import pacemaker-2.0.3-3.el8

This commit is contained in:
CentOS Sources 2020-01-21 17:24:07 -05:00 committed by Stepan Oksanichenko
parent 98ca009dce
commit ed303ba18c
10 changed files with 133 additions and 1883 deletions

2
.gitignore vendored
View File

@ -1,2 +1,2 @@
SOURCES/nagios-agents-metadata-105ab8a.tar.gz SOURCES/nagios-agents-metadata-105ab8a.tar.gz
SOURCES/pacemaker-744a30d.tar.gz SOURCES/pacemaker-4b1f869.tar.gz

View File

@ -1,2 +1,2 @@
ea6c0a27fd0ae8ce02f84a11f08a0d79377041c3 SOURCES/nagios-agents-metadata-105ab8a.tar.gz ea6c0a27fd0ae8ce02f84a11f08a0d79377041c3 SOURCES/nagios-agents-metadata-105ab8a.tar.gz
98d783c49fa894c5bdc30f907f5355539030578d SOURCES/pacemaker-744a30d.tar.gz dfd19e7ec7aa96520f4948fc37d48ea69835bbdb SOURCES/pacemaker-4b1f869.tar.gz

View File

@ -1,284 +0,0 @@
From 66e5e4d83e90be3cecab7bf5f50d0e10fbaa7cea Mon Sep 17 00:00:00 2001
From: "Gao,Yan" <ygao@suse.com>
Date: Fri, 26 Apr 2019 11:52:59 +0200
Subject: [PATCH 1/3] Fix: libcrmcommon: correctly apply XML diffs with
multiple move/create changes
Given a resource group:
```
<group id="dummies">
<primitive id="dummy0"/>
<primitive id="dummy1"/>
<primitive id="dummy2"/>
<primitive id="dummy3"/>
<primitive id="dummy4"/>
</group>
```
, if we'd like to change it to:
```
<group id="dummies">
<primitive id="dummy3"/>
<primitive id="dummy4"/>
<primitive id="dummy2"/>
<primitive id="dummy0"/>
<primitive id="dummy1"/>
</group>
```
, the generated XML diff would be like:
```
<diff format="2">
<change operation="move" path="//primitive[@id=dummy3]" position="0"/>
<change operation="move" path="//primitive[@id=dummy4]" position="1"/>
<change operation="move" path="//primitive[@id=dummy0]" position="3"/>
<change operation="move" path="//primitive[@id=dummy1]" position="4"/>
</diff>
```
Previously after applying the XML diff, the resulting XML would be a mess:
```
<group id="dummies">
<primitive id="dummy3"/>
<primitive id="dummy4"/>
<primitive id="dummy0"/>
<primitive id="dummy2"/>
<primitive id="dummy1"/>
</group>
```
It's because the positions of the already moved XML objects could be
affected by the later moved objects.
This commit fixes it by temporarily putting "move" objects after the
last sibling and also delaying the adding of any "create" objects, then
placing them to the target positions in the right order.
---
lib/common/xml.c | 126 ++++++++++++++++++++++++++++++++++++++++++-------------
1 file changed, 97 insertions(+), 29 deletions(-)
diff --git a/lib/common/xml.c b/lib/common/xml.c
index 66b5f66..d815a48 100644
--- a/lib/common/xml.c
+++ b/lib/common/xml.c
@@ -1466,11 +1466,40 @@ __xml_find_path(xmlNode *top, const char *key, int target_position)
return target;
}
+typedef struct xml_change_obj_s {
+ xmlNode *change;
+ xmlNode *match;
+} xml_change_obj_t;
+
+static gint
+sort_change_obj_by_position(gconstpointer a, gconstpointer b)
+{
+ const xml_change_obj_t *change_obj_a = a;
+ const xml_change_obj_t *change_obj_b = b;
+ int position_a = -1;
+ int position_b = -1;
+
+ crm_element_value_int(change_obj_a->change, XML_DIFF_POSITION, &position_a);
+ crm_element_value_int(change_obj_b->change, XML_DIFF_POSITION, &position_b);
+
+ if (position_a < position_b) {
+ return -1;
+
+ } else if (position_a > position_b) {
+ return 1;
+ }
+
+ return 0;
+}
+
static int
xml_apply_patchset_v2(xmlNode *xml, xmlNode *patchset)
{
int rc = pcmk_ok;
xmlNode *change = NULL;
+ GListPtr change_objs = NULL;
+ GListPtr gIter = NULL;
+
for (change = __xml_first_child(patchset); change != NULL; change = __xml_next(change)) {
xmlNode *match = NULL;
const char *op = crm_element_value(change, XML_DIFF_OP);
@@ -1482,6 +1511,7 @@ xml_apply_patchset_v2(xmlNode *xml, xmlNode *patchset)
continue;
}
+ // "delete" changes for XML comments are generated with "position"
if(strcmp(op, "delete") == 0) {
crm_element_value_int(change, XML_DIFF_POSITION, &position);
}
@@ -1497,7 +1527,71 @@ xml_apply_patchset_v2(xmlNode *xml, xmlNode *patchset)
rc = -pcmk_err_diff_failed;
continue;
- } else if(strcmp(op, "create") == 0) {
+ } else if (strcmp(op, "create") == 0 || strcmp(op, "move") == 0) {
+ // Delay the adding of a "create" object
+ xml_change_obj_t *change_obj = calloc(1, sizeof(xml_change_obj_t));
+
+ CRM_ASSERT(change_obj != NULL);
+
+ change_obj->change = change;
+ change_obj->match = match;
+
+ change_objs = g_list_append(change_objs, change_obj);
+
+ if (strcmp(op, "move") == 0) {
+ // Temporarily put the "move" object after the last sibling
+ if (match->parent != NULL && match->parent->last != NULL) {
+ xmlAddNextSibling(match->parent->last, match);
+ }
+ }
+
+ } else if(strcmp(op, "delete") == 0) {
+ free_xml(match);
+
+ } else if(strcmp(op, "modify") == 0) {
+ xmlAttr *pIter = pcmk__first_xml_attr(match);
+ xmlNode *attrs = __xml_first_child(first_named_child(change, XML_DIFF_RESULT));
+
+ if(attrs == NULL) {
+ rc = -ENOMSG;
+ continue;
+ }
+ while(pIter != NULL) {
+ const char *name = (const char *)pIter->name;
+
+ pIter = pIter->next;
+ xml_remove_prop(match, name);
+ }
+
+ for (pIter = pcmk__first_xml_attr(attrs); pIter != NULL; pIter = pIter->next) {
+ const char *name = (const char *)pIter->name;
+ const char *value = crm_element_value(attrs, name);
+
+ crm_xml_add(match, name, value);
+ }
+
+ } else {
+ crm_err("Unknown operation: %s", op);
+ }
+ }
+
+ // Changes should be generated in the right order. Double checking.
+ change_objs = g_list_sort(change_objs, sort_change_obj_by_position);
+
+ for (gIter = change_objs; gIter; gIter = gIter->next) {
+ xml_change_obj_t *change_obj = gIter->data;
+ xmlNode *match = change_obj->match;
+ const char *op = NULL;
+ const char *xpath = NULL;
+
+ change = change_obj->change;
+
+ op = crm_element_value(change, XML_DIFF_OP);
+ xpath = crm_element_value(change, XML_DIFF_PATH);
+
+ crm_trace("Continue performing %s on %s with %p", op, xpath, match);
+
+ if(strcmp(op, "create") == 0) {
int position = 0;
xmlNode *child = NULL;
xmlNode *match_child = NULL;
@@ -1565,36 +1659,10 @@ xml_apply_patchset_v2(xmlNode *xml, xmlNode *patchset)
match->name, ID(match), __xml_offset(match), position, match->prev);
rc = -pcmk_err_diff_failed;
}
-
- } else if(strcmp(op, "delete") == 0) {
- free_xml(match);
-
- } else if(strcmp(op, "modify") == 0) {
- xmlAttr *pIter = pcmk__first_xml_attr(match);
- xmlNode *attrs = __xml_first_child(first_named_child(change, XML_DIFF_RESULT));
-
- if(attrs == NULL) {
- rc = -ENOMSG;
- continue;
- }
- while(pIter != NULL) {
- const char *name = (const char *)pIter->name;
-
- pIter = pIter->next;
- xml_remove_prop(match, name);
- }
-
- for (pIter = pcmk__first_xml_attr(attrs); pIter != NULL; pIter = pIter->next) {
- const char *name = (const char *)pIter->name;
- const char *value = crm_element_value(attrs, name);
-
- crm_xml_add(match, name, value);
- }
-
- } else {
- crm_err("Unknown operation: %s", op);
}
}
+
+ g_list_free_full(change_objs, free);
return rc;
}
--
1.8.3.1
From f8d008d8d3a29900ee0c6decbb71a243fa4c2d8c Mon Sep 17 00:00:00 2001
From: "Gao,Yan" <ygao@suse.com>
Date: Tue, 30 Apr 2019 00:15:03 +0200
Subject: [PATCH 2/3] Fix: libcrmcommon: avoid possible use-of-NULL when
applying XML diffs
---
lib/common/xml.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/lib/common/xml.c b/lib/common/xml.c
index d815a48..fe87de6 100644
--- a/lib/common/xml.c
+++ b/lib/common/xml.c
@@ -1506,11 +1506,12 @@ xml_apply_patchset_v2(xmlNode *xml, xmlNode *patchset)
const char *xpath = crm_element_value(change, XML_DIFF_PATH);
int position = -1;
- crm_trace("Processing %s %s", change->name, op);
if(op == NULL) {
continue;
}
+ crm_trace("Processing %s %s", change->name, op);
+
// "delete" changes for XML comments are generated with "position"
if(strcmp(op, "delete") == 0) {
crm_element_value_int(change, XML_DIFF_POSITION, &position);
--
1.8.3.1
From e6b2bf0cf7e7ed839583d529b190a7a6cd1bd594 Mon Sep 17 00:00:00 2001
From: "Gao,Yan" <ygao@suse.com>
Date: Tue, 30 Apr 2019 00:19:46 +0200
Subject: [PATCH 3/3] Fix: libcrmcommon: return error when applying XML diffs
containing unknown operations
---
lib/common/xml.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/lib/common/xml.c b/lib/common/xml.c
index fe87de6..940c4b9 100644
--- a/lib/common/xml.c
+++ b/lib/common/xml.c
@@ -1573,6 +1573,7 @@ xml_apply_patchset_v2(xmlNode *xml, xmlNode *patchset)
} else {
crm_err("Unknown operation: %s", op);
+ rc = -pcmk_err_diff_failed;
}
}
--
1.8.3.1

View File

@ -1,273 +0,0 @@
From 5470f1d9c776dbf753e015fa96153b6a63c17b83 Mon Sep 17 00:00:00 2001
From: "Gao,Yan" <ygao@suse.com>
Date: Thu, 9 May 2019 13:24:35 +0200
Subject: [PATCH] Fix: controller: confirm cancel of failed monitors
Usually after a monitor has been cancelled from executor, contoller
erases the corresponding lrm_rsc_op from the cib, and DC will confirm
the cancel action by process_op_deletion() according to the cib diff.
But if a monitor has failed, the lrm_rsc_op will be recorded as
"last_failure". When cancelling it, the lrm_rsc_op won't get erased from
the cib given the logic on purpose in erase_lrm_history_by_op(). So that
the cancel action won't have a chance to get confirmed by DC with
process_op_deletion().
Previously cluster transition would get stuck waiting for the remaining
action timer to time out.
This commit fixes the issue by directly acknowledging the cancel action
in this case and enabling DC to be able to confirm it.
This also moves get_node_id() function into controld_utils.c for common
use.
Producer:
```
# Insert a 10s sleep in the monitor action of RA
# /usr/lib/ocf/resource.d/pacemaker/Stateful:
stateful_monitor() {
+ sleep 10
stateful_check_state "master"
# Add a promotable clone resource:
crm configure primitive stateful ocf:pacemaker:Stateful \
op monitor interval=5 role=Master \
op monitor interval=10 role=Slave
crm configure clone p-clone stateful \
meta promotable=true
# Wait for the resource instance to be started, promoted to be master,
# and monitor for master role to complete.
# Set is-managed=false for the promotable clone:
crm_resource --meta -p is-managed -v false -r p-clone
# Change the status of the master instance to be slave and immediately
# enforce refresh of it:
echo slave > /var/run/Stateful-stateful.state; crm_resource --refresh -r stateful --force
# Wait for probe to complete, and then monitor for slave role to be
# issued:
sleep 15
# While the monitor for slave role is still in progress, change the
# status to be master again:
echo master > /var/run/Stateful-stateful.state
# The monitor for slave role returns error. Cluster issues monitor for
# master role instead and tries to cancel the failed one for slave role.
# But cluster transition gets stuck. Depending on the monitor timeout
# configured for the slave role plus cluster-delay, only after that
# controller eventually says:
pacemaker-controld[21205] error: Node opensuse150 did not send cancel result (via controller) within 20000ms (action timeout plus cluster-delay)
pacemaker-controld[21205] error: [Action 1]: In-flight rsc op stateful_monitor_10000 on opensuse150 (priority: 0, waiting: none)
pacemaker-controld[21205] notice: Transition 6 aborted: Action lost
```
---
daemons/controld/controld_execd.c | 38 ++++++++++++++++++++++++++++++++
daemons/controld/controld_te_callbacks.c | 21 ++----------------
daemons/controld/controld_te_events.c | 32 +++++++++++++++++++++++++++
daemons/controld/controld_transition.h | 1 +
daemons/controld/controld_utils.c | 13 +++++++++++
daemons/controld/controld_utils.h | 2 ++
6 files changed, 88 insertions(+), 19 deletions(-)
diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c
index 976fed1..8282fed 100644
--- a/daemons/controld/controld_execd.c
+++ b/daemons/controld/controld_execd.c
@@ -2476,6 +2476,30 @@ unescape_newlines(const char *string)
return ret;
}
+static bool
+did_lrm_rsc_op_fail(lrm_state_t *lrm_state, const char * rsc_id,
+ const char * op_type, guint interval_ms)
+{
+ rsc_history_t *entry = NULL;
+
+ CRM_CHECK(lrm_state != NULL, return FALSE);
+ CRM_CHECK(rsc_id != NULL, return FALSE);
+ CRM_CHECK(op_type != NULL, return FALSE);
+
+ entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id);
+ if (entry == NULL || entry->failed == NULL) {
+ return FALSE;
+ }
+
+ if (crm_str_eq(entry->failed->rsc_id, rsc_id, TRUE)
+ && safe_str_eq(entry->failed->op_type, op_type)
+ && entry->failed->interval_ms == interval_ms) {
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
void
process_lrm_event(lrm_state_t *lrm_state, lrmd_event_data_t *op,
struct recurring_op_s *pending, xmlNode *action_xml)
@@ -2605,6 +2629,20 @@ process_lrm_event(lrm_state_t *lrm_state, lrmd_event_data_t *op,
erase_lrm_history_by_op(lrm_state, op);
}
+ /* If the recurring operation had failed, the lrm_rsc_op is recorded as
+ * "last_failure" which won't get erased from the cib given the logic on
+ * purpose in erase_lrm_history_by_op(). So that the cancel action won't
+ * have a chance to get confirmed by DC with process_op_deletion().
+ * Cluster transition would get stuck waiting for the remaining action
+ * timer to time out.
+ *
+ * Directly acknowledge the cancel operation in this case.
+ */
+ if (did_lrm_rsc_op_fail(lrm_state, pending->rsc_id,
+ pending->op_type, pending->interval_ms)) {
+ need_direct_ack = TRUE;
+ }
+
} else if (op->rsc_deleted) {
/* This recurring operation was cancelled (but not by us, and the
* executor does not have resource information, likely due to resource
diff --git a/daemons/controld/controld_te_callbacks.c b/daemons/controld/controld_te_callbacks.c
index 51d908e..22b5f4b 100644
--- a/daemons/controld/controld_te_callbacks.c
+++ b/daemons/controld/controld_te_callbacks.c
@@ -32,19 +32,6 @@ static unsigned long int stonith_max_attempts = 10;
/* #define RSC_OP_TEMPLATE "//"XML_TAG_DIFF_ADDED"//"XML_TAG_CIB"//"XML_CIB_TAG_STATE"[@uname='%s']"//"XML_LRM_TAG_RSC_OP"[@id='%s]" */
#define RSC_OP_TEMPLATE "//"XML_TAG_DIFF_ADDED"//"XML_TAG_CIB"//"XML_LRM_TAG_RSC_OP"[@id='%s']"
-static const char *
-get_node_id(xmlNode * rsc_op)
-{
- xmlNode *node = rsc_op;
-
- while (node != NULL && safe_str_neq(XML_CIB_TAG_STATE, TYPE(node))) {
- node = node->parent;
- }
-
- CRM_CHECK(node != NULL, return NULL);
- return ID(node);
-}
-
void
update_stonith_max_attempts(const char* value)
{
@@ -374,12 +361,8 @@ process_op_deletion(const char *xpath, xmlNode *change)
node_uuid = extract_node_uuid(xpath);
cancel = get_cancel_action(key, node_uuid);
if (cancel) {
- crm_info("Cancellation of %s on %s confirmed (%d)",
- key, node_uuid, cancel->id);
- stop_te_timer(cancel->timer);
- te_action_confirmed(cancel);
- update_graph(transition_graph, cancel);
- trigger_graph();
+ confirm_cancel_action(cancel);
+
} else {
abort_transition(INFINITY, tg_restart, "Resource operation removal",
change);
diff --git a/daemons/controld/controld_te_events.c b/daemons/controld/controld_te_events.c
index c0d096f..b7b48a4 100644
--- a/daemons/controld/controld_te_events.c
+++ b/daemons/controld/controld_te_events.c
@@ -355,6 +355,27 @@ get_cancel_action(const char *id, const char *node)
return NULL;
}
+void
+confirm_cancel_action(crm_action_t *cancel)
+{
+ const char *op_key = NULL;
+ const char *node_name = NULL;
+
+ CRM_ASSERT(cancel != NULL);
+
+ op_key = crm_element_value(cancel->xml, XML_LRM_ATTR_TASK_KEY);
+ node_name = crm_element_value(cancel->xml, XML_LRM_ATTR_TARGET);
+
+ stop_te_timer(cancel->timer);
+ te_action_confirmed(cancel);
+ update_graph(transition_graph, cancel);
+
+ crm_info("Cancellation of %s on %s confirmed (action %d)",
+ op_key, node_name, cancel->id);
+
+ trigger_graph();
+}
+
/* downed nodes are listed like: <downed> <node id="UUID1" /> ... </downed> */
#define XPATH_DOWNED "//" XML_GRAPH_TAG_DOWNED \
"/" XML_CIB_TAG_NODE "[@" XML_ATTR_UUID "='%s']"
@@ -471,6 +492,17 @@ process_graph_event(xmlNode *event, const char *event_node)
/* Recurring actions have the transition number they were first
* scheduled in.
*/
+
+ if (status == PCMK_LRM_OP_CANCELLED) {
+ const char *node_id = get_node_id(event);
+
+ action = get_cancel_action(id, node_id);
+ if (action) {
+ confirm_cancel_action(action);
+ }
+ goto bail;
+ }
+
desc = "arrived after initial scheduling";
abort_transition(INFINITY, tg_restart, "Change in recurring result",
event);
diff --git a/daemons/controld/controld_transition.h b/daemons/controld/controld_transition.h
index 0a33599..a162f99 100644
--- a/daemons/controld/controld_transition.h
+++ b/daemons/controld/controld_transition.h
@@ -25,6 +25,7 @@ void execute_stonith_cleanup(void);
/* tengine */
extern crm_action_t *match_down_event(const char *target);
extern crm_action_t *get_cancel_action(const char *id, const char *node);
+void confirm_cancel_action(crm_action_t *cancel);
void controld_record_action_timeout(crm_action_t *action);
extern gboolean fail_incompletable_actions(crm_graph_t * graph, const char *down_node);
diff --git a/daemons/controld/controld_utils.c b/daemons/controld/controld_utils.c
index ca7e15d..35922f0 100644
--- a/daemons/controld/controld_utils.c
+++ b/daemons/controld/controld_utils.c
@@ -1073,3 +1073,16 @@ feature_set_compatible(const char *dc_version, const char *join_version)
// DC's minor version must be the same or older
return dc_v <= join_v;
}
+
+const char *
+get_node_id(xmlNode *lrm_rsc_op)
+{
+ xmlNode *node = lrm_rsc_op;
+
+ while (node != NULL && safe_str_neq(XML_CIB_TAG_STATE, TYPE(node))) {
+ node = node->parent;
+ }
+
+ CRM_CHECK(node != NULL, return NULL);
+ return ID(node);
+}
diff --git a/daemons/controld/controld_utils.h b/daemons/controld/controld_utils.h
index 2a92db5..68992f5 100644
--- a/daemons/controld/controld_utils.h
+++ b/daemons/controld/controld_utils.h
@@ -95,6 +95,8 @@ unsigned int cib_op_timeout(void);
bool feature_set_compatible(const char *dc_version, const char *join_version);
bool controld_action_is_recordable(const char *action);
+const char *get_node_id(xmlNode *lrm_rsc_op);
+
/* Convenience macro for registering a CIB callback
* (assumes that data can be freed with free())
*/
--
1.8.3.1

View File

@ -1,652 +0,0 @@
From 0a884f325e1049febc28bf0419ab307dd0bce5af Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Thu, 16 May 2019 20:04:57 -0500
Subject: [PATCH] Log: various: improve fencer connection messages
Previously, log messages around fencer connections were inconsistent.
This attempts to make them more consistent by: having stonith_api_signon() log
only at debug level, letting the callers log at a level appropriate to the
situation using the return code; functionizing retrying a connection; and
using similar wording across clients.
This also does a bit of refactoring for better error checking and improved
efficiency.
---
daemons/controld/controld_control.c | 7 +-
daemons/controld/controld_te_utils.c | 59 ++++++-----
daemons/execd/pacemaker-execd.c | 28 ++---
daemons/fenced/cts-fence-helper.c | 38 +++----
include/crm/stonith-ng.h | 4 +
lib/fencing/st_client.c | 195 ++++++++++++++++++++---------------
tools/crm_mon.c | 1 -
tools/stonith_admin.c | 29 +-----
8 files changed, 181 insertions(+), 180 deletions(-)
diff --git a/daemons/controld/controld_control.c b/daemons/controld/controld_control.c
index 89b5b5d..6d9f335 100644
--- a/daemons/controld/controld_control.c
+++ b/daemons/controld/controld_control.c
@@ -628,10 +628,11 @@ do_started(long long action,
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
}
+ // Try connecting to fencer (retrying later in mainloop if failed)
if (stonith_reconnect == NULL) {
- int dummy;
-
- stonith_reconnect = mainloop_add_trigger(G_PRIORITY_LOW, te_connect_stonith, &dummy);
+ stonith_reconnect = mainloop_add_trigger(G_PRIORITY_LOW,
+ te_connect_stonith,
+ GINT_TO_POINTER(TRUE));
}
set_bit(fsa_input_register, R_ST_REQUIRED);
mainloop_set_trigger(stonith_reconnect);
diff --git a/daemons/controld/controld_te_utils.c b/daemons/controld/controld_te_utils.c
index 5606ed6..22f83ad 100644
--- a/daemons/controld/controld_te_utils.c
+++ b/daemons/controld/controld_te_utils.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2004-2018 Andrew Beekhof <andrew@beekhof.net>
+ * Copyright 2004-2019 the Pacemaker project contributors
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
@@ -385,10 +385,18 @@ te_trigger_stonith_history_sync(void)
mainloop_timer_start(stonith_history_sync_timer);
}
+/*!
+ * \brief Connect to fencer
+ *
+ * \param[in] user_data If NULL, retry failures now, otherwise retry in main loop
+ *
+ * \return TRUE
+ * \note If user_data is NULL, this will wait 2s between attempts, for up to
+ * 30 attempts, meaning the controller could be blocked as long as 58s.
+ */
gboolean
te_connect_stonith(gpointer user_data)
{
- int lpc = 0;
int rc = pcmk_ok;
if (stonith_api == NULL) {
@@ -396,42 +404,41 @@ te_connect_stonith(gpointer user_data)
}
if (stonith_api->state != stonith_disconnected) {
- crm_trace("Still connected");
+ crm_trace("Already connected to fencer, no need to retry");
return TRUE;
}
- for (lpc = 0; lpc < 30; lpc++) {
- crm_debug("Attempting connection to fencing daemon...");
-
- sleep(1);
- rc = stonith_api->cmds->connect(stonith_api, crm_system_name, NULL);
-
- if (rc == pcmk_ok) {
- break;
+ if (user_data == NULL) {
+ // Blocking (retry failures now until successful)
+ rc = stonith_api_connect_retry(stonith_api, crm_system_name, 30);
+ if (rc != pcmk_ok) {
+ crm_err("Could not connect to fencer in 30 attempts: %s "
+ CRM_XS " rc=%d", pcmk_strerror(rc), rc);
}
-
- if (user_data != NULL) {
+ } else {
+ // Non-blocking (retry failures later in main loop)
+ rc = stonith_api->cmds->connect(stonith_api, crm_system_name, NULL);
+ if (rc != pcmk_ok) {
if (is_set(fsa_input_register, R_ST_REQUIRED)) {
- crm_err("Sign-in failed: triggered a retry");
+ crm_err("Fencer connection failed (will retry): %s "
+ CRM_XS " rc=%d", pcmk_strerror(rc), rc);
mainloop_set_trigger(stonith_reconnect);
} else {
- crm_info("Sign-in failed, but no longer required");
+ crm_info("Fencer connection failed (ignoring because no longer required): %s "
+ CRM_XS " rc=%d", pcmk_strerror(rc), rc);
}
return TRUE;
}
-
- crm_err("Sign-in failed: pausing and trying again in 2s...");
- sleep(1);
}
- CRM_CHECK(rc == pcmk_ok, return TRUE); /* If not, we failed 30 times... just get out */
- stonith_api->cmds->register_notification(stonith_api, T_STONITH_NOTIFY_DISCONNECT,
- tengine_stonith_connection_destroy);
-
- stonith_api->cmds->register_notification(stonith_api, T_STONITH_NOTIFY_FENCE,
- tengine_stonith_notify);
-
- crm_trace("Connected");
+ if (rc == pcmk_ok) {
+ stonith_api->cmds->register_notification(stonith_api,
+ T_STONITH_NOTIFY_DISCONNECT,
+ tengine_stonith_connection_destroy);
+ stonith_api->cmds->register_notification(stonith_api,
+ T_STONITH_NOTIFY_FENCE,
+ tengine_stonith_notify);
+ }
return TRUE;
}
diff --git a/daemons/execd/pacemaker-execd.c b/daemons/execd/pacemaker-execd.c
index 21bb0ed..e2fdfca 100644
--- a/daemons/execd/pacemaker-execd.c
+++ b/daemons/execd/pacemaker-execd.c
@@ -65,28 +65,20 @@ get_stonith_connection(void)
stonith_api = NULL;
}
- if (!stonith_api) {
- int rc = 0;
- int tries = 10;
+ if (stonith_api == NULL) {
+ int rc = pcmk_ok;
stonith_api = stonith_api_new();
- do {
- rc = stonith_api->cmds->connect(stonith_api, "pacemaker-execd", NULL);
- if (rc == pcmk_ok) {
- stonith_api->cmds->register_notification(stonith_api,
- T_STONITH_NOTIFY_DISCONNECT,
- stonith_connection_destroy_cb);
- break;
- }
- sleep(1);
- tries--;
- } while (tries);
-
- if (rc) {
- crm_err("Unable to connect to stonith daemon to execute command. error: %s",
- pcmk_strerror(rc));
+ rc = stonith_api_connect_retry(stonith_api, crm_system_name, 10);
+ if (rc != pcmk_ok) {
+ crm_err("Could not connect to fencer in 10 attempts: %s "
+ CRM_XS " rc=%d", pcmk_strerror(rc), rc);
stonith_api_delete(stonith_api);
stonith_api = NULL;
+ } else {
+ stonith_api->cmds->register_notification(stonith_api,
+ T_STONITH_NOTIFY_DISCONNECT,
+ stonith_connection_destroy_cb);
}
}
return stonith_api;
diff --git a/daemons/fenced/cts-fence-helper.c b/daemons/fenced/cts-fence-helper.c
index c5ce1ab..4552fc1 100644
--- a/daemons/fenced/cts-fence-helper.c
+++ b/daemons/fenced/cts-fence-helper.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2009-2018 Andrew Beekhof <andrew@beekhof.net>
+ * Copyright 2009-2019 the Pacemaker project contributors
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
@@ -124,8 +124,10 @@ passive_test(void)
int rc = 0;
rc = st->cmds->connect(st, crm_system_name, &pollfd.fd);
- crm_debug("Connect: %d", rc);
-
+ if (rc != pcmk_ok) {
+ stonith_api_delete(st);
+ crm_exit(CRM_EX_DISCONNECT);
+ }
st->cmds->register_notification(st, T_STONITH_NOTIFY_DISCONNECT, st_callback);
st->cmds->register_notification(st, T_STONITH_NOTIFY_FENCE, st_callback);
st->cmds->register_notification(st, STONITH_OP_DEVICE_ADD, st_callback);
@@ -271,8 +273,10 @@ sanity_tests(void)
int rc = 0;
rc = st->cmds->connect(st, crm_system_name, &pollfd.fd);
- crm_debug("Connect: %d", rc);
-
+ if (rc != pcmk_ok) {
+ stonith_api_delete(st);
+ crm_exit(CRM_EX_DISCONNECT);
+ }
st->cmds->register_notification(st, T_STONITH_NOTIFY_DISCONNECT, st_callback);
st->cmds->register_notification(st, T_STONITH_NOTIFY_FENCE, st_callback);
st->cmds->register_notification(st, STONITH_OP_DEVICE_ADD, st_callback);
@@ -295,7 +299,10 @@ standard_dev_test(void)
stonith_key_value_t *params = NULL;
rc = st->cmds->connect(st, crm_system_name, &pollfd.fd);
- crm_debug("Connect: %d", rc);
+ if (rc != pcmk_ok) {
+ stonith_api_delete(st);
+ crm_exit(CRM_EX_DISCONNECT);
+ }
params = stonith_key_value_add(params, "pcmk_host_map", "some-host=pcmk-7 true_1_node1=3,4");
@@ -502,23 +509,12 @@ test_register_async_devices(int check_event)
static void
try_mainloop_connect(int check_event)
{
- int tries = 10;
- int i = 0;
- int rc = 0;
+ int rc = stonith_api_connect_retry(st, crm_system_name, 10);
- for (i = 0; i < tries; i++) {
- rc = st->cmds->connect(st, crm_system_name, NULL);
-
- if (!rc) {
- crm_info("stonith client connection established");
- mainloop_test_done(TRUE);
- return;
- } else {
- crm_info("stonith client connection failed");
- }
- sleep(1);
+ if (rc == pcmk_ok) {
+ mainloop_test_done(TRUE);
+ return;
}
-
crm_err("API CONNECTION FAILURE");
mainloop_test_done(FALSE);
}
diff --git a/include/crm/stonith-ng.h b/include/crm/stonith-ng.h
index b7365a9..b640732 100644
--- a/include/crm/stonith-ng.h
+++ b/include/crm/stonith-ng.h
@@ -430,6 +430,10 @@ void stonith_key_value_freeall(stonith_key_value_t * kvp, int keys, int values);
void stonith_history_free(stonith_history_t *history);
+// Convenience functions
+int stonith_api_connect_retry(stonith_t *st, const char *name,
+ int max_attempts);
+
/* Basic helpers that allows nodes to be fenced and the history to be
* queried without mainloop or the caller understanding the full API
*
diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c
index 270ef8d..ceee944 100644
--- a/lib/fencing/st_client.c
+++ b/lib/fencing/st_client.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2004-2018 Andrew Beekhof <andrew@beekhof.net>
+ * Copyright 2004-2019 the Pacemaker project contributors
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
@@ -1415,14 +1415,21 @@ static int
stonith_api_signon(stonith_t * stonith, const char *name, int *stonith_fd)
{
int rc = pcmk_ok;
- stonith_private_t *native = stonith->st_private;
+ stonith_private_t *native = NULL;
+ const char *display_name = name? name : "client";
static struct ipc_client_callbacks st_callbacks = {
.dispatch = stonith_dispatch_internal,
.destroy = stonith_connection_destroy
};
- crm_trace("Connecting command channel");
+ CRM_CHECK(stonith != NULL, return -EINVAL);
+
+ native = stonith->st_private;
+ CRM_ASSERT(native != NULL);
+
+ crm_debug("Attempting fencer connection by %s with%s mainloop",
+ display_name, (stonith_fd? "out" : ""));
stonith->state = stonith_connected_command;
if (stonith_fd) {
@@ -1432,8 +1439,9 @@ stonith_api_signon(stonith_t * stonith, const char *name, int *stonith_fd)
if (native->ipc && crm_ipc_connect(native->ipc)) {
*stonith_fd = crm_ipc_get_fd(native->ipc);
} else if (native->ipc) {
- crm_perror(LOG_ERR, "Connection to fencer failed");
- rc = -ENOTCONN;
+ crm_ipc_close(native->ipc);
+ crm_ipc_destroy(native->ipc);
+ native->ipc = NULL;
}
} else {
@@ -1444,11 +1452,8 @@ stonith_api_signon(stonith_t * stonith, const char *name, int *stonith_fd)
}
if (native->ipc == NULL) {
- crm_debug("Could not connect to the Stonith API");
rc = -ENOTCONN;
- }
-
- if (rc == pcmk_ok) {
+ } else {
xmlNode *reply = NULL;
xmlNode *hello = create_xml_node(NULL, "stonith_command");
@@ -1458,11 +1463,12 @@ stonith_api_signon(stonith_t * stonith, const char *name, int *stonith_fd)
rc = crm_ipc_send(native->ipc, hello, crm_ipc_client_response, -1, &reply);
if (rc < 0) {
- crm_perror(LOG_DEBUG, "Couldn't complete registration with the fencing API: %d", rc);
+ crm_debug("Couldn't register with the fencer: %s "
+ CRM_XS " rc=%d", pcmk_strerror(rc), rc);
rc = -ECOMM;
} else if (reply == NULL) {
- crm_err("Did not receive registration reply");
+ crm_debug("Couldn't register with the fencer: no reply");
rc = -EPROTO;
} else {
@@ -1470,18 +1476,23 @@ stonith_api_signon(stonith_t * stonith, const char *name, int *stonith_fd)
const char *tmp_ticket = crm_element_value(reply, F_STONITH_CLIENTID);
if (safe_str_neq(msg_type, CRM_OP_REGISTER)) {
- crm_err("Invalid registration message: %s", msg_type);
- crm_log_xml_err(reply, "Bad reply");
+ crm_debug("Couldn't register with the fencer: invalid reply type '%s'",
+ (msg_type? msg_type : "(missing)"));
+ crm_log_xml_debug(reply, "Invalid fencer reply");
rc = -EPROTO;
} else if (tmp_ticket == NULL) {
- crm_err("No registration token provided");
- crm_log_xml_err(reply, "Bad reply");
+ crm_debug("Couldn't register with the fencer: no token in reply");
+ crm_log_xml_debug(reply, "Invalid fencer reply");
rc = -EPROTO;
} else {
- crm_trace("Obtained registration token: %s", tmp_ticket);
native->token = strdup(tmp_ticket);
+#if HAVE_MSGFROMIPC_TIMEOUT
+ stonith->call_timeout = MAX_IPC_DELAY;
+#endif
+ crm_debug("Connection to fencer by %s succeeded (registration token: %s)",
+ display_name, native->token);
rc = pcmk_ok;
}
}
@@ -1490,16 +1501,11 @@ stonith_api_signon(stonith_t * stonith, const char *name, int *stonith_fd)
free_xml(hello);
}
- if (rc == pcmk_ok) {
-#if HAVE_MSGFROMIPC_TIMEOUT
- stonith->call_timeout = MAX_IPC_DELAY;
-#endif
- crm_debug("Connection to fencer successful");
- return pcmk_ok;
+ if (rc != pcmk_ok) {
+ crm_debug("Connection attempt to fencer by %s failed: %s "
+ CRM_XS " rc=%d", display_name, pcmk_strerror(rc), rc);
+ stonith->cmds->disconnect(stonith);
}
-
- crm_debug("Connection to fencer failed: %s", pcmk_strerror(rc));
- stonith->cmds->disconnect(stonith);
return rc;
}
@@ -2071,6 +2077,36 @@ stonith_api_new(void)
return new_stonith;
}
+/*!
+ * \brief Make a blocking connection attempt to the fencer
+ *
+ * \param[in,out] st Fencer API object
+ * \param[in] name Client name to use with fencer
+ * \param[in] max_attempts Return error if this many attempts fail
+ *
+ * \return pcmk_ok on success, result of last attempt otherwise
+ */
+int
+stonith_api_connect_retry(stonith_t *st, const char *name, int max_attempts)
+{
+ int rc = -EINVAL; // if max_attempts is not positive
+
+ for (int attempt = 1; attempt <= max_attempts; attempt++) {
+ rc = st->cmds->connect(st, name, NULL);
+ if (rc == pcmk_ok) {
+ return pcmk_ok;
+ } else if (attempt < max_attempts) {
+ crm_notice("Fencer connection attempt %d of %d failed (retrying in 2s): %s "
+ CRM_XS " rc=%d",
+ attempt, max_attempts, pcmk_strerror(rc), rc);
+ sleep(2);
+ }
+ }
+ crm_notice("Could not connect to fencer: %s " CRM_XS " rc=%d",
+ pcmk_strerror(rc), rc);
+ return rc;
+}
+
stonith_key_value_t *
stonith_key_value_add(stonith_key_value_t * head, const char *key, const char *value)
{
@@ -2122,85 +2158,78 @@ stonith_key_value_freeall(stonith_key_value_t * head, int keys, int values)
int
stonith_api_kick(uint32_t nodeid, const char *uname, int timeout, bool off)
{
- char *name = NULL;
- const char *action = "reboot";
-
- int rc = -EPROTO;
- stonith_t *st = NULL;
- enum stonith_call_options opts = st_opt_sync_call | st_opt_allow_suicide;
+ int rc = pcmk_ok;
+ stonith_t *st = stonith_api_new();
+ const char *action = off? "off" : "reboot";
api_log_open();
- st = stonith_api_new();
- if (st) {
- rc = st->cmds->connect(st, "stonith-api", NULL);
- if(rc != pcmk_ok) {
- api_log(LOG_ERR, "Connection failed, could not kick (%s) node %u/%s : %s (%d)", action, nodeid, uname, pcmk_strerror(rc), rc);
- }
+ if (st == NULL) {
+ api_log(LOG_ERR, "API initialization failed, could not kick (%s) node %u/%s",
+ action, nodeid, uname);
+ return -EPROTO;
}
- if (uname != NULL) {
- name = strdup(uname);
-
- } else if (nodeid > 0) {
- opts |= st_opt_cs_nodeid;
- name = crm_itoa(nodeid);
- }
-
- if (off) {
- action = "off";
- }
-
- if (rc == pcmk_ok) {
+ rc = st->cmds->connect(st, "stonith-api", NULL);
+ if (rc != pcmk_ok) {
+ api_log(LOG_ERR, "Connection failed, could not kick (%s) node %u/%s : %s (%d)",
+ action, nodeid, uname, pcmk_strerror(rc), rc);
+ } else {
+ char *name = NULL;
+ enum stonith_call_options opts = st_opt_sync_call | st_opt_allow_suicide;
+
+ if (uname != NULL) {
+ name = strdup(uname);
+ } else if (nodeid > 0) {
+ opts |= st_opt_cs_nodeid;
+ name = crm_itoa(nodeid);
+ }
rc = st->cmds->fence(st, opts, name, action, timeout, 0);
- if(rc != pcmk_ok) {
- api_log(LOG_ERR, "Could not kick (%s) node %u/%s : %s (%d)", action, nodeid, uname, pcmk_strerror(rc), rc);
+ free(name);
+
+ if (rc != pcmk_ok) {
+ api_log(LOG_ERR, "Could not kick (%s) node %u/%s : %s (%d)",
+ action, nodeid, uname, pcmk_strerror(rc), rc);
} else {
- api_log(LOG_NOTICE, "Node %u/%s kicked: %s ", nodeid, uname, action);
+ api_log(LOG_NOTICE, "Node %u/%s kicked: %s", nodeid, uname, action);
}
}
- if (st) {
- st->cmds->disconnect(st);
- stonith_api_delete(st);
- }
-
- free(name);
+ stonith_api_delete(st);
return rc;
}
time_t
stonith_api_time(uint32_t nodeid, const char *uname, bool in_progress)
{
- int rc = 0;
- char *name = NULL;
-
+ int rc = pcmk_ok;
time_t when = 0;
- stonith_t *st = NULL;
+ stonith_t *st = stonith_api_new();
stonith_history_t *history = NULL, *hp = NULL;
- enum stonith_call_options opts = st_opt_sync_call;
-
- st = stonith_api_new();
- if (st) {
- rc = st->cmds->connect(st, "stonith-api", NULL);
- if(rc != pcmk_ok) {
- api_log(LOG_NOTICE, "Connection failed: %s (%d)", pcmk_strerror(rc), rc);
- }
- }
-
- if (uname != NULL) {
- name = strdup(uname);
- } else if (nodeid > 0) {
- opts |= st_opt_cs_nodeid;
- name = crm_itoa(nodeid);
+ if (st == NULL) {
+ api_log(LOG_ERR, "Could not retrieve fence history for %u/%s: "
+ "API initialization failed", nodeid, uname);
+ return when;
}
- if (st && rc == pcmk_ok) {
+ rc = st->cmds->connect(st, "stonith-api", NULL);
+ if (rc != pcmk_ok) {
+ api_log(LOG_NOTICE, "Connection failed: %s (%d)", pcmk_strerror(rc), rc);
+ } else {
int entries = 0;
int progress = 0;
int completed = 0;
-
+ char *name = NULL;
+ enum stonith_call_options opts = st_opt_sync_call;
+
+ if (uname != NULL) {
+ name = strdup(uname);
+ } else if (nodeid > 0) {
+ opts |= st_opt_cs_nodeid;
+ name = crm_itoa(nodeid);
+ }
rc = st->cmds->history(st, opts, name, &history, 120);
+ free(name);
for (hp = history; hp; hp = hp->next) {
entries++;
@@ -2227,15 +2256,11 @@ stonith_api_time(uint32_t nodeid, const char *uname, bool in_progress)
}
}
- if (st) {
- st->cmds->disconnect(st);
- stonith_api_delete(st);
- }
+ stonith_api_delete(st);
if(when) {
api_log(LOG_INFO, "Node %u/%s last kicked at: %ld", nodeid, uname, (long int)when);
}
- free(name);
return when;
}
diff --git a/tools/crm_mon.c b/tools/crm_mon.c
index e101b62..bed0796 100644
--- a/tools/crm_mon.c
+++ b/tools/crm_mon.c
@@ -298,7 +298,6 @@ cib_connect(gboolean full)
}
if ((fence_connect) && (st->state == stonith_disconnected)) {
- crm_trace("Connecting to stonith");
rc = st->cmds->connect(st, crm_system_name, NULL);
if (rc == pcmk_ok) {
crm_trace("Setting up stonith callbacks");
diff --git a/tools/stonith_admin.c b/tools/stonith_admin.c
index d960fb1..6be66c6 100644
--- a/tools/stonith_admin.c
+++ b/tools/stonith_admin.c
@@ -198,31 +198,6 @@ struct {
int rc;
} async_fence_data;
-static int
-try_mainloop_connect(void)
-{
- stonith_t *st = async_fence_data.st;
- int tries = 10;
- int i = 0;
- int rc = 0;
-
- for (i = 0; i < tries; i++) {
- crm_debug("Connecting as %s", async_fence_data.name);
- rc = st->cmds->connect(st, async_fence_data.name, NULL);
-
- if (!rc) {
- crm_debug("stonith client connection established");
- return 0;
- } else {
- crm_debug("stonith client connection failed");
- }
- sleep(1);
- }
-
- crm_err("Could not connect to the fencer");
- return -1;
-}
-
static void
notify_callback(stonith_t * st, stonith_event_t * e)
{
@@ -251,8 +226,10 @@ async_fence_helper(gpointer user_data)
{
stonith_t *st = async_fence_data.st;
int call_id = 0;
+ int rc = stonith_api_connect_retry(st, async_fence_data.name, 10);
- if (try_mainloop_connect()) {
+ if (rc != pcmk_ok) {
+ fprintf(stderr, "Could not connect to fencer: %s\n", pcmk_strerror(rc));
g_main_loop_quit(mainloop);
return TRUE;
}
--
1.8.3.1

View File

@ -1,49 +0,0 @@
From 463eb8e36e2d2bf10a0e37938e0924ea6699f041 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Thu, 30 May 2019 08:37:52 -0500
Subject: [PATCH] Low: libpe_status: offer compile-time option to change
concurrent-fencing default
We most likely want to make concurrent-fencing default to true at some point.
For now, offer that possibility via a compile-time constant, for experimenting.
---
lib/pengine/common.c | 8 +++++++-
lib/pengine/status.c | 3 +++
2 files changed, 10 insertions(+), 1 deletion(-)
diff --git a/lib/pengine/common.c b/lib/pengine/common.c
index 9513633..3a283b4 100644
--- a/lib/pengine/common.c
+++ b/lib/pengine/common.c
@@ -95,7 +95,13 @@ static pe_cluster_option pe_opts[] = {
"How long to wait for the STONITH action (reboot,on,off) to complete", NULL },
{ XML_ATTR_HAVE_WATCHDOG, NULL, "boolean", NULL, "false", &check_boolean,
"Enable watchdog integration", "Set automatically by the cluster if SBD is detected. User configured values are ignored." },
- { "concurrent-fencing", NULL, "boolean", NULL, "false", &check_boolean,
+ { "concurrent-fencing", NULL, "boolean", NULL,
+#ifdef DEFAULT_CONCURRENT_FENCING_TRUE
+ "true",
+#else
+ "false",
+#endif
+ &check_boolean,
"Allow performing fencing operations in parallel", NULL },
{ "startup-fencing", NULL, "boolean", NULL, "true", &check_boolean,
"STONITH unseen nodes", "Advanced Use Only! Not using the default is very unsafe!" },
diff --git a/lib/pengine/status.c b/lib/pengine/status.c
index 3ccfac4..a8b0947 100644
--- a/lib/pengine/status.c
+++ b/lib/pengine/status.c
@@ -354,6 +354,9 @@ set_working_set_defaults(pe_working_set_t * data_set)
set_bit(data_set->flags, pe_flag_stop_rsc_orphans);
set_bit(data_set->flags, pe_flag_symmetric_cluster);
set_bit(data_set->flags, pe_flag_stop_action_orphans);
+#ifdef DEFAULT_CONCURRENT_FENCING_TRUE
+ set_bit(data_set->flags, pe_flag_concurrent_fencing);
+#endif
}
resource_t *
--
1.8.3.1

View File

@ -1,211 +0,0 @@
From 65170ffd5fa10cbda176b3f88e817d534b6331d6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20Pokorn=C3=BD?= <jpokorny@redhat.com>
Date: Wed, 29 Aug 2018 15:49:58 +0200
Subject: [PATCH 1/2] Low: mainloop: make it possible to specify server's
priority in mainloop
---
include/crm/common/mainloop.h | 24 +++++++++++++
lib/common/mainloop.c | 82 +++++++++++++++++++++++++++++++++++++++++--
2 files changed, 103 insertions(+), 3 deletions(-)
diff --git a/include/crm/common/mainloop.h b/include/crm/common/mainloop.h
index 85da1cd..2cfb63e 100644
--- a/include/crm/common/mainloop.h
+++ b/include/crm/common/mainloop.h
@@ -79,6 +79,30 @@ struct ipc_client_callbacks {
qb_ipcs_service_t *mainloop_add_ipc_server(const char *name, enum qb_ipc_type type,
struct qb_ipcs_service_handlers *callbacks);
+/*!
+ * \brief Start server-side API end-point, hooked into the internal event loop
+ *
+ * \param[in] name name of the IPC end-point ("address" for the client)
+ * \param[in] type selects libqb's IPC back-end (or use #QB_IPC_NATIVE)
+ * \param[in] callbacks defines libqb's IPC service-level handlers
+ * \param[in] priority priority relative to other events handled in the
+ * abstract handling loop, use #QB_LOOP_MED when unsure
+ *
+ * \return libqb's opaque handle to the created service abstraction
+ *
+ * \note For portability concerns, do not use this function if you keep
+ * \p priority as #QB_LOOP_MED, stick with #mainloop_add_ipc_server
+ * (with exactly such semantics) instead (once you link with this new
+ * symbol employed, you can't downgrade the library freely anymore).
+ *
+ * \note The intended effect will only get fully reflected when run-time
+ * linked to patched libqb: https://github.com/ClusterLabs/libqb/pull/352
+ */
+qb_ipcs_service_t *mainloop_add_ipc_server_with_prio(const char *name,
+ enum qb_ipc_type type,
+ struct qb_ipcs_service_handlers *callbacks,
+ enum qb_loop_priority prio);
+
void mainloop_del_ipc_server(qb_ipcs_service_t * server);
mainloop_io_t *mainloop_add_ipc_client(const char *name, int priority, size_t max_size,
diff --git a/lib/common/mainloop.c b/lib/common/mainloop.c
index 18f7014..17e69f0 100644
--- a/lib/common/mainloop.c
+++ b/lib/common/mainloop.c
@@ -509,6 +509,65 @@ gio_poll_destroy(gpointer data)
}
}
+/*!
+ * \internal
+ * \brief Convert libqb's poll priority into GLib's one
+ *
+ * \param[in] prio libqb's poll priority (#QB_LOOP_MED assumed as fallback)
+ *
+ * \return best matching GLib's priority
+ */
+static gint
+conv_prio_libqb2glib(enum qb_loop_priority prio)
+{
+ gint ret = G_PRIORITY_DEFAULT;
+ switch (prio) {
+ case QB_LOOP_LOW:
+ ret = G_PRIORITY_LOW;
+ break;
+ case QB_LOOP_HIGH:
+ ret = G_PRIORITY_HIGH;
+ break;
+ default:
+ crm_trace("Invalid libqb's loop priority %d, assuming QB_LOOP_MED",
+ prio);
+ /* fall-through */
+ case QB_LOOP_MED:
+ break;
+ }
+ return ret;
+}
+
+/*!
+ * \internal
+ * \brief Convert libqb's poll priority to rate limiting spec
+ *
+ * \param[in] prio libqb's poll priority (#QB_LOOP_MED assumed as fallback)
+ *
+ * \return best matching rate limiting spec
+ */
+static enum qb_ipcs_rate_limit
+conv_libqb_prio2ratelimit(enum qb_loop_priority prio)
+{
+ /* this is an inversion of what libqb's qb_ipcs_request_rate_limit does */
+ enum qb_ipcs_rate_limit ret = QB_IPCS_RATE_NORMAL;
+ switch (prio) {
+ case QB_LOOP_LOW:
+ ret = QB_IPCS_RATE_SLOW;
+ break;
+ case QB_LOOP_HIGH:
+ ret = QB_IPCS_RATE_FAST;
+ break;
+ default:
+ crm_trace("Invalid libqb's loop priority %d, assuming QB_LOOP_MED",
+ prio);
+ /* fall-through */
+ case QB_LOOP_MED:
+ break;
+ }
+ return ret;
+}
+
static int32_t
gio_poll_dispatch_update(enum qb_loop_priority p, int32_t fd, int32_t evts,
void *data, qb_ipcs_dispatch_fn_t fn, int32_t add)
@@ -555,8 +614,8 @@ gio_poll_dispatch_update(enum qb_loop_priority p, int32_t fd, int32_t evts,
adaptor->p = p;
adaptor->is_used++;
adaptor->source =
- g_io_add_watch_full(channel, G_PRIORITY_DEFAULT, evts, gio_read_socket, adaptor,
- gio_poll_destroy);
+ g_io_add_watch_full(channel, conv_prio_libqb2glib(p), evts,
+ gio_read_socket, adaptor, gio_poll_destroy);
/* Now that mainloop now holds a reference to channel,
* thanks to g_io_add_watch_full(), drop ours from g_io_channel_unix_new().
@@ -640,7 +699,15 @@ pick_ipc_type(enum qb_ipc_type requested)
qb_ipcs_service_t *
mainloop_add_ipc_server(const char *name, enum qb_ipc_type type,
- struct qb_ipcs_service_handlers * callbacks)
+ struct qb_ipcs_service_handlers *callbacks)
+{
+ return mainloop_add_ipc_server_with_prio(name, type, callbacks, QB_LOOP_MED);
+}
+
+qb_ipcs_service_t *
+mainloop_add_ipc_server_with_prio(const char *name, enum qb_ipc_type type,
+ struct qb_ipcs_service_handlers *callbacks,
+ enum qb_loop_priority prio)
{
int rc = 0;
qb_ipcs_service_t *server = NULL;
@@ -652,6 +719,15 @@ mainloop_add_ipc_server(const char *name, enum qb_ipc_type type,
crm_client_init();
server = qb_ipcs_create(name, 0, pick_ipc_type(type), callbacks);
+ if (server == NULL) {
+ crm_err("Could not create %s IPC server: %s (%d)", name, pcmk_strerror(rc), rc);
+ return NULL;
+ }
+
+ if (prio != QB_LOOP_MED) {
+ qb_ipcs_request_rate_limit(server, conv_libqb_prio2ratelimit(prio));
+ }
+
#ifdef HAVE_IPCS_GET_BUFFER_SIZE
/* All clients should use at least ipc_buffer_max as their buffer size */
qb_ipcs_enforce_buffer_size(server, crm_ipc_default_buffer_size());
--
1.8.3.1
From 3401f25994e8cc059898550082f9b75f2d07f103 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20Pokorn=C3=BD?= <jpokorny@redhat.com>
Date: Wed, 29 Aug 2018 15:50:57 +0200
Subject: [PATCH 2/2] High: stonith-ng's function cannot be blocked with CIB
updates forever
In the high-load (or high-rate-config-change) scenarios,
pacemaker-fenced would be unable to provide service when basically DoS'd
with CIB update notifications. Try to reconcile that with elevated
priority of the server's proper listening interface in the mainloop, at
worst, it will try to fence with slightly outdated config, but appears
to be less bad than not carrying the execution at all, for instance.
Other daemons might be considered as well.
Prerequisites:
- https://github.com/ClusterLabs/libqb/pull/352
(libqb used to contain a bug due to which one particular step in the
initial-client-connection-accepting-at-the-server procedure that would
be carried out with hard-coded (and hence possibly lower than competing
events') priority, which backfires exactly in this case (once the
pacemaker part is fixed -- by the means of elevating priority for
the API end-point of fenced so that it won't get consistently
overridden with a non-socket-based event source/trigger)
How to verify:
- mocked/based -N (see commit adding that module to mocked based daemon)
---
lib/common/utils.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/lib/common/utils.c b/lib/common/utils.c
index 758eb1b..d1c3e26 100644
--- a/lib/common/utils.c
+++ b/lib/common/utils.c
@@ -1031,7 +1031,8 @@ attrd_ipc_server_init(qb_ipcs_service_t **ipcs, struct qb_ipcs_service_handlers
void
stonith_ipc_server_init(qb_ipcs_service_t **ipcs, struct qb_ipcs_service_handlers *cb)
{
- *ipcs = mainloop_add_ipc_server("stonith-ng", QB_IPC_NATIVE, cb);
+ *ipcs = mainloop_add_ipc_server_with_prio("stonith-ng", QB_IPC_NATIVE, cb,
+ QB_LOOP_HIGH);
if (*ipcs == NULL) {
crm_err("Failed to create fencer: exiting and inhibiting respawn.");
--
1.8.3.1

View File

@ -1,233 +0,0 @@
From 169d424cf88594f15e7e66baa705df6b727aa807 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Tue, 4 Jun 2019 16:24:16 -0500
Subject: [PATCH 1/4] Log: pacemaker-remoted: use different default log if pid
1
When pacemaker-remoted runs as pid 1 inside a container, there may not be a
/var/log/pacemaker directory. To get around this, use a default log of
/var/log/pcmk-init.log when running as pid 1.
This was chosen over alternatives (creating the /var/log/pacemaker directory,
or passing the log location as an environment variable when creating the
implicit container resource) because it both avoids forcing a restart of
active bundles due to configuration change (as well as preserving regression
test output) and allows users to configure an explicit log location via the
container image or the bundle's extra arguments.
---
daemons/execd/pacemaker-execd.c | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/daemons/execd/pacemaker-execd.c b/daemons/execd/pacemaker-execd.c
index e2fdfca..cfa5500 100644
--- a/daemons/execd/pacemaker-execd.c
+++ b/daemons/execd/pacemaker-execd.c
@@ -429,6 +429,14 @@ static void spawn_pidone(int argc, char **argv, char **envp)
return;
}
+ /* Containers can be expected to have /var/log, but they may not have
+ * /var/log/pacemaker, so use a different default if no value has been
+ * explicitly configured in the container's environment.
+ */
+ if (daemon_option("logfile") == NULL) {
+ set_daemon_option("logfile", "/var/log/pcmk-init.log");
+ }
+
sigfillset(&set);
sigprocmask(SIG_BLOCK, &set, 0);
--
1.8.3.1
From 7e362387a092b5617b36a69961115f7703e4d801 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Fri, 17 May 2019 12:39:43 -0500
Subject: [PATCH 2/4] Refactor: libpe_status: add enum for bundle mount flags
More readable than 0 or 1
---
lib/pengine/bundle.c | 17 +++++++++--------
lib/pengine/variant.h | 9 ++++++++-
2 files changed, 17 insertions(+), 9 deletions(-)
diff --git a/lib/pengine/bundle.c b/lib/pengine/bundle.c
index 3b32f04..b223f03 100644
--- a/lib/pengine/bundle.c
+++ b/lib/pengine/bundle.c
@@ -228,7 +228,7 @@ create_docker_resource(pe_resource_t *parent, pe__bundle_variant_data_t *data,
for(GListPtr pIter = data->mounts; pIter != NULL; pIter = pIter->next) {
pe__bundle_mount_t *mount = pIter->data;
- if(mount->flags) {
+ if (is_set(mount->flags, pe__bundle_mount_subdir)) {
char *source = crm_strdup_printf(
"%s/%s-%d", mount->source, data->prefix, replica->offset);
@@ -396,7 +396,7 @@ create_podman_resource(pe_resource_t *parent, pe__bundle_variant_data_t *data,
for(GListPtr pIter = data->mounts; pIter != NULL; pIter = pIter->next) {
pe__bundle_mount_t *mount = pIter->data;
- if(mount->flags) {
+ if (is_set(mount->flags, pe__bundle_mount_subdir)) {
char *source = crm_strdup_printf(
"%s/%s-%d", mount->source, data->prefix, replica->offset);
@@ -562,7 +562,7 @@ create_rkt_resource(pe_resource_t *parent, pe__bundle_variant_data_t *data,
for(GListPtr pIter = data->mounts; pIter != NULL; pIter = pIter->next) {
pe__bundle_mount_t *mount = pIter->data;
- if(mount->flags) {
+ if (is_set(mount->flags, pe__bundle_mount_subdir)) {
char *source = crm_strdup_printf(
"%s/%s-%d", mount->source, data->prefix, replica->offset);
@@ -894,7 +894,7 @@ create_container(pe_resource_t *parent, pe__bundle_variant_data_t *data,
static void
mount_add(pe__bundle_variant_data_t *bundle_data, const char *source,
- const char *target, const char *options, int flags)
+ const char *target, const char *options, uint32_t flags)
{
pe__bundle_mount_t *mount = calloc(1, sizeof(pe__bundle_mount_t));
@@ -1142,11 +1142,11 @@ pe__unpack_bundle(pe_resource_t *rsc, pe_working_set_t *data_set)
const char *source = crm_element_value(xml_child, "source-dir");
const char *target = crm_element_value(xml_child, "target-dir");
const char *options = crm_element_value(xml_child, "options");
- int flags = 0;
+ int flags = pe__bundle_mount_none;
if (source == NULL) {
source = crm_element_value(xml_child, "source-dir-root");
- flags = 1;
+ set_bit(flags, pe__bundle_mount_subdir);
}
if (source && target) {
@@ -1251,9 +1251,10 @@ pe__unpack_bundle(pe_resource_t *rsc, pe_working_set_t *data_set)
* reasonable.
*/
mount_add(bundle_data, DEFAULT_REMOTE_KEY_LOCATION,
- DEFAULT_REMOTE_KEY_LOCATION, NULL, 0);
+ DEFAULT_REMOTE_KEY_LOCATION, NULL, pe__bundle_mount_none);
- mount_add(bundle_data, CRM_BUNDLE_DIR, "/var/log", NULL, 1);
+ mount_add(bundle_data, CRM_BUNDLE_DIR, "/var/log", NULL,
+ pe__bundle_mount_subdir);
port = calloc(1, sizeof(pe__bundle_port_t));
if(bundle_data->control_port) {
diff --git a/lib/pengine/variant.h b/lib/pengine/variant.h
index f46aa11..7f77eef 100644
--- a/lib/pengine/variant.h
+++ b/lib/pengine/variant.h
@@ -51,11 +51,18 @@ typedef struct {
pe_resource_t *remote;
} pe__bundle_replica_t;
+enum pe__bundle_mount_flags {
+ pe__bundle_mount_none = 0x00,
+
+ // mount instance-specific subdirectory rather than source directly
+ pe__bundle_mount_subdir = 0x01
+};
+
typedef struct {
char *source;
char *target;
char *options;
- int flags;
+ uint32_t flags; // bitmask of pe__bundle_mount_flags
} pe__bundle_mount_t;
typedef struct {
--
1.8.3.1
From 87eac95868930ffda4d964c2b6bd9960b6893cc9 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Fri, 17 May 2019 14:13:54 -0500
Subject: [PATCH 3/4] Fix: controller: don't check join status after remote
node appears
Only cluster nodes have join state
---
daemons/controld/controld_callbacks.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/daemons/controld/controld_callbacks.c b/daemons/controld/controld_callbacks.c
index 06ffb9d..3ce7470 100644
--- a/daemons/controld/controld_callbacks.c
+++ b/daemons/controld/controld_callbacks.c
@@ -228,7 +228,7 @@ peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *d
crm_trace("Alive=%d, appeared=%d, down=%d",
alive, appeared, (down? down->id : -1));
- if (appeared && (alive > 0)) {
+ if (appeared && (alive > 0) && !is_remote) {
register_fsa_input_before(C_FSA_INTERNAL, I_NODE_JOIN, NULL);
}
--
1.8.3.1
From 5755b63850a17cd91bca28e83c39119378fe1887 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Sat, 18 May 2019 21:59:00 -0500
Subject: [PATCH 4/4] Doc: Pacemaker Explained: document effect of SELinux on
bundle storage
---
doc/Pacemaker_Explained/en-US/Ch-Advanced-Resources.txt | 15 ++++++++++++---
1 file changed, 12 insertions(+), 3 deletions(-)
diff --git a/doc/Pacemaker_Explained/en-US/Ch-Advanced-Resources.txt b/doc/Pacemaker_Explained/en-US/Ch-Advanced-Resources.txt
index e431626..4a181df 100644
--- a/doc/Pacemaker_Explained/en-US/Ch-Advanced-Resources.txt
+++ b/doc/Pacemaker_Explained/en-US/Ch-Advanced-Resources.txt
@@ -999,11 +999,11 @@ association with Docker, Inc. is implied.]
<storage-mapping id="httpd-root"
source-dir="/srv/html"
target-dir="/var/www/html"
- options="rw"/>
+ options="rw,Z"/>
<storage-mapping id="httpd-logs"
source-dir-root="/var/log/pacemaker/bundles"
target-dir="/etc/httpd/logs"
- options="rw"/>
+ options="rw,Z"/>
</storage>
<primitive class="ocf" id="httpd" provider="heartbeat" type="apache"/>
</bundle>
@@ -1293,7 +1293,8 @@ indexterm:[bundle,storage,storage-mapping]
|options
|
-|File system mount options to use when mapping the storage
+|A comma-separated list of file system mount options to use when mapping the
+ storage
indexterm:[options,storage-mapping]
indexterm:[storage-mapping,Property,options]
@@ -1322,6 +1323,14 @@ The +PCMK_authkey_location+ environment variable must not be set to anything
other than the default of `/etc/pacemaker/authkey` on any node in the cluster.
====
+[IMPORTANT]
+====
+If SELinux is used in enforcing mode on the host, you must ensure the container
+is allowed to use any storage you mount into it. For Docker and podman bundles,
+adding "Z" to the mount options will create a container-specific label for the
+mount that allows the container access.
+====
+
=== Bundle Primitive ===
A bundle may optionally contain one +<primitive>+ resource
--
1.8.3.1

View File

@ -1,131 +0,0 @@
From d6e2db2702aa533bca7208bbdc18cb4254cc89d2 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Thu, 6 Jun 2019 14:18:37 -0500
Subject: [PATCH] Test: scheduler: explicitly set concurrent-fencing in
relevant regression tests
... since concurrent-fencing's default is likely to eventually change,
which would otherwise affect the results of these tests
---
cts/scheduler/rec-node-14.xml | 1 +
cts/scheduler/remote-connection-unrecoverable.xml | 1 +
cts/scheduler/remote-recover-all.xml | 1 +
cts/scheduler/remote-recover-no-resources.xml | 1 +
cts/scheduler/remote-recover-unknown.xml | 1 +
cts/scheduler/stonith-4.xml | 1 +
cts/scheduler/suicide-needed-inquorate.xml | 1 +
cts/scheduler/ticket-clone-21.xml | 1 +
cts/scheduler/ticket-clone-9.xml | 1 +
9 files changed, 9 insertions(+)
diff --git a/cts/scheduler/rec-node-14.xml b/cts/scheduler/rec-node-14.xml
index 60307ba..aefa410 100644
--- a/cts/scheduler/rec-node-14.xml
+++ b/cts/scheduler/rec-node-14.xml
@@ -4,6 +4,7 @@
<cluster_property_set id="cib-bootstrap-options">
<nvpair id="nvpair.id21835" name="stonith-enabled" value="true"/>
<nvpair id="nvpair.id21844" name="no-quorum-policy" value="ignore"/>
+ <nvpair id="options-concurrent-fencing" name="concurrent-fencing" value="false"/>
</cluster_property_set>
</crm_config>
<nodes>
diff --git a/cts/scheduler/remote-connection-unrecoverable.xml b/cts/scheduler/remote-connection-unrecoverable.xml
index df9fee2..efec646 100644
--- a/cts/scheduler/remote-connection-unrecoverable.xml
+++ b/cts/scheduler/remote-connection-unrecoverable.xml
@@ -7,6 +7,7 @@
<nvpair id="cib-bootstrap-options-cluster-infrastructure" name="cluster-infrastructure" value="corosync"/>
<nvpair id="cib-bootstrap-options-cluster-name" name="cluster-name" value="mycluster"/>
<nvpair id="cib-bootstrap-options-last-lrm-refresh" name="last-lrm-refresh" value="1459735110"/>
+ <nvpair id="options-concurrent-fencing" name="concurrent-fencing" value="false"/>
</cluster_property_set>
</crm_config>
<nodes>
diff --git a/cts/scheduler/remote-recover-all.xml b/cts/scheduler/remote-recover-all.xml
index 0ade7cd..1680166 100644
--- a/cts/scheduler/remote-recover-all.xml
+++ b/cts/scheduler/remote-recover-all.xml
@@ -10,6 +10,7 @@
<nvpair id="cib-bootstrap-options-cluster-recheck-interval" name="cluster-recheck-interval" value="60s"/>
<nvpair id="cib-bootstrap-options-maintenance-mode" name="maintenance-mode" value="false"/>
<nvpair id="cib-bootstrap-options-last-lrm-refresh" name="last-lrm-refresh" value="1493817755"/>
+ <nvpair id="options-concurrent-fencing" name="concurrent-fencing" value="false"/>
</cluster_property_set>
<cluster_property_set id="redis_replication">
<nvpair id="redis_replication-redis_REPL_INFO" name="redis_REPL_INFO" value="controller-0"/>
diff --git a/cts/scheduler/remote-recover-no-resources.xml b/cts/scheduler/remote-recover-no-resources.xml
index 37708bb..602ed2b 100644
--- a/cts/scheduler/remote-recover-no-resources.xml
+++ b/cts/scheduler/remote-recover-no-resources.xml
@@ -10,6 +10,7 @@
<nvpair id="cib-bootstrap-options-cluster-recheck-interval" name="cluster-recheck-interval" value="60s"/>
<nvpair id="cib-bootstrap-options-maintenance-mode" name="maintenance-mode" value="false"/>
<nvpair id="cib-bootstrap-options-last-lrm-refresh" name="last-lrm-refresh" value="1493817755"/>
+ <nvpair id="options-concurrent-fencing" name="concurrent-fencing" value="false"/>
</cluster_property_set>
<cluster_property_set id="redis_replication">
<nvpair id="redis_replication-redis_REPL_INFO" name="redis_REPL_INFO" value="controller-0"/>
diff --git a/cts/scheduler/remote-recover-unknown.xml b/cts/scheduler/remote-recover-unknown.xml
index f070f11..f47a841 100644
--- a/cts/scheduler/remote-recover-unknown.xml
+++ b/cts/scheduler/remote-recover-unknown.xml
@@ -10,6 +10,7 @@
<nvpair id="cib-bootstrap-options-cluster-recheck-interval" name="cluster-recheck-interval" value="60s"/>
<nvpair id="cib-bootstrap-options-maintenance-mode" name="maintenance-mode" value="false"/>
<nvpair id="cib-bootstrap-options-last-lrm-refresh" name="last-lrm-refresh" value="1493817755"/>
+ <nvpair id="options-concurrent-fencing" name="concurrent-fencing" value="false"/>
</cluster_property_set>
<cluster_property_set id="redis_replication">
<nvpair id="redis_replication-redis_REPL_INFO" name="redis_REPL_INFO" value="controller-0"/>
diff --git a/cts/scheduler/stonith-4.xml b/cts/scheduler/stonith-4.xml
index 7979462..dd7af8d 100644
--- a/cts/scheduler/stonith-4.xml
+++ b/cts/scheduler/stonith-4.xml
@@ -4,6 +4,7 @@
<cluster_property_set id="cib-bootstrap-options">
<nvpair id="cib-bootstrap-options-dc-version" name="dc-version" value="1.1.8-0.772.26fe3e5.git.fc17-26fe3e52d259e4726699300d27991fc1a80c556b"/>
<nvpair id="cib-bootstrap-options-cluster-infrastructure" name="cluster-infrastructure" value="corosync"/>
+ <nvpair id="options-concurrent-fencing" name="concurrent-fencing" value="false"/>
</cluster_property_set>
</crm_config>
<nodes>
diff --git a/cts/scheduler/suicide-needed-inquorate.xml b/cts/scheduler/suicide-needed-inquorate.xml
index e626ea6..f87422b 100644
--- a/cts/scheduler/suicide-needed-inquorate.xml
+++ b/cts/scheduler/suicide-needed-inquorate.xml
@@ -6,6 +6,7 @@
<nvpair id="cib-bootstrap-options-dc-version" name="dc-version" value="1.1.17-1"/>
<nvpair id="cib-bootstrap-options-cluster-infrastructure" name="cluster-infrastructure" value="corosync"/>
<nvpair id="cib-bootstrap-options-no-quorum-policy" name="no-quorum-policy" value="suicide"/>
+ <nvpair id="options-concurrent-fencing" name="concurrent-fencing" value="false"/>
</cluster_property_set>
</crm_config>
<nodes>
diff --git a/cts/scheduler/ticket-clone-21.xml b/cts/scheduler/ticket-clone-21.xml
index bb1f044..efd5294 100644
--- a/cts/scheduler/ticket-clone-21.xml
+++ b/cts/scheduler/ticket-clone-21.xml
@@ -4,6 +4,7 @@
<cluster_property_set id="cib-bootstrap-options">
<nvpair id="cib-bootstrap-options-stonith-enabled" name="stonith-enabled" value="true"/>
<nvpair id="cib-bootstrap-options-no-quorum-policy" name="no-quorum-policy" value="ignore"/>
+ <nvpair id="options-concurrent-fencing" name="concurrent-fencing" value="false"/>
</cluster_property_set>
</crm_config>
<nodes>
diff --git a/cts/scheduler/ticket-clone-9.xml b/cts/scheduler/ticket-clone-9.xml
index e77210d..c6d5809 100644
--- a/cts/scheduler/ticket-clone-9.xml
+++ b/cts/scheduler/ticket-clone-9.xml
@@ -4,6 +4,7 @@
<cluster_property_set id="cib-bootstrap-options">
<nvpair id="cib-bootstrap-options-stonith-enabled" name="stonith-enabled" value="true"/>
<nvpair id="cib-bootstrap-options-no-quorum-policy" name="no-quorum-policy" value="ignore"/>
+ <nvpair id="options-concurrent-fencing" name="concurrent-fencing" value="false"/>
</cluster_property_set>
</crm_config>
<nodes>
--
1.8.3.1

View File

@ -5,7 +5,15 @@
%global gname haclient %global gname haclient
## Where to install Pacemaker documentation ## Where to install Pacemaker documentation
%if 0%{?suse_version} > 0
%global pcmk_docdir %{_docdir}/%{name}-%{version}
%else
%if 0%{?rhel} > 7
%global pcmk_docdir %{_docdir}/%{name}-doc %global pcmk_docdir %{_docdir}/%{name}-doc
%else
%global pcmk_docdir %{_docdir}/%{name}
%endif
%endif
## GitHub entity that distributes source (for ease of using a fork) ## GitHub entity that distributes source (for ease of using a fork)
%global github_owner ClusterLabs %global github_owner ClusterLabs
@ -13,12 +21,12 @@
## Upstream pacemaker version, and its package version (specversion ## Upstream pacemaker version, and its package version (specversion
## can be incremented to build packages reliably considered "newer" ## can be incremented to build packages reliably considered "newer"
## than previously built packages with the same pcmkversion) ## than previously built packages with the same pcmkversion)
%global pcmkversion 2.0.2 %global pcmkversion 2.0.3
%global specversion 1 %global specversion 3
## Upstream commit (or git tag, such as "Pacemaker-" plus the ## Upstream commit (or git tag, such as "Pacemaker-" plus the
## {pcmkversion} macro for an official release) to use for this package ## {pcmkversion} macro for an official release) to use for this package
%global commit 744a30d655c9fbd66ad6e103697db0283bb90779 %global commit 4b1f869f0f64ef0d248b6aa4781d38ecccf83318
## Since git v2.11, the extent of abbreviation is autoscaled by default ## Since git v2.11, the extent of abbreviation is autoscaled by default
## (used to be constant of 7), so we need to convey it for non-tags, too. ## (used to be constant of 7), so we need to convey it for non-tags, too.
%global commit_abbrev 7 %global commit_abbrev 7
@ -80,6 +88,43 @@
%define gnutls_priorities %{?pcmk_gnutls_priorities}%{!?pcmk_gnutls_priorities:@SYSTEM} %define gnutls_priorities %{?pcmk_gnutls_priorities}%{!?pcmk_gnutls_priorities:@SYSTEM}
%endif %endif
%if !%{defined _rundir}
%if 0%{?fedora} >= 15 || 0%{?rhel} >= 7 || 0%{?suse_version} >= 1200
%define _rundir /run
%else
%define _rundir /var/run
%endif
%endif
%if 0%{?fedora} > 22 || 0%{?rhel} > 7
%global supports_recommends 1
%endif
## Different distros name certain packages differently
## (note: corosync libraries also differ, but all provide corosync-devel)
%if 0%{?suse_version} > 0
%global pkgname_bzip2_devel libbz2-devel
%global pkgname_docbook_xsl docbook-xsl-stylesheets
%global pkgname_gnutls_devel libgnutls-devel
%global pkgname_shadow_utils shadow
%global pkgname_procps procps
%global pkgname_glue_libs libglue
%global pkgname_pcmk_libs lib%{name}3
%global hacluster_id 90
%else
%global pkgname_libtool_devel libtool-ltdl-devel
%global pkgname_libtool_devel_arch libtool-ltdl-devel%{?_isa}
%global pkgname_bzip2_devel bzip2-devel
%global pkgname_docbook_xsl docbook-style-xsl
%global pkgname_gnutls_devel gnutls-devel
%global pkgname_shadow_utils shadow-utils
%global pkgname_procps procps-ng
%global pkgname_publican publican
%global pkgname_glue_libs cluster-glue-libs
%global pkgname_pcmk_libs %{name}-libs
%global hacluster_id 189
%endif
# Python-related definitions # Python-related definitions
## Use Python 3 on certain platforms if major version not specified ## Use Python 3 on certain platforms if major version not specified
@ -211,22 +256,17 @@ Source0: https://github.com/%{github_owner}/%{name}/archive/%{commit}/%{na
Source1: nagios-agents-metadata-%{nagios_hash}.tar.gz Source1: nagios-agents-metadata-%{nagios_hash}.tar.gz
# upstream commits # upstream commits
Patch1: 001-xmldiffs.patch #Patch1: 001-xxx.patch
Patch2: 002-failed-monitors.patch
Patch3: 003-fencer-logs.patch
Patch4: 004-concurrent-fencing.patch
Patch5: 005-glib-priorities.patch
Patch6: 006-bundle-fixes.patch
# downstream-only commits # downstream-only commits
Patch100: 100-concurrent-fencing-tests.patch #Patch100: xxx.patch
Requires: resource-agents Requires: resource-agents
Requires: %{name}-libs%{?_isa} = %{version}-%{release} Requires: %{pkgname_pcmk_libs}%{?_isa} = %{version}-%{release}
Requires: %{name}-cluster-libs%{?_isa} = %{version}-%{release} Requires: %{name}-cluster-libs%{?_isa} = %{version}-%{release}
Requires: %{name}-cli = %{version}-%{release} Requires: %{name}-cli = %{version}-%{release}
%if !%{defined _unitdir} %if !%{defined _unitdir}
Requires: procps-ng Requires: %{pkgname_procps}
Requires: psmisc Requires: psmisc
%endif %endif
%{?systemd_requires} %{?systemd_requires}
@ -244,14 +284,14 @@ BuildRequires: libqb-devel >= 0.17.0
BuildRequires: coreutils findutils grep sed BuildRequires: coreutils findutils grep sed
# Required for core functionality # Required for core functionality
BuildRequires: automake autoconf gcc libtool pkgconfig libtool-ltdl-devel BuildRequires: automake autoconf gcc libtool pkgconfig %{?pkgname_libtool_devel}
BuildRequires: pkgconfig(glib-2.0) >= 2.16 BuildRequires: pkgconfig(glib-2.0) >= 2.16
BuildRequires: libxml2-devel libxslt-devel libuuid-devel BuildRequires: libxml2-devel libxslt-devel libuuid-devel
BuildRequires: bzip2-devel BuildRequires: %{pkgname_bzip2_devel}
# Enables optional functionality # Enables optional functionality
BuildRequires: ncurses-devel docbook-style-xsl BuildRequires: ncurses-devel %{pkgname_docbook_xsl}
BuildRequires: help2man gnutls-devel pam-devel pkgconfig(dbus-1) BuildRequires: help2man %{pkgname_gnutls_devel} pam-devel pkgconfig(dbus-1)
%if %{systemd_native} %if %{systemd_native}
BuildRequires: pkgconfig(systemd) BuildRequires: pkgconfig(systemd)
@ -261,16 +301,16 @@ BuildRequires: pkgconfig(systemd)
BuildRequires: git BuildRequires: git
Requires: corosync >= 2.0.0 Requires: corosync >= 2.0.0
BuildRequires: corosynclib-devel >= 2.0.0 BuildRequires: corosync-devel >= 2.0.0
%if %{with stonithd} %if %{with stonithd}
BuildRequires: cluster-glue-libs-devel BuildRequires: %{pkgname_glue_libs}-devel
%endif %endif
## (note no avoiding effect when building through non-customized mock) ## (note no avoiding effect when building through non-customized mock)
%if !%{bleeding} %if !%{bleeding}
%if %{with doc} %if %{with doc}
BuildRequires: inkscape asciidoc publican BuildRequires: inkscape asciidoc %{?pkgname_publican}
%endif %endif
%endif %endif
@ -301,12 +341,15 @@ Available rpmbuild rebuild options:
License: GPLv2+ and LGPLv2+ License: GPLv2+ and LGPLv2+
Summary: Command line tools for controlling Pacemaker clusters Summary: Command line tools for controlling Pacemaker clusters
Group: System Environment/Daemons Group: System Environment/Daemons
Requires: %{name}-libs%{?_isa} = %{version}-%{release} Requires: %{pkgname_pcmk_libs}%{?_isa} = %{version}-%{release}
%if 0%{?fedora} > 22 || 0%{?rhel} > 7 %if 0%{?supports_recommends}
#Recommends: pcmk-cluster-manager = %{version}-%{release} #Recommends: pcmk-cluster-manager = %{version}-%{release}
# For crm_report
Requires: tar
Requires: bzip2
%endif %endif
Requires: perl-TimeDate Requires: perl-TimeDate
Requires: procps-ng Requires: %{pkgname_procps}
Requires: psmisc Requires: psmisc
Requires(post):coreutils Requires(post):coreutils
@ -318,27 +361,27 @@ The %{name}-cli package contains command line tools that can be used
to query and control the cluster from machines that may, or may not, to query and control the cluster from machines that may, or may not,
be part of the cluster. be part of the cluster.
%package libs %package -n %{pkgname_pcmk_libs}
License: GPLv2+ and LGPLv2+ License: GPLv2+ and LGPLv2+
Summary: Core Pacemaker libraries Summary: Core Pacemaker libraries
Group: System Environment/Daemons Group: System Environment/Daemons
Requires(pre): shadow-utils Requires(pre): %{pkgname_shadow_utils}
Requires: %{name}-schemas = %{version}-%{release} Requires: %{name}-schemas = %{version}-%{release}
# sbd 1.4.0+ supports the libpe_status API for pe_working_set_t # sbd 1.4.0+ supports the libpe_status API for pe_working_set_t
Conflicts: sbd < 1.4.0 Conflicts: sbd < 1.4.0
%description libs %description -n %{pkgname_pcmk_libs}
Pacemaker is an advanced, scalable High-Availability cluster resource Pacemaker is an advanced, scalable High-Availability cluster resource
manager. manager.
The %{name}-libs package contains shared libraries needed for cluster The %{pkgname_pcmk_libs} package contains shared libraries needed for cluster
nodes and those just running the CLI tools. nodes and those just running the CLI tools.
%package cluster-libs %package cluster-libs
License: GPLv2+ and LGPLv2+ License: GPLv2+ and LGPLv2+
Summary: Cluster Libraries used by Pacemaker Summary: Cluster Libraries used by Pacemaker
Group: System Environment/Daemons Group: System Environment/Daemons
Requires: %{name}-libs%{?_isa} = %{version}-%{release} Requires: %{pkgname_pcmk_libs}%{?_isa} = %{version}-%{release}
%description cluster-libs %description cluster-libs
Pacemaker is an advanced, scalable High-Availability cluster resource Pacemaker is an advanced, scalable High-Availability cluster resource
@ -356,11 +399,11 @@ License: GPLv2+ and LGPLv2+ and BSD
%endif %endif
Summary: Pacemaker remote daemon for non-cluster nodes Summary: Pacemaker remote daemon for non-cluster nodes
Group: System Environment/Daemons Group: System Environment/Daemons
Requires: %{name}-libs%{?_isa} = %{version}-%{release} Requires: %{pkgname_pcmk_libs}%{?_isa} = %{version}-%{release}
Requires: %{name}-cli = %{version}-%{release} Requires: %{name}-cli = %{version}-%{release}
Requires: resource-agents Requires: resource-agents
%if !%{defined _unitdir} %if !%{defined _unitdir}
Requires: procps-ng Requires: %{pkgname_procps}
%endif %endif
# -remote can be fully independent of systemd # -remote can be fully independent of systemd
%{?systemd_ordering}%{!?systemd_ordering:%{?systemd_requires}} %{?systemd_ordering}%{!?systemd_ordering:%{?systemd_requires}}
@ -375,23 +418,23 @@ The %{name}-remote package contains the Pacemaker Remote daemon
which is capable of extending pacemaker functionality to remote which is capable of extending pacemaker functionality to remote
nodes not running the full corosync/cluster stack. nodes not running the full corosync/cluster stack.
%package libs-devel %package -n %{pkgname_pcmk_libs}-devel
License: GPLv2+ and LGPLv2+ License: GPLv2+ and LGPLv2+
Summary: Pacemaker development package Summary: Pacemaker development package
Group: Development/Libraries Group: Development/Libraries
Requires: %{name}-libs%{?_isa} = %{version}-%{release} Requires: %{pkgname_pcmk_libs}%{?_isa} = %{version}-%{release}
Requires: %{name}-cluster-libs%{?_isa} = %{version}-%{release} Requires: %{name}-cluster-libs%{?_isa} = %{version}-%{release}
Requires: libuuid-devel%{?_isa} libtool-ltdl-devel%{?_isa} Requires: libuuid-devel%{?_isa} %{?pkgname_libtool_devel_arch}
Requires: libxml2-devel%{?_isa} libxslt-devel%{?_isa} Requires: libxml2-devel%{?_isa} libxslt-devel%{?_isa}
Requires: bzip2-devel%{?_isa} glib2-devel%{?_isa} Requires: %{pkgname_bzip2_devel}%{?_isa} glib2-devel%{?_isa}
Requires: libqb-devel%{?_isa} Requires: libqb-devel%{?_isa}
Requires: corosynclib-devel%{?_isa} >= 2.0.0 Requires: corosync-devel >= 2.0.0
%description libs-devel %description -n %{pkgname_pcmk_libs}-devel
Pacemaker is an advanced, scalable High-Availability cluster resource Pacemaker is an advanced, scalable High-Availability cluster resource
manager. manager.
The %{name}-libs-devel package contains headers and shared libraries The %{pkgname_pcmk_libs}-devel package contains headers and shared libraries
for developing tools for Pacemaker. for developing tools for Pacemaker.
%package cts %package cts
@ -399,8 +442,9 @@ License: GPLv2+ and LGPLv2+
Summary: Test framework for cluster-related technologies like Pacemaker Summary: Test framework for cluster-related technologies like Pacemaker
Group: System Environment/Daemons Group: System Environment/Daemons
Requires: %{python_path} Requires: %{python_path}
Requires: %{name}-libs = %{version}-%{release} Requires: %{pkgname_pcmk_libs} = %{version}-%{release}
Requires: procps-ng Requires: %{name}-cli = %{version}-%{release}
Requires: %{pkgname_procps}
Requires: psmisc Requires: psmisc
BuildArch: noarch BuildArch: noarch
@ -472,7 +516,7 @@ monitor resources.
# Early versions of autotools (e.g. RHEL <= 5) do not support --docdir # Early versions of autotools (e.g. RHEL <= 5) do not support --docdir
export docdir=%{pcmk_docdir} export docdir=%{pcmk_docdir}
export systemdunitdir=%{?_unitdir}%{!?_unitdir:no} export systemdsystemunitdir=%{?_unitdir}%{!?_unitdir:no}
# RHEL changes pacemaker's concurrent-fencing default to true # RHEL changes pacemaker's concurrent-fencing default to true
export CPPFLAGS="-DDEFAULT_CONCURRENT_FENCING_TRUE" export CPPFLAGS="-DDEFAULT_CONCURRENT_FENCING_TRUE"
@ -500,8 +544,9 @@ export LDFLAGS_HARDENED_LIB="%{?_hardening_ldflags}"
%{!?with_doc: --with-brand=} \ %{!?with_doc: --with-brand=} \
%{?gnutls_priorities: --with-gnutls-priorities="%{gnutls_priorities}"} \ %{?gnutls_priorities: --with-gnutls-priorities="%{gnutls_priorities}"} \
--with-initdir=%{_initrddir} \ --with-initdir=%{_initrddir} \
--with-runstatedir=%{_rundir} \
--localstatedir=%{_var} \ --localstatedir=%{_var} \
--with-bug-url=https://bugzilla.redhat.com/ \ --with-bug-url=https://bugzilla.redhat.com/ \
--with-nagios \ --with-nagios \
--with-nagios-metadata-dir=%{_datadir}/pacemaker/nagios/plugins-metadata/ \ --with-nagios-metadata-dir=%{_datadir}/pacemaker/nagios/plugins-metadata/ \
--with-nagios-plugin-dir=%{_libdir}/nagios/plugins/ \ --with-nagios-plugin-dir=%{_libdir}/nagios/plugins/ \
@ -513,7 +558,7 @@ sed -i 's|^hardcode_libdir_flag_spec=.*|hardcode_libdir_flag_spec=""|g' libtool
sed -i 's|^runpath_var=LD_RUN_PATH|runpath_var=DIE_RPATH_DIE|g' libtool sed -i 's|^runpath_var=LD_RUN_PATH|runpath_var=DIE_RPATH_DIE|g' libtool
%endif %endif
make %{_smp_mflags} V=1 all make %{_smp_mflags} V=1
%check %check
{ cts/cts-scheduler --run load-stopped-loop \ { cts/cts-scheduler --run load-stopped-loop \
@ -689,17 +734,17 @@ fi
%systemd_postun_with_restart crm_mon.service %systemd_postun_with_restart crm_mon.service
%endif %endif
%pre libs %pre -n %{pkgname_pcmk_libs}
getent group %{gname} >/dev/null || groupadd -r %{gname} -g 189 getent group %{gname} >/dev/null || groupadd -r %{gname} -g %{hacluster_id}
getent passwd %{uname} >/dev/null || useradd -r -g %{gname} -u 189 -s /sbin/nologin -c "cluster user" %{uname} getent passwd %{uname} >/dev/null || useradd -r -g %{gname} -u %{hacluster_id} -s /sbin/nologin -c "cluster user" %{uname}
exit 0 exit 0
%if %{defined ldconfig_scriptlets} %if %{defined ldconfig_scriptlets}
%ldconfig_scriptlets libs %ldconfig_scriptlets libs
%ldconfig_scriptlets cluster-libs %ldconfig_scriptlets cluster-libs
%else %else
%post libs -p /sbin/ldconfig %post -n %{pkgname_pcmk_libs} -p /sbin/ldconfig
%postun libs -p /sbin/ldconfig %postun -n %{pkgname_pcmk_libs} -p /sbin/ldconfig
%post cluster-libs -p /sbin/ldconfig %post cluster-libs -p /sbin/ldconfig
%postun cluster-libs -p /sbin/ldconfig %postun cluster-libs -p /sbin/ldconfig
@ -819,7 +864,7 @@ exit 0
%dir %attr (770, %{uname}, %{gname}) %{_var}/log/pacemaker %dir %attr (770, %{uname}, %{gname}) %{_var}/log/pacemaker
%dir %attr (770, %{uname}, %{gname}) %{_var}/log/pacemaker/bundles %dir %attr (770, %{uname}, %{gname}) %{_var}/log/pacemaker/bundles
%files libs %files -n %{pkgname_pcmk_libs}
%{_libdir}/libcib.so.* %{_libdir}/libcib.so.*
%{_libdir}/liblrmd.so.* %{_libdir}/liblrmd.so.*
%{_libdir}/libcrmservice.so.* %{_libdir}/libcrmservice.so.*
@ -874,7 +919,7 @@ exit 0
%doc COPYING %doc COPYING
%doc ChangeLog %doc ChangeLog
%files libs-devel %files -n %{pkgname_pcmk_libs}-devel
%{_includedir}/pacemaker %{_includedir}/pacemaker
%{_libdir}/*.so %{_libdir}/*.so
%if %{with coverage} %if %{with coverage}
@ -891,6 +936,7 @@ exit 0
%{_datadir}/pacemaker/*.rng %{_datadir}/pacemaker/*.rng
%{_datadir}/pacemaker/*.xsl %{_datadir}/pacemaker/*.xsl
%{_datadir}/pacemaker/api %{_datadir}/pacemaker/api
%{_datadir}/pkgconfig/pacemaker-schemas.pc
%files nagios-plugins-metadata %files nagios-plugins-metadata
%dir %{_datadir}/pacemaker/nagios/plugins-metadata %dir %{_datadir}/pacemaker/nagios/plugins-metadata
@ -898,6 +944,43 @@ exit 0
%license %{nagios_name}-%{nagios_hash}/COPYING %license %{nagios_name}-%{nagios_hash}/COPYING
%changelog %changelog
* Wed Nov 27 2019 Ken Gaillot <kgaillot@redhat.com> - 2.0.3-3
- Rebase on Pacemaker-2.0.3 final release
- Resolves: rhbz1752538
* Wed Nov 13 2019 Ken Gaillot <kgaillot@redhat.com> - 2.0.3-2
- Rebase on Pacemaker-2.0.3-rc3
- Resolves: rhbz1752538
* Thu Oct 31 2019 Ken Gaillot <kgaillot@redhat.com> - 2.0.3-1
- Rebase on Pacemaker-2.0.3-rc2
- Parse crm_mon --fence-history option correctly
- Put timeout on controller waiting for scheduler response
- Offer Pacemaker Remote option for bind address
- Calculate cluster recheck interval dynamically
- Clarify crm_resource help text
- Reduce system calls after forking a child process
- Resolves: rhbz1699978
- Resolves: rhbz1725236
- Resolves: rhbz1743377
- Resolves: rhbz1747553
- Resolves: rhbz1748805
- Resolves: rhbz1752538
- Resolves: rhbz1762025
* Mon Aug 26 2019 Ken Gaillot <kgaillot@redhat.com> - 2.0.2-3
- Make pacemaker-cli require tar and bzip2
- Resolves: rhbz#1741580
* Fri Jun 21 2019 Klaus Wenninger <kwenning@redhat.com> - 2.0.2-2
- Synchronize fence-history on fenced-restart
- Cleanup leftover pending-fence-actions when fenced is restarted
- Improve fencing of remote-nodes
- Resolves: rhbz#1708380
- Resolves: rhbz#1708378
- Resolves: rhbz#1721198
- Resolves: rhbz#1695737
* Thu Jun 6 2019 Ken Gaillot <kgaillot@redhat.com> - 2.0.2-1 * Thu Jun 6 2019 Ken Gaillot <kgaillot@redhat.com> - 2.0.2-1
- Add stonith_admin option to display XML output - Add stonith_admin option to display XML output
- Add new crm_rule tool to check date/time rules - Add new crm_rule tool to check date/time rules