pacemaker/SOURCES/011-cts.patch

271 lines
9.7 KiB
Diff

From 4e190ebc5460563bae2586b28afb0415f2eb3d1a Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Wed, 1 Jul 2020 20:38:16 -0500
Subject: [PATCH 1/4] Test: CTS: libqb shared memory creates directories now
... so use "rm -rf" instead of "rm -f"
---
cts/CTS.py.in | 2 +-
cts/CTSaudits.py | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/cts/CTS.py.in b/cts/CTS.py.in
index c418318..091bb1f 100644
--- a/cts/CTS.py.in
+++ b/cts/CTS.py.in
@@ -546,7 +546,7 @@ class ClusterManager(UserDict):
if self.rsh(node, self.templates["StopCmd"]) == 0:
# Make sure we can continue even if corosync leaks
# fdata-* is the old name
- #self.rsh(node, "rm -f /dev/shm/qb-* /dev/shm/fdata-*")
+ #self.rsh(node, "rm -rf /dev/shm/qb-* /dev/shm/fdata-*")
self.ShouldBeStatus[node] = "down"
self.cluster_stable(self.Env["DeadTime"])
return 1
diff --git a/cts/CTSaudits.py b/cts/CTSaudits.py
index b7e0827..cc82171 100755
--- a/cts/CTSaudits.py
+++ b/cts/CTSaudits.py
@@ -233,7 +233,7 @@ class FileAudit(ClusterAudit):
for line in lsout:
self.CM.debug("ps[%s]: %s" % (node, line))
- self.CM.rsh(node, "rm -f /dev/shm/qb-*")
+ self.CM.rsh(node, "rm -rf /dev/shm/qb-*")
else:
self.CM.debug("Skipping %s" % node)
--
1.8.3.1
From 4316507d50d51c7864d8d34aac1da31a232b9f42 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Thu, 2 Jul 2020 16:09:20 -0500
Subject: [PATCH 2/4] Test: CTS: ignore error logged by recent pcs versions
... because it is expected when a node is fenced, and we should already see
pacemaker errors if a node is unexpectedly fenced
---
cts/patterns.py | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/cts/patterns.py b/cts/patterns.py
index 96d6471..7eed90c 100644
--- a/cts/patterns.py
+++ b/cts/patterns.py
@@ -21,6 +21,10 @@ class BasePatterns(object):
# Logging bug in some versions of libvirtd
r"libvirtd.*: internal error: Failed to parse PCI config address",
+
+ # pcs can log this when node is fenced, but fencing is OK in some
+ # tests (and we will catch it in pacemaker logs when not OK)
+ r"pcs.daemon:No response from: .* request: get_configs, error:",
]
self.BadNews = []
self.components = {}
--
1.8.3.1
From 598ae0f65bad6ed16978d1ab6e24e8e358e0a1a4 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Thu, 2 Jul 2020 20:40:00 -0500
Subject: [PATCH 3/4] Low: libcrmcommon: avoid assertion on controller protocol
errors
Previously, after a protocol error, we would set reply to NULL and then try to
call crm_element_value() on it, which would log an assertion.
---
lib/common/ipc_controld.c | 46 ++++++++++++++++++++++------------------------
1 file changed, 22 insertions(+), 24 deletions(-)
diff --git a/lib/common/ipc_controld.c b/lib/common/ipc_controld.c
index 5917cc5..22cb9e0 100644
--- a/lib/common/ipc_controld.c
+++ b/lib/common/ipc_controld.c
@@ -187,53 +187,51 @@ dispatch(pcmk_ipc_api_t *api, xmlNode *reply)
crm_debug("Unrecognizable controller message: invalid message type '%s'",
crm_str(value));
status = CRM_EX_PROTOCOL;
- reply = NULL;
+ goto done;
}
if (crm_element_value(reply, XML_ATTR_REFERENCE) == NULL) {
crm_debug("Unrecognizable controller message: no reference");
status = CRM_EX_PROTOCOL;
- reply = NULL;
+ goto done;
}
value = crm_element_value(reply, F_CRM_TASK);
if (value == NULL) {
crm_debug("Unrecognizable controller message: no command name");
status = CRM_EX_PROTOCOL;
- reply = NULL;
+ goto done;
}
// Parse useful info from reply
- if (reply != NULL) {
- reply_data.feature_set = crm_element_value(reply, XML_ATTR_VERSION);
- reply_data.host_from = crm_element_value(reply, F_CRM_HOST_FROM);
- msg_data = get_message_xml(reply, F_CRM_DATA);
+ reply_data.feature_set = crm_element_value(reply, XML_ATTR_VERSION);
+ reply_data.host_from = crm_element_value(reply, F_CRM_HOST_FROM);
+ msg_data = get_message_xml(reply, F_CRM_DATA);
- if (!strcmp(value, CRM_OP_REPROBE)) {
- reply_data.reply_type = pcmk_controld_reply_reprobe;
+ if (!strcmp(value, CRM_OP_REPROBE)) {
+ reply_data.reply_type = pcmk_controld_reply_reprobe;
- } else if (!strcmp(value, CRM_OP_NODE_INFO)) {
- set_node_info_data(&reply_data, msg_data);
+ } else if (!strcmp(value, CRM_OP_NODE_INFO)) {
+ set_node_info_data(&reply_data, msg_data);
- } else if (!strcmp(value, CRM_OP_INVOKE_LRM)) {
- reply_data.reply_type = pcmk_controld_reply_resource;
- reply_data.data.resource.node_state = msg_data;
+ } else if (!strcmp(value, CRM_OP_INVOKE_LRM)) {
+ reply_data.reply_type = pcmk_controld_reply_resource;
+ reply_data.data.resource.node_state = msg_data;
- } else if (!strcmp(value, CRM_OP_PING)) {
- set_ping_data(&reply_data, msg_data);
+ } else if (!strcmp(value, CRM_OP_PING)) {
+ set_ping_data(&reply_data, msg_data);
- } else if (!strcmp(value, PCMK__CONTROLD_CMD_NODES)) {
- set_nodes_data(&reply_data, msg_data);
+ } else if (!strcmp(value, PCMK__CONTROLD_CMD_NODES)) {
+ set_nodes_data(&reply_data, msg_data);
- } else {
- crm_debug("Unrecognizable controller message: unknown command '%s'",
- value);
- status = CRM_EX_PROTOCOL;
- reply = NULL;
- }
+ } else {
+ crm_debug("Unrecognizable controller message: unknown command '%s'",
+ value);
+ status = CRM_EX_PROTOCOL;
}
+done:
pcmk__call_ipc_callback(api, pcmk_ipc_event_reply, status, &reply_data);
// Free any reply data that was allocated
--
1.8.3.1
From 5ae4101b60f8c0cd96eb2097a65a59aaa1750d73 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Fri, 17 Jul 2020 17:20:23 -0500
Subject: [PATCH 4/4] Log: fencer: don't log assertion if unable to create full
request reply
Previously, we would log an assertion and a warning if asked to create a reply
to a NULL request. However there is a possible sequence for this to happen:
- Some nodes are up and some down at cluster start-up
- One node is elected DC and schedules fencing of the down nodes
- Fencing is initiated for one of the down nodes
- One of the other down nodes comes up and is elected DC
- The fencing result comes back and all peers (including new DC) are notified
- New DC tries to create a notification for its client (the controller)
but doesn't know anything about the initial request
For now, just log a warning and drop the assertion. Longer term, maybe we
should synchronize in-flight request information when a fencer joins the
process group.
---
daemons/fenced/fenced_commands.c | 55 +++++++++++++++++++++++-----------------
1 file changed, 32 insertions(+), 23 deletions(-)
diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
index 05c5437..9c27d61 100644
--- a/daemons/fenced/fenced_commands.c
+++ b/daemons/fenced/fenced_commands.c
@@ -2336,22 +2336,8 @@ stonith_fence(xmlNode * msg)
xmlNode *
stonith_construct_reply(xmlNode * request, const char *output, xmlNode * data, int rc)
{
- int lpc = 0;
xmlNode *reply = NULL;
- const char *name = NULL;
- const char *value = NULL;
-
- const char *names[] = {
- F_STONITH_OPERATION,
- F_STONITH_CALLID,
- F_STONITH_CLIENTID,
- F_STONITH_CLIENTNAME,
- F_STONITH_REMOTE_OP_ID,
- F_STONITH_CALLOPTS
- };
-
- crm_trace("Creating a basic reply");
reply = create_xml_node(NULL, T_STONITH_REPLY);
crm_xml_add(reply, "st_origin", __FUNCTION__);
@@ -2359,16 +2345,39 @@ stonith_construct_reply(xmlNode * request, const char *output, xmlNode * data, i
crm_xml_add(reply, "st_output", output);
crm_xml_add_int(reply, F_STONITH_RC, rc);
- CRM_CHECK(request != NULL, crm_warn("Can't create a sane reply"); return reply);
- for (lpc = 0; lpc < DIMOF(names); lpc++) {
- name = names[lpc];
- value = crm_element_value(request, name);
- crm_xml_add(reply, name, value);
- }
+ if (request == NULL) {
+ /* Most likely, this is the result of a stonith operation that was
+ * initiated before we came up. Unfortunately that means we lack enough
+ * information to provide clients with a full result.
+ *
+ * @TODO Maybe synchronize this information at start-up?
+ */
+ crm_warn("Missing request information for client notifications for "
+ "operation with result %d (initiated before we came up?)", rc);
- if (data != NULL) {
- crm_trace("Attaching reply output");
- add_message_xml(reply, F_STONITH_CALLDATA, data);
+ } else {
+ const char *name = NULL;
+ const char *value = NULL;
+
+ const char *names[] = {
+ F_STONITH_OPERATION,
+ F_STONITH_CALLID,
+ F_STONITH_CLIENTID,
+ F_STONITH_CLIENTNAME,
+ F_STONITH_REMOTE_OP_ID,
+ F_STONITH_CALLOPTS
+ };
+
+ crm_trace("Creating a result reply with%s reply output (rc=%d)",
+ (data? "" : "out"), rc);
+ for (int lpc = 0; lpc < DIMOF(names); lpc++) {
+ name = names[lpc];
+ value = crm_element_value(request, name);
+ crm_xml_add(reply, name, value);
+ }
+ if (data != NULL) {
+ add_message_xml(reply, F_STONITH_CALLDATA, data);
+ }
}
return reply;
}
--
1.8.3.1