271 lines
9.7 KiB
Diff
271 lines
9.7 KiB
Diff
From 4e190ebc5460563bae2586b28afb0415f2eb3d1a Mon Sep 17 00:00:00 2001
|
|
From: Ken Gaillot <kgaillot@redhat.com>
|
|
Date: Wed, 1 Jul 2020 20:38:16 -0500
|
|
Subject: [PATCH 1/4] Test: CTS: libqb shared memory creates directories now
|
|
|
|
... so use "rm -rf" instead of "rm -f"
|
|
---
|
|
cts/CTS.py.in | 2 +-
|
|
cts/CTSaudits.py | 2 +-
|
|
2 files changed, 2 insertions(+), 2 deletions(-)
|
|
|
|
diff --git a/cts/CTS.py.in b/cts/CTS.py.in
|
|
index c418318..091bb1f 100644
|
|
--- a/cts/CTS.py.in
|
|
+++ b/cts/CTS.py.in
|
|
@@ -546,7 +546,7 @@ class ClusterManager(UserDict):
|
|
if self.rsh(node, self.templates["StopCmd"]) == 0:
|
|
# Make sure we can continue even if corosync leaks
|
|
# fdata-* is the old name
|
|
- #self.rsh(node, "rm -f /dev/shm/qb-* /dev/shm/fdata-*")
|
|
+ #self.rsh(node, "rm -rf /dev/shm/qb-* /dev/shm/fdata-*")
|
|
self.ShouldBeStatus[node] = "down"
|
|
self.cluster_stable(self.Env["DeadTime"])
|
|
return 1
|
|
diff --git a/cts/CTSaudits.py b/cts/CTSaudits.py
|
|
index b7e0827..cc82171 100755
|
|
--- a/cts/CTSaudits.py
|
|
+++ b/cts/CTSaudits.py
|
|
@@ -233,7 +233,7 @@ class FileAudit(ClusterAudit):
|
|
for line in lsout:
|
|
self.CM.debug("ps[%s]: %s" % (node, line))
|
|
|
|
- self.CM.rsh(node, "rm -f /dev/shm/qb-*")
|
|
+ self.CM.rsh(node, "rm -rf /dev/shm/qb-*")
|
|
|
|
else:
|
|
self.CM.debug("Skipping %s" % node)
|
|
--
|
|
1.8.3.1
|
|
|
|
|
|
From 4316507d50d51c7864d8d34aac1da31a232b9f42 Mon Sep 17 00:00:00 2001
|
|
From: Ken Gaillot <kgaillot@redhat.com>
|
|
Date: Thu, 2 Jul 2020 16:09:20 -0500
|
|
Subject: [PATCH 2/4] Test: CTS: ignore error logged by recent pcs versions
|
|
|
|
... because it is expected when a node is fenced, and we should already see
|
|
pacemaker errors if a node is unexpectedly fenced
|
|
---
|
|
cts/patterns.py | 4 ++++
|
|
1 file changed, 4 insertions(+)
|
|
|
|
diff --git a/cts/patterns.py b/cts/patterns.py
|
|
index 96d6471..7eed90c 100644
|
|
--- a/cts/patterns.py
|
|
+++ b/cts/patterns.py
|
|
@@ -21,6 +21,10 @@ class BasePatterns(object):
|
|
|
|
# Logging bug in some versions of libvirtd
|
|
r"libvirtd.*: internal error: Failed to parse PCI config address",
|
|
+
|
|
+ # pcs can log this when node is fenced, but fencing is OK in some
|
|
+ # tests (and we will catch it in pacemaker logs when not OK)
|
|
+ r"pcs.daemon:No response from: .* request: get_configs, error:",
|
|
]
|
|
self.BadNews = []
|
|
self.components = {}
|
|
--
|
|
1.8.3.1
|
|
|
|
|
|
From 598ae0f65bad6ed16978d1ab6e24e8e358e0a1a4 Mon Sep 17 00:00:00 2001
|
|
From: Ken Gaillot <kgaillot@redhat.com>
|
|
Date: Thu, 2 Jul 2020 20:40:00 -0500
|
|
Subject: [PATCH 3/4] Low: libcrmcommon: avoid assertion on controller protocol
|
|
errors
|
|
|
|
Previously, after a protocol error, we would set reply to NULL and then try to
|
|
call crm_element_value() on it, which would log an assertion.
|
|
---
|
|
lib/common/ipc_controld.c | 46 ++++++++++++++++++++++------------------------
|
|
1 file changed, 22 insertions(+), 24 deletions(-)
|
|
|
|
diff --git a/lib/common/ipc_controld.c b/lib/common/ipc_controld.c
|
|
index 5917cc5..22cb9e0 100644
|
|
--- a/lib/common/ipc_controld.c
|
|
+++ b/lib/common/ipc_controld.c
|
|
@@ -187,53 +187,51 @@ dispatch(pcmk_ipc_api_t *api, xmlNode *reply)
|
|
crm_debug("Unrecognizable controller message: invalid message type '%s'",
|
|
crm_str(value));
|
|
status = CRM_EX_PROTOCOL;
|
|
- reply = NULL;
|
|
+ goto done;
|
|
}
|
|
|
|
if (crm_element_value(reply, XML_ATTR_REFERENCE) == NULL) {
|
|
crm_debug("Unrecognizable controller message: no reference");
|
|
status = CRM_EX_PROTOCOL;
|
|
- reply = NULL;
|
|
+ goto done;
|
|
}
|
|
|
|
value = crm_element_value(reply, F_CRM_TASK);
|
|
if (value == NULL) {
|
|
crm_debug("Unrecognizable controller message: no command name");
|
|
status = CRM_EX_PROTOCOL;
|
|
- reply = NULL;
|
|
+ goto done;
|
|
}
|
|
|
|
// Parse useful info from reply
|
|
|
|
- if (reply != NULL) {
|
|
- reply_data.feature_set = crm_element_value(reply, XML_ATTR_VERSION);
|
|
- reply_data.host_from = crm_element_value(reply, F_CRM_HOST_FROM);
|
|
- msg_data = get_message_xml(reply, F_CRM_DATA);
|
|
+ reply_data.feature_set = crm_element_value(reply, XML_ATTR_VERSION);
|
|
+ reply_data.host_from = crm_element_value(reply, F_CRM_HOST_FROM);
|
|
+ msg_data = get_message_xml(reply, F_CRM_DATA);
|
|
|
|
- if (!strcmp(value, CRM_OP_REPROBE)) {
|
|
- reply_data.reply_type = pcmk_controld_reply_reprobe;
|
|
+ if (!strcmp(value, CRM_OP_REPROBE)) {
|
|
+ reply_data.reply_type = pcmk_controld_reply_reprobe;
|
|
|
|
- } else if (!strcmp(value, CRM_OP_NODE_INFO)) {
|
|
- set_node_info_data(&reply_data, msg_data);
|
|
+ } else if (!strcmp(value, CRM_OP_NODE_INFO)) {
|
|
+ set_node_info_data(&reply_data, msg_data);
|
|
|
|
- } else if (!strcmp(value, CRM_OP_INVOKE_LRM)) {
|
|
- reply_data.reply_type = pcmk_controld_reply_resource;
|
|
- reply_data.data.resource.node_state = msg_data;
|
|
+ } else if (!strcmp(value, CRM_OP_INVOKE_LRM)) {
|
|
+ reply_data.reply_type = pcmk_controld_reply_resource;
|
|
+ reply_data.data.resource.node_state = msg_data;
|
|
|
|
- } else if (!strcmp(value, CRM_OP_PING)) {
|
|
- set_ping_data(&reply_data, msg_data);
|
|
+ } else if (!strcmp(value, CRM_OP_PING)) {
|
|
+ set_ping_data(&reply_data, msg_data);
|
|
|
|
- } else if (!strcmp(value, PCMK__CONTROLD_CMD_NODES)) {
|
|
- set_nodes_data(&reply_data, msg_data);
|
|
+ } else if (!strcmp(value, PCMK__CONTROLD_CMD_NODES)) {
|
|
+ set_nodes_data(&reply_data, msg_data);
|
|
|
|
- } else {
|
|
- crm_debug("Unrecognizable controller message: unknown command '%s'",
|
|
- value);
|
|
- status = CRM_EX_PROTOCOL;
|
|
- reply = NULL;
|
|
- }
|
|
+ } else {
|
|
+ crm_debug("Unrecognizable controller message: unknown command '%s'",
|
|
+ value);
|
|
+ status = CRM_EX_PROTOCOL;
|
|
}
|
|
|
|
+done:
|
|
pcmk__call_ipc_callback(api, pcmk_ipc_event_reply, status, &reply_data);
|
|
|
|
// Free any reply data that was allocated
|
|
--
|
|
1.8.3.1
|
|
|
|
|
|
From 5ae4101b60f8c0cd96eb2097a65a59aaa1750d73 Mon Sep 17 00:00:00 2001
|
|
From: Ken Gaillot <kgaillot@redhat.com>
|
|
Date: Fri, 17 Jul 2020 17:20:23 -0500
|
|
Subject: [PATCH 4/4] Log: fencer: don't log assertion if unable to create full
|
|
request reply
|
|
|
|
Previously, we would log an assertion and a warning if asked to create a reply
|
|
to a NULL request. However there is a possible sequence for this to happen:
|
|
|
|
- Some nodes are up and some down at cluster start-up
|
|
- One node is elected DC and schedules fencing of the down nodes
|
|
- Fencing is initiated for one of the down nodes
|
|
- One of the other down nodes comes up and is elected DC
|
|
- The fencing result comes back and all peers (including new DC) are notified
|
|
- New DC tries to create a notification for its client (the controller)
|
|
but doesn't know anything about the initial request
|
|
|
|
For now, just log a warning and drop the assertion. Longer term, maybe we
|
|
should synchronize in-flight request information when a fencer joins the
|
|
process group.
|
|
---
|
|
daemons/fenced/fenced_commands.c | 55 +++++++++++++++++++++++-----------------
|
|
1 file changed, 32 insertions(+), 23 deletions(-)
|
|
|
|
diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
|
|
index 05c5437..9c27d61 100644
|
|
--- a/daemons/fenced/fenced_commands.c
|
|
+++ b/daemons/fenced/fenced_commands.c
|
|
@@ -2336,22 +2336,8 @@ stonith_fence(xmlNode * msg)
|
|
xmlNode *
|
|
stonith_construct_reply(xmlNode * request, const char *output, xmlNode * data, int rc)
|
|
{
|
|
- int lpc = 0;
|
|
xmlNode *reply = NULL;
|
|
|
|
- const char *name = NULL;
|
|
- const char *value = NULL;
|
|
-
|
|
- const char *names[] = {
|
|
- F_STONITH_OPERATION,
|
|
- F_STONITH_CALLID,
|
|
- F_STONITH_CLIENTID,
|
|
- F_STONITH_CLIENTNAME,
|
|
- F_STONITH_REMOTE_OP_ID,
|
|
- F_STONITH_CALLOPTS
|
|
- };
|
|
-
|
|
- crm_trace("Creating a basic reply");
|
|
reply = create_xml_node(NULL, T_STONITH_REPLY);
|
|
|
|
crm_xml_add(reply, "st_origin", __FUNCTION__);
|
|
@@ -2359,16 +2345,39 @@ stonith_construct_reply(xmlNode * request, const char *output, xmlNode * data, i
|
|
crm_xml_add(reply, "st_output", output);
|
|
crm_xml_add_int(reply, F_STONITH_RC, rc);
|
|
|
|
- CRM_CHECK(request != NULL, crm_warn("Can't create a sane reply"); return reply);
|
|
- for (lpc = 0; lpc < DIMOF(names); lpc++) {
|
|
- name = names[lpc];
|
|
- value = crm_element_value(request, name);
|
|
- crm_xml_add(reply, name, value);
|
|
- }
|
|
+ if (request == NULL) {
|
|
+ /* Most likely, this is the result of a stonith operation that was
|
|
+ * initiated before we came up. Unfortunately that means we lack enough
|
|
+ * information to provide clients with a full result.
|
|
+ *
|
|
+ * @TODO Maybe synchronize this information at start-up?
|
|
+ */
|
|
+ crm_warn("Missing request information for client notifications for "
|
|
+ "operation with result %d (initiated before we came up?)", rc);
|
|
|
|
- if (data != NULL) {
|
|
- crm_trace("Attaching reply output");
|
|
- add_message_xml(reply, F_STONITH_CALLDATA, data);
|
|
+ } else {
|
|
+ const char *name = NULL;
|
|
+ const char *value = NULL;
|
|
+
|
|
+ const char *names[] = {
|
|
+ F_STONITH_OPERATION,
|
|
+ F_STONITH_CALLID,
|
|
+ F_STONITH_CLIENTID,
|
|
+ F_STONITH_CLIENTNAME,
|
|
+ F_STONITH_REMOTE_OP_ID,
|
|
+ F_STONITH_CALLOPTS
|
|
+ };
|
|
+
|
|
+ crm_trace("Creating a result reply with%s reply output (rc=%d)",
|
|
+ (data? "" : "out"), rc);
|
|
+ for (int lpc = 0; lpc < DIMOF(names); lpc++) {
|
|
+ name = names[lpc];
|
|
+ value = crm_element_value(request, name);
|
|
+ crm_xml_add(reply, name, value);
|
|
+ }
|
|
+ if (data != NULL) {
|
|
+ add_message_xml(reply, F_STONITH_CALLDATA, data);
|
|
+ }
|
|
}
|
|
return reply;
|
|
}
|
|
--
|
|
1.8.3.1
|
|
|