diff --git a/.gitignore b/.gitignore index e31915e..2a1c896 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,2 @@ SOURCES/nagios-agents-metadata-105ab8a7b2c16b9a29cf1c1596b80136eeef332b.tar.gz -SOURCES/pacemaker-a3f44794f.tar.gz +SOURCES/pacemaker-6fdc9deea.tar.gz diff --git a/.pacemaker.metadata b/.pacemaker.metadata index f5f737d..d1dbf02 100644 --- a/.pacemaker.metadata +++ b/.pacemaker.metadata @@ -1,2 +1,2 @@ 2cbec94ad67dfbeba75e38d2c3c5c44961b3cd16 SOURCES/nagios-agents-metadata-105ab8a7b2c16b9a29cf1c1596b80136eeef332b.tar.gz -b16198db5f86857ba8bc0ebd04fd386da360478a SOURCES/pacemaker-a3f44794f.tar.gz +fbf71fb3fb42c76f9f1e98497505eb8521cab55e SOURCES/pacemaker-6fdc9deea.tar.gz diff --git a/SOURCES/001-remote-start-state.patch b/SOURCES/001-remote-start-state.patch new file mode 100644 index 0000000..e66bf16 --- /dev/null +++ b/SOURCES/001-remote-start-state.patch @@ -0,0 +1,402 @@ +From cf53f523e691295879cd75cff1a86bc15664fa51 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Tue, 2 May 2023 09:59:13 -0400 +Subject: [PATCH 1/7] Feature: daemons: Add start state to LRMD handshake XML + +This gets read out of /etc/sysconfig/pacemaker and set into the +environment. The remote node executor will then add that to the XML +that it sends to the controller upon startup. + +Ref T183 +--- + daemons/execd/execd_commands.c | 5 +++++ + include/crm_internal.h | 1 + + 2 files changed, 6 insertions(+) + +diff --git a/daemons/execd/execd_commands.c b/daemons/execd/execd_commands.c +index fa2761e..9a783a5 100644 +--- a/daemons/execd/execd_commands.c ++++ b/daemons/execd/execd_commands.c +@@ -1474,6 +1474,7 @@ process_lrmd_signon(pcmk__client_t *client, xmlNode *request, int call_id, + int rc = pcmk_ok; + time_t now = time(NULL); + const char *protocol_version = crm_element_value(request, F_LRMD_PROTOCOL_VERSION); ++ const char *start_state = pcmk__env_option(PCMK__ENV_NODE_START_STATE); + + if (compare_version(protocol_version, LRMD_MIN_PROTOCOL_VERSION) < 0) { + crm_err("Cluster API version must be greater than or equal to %s, not %s", +@@ -1503,6 +1504,10 @@ process_lrmd_signon(pcmk__client_t *client, xmlNode *request, int call_id, + crm_xml_add(*reply, F_LRMD_PROTOCOL_VERSION, LRMD_PROTOCOL_VERSION); + crm_xml_add_ll(*reply, PCMK__XA_UPTIME, now - start_time); + ++ if (start_state) { ++ crm_xml_add(*reply, PCMK__XA_NODE_START_STATE, start_state); ++ } ++ + return rc; + } + +diff --git a/include/crm_internal.h b/include/crm_internal.h +index 5f6531f..771bd26 100644 +--- a/include/crm_internal.h ++++ b/include/crm_internal.h +@@ -84,6 +84,7 @@ + #define PCMK__XA_GRAPH_ERRORS "graph-errors" + #define PCMK__XA_GRAPH_WARNINGS "graph-warnings" + #define PCMK__XA_MODE "mode" ++#define PCMK__XA_NODE_START_STATE "node_start_state" + #define PCMK__XA_TASK "task" + #define PCMK__XA_UPTIME "uptime" + #define PCMK__XA_CONN_HOST "connection_host" +-- +2.31.1 + +From c950291742711b5c4c8986adc8e938fe6fef861c Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Tue, 2 May 2023 10:04:32 -0400 +Subject: [PATCH 2/7] Feature: liblrmd: Save a remote node's requested start + state + +Ref T183 +--- + include/crm/common/ipc_internal.h | 1 + + lib/lrmd/lrmd_client.c | 7 +++++++ + 2 files changed, 8 insertions(+) + +diff --git a/include/crm/common/ipc_internal.h b/include/crm/common/ipc_internal.h +index 5099dda..d203924 100644 +--- a/include/crm/common/ipc_internal.h ++++ b/include/crm/common/ipc_internal.h +@@ -112,6 +112,7 @@ struct pcmk__remote_s { + int tcp_socket; + mainloop_io_t *source; + time_t uptime; ++ char *start_state; + + /* CIB-only */ + char *token; +diff --git a/lib/lrmd/lrmd_client.c b/lib/lrmd/lrmd_client.c +index c565728..4239105 100644 +--- a/lib/lrmd/lrmd_client.c ++++ b/lib/lrmd/lrmd_client.c +@@ -588,7 +588,9 @@ lrmd_tls_connection_destroy(gpointer userdata) + } + + free(native->remote->buffer); ++ free(native->remote->start_state); + native->remote->buffer = NULL; ++ native->remote->start_state = NULL; + native->source = 0; + native->sock = 0; + native->psk_cred_c = NULL; +@@ -980,6 +982,7 @@ lrmd_handshake(lrmd_t * lrmd, const char *name) + const char *version = crm_element_value(reply, F_LRMD_PROTOCOL_VERSION); + const char *msg_type = crm_element_value(reply, F_LRMD_OPERATION); + const char *tmp_ticket = crm_element_value(reply, F_LRMD_CLIENTID); ++ const char *start_state = crm_element_value(reply, PCMK__XA_NODE_START_STATE); + long long uptime = -1; + + crm_element_value_int(reply, F_LRMD_RC, &rc); +@@ -992,6 +995,10 @@ lrmd_handshake(lrmd_t * lrmd, const char *name) + crm_element_value_ll(reply, PCMK__XA_UPTIME, &uptime); + native->remote->uptime = uptime; + ++ if (start_state) { ++ native->remote->start_state = strdup(start_state); ++ } ++ + if (rc == -EPROTO) { + crm_err("Executor protocol version mismatch between client (%s) and server (%s)", + LRMD_PROTOCOL_VERSION, version); +-- +2.31.1 + +From 7302014c7b7296be31b1f542b3f107d55b1fb2a0 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Tue, 2 May 2023 10:05:13 -0400 +Subject: [PATCH 3/7] Feature: liblrmd: Add lrmd__node_start_state. + +This function is used to get the start state out of an lrmd_private_t +structure. + +Ref T183 +--- + include/crm/lrmd_internal.h | 1 + + lib/lrmd/lrmd_client.c | 12 ++++++++++++ + 2 files changed, 13 insertions(+) + +diff --git a/include/crm/lrmd_internal.h b/include/crm/lrmd_internal.h +index 5810554..d1cd25d 100644 +--- a/include/crm/lrmd_internal.h ++++ b/include/crm/lrmd_internal.h +@@ -47,6 +47,7 @@ void lrmd__set_result(lrmd_event_data_t *event, enum ocf_exitcode rc, + void lrmd__reset_result(lrmd_event_data_t *event); + + time_t lrmd__uptime(lrmd_t *lrmd); ++const char *lrmd__node_start_state(lrmd_t *lrmd); + + /* Shared functions for IPC proxy back end */ + +diff --git a/lib/lrmd/lrmd_client.c b/lib/lrmd/lrmd_client.c +index 4239105..82434b9 100644 +--- a/lib/lrmd/lrmd_client.c ++++ b/lib/lrmd/lrmd_client.c +@@ -2538,3 +2538,15 @@ lrmd__uptime(lrmd_t *lrmd) + return native->remote->uptime; + } + } ++ ++const char * ++lrmd__node_start_state(lrmd_t *lrmd) ++{ ++ lrmd_private_t *native = lrmd->lrmd_private; ++ ++ if (native->remote == NULL) { ++ return NULL; ++ } else { ++ return native->remote->start_state; ++ } ++} +-- +2.31.1 + +From e5e4d43f847da0930bae12f63c7e9d9c44c07cdf Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Tue, 2 May 2023 10:07:58 -0400 +Subject: [PATCH 4/7] Refactor: controller: Make set_join_state a public + function. + +This already does all the work of setting a node's start state. It just +needs to be made public and given arguments for what node to set instead +of reading globals. + +Ref T183 +--- + daemons/controld/controld_join_client.c | 20 ++++++++++---------- + daemons/controld/pacemaker-controld.h | 3 +++ + 2 files changed, 13 insertions(+), 10 deletions(-) + +diff --git a/daemons/controld/controld_join_client.c b/daemons/controld/controld_join_client.c +index da6a9d6..07e2a27 100644 +--- a/daemons/controld/controld_join_client.c ++++ b/daemons/controld/controld_join_client.c +@@ -195,32 +195,31 @@ join_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void * + free_xml(generation); + } + +-static void +-set_join_state(const char * start_state) ++void ++set_join_state(const char *start_state, const char *node_name, const char *node_uuid) + { + if (pcmk__str_eq(start_state, "standby", pcmk__str_casei)) { + crm_notice("Forcing node %s to join in %s state per configured " +- "environment", controld_globals.our_nodename, start_state); ++ "environment", node_name, start_state); + cib__update_node_attr(controld_globals.logger_out, + controld_globals.cib_conn, cib_sync_call, +- XML_CIB_TAG_NODES, controld_globals.our_uuid, ++ XML_CIB_TAG_NODES, node_uuid, + NULL, NULL, NULL, "standby", "on", NULL, NULL); + + } else if (pcmk__str_eq(start_state, "online", pcmk__str_casei)) { + crm_notice("Forcing node %s to join in %s state per configured " +- "environment", controld_globals.our_nodename, start_state); ++ "environment", node_name, start_state); + cib__update_node_attr(controld_globals.logger_out, + controld_globals.cib_conn, cib_sync_call, +- XML_CIB_TAG_NODES, controld_globals.our_uuid, ++ XML_CIB_TAG_NODES, node_uuid, + NULL, NULL, NULL, "standby", "off", NULL, NULL); + + } else if (pcmk__str_eq(start_state, "default", pcmk__str_casei)) { +- crm_debug("Not forcing a starting state on node %s", +- controld_globals.our_nodename); ++ crm_debug("Not forcing a starting state on node %s", node_name); + + } else { + crm_warn("Unrecognized start state '%s', using 'default' (%s)", +- start_state, controld_globals.our_nodename); ++ start_state, node_name); + } + } + +@@ -335,7 +334,8 @@ do_cl_join_finalize_respond(long long action, + + first_join = FALSE; + if (start_state) { +- set_join_state(start_state); ++ set_join_state(start_state, controld_globals.our_nodename, ++ controld_globals.our_uuid); + } + } + +diff --git a/daemons/controld/pacemaker-controld.h b/daemons/controld/pacemaker-controld.h +index 1484a00..d8c2ddd 100644 +--- a/daemons/controld/pacemaker-controld.h ++++ b/daemons/controld/pacemaker-controld.h +@@ -36,4 +36,7 @@ void controld_remove_voter(const char *uname); + void controld_election_fini(void); + void controld_stop_current_election_timeout(void); + ++void set_join_state(const char *start_state, const char *node_name, ++ const char *node_uuid); ++ + #endif +-- +2.31.1 + +From 63d069adb344bba2c982013226f87dfd95afaff3 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Tue, 2 May 2023 13:38:03 -0400 +Subject: [PATCH 5/7] Refactor: controller: set_join_state needs to take a + remote parameter. + +Without this parameter, we won't know what to pass to as node_type to +cib__update_node_attr. And without that, that function will not know to +update a remote node - it'll try to update a regular node by the same +name, which either doesn't exist or is not what we were hoping would +happen. + +Ref T138 +--- + daemons/controld/controld_join_client.c | 11 +++++++---- + daemons/controld/pacemaker-controld.h | 2 +- + 2 files changed, 8 insertions(+), 5 deletions(-) + +diff --git a/daemons/controld/controld_join_client.c b/daemons/controld/controld_join_client.c +index 07e2a27..799d1b4 100644 +--- a/daemons/controld/controld_join_client.c ++++ b/daemons/controld/controld_join_client.c +@@ -196,7 +196,8 @@ join_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void * + } + + void +-set_join_state(const char *start_state, const char *node_name, const char *node_uuid) ++set_join_state(const char *start_state, const char *node_name, const char *node_uuid, ++ bool remote) + { + if (pcmk__str_eq(start_state, "standby", pcmk__str_casei)) { + crm_notice("Forcing node %s to join in %s state per configured " +@@ -204,7 +205,8 @@ set_join_state(const char *start_state, const char *node_name, const char *node_ + cib__update_node_attr(controld_globals.logger_out, + controld_globals.cib_conn, cib_sync_call, + XML_CIB_TAG_NODES, node_uuid, +- NULL, NULL, NULL, "standby", "on", NULL, NULL); ++ NULL, NULL, NULL, "standby", "on", NULL, ++ remote ? "remote" : NULL); + + } else if (pcmk__str_eq(start_state, "online", pcmk__str_casei)) { + crm_notice("Forcing node %s to join in %s state per configured " +@@ -212,7 +214,8 @@ set_join_state(const char *start_state, const char *node_name, const char *node_ + cib__update_node_attr(controld_globals.logger_out, + controld_globals.cib_conn, cib_sync_call, + XML_CIB_TAG_NODES, node_uuid, +- NULL, NULL, NULL, "standby", "off", NULL, NULL); ++ NULL, NULL, NULL, "standby", "off", NULL, ++ remote ? "remote" : NULL); + + } else if (pcmk__str_eq(start_state, "default", pcmk__str_casei)) { + crm_debug("Not forcing a starting state on node %s", node_name); +@@ -335,7 +338,7 @@ do_cl_join_finalize_respond(long long action, + first_join = FALSE; + if (start_state) { + set_join_state(start_state, controld_globals.our_nodename, +- controld_globals.our_uuid); ++ controld_globals.our_uuid, false); + } + } + +diff --git a/daemons/controld/pacemaker-controld.h b/daemons/controld/pacemaker-controld.h +index d8c2ddd..2334cce 100644 +--- a/daemons/controld/pacemaker-controld.h ++++ b/daemons/controld/pacemaker-controld.h +@@ -37,6 +37,6 @@ void controld_election_fini(void); + void controld_stop_current_election_timeout(void); + + void set_join_state(const char *start_state, const char *node_name, +- const char *node_uuid); ++ const char *node_uuid, bool remote); + + #endif +-- +2.31.1 + +From 67274787898355065315f8c06d62458e2c2b0afe Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Tue, 2 May 2023 10:09:02 -0400 +Subject: [PATCH 6/7] Feature: controller: When a remote node starts, apply any + start state. + +If we were given a start state in the handshake XML, that is now stored +in the remote node cache's private data. Extract it and set the state +on the node with set_node_state. + +Fixes T183 +--- + daemons/controld/controld_remote_ra.c | 15 +++++++++++++++ + 1 file changed, 15 insertions(+) + +diff --git a/daemons/controld/controld_remote_ra.c b/daemons/controld/controld_remote_ra.c +index f24b755..8ab1e46 100644 +--- a/daemons/controld/controld_remote_ra.c ++++ b/daemons/controld/controld_remote_ra.c +@@ -280,6 +280,7 @@ remote_node_up(const char *node_name) + int call_opt; + xmlNode *update, *state; + crm_node_t *node; ++ lrm_state_t *connection_rsc = NULL; + + CRM_CHECK(node_name != NULL, return); + crm_info("Announcing Pacemaker Remote node %s", node_name); +@@ -301,6 +302,20 @@ remote_node_up(const char *node_name) + purge_remote_node_attrs(call_opt, node); + pcmk__update_peer_state(__func__, node, CRM_NODE_MEMBER, 0); + ++ /* Apply any start state that we were given from the environment on the ++ * remote node. ++ */ ++ connection_rsc = lrm_state_find(node->uname); ++ ++ if (connection_rsc != NULL) { ++ lrmd_t *lrm = connection_rsc->conn; ++ const char *start_state = lrmd__node_start_state(lrm); ++ ++ if (start_state) { ++ set_join_state(start_state, node->uname, node->uuid, true); ++ } ++ } ++ + /* pacemaker_remote nodes don't participate in the membership layer, + * so cluster nodes don't automatically get notified when they come and go. + * We send a cluster message to the DC, and update the CIB node state entry, +-- +2.31.1 + +From 91cdda7056c9b9254a0d7e7a016b30f788e3e3ff Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Tue, 2 May 2023 10:16:30 -0400 +Subject: [PATCH 7/7] Doc: sysconfig: Remote nodes now respect start state. + +Ref T183 +--- + etc/sysconfig/pacemaker.in | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/etc/sysconfig/pacemaker.in b/etc/sysconfig/pacemaker.in +index 3b03ad6..041da71 100644 +--- a/etc/sysconfig/pacemaker.in ++++ b/etc/sysconfig/pacemaker.in +@@ -144,8 +144,7 @@ + # By default, the local host will join the cluster in an online or standby + # state when Pacemaker first starts depending on whether it was previously put + # into standby mode. If this variable is set to "standby" or "online", it will +-# force the local host to join in the specified state. This has no effect on +-# Pacemaker Remote nodes. ++# force the local host to join in the specified state. + # + # Default: PCMK_node_start_state="default" + +-- +2.31.1 + diff --git a/SOURCES/001-sync-points.patch b/SOURCES/001-sync-points.patch deleted file mode 100644 index c034c78..0000000 --- a/SOURCES/001-sync-points.patch +++ /dev/null @@ -1,2429 +0,0 @@ -From de05f6b52c667155d262ceeb541dc1041d079d71 Mon Sep 17 00:00:00 2001 -From: Chris Lumens -Date: Thu, 8 Sep 2022 11:36:58 -0400 -Subject: [PATCH 01/26] Refactor: tools: Use a uint32_t for attr_options. - ---- - tools/attrd_updater.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/tools/attrd_updater.c b/tools/attrd_updater.c -index d90567a..b85a281 100644 ---- a/tools/attrd_updater.c -+++ b/tools/attrd_updater.c -@@ -47,7 +47,7 @@ struct { - gchar *attr_node; - gchar *attr_set; - char *attr_value; -- int attr_options; -+ uint32_t attr_options; - gboolean query_all; - gboolean quiet; - } options = { --- -2.31.1 - -From c6637520b474d44553ade52c0dbe9e36e873135f Mon Sep 17 00:00:00 2001 -From: Chris Lumens -Date: Fri, 21 Oct 2022 14:31:16 -0400 -Subject: [PATCH 02/26] Refactor: libcrmcommon: Make pcmk__xe_match more - broadly useful. - -If attr_v is NULL, simply return the first node with a matching name. ---- - lib/common/xml.c | 10 ++++++---- - 1 file changed, 6 insertions(+), 4 deletions(-) - -diff --git a/lib/common/xml.c b/lib/common/xml.c -index 036dd87..ac6f46a 100644 ---- a/lib/common/xml.c -+++ b/lib/common/xml.c -@@ -510,7 +510,7 @@ find_xml_node(const xmlNode *root, const char *search_path, gboolean must_find) - * \param[in] parent XML element to search - * \param[in] node_name If not NULL, only match children of this type - * \param[in] attr_n If not NULL, only match children with an attribute -- * of this name and a value of \p attr_v -+ * of this name. - * \param[in] attr_v If \p attr_n and this are not NULL, only match children - * with an attribute named \p attr_n and this value - * -@@ -520,14 +520,16 @@ xmlNode * - pcmk__xe_match(const xmlNode *parent, const char *node_name, - const char *attr_n, const char *attr_v) - { -- /* ensure attr_v specified when attr_n is */ -- CRM_CHECK(attr_n == NULL || attr_v != NULL, return NULL); -+ CRM_CHECK(parent != NULL, return NULL); -+ CRM_CHECK(attr_v == NULL || attr_n != NULL, return NULL); - - for (xmlNode *child = pcmk__xml_first_child(parent); child != NULL; - child = pcmk__xml_next(child)) { - if (pcmk__str_eq(node_name, (const char *) (child->name), - pcmk__str_null_matches) -- && ((attr_n == NULL) || attr_matches(child, attr_n, attr_v))) { -+ && ((attr_n == NULL) || -+ (attr_v == NULL && xmlHasProp(child, (pcmkXmlStr) attr_n)) || -+ (attr_v != NULL && attr_matches(child, attr_n, attr_v)))) { - return child; - } - } --- -2.31.1 - -From dd520579484c6ec091f7fbb550347941302dad0e Mon Sep 17 00:00:00 2001 -From: Chris Lumens -Date: Fri, 21 Oct 2022 14:32:46 -0400 -Subject: [PATCH 03/26] Tests: libcrmcommon: Add tests for pcmk__xe_match. - ---- - lib/common/tests/xml/Makefile.am | 3 +- - lib/common/tests/xml/pcmk__xe_match_test.c | 105 +++++++++++++++++++++ - 2 files changed, 107 insertions(+), 1 deletion(-) - create mode 100644 lib/common/tests/xml/pcmk__xe_match_test.c - -diff --git a/lib/common/tests/xml/Makefile.am b/lib/common/tests/xml/Makefile.am -index 342ca07..0ccdcc3 100644 ---- a/lib/common/tests/xml/Makefile.am -+++ b/lib/common/tests/xml/Makefile.am -@@ -11,6 +11,7 @@ include $(top_srcdir)/mk/tap.mk - include $(top_srcdir)/mk/unittest.mk - - # Add "_test" to the end of all test program names to simplify .gitignore. --check_PROGRAMS = pcmk__xe_foreach_child_test -+check_PROGRAMS = pcmk__xe_foreach_child_test \ -+ pcmk__xe_match_test - - TESTS = $(check_PROGRAMS) -diff --git a/lib/common/tests/xml/pcmk__xe_match_test.c b/lib/common/tests/xml/pcmk__xe_match_test.c -new file mode 100644 -index 0000000..fd529ba ---- /dev/null -+++ b/lib/common/tests/xml/pcmk__xe_match_test.c -@@ -0,0 +1,105 @@ -+/* -+ * Copyright 2022 the Pacemaker project contributors -+ * -+ * The version control history for this file may have further details. -+ * -+ * This source code is licensed under the GNU Lesser General Public License -+ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. -+ */ -+ -+#include -+ -+#include -+#include -+ -+const char *str1 = -+ "\n" -+ " \n" -+ " \n" -+ " content\n" -+ " \n" -+ " \n" -+ " \n" -+ " content\n" -+ " \n" -+ " \n" -+ " \n" -+ " content\n" -+ " \n" -+ " \n" -+ " \n" -+ " content\n" -+ " \n" -+ " \n" -+ " \n" -+ " content\n" -+ " \n" -+ ""; -+ -+static void -+bad_input(void **state) { -+ xmlNode *xml = string2xml(str1); -+ -+ assert_null(pcmk__xe_match(NULL, NULL, NULL, NULL)); -+ assert_null(pcmk__xe_match(NULL, NULL, NULL, "attrX")); -+ -+ free_xml(xml); -+} -+ -+static void -+not_found(void **state) { -+ xmlNode *xml = string2xml(str1); -+ -+ /* No node with an attrX attribute */ -+ assert_null(pcmk__xe_match(xml, NULL, "attrX", NULL)); -+ /* No nodeX node */ -+ assert_null(pcmk__xe_match(xml, "nodeX", NULL, NULL)); -+ /* No nodeA node with attrX */ -+ assert_null(pcmk__xe_match(xml, "nodeA", "attrX", NULL)); -+ /* No nodeA node with attrA=XYZ */ -+ assert_null(pcmk__xe_match(xml, "nodeA", "attrA", "XYZ")); -+ -+ free_xml(xml); -+} -+ -+static void -+find_attrB(void **state) { -+ xmlNode *xml = string2xml(str1); -+ xmlNode *result = NULL; -+ -+ /* Find the first node with attrB */ -+ result = pcmk__xe_match(xml, NULL, "attrB", NULL); -+ assert_non_null(result); -+ assert_string_equal(crm_element_value(result, "id"), "3"); -+ -+ /* Find the first nodeB with attrB */ -+ result = pcmk__xe_match(xml, "nodeB", "attrB", NULL); -+ assert_non_null(result); -+ assert_string_equal(crm_element_value(result, "id"), "5"); -+ -+ free_xml(xml); -+} -+ -+static void -+find_attrA_matching(void **state) { -+ xmlNode *xml = string2xml(str1); -+ xmlNode *result = NULL; -+ -+ /* Find attrA=456 */ -+ result = pcmk__xe_match(xml, NULL, "attrA", "456"); -+ assert_non_null(result); -+ assert_string_equal(crm_element_value(result, "id"), "2"); -+ -+ /* Find a nodeB with attrA=123 */ -+ result = pcmk__xe_match(xml, "nodeB", "attrA", "123"); -+ assert_non_null(result); -+ assert_string_equal(crm_element_value(result, "id"), "4"); -+ -+ free_xml(xml); -+} -+ -+PCMK__UNIT_TEST(NULL, NULL, -+ cmocka_unit_test(bad_input), -+ cmocka_unit_test(not_found), -+ cmocka_unit_test(find_attrB), -+ cmocka_unit_test(find_attrA_matching)); --- -2.31.1 - -From 03af8498d8aaf21c509cec9b0ec4b78475da41d7 Mon Sep 17 00:00:00 2001 -From: Chris Lumens -Date: Thu, 8 Sep 2022 12:22:26 -0400 -Subject: [PATCH 04/26] Feature: libcrmcommon: Add attrd options for specifying - a sync point. - ---- - include/crm/common/attrd_internal.h | 16 +++++++++------- - 1 file changed, 9 insertions(+), 7 deletions(-) - -diff --git a/include/crm/common/attrd_internal.h b/include/crm/common/attrd_internal.h -index f7033ad..389be48 100644 ---- a/include/crm/common/attrd_internal.h -+++ b/include/crm/common/attrd_internal.h -@@ -16,13 +16,15 @@ extern "C" { - - // Options for clients to use with functions below - enum pcmk__node_attr_opts { -- pcmk__node_attr_none = 0, -- pcmk__node_attr_remote = (1 << 0), -- pcmk__node_attr_private = (1 << 1), -- pcmk__node_attr_pattern = (1 << 2), -- pcmk__node_attr_value = (1 << 3), -- pcmk__node_attr_delay = (1 << 4), -- pcmk__node_attr_perm = (1 << 5), -+ pcmk__node_attr_none = 0, -+ pcmk__node_attr_remote = (1 << 0), -+ pcmk__node_attr_private = (1 << 1), -+ pcmk__node_attr_pattern = (1 << 2), -+ pcmk__node_attr_value = (1 << 3), -+ pcmk__node_attr_delay = (1 << 4), -+ pcmk__node_attr_perm = (1 << 5), -+ pcmk__node_attr_sync_local = (1 << 6), -+ pcmk__node_attr_sync_cluster = (1 << 7), - }; - - #define pcmk__set_node_attr_flags(node_attr_flags, flags_to_set) do { \ --- -2.31.1 - -From 5c8825293ee21d3823bdcd01b0df9c7d39739940 Mon Sep 17 00:00:00 2001 -From: Chris Lumens -Date: Thu, 8 Sep 2022 12:23:09 -0400 -Subject: [PATCH 05/26] Feature: libcrmcommon: Add sync point to IPC request - XML. - -If one of the pcmk__node_attr_sync_* options is provided, add an -attribute to the request XML. This will later be inspected by the -server to determine when to send the reply to the client. ---- - include/crm/common/options_internal.h | 2 ++ - include/crm_internal.h | 1 + - lib/common/ipc_attrd.c | 6 ++++++ - 3 files changed, 9 insertions(+) - -diff --git a/include/crm/common/options_internal.h b/include/crm/common/options_internal.h -index b153c67..f29ba3f 100644 ---- a/include/crm/common/options_internal.h -+++ b/include/crm/common/options_internal.h -@@ -145,9 +145,11 @@ bool pcmk__valid_sbd_timeout(const char *value); - #define PCMK__META_ALLOW_UNHEALTHY_NODES "allow-unhealthy-nodes" - - // Constants for enumerated values for various options -+#define PCMK__VALUE_CLUSTER "cluster" - #define PCMK__VALUE_CUSTOM "custom" - #define PCMK__VALUE_FENCING "fencing" - #define PCMK__VALUE_GREEN "green" -+#define PCMK__VALUE_LOCAL "local" - #define PCMK__VALUE_MIGRATE_ON_RED "migrate-on-red" - #define PCMK__VALUE_NONE "none" - #define PCMK__VALUE_NOTHING "nothing" -diff --git a/include/crm_internal.h b/include/crm_internal.h -index e6e2e96..08193c3 100644 ---- a/include/crm_internal.h -+++ b/include/crm_internal.h -@@ -71,6 +71,7 @@ - #define PCMK__XA_ATTR_RESOURCE "attr_resource" - #define PCMK__XA_ATTR_SECTION "attr_section" - #define PCMK__XA_ATTR_SET "attr_set" -+#define PCMK__XA_ATTR_SYNC_POINT "attr_sync_point" - #define PCMK__XA_ATTR_USER "attr_user" - #define PCMK__XA_ATTR_UUID "attr_key" - #define PCMK__XA_ATTR_VALUE "attr_value" -diff --git a/lib/common/ipc_attrd.c b/lib/common/ipc_attrd.c -index f6cfbc4..4606509 100644 ---- a/lib/common/ipc_attrd.c -+++ b/lib/common/ipc_attrd.c -@@ -431,6 +431,12 @@ populate_update_op(xmlNode *op, const char *node, const char *name, const char * - pcmk_is_set(options, pcmk__node_attr_remote)); - crm_xml_add_int(op, PCMK__XA_ATTR_IS_PRIVATE, - pcmk_is_set(options, pcmk__node_attr_private)); -+ -+ if (pcmk_is_set(options, pcmk__node_attr_sync_local)) { -+ crm_xml_add(op, PCMK__XA_ATTR_SYNC_POINT, PCMK__VALUE_LOCAL); -+ } else if (pcmk_is_set(options, pcmk__node_attr_sync_cluster)) { -+ crm_xml_add(op, PCMK__XA_ATTR_SYNC_POINT, PCMK__VALUE_CLUSTER); -+ } - } - - int --- -2.31.1 - -From e2b3fee630caf0846ca8bbffcef4d6d2acfd32a5 Mon Sep 17 00:00:00 2001 -From: Chris Lumens -Date: Thu, 8 Sep 2022 12:26:28 -0400 -Subject: [PATCH 06/26] Feature: tools: Add --wait= parameter to attrd_updater. - -This command line option is used to specify the sync point to use. For -the moment, it has no effect. ---- - tools/attrd_updater.c | 24 ++++++++++++++++++++++++ - 1 file changed, 24 insertions(+) - -diff --git a/tools/attrd_updater.c b/tools/attrd_updater.c -index b85a281..c4779a6 100644 ---- a/tools/attrd_updater.c -+++ b/tools/attrd_updater.c -@@ -97,6 +97,22 @@ section_cb (const gchar *option_name, const gchar *optarg, gpointer data, GError - return TRUE; - } - -+static gboolean -+wait_cb (const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { -+ if (pcmk__str_eq(optarg, "no", pcmk__str_none)) { -+ pcmk__clear_node_attr_flags(options.attr_options, pcmk__node_attr_sync_local | pcmk__node_attr_sync_cluster); -+ return TRUE; -+ } else if (pcmk__str_eq(optarg, PCMK__VALUE_LOCAL, pcmk__str_none)) { -+ pcmk__clear_node_attr_flags(options.attr_options, pcmk__node_attr_sync_local | pcmk__node_attr_sync_cluster); -+ pcmk__set_node_attr_flags(options.attr_options, pcmk__node_attr_sync_local); -+ return TRUE; -+ } else { -+ g_set_error(err, PCMK__EXITC_ERROR, CRM_EX_USAGE, -+ "--wait= must be one of 'no', 'local', 'cluster'"); -+ return FALSE; -+ } -+} -+ - #define INDENT " " - - static GOptionEntry required_entries[] = { -@@ -175,6 +191,14 @@ static GOptionEntry addl_entries[] = { - "If this creates a new attribute, never write the attribute to CIB", - NULL }, - -+ { "wait", 'W', 0, G_OPTION_ARG_CALLBACK, wait_cb, -+ "Wait for some event to occur before returning. Values are 'no' (wait\n" -+ INDENT "only for the attribute daemon to acknowledge the request) or\n" -+ INDENT "'local' (wait until the change has propagated to where a local\n" -+ INDENT "query will return the request value, or the value set by a\n" -+ INDENT "later request). Default is 'no'.", -+ "UNTIL" }, -+ - { NULL } - }; - --- -2.31.1 - -From 52d51ab41b2f00e72724ab39835b3db86605a96b Mon Sep 17 00:00:00 2001 -From: Chris Lumens -Date: Thu, 20 Oct 2022 14:40:13 -0400 -Subject: [PATCH 07/26] Feature: daemons: Add functions for checking a request - for a sync point. - ---- - daemons/attrd/Makefile.am | 1 + - daemons/attrd/attrd_sync.c | 38 +++++++++++++++++++++++++++++++++ - daemons/attrd/pacemaker-attrd.h | 3 +++ - 3 files changed, 42 insertions(+) - create mode 100644 daemons/attrd/attrd_sync.c - -diff --git a/daemons/attrd/Makefile.am b/daemons/attrd/Makefile.am -index 1a3d360..6bb81c4 100644 ---- a/daemons/attrd/Makefile.am -+++ b/daemons/attrd/Makefile.am -@@ -32,6 +32,7 @@ pacemaker_attrd_SOURCES = attrd_alerts.c \ - attrd_elections.c \ - attrd_ipc.c \ - attrd_messages.c \ -+ attrd_sync.c \ - attrd_utils.c \ - pacemaker-attrd.c - -diff --git a/daemons/attrd/attrd_sync.c b/daemons/attrd/attrd_sync.c -new file mode 100644 -index 0000000..92759d2 ---- /dev/null -+++ b/daemons/attrd/attrd_sync.c -@@ -0,0 +1,38 @@ -+/* -+ * Copyright 2022 the Pacemaker project contributors -+ * -+ * The version control history for this file may have further details. -+ * -+ * This source code is licensed under the GNU General Public License version 2 -+ * or later (GPLv2+) WITHOUT ANY WARRANTY. -+ */ -+ -+#include -+ -+#include -+#include -+ -+#include "pacemaker-attrd.h" -+ -+const char * -+attrd_request_sync_point(xmlNode *xml) -+{ -+ if (xml_has_children(xml)) { -+ xmlNode *child = pcmk__xe_match(xml, XML_ATTR_OP, PCMK__XA_ATTR_SYNC_POINT, NULL); -+ -+ if (child) { -+ return crm_element_value(child, PCMK__XA_ATTR_SYNC_POINT); -+ } else { -+ return NULL; -+ } -+ -+ } else { -+ return crm_element_value(xml, PCMK__XA_ATTR_SYNC_POINT); -+ } -+} -+ -+bool -+attrd_request_has_sync_point(xmlNode *xml) -+{ -+ return attrd_request_sync_point(xml) != NULL; -+} -diff --git a/daemons/attrd/pacemaker-attrd.h b/daemons/attrd/pacemaker-attrd.h -index 71ce90a..ff850bb 100644 ---- a/daemons/attrd/pacemaker-attrd.h -+++ b/daemons/attrd/pacemaker-attrd.h -@@ -182,4 +182,7 @@ mainloop_timer_t *attrd_add_timer(const char *id, int timeout_ms, attribute_t *a - void attrd_unregister_handlers(void); - void attrd_handle_request(pcmk__request_t *request); - -+const char *attrd_request_sync_point(xmlNode *xml); -+bool attrd_request_has_sync_point(xmlNode *xml); -+ - #endif /* PACEMAKER_ATTRD__H */ --- -2.31.1 - -From 2e0509a12ee7d4a612133ee65b75245eea7d271d Mon Sep 17 00:00:00 2001 -From: Chris Lumens -Date: Thu, 20 Oct 2022 14:42:04 -0400 -Subject: [PATCH 08/26] Refactor: daemons: Don't ACK update requests that give - a sync point. - -The ACK is the only response from the server for update messages. If -the message specified that it wanted to wait for a sync point, we need -to delay sending that response until the sync point is reached. -Therefore, do not always immediately send the ACK. ---- - daemons/attrd/attrd_messages.c | 19 ++++++++++++++----- - 1 file changed, 14 insertions(+), 5 deletions(-) - -diff --git a/daemons/attrd/attrd_messages.c b/daemons/attrd/attrd_messages.c -index de4a28a..9e8ae40 100644 ---- a/daemons/attrd/attrd_messages.c -+++ b/daemons/attrd/attrd_messages.c -@@ -137,12 +137,21 @@ handle_update_request(pcmk__request_t *request) - attrd_peer_update(peer, request->xml, host, false); - pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); - return NULL; -+ - } else { -- /* Because attrd_client_update can be called recursively, we send the ACK -- * here to ensure that the client only ever receives one. -- */ -- attrd_send_ack(request->ipc_client, request->ipc_id, -- request->flags|crm_ipc_client_response); -+ if (!attrd_request_has_sync_point(request->xml)) { -+ /* If the client doesn't want to wait for a sync point, go ahead and send -+ * the ACK immediately. Otherwise, we'll send the ACK when the appropriate -+ * sync point is reached. -+ * -+ * In the normal case, attrd_client_update can be called recursively which -+ * makes where to send the ACK tricky. Doing it here ensures the client -+ * only ever receives one. -+ */ -+ attrd_send_ack(request->ipc_client, request->ipc_id, -+ request->flags|crm_ipc_client_response); -+ } -+ - return attrd_client_update(request); - } - } --- -2.31.1 - -From 2a0ff66cdf0085c4c8ab1992ef7e785a4facc8c7 Mon Sep 17 00:00:00 2001 -From: Chris Lumens -Date: Thu, 20 Oct 2022 14:48:48 -0400 -Subject: [PATCH 09/26] Feature: daemons: Add support for local sync points on - updates. - -In the IPC dispatcher for attrd, add the client to a wait list if its -request specifies a sync point. When the attribute's value is changed -on the local attrd, alert any clients waiting on a local sync point by -then sending the previously delayed ACK. - -Sync points for other requests and the global sync point are not yet -supported. - -Fixes T35. ---- - daemons/attrd/attrd_corosync.c | 18 +++++ - daemons/attrd/attrd_messages.c | 12 ++- - daemons/attrd/attrd_sync.c | 137 ++++++++++++++++++++++++++++++++ - daemons/attrd/pacemaker-attrd.h | 7 ++ - 4 files changed, 173 insertions(+), 1 deletion(-) - -diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c -index 539e5bf..4337280 100644 ---- a/daemons/attrd/attrd_corosync.c -+++ b/daemons/attrd/attrd_corosync.c -@@ -568,14 +568,32 @@ void - attrd_peer_update(const crm_node_t *peer, xmlNode *xml, const char *host, - bool filter) - { -+ bool handle_sync_point = false; -+ - if (xml_has_children(xml)) { - for (xmlNode *child = first_named_child(xml, XML_ATTR_OP); child != NULL; - child = crm_next_same_xml(child)) { - copy_attrs(xml, child); - attrd_peer_update_one(peer, child, filter); -+ -+ if (attrd_request_has_sync_point(child)) { -+ handle_sync_point = true; -+ } - } - - } else { - attrd_peer_update_one(peer, xml, filter); -+ -+ if (attrd_request_has_sync_point(xml)) { -+ handle_sync_point = true; -+ } -+ } -+ -+ /* If the update XML specified that the client wanted to wait for a sync -+ * point, process that now. -+ */ -+ if (handle_sync_point) { -+ crm_debug("Hit local sync point for attribute update"); -+ attrd_ack_waitlist_clients(attrd_sync_point_local, xml); - } - } -diff --git a/daemons/attrd/attrd_messages.c b/daemons/attrd/attrd_messages.c -index 9e8ae40..c96700f 100644 ---- a/daemons/attrd/attrd_messages.c -+++ b/daemons/attrd/attrd_messages.c -@@ -139,7 +139,17 @@ handle_update_request(pcmk__request_t *request) - return NULL; - - } else { -- if (!attrd_request_has_sync_point(request->xml)) { -+ if (attrd_request_has_sync_point(request->xml)) { -+ /* If this client supplied a sync point it wants to wait for, add it to -+ * the wait list. Clients on this list will not receive an ACK until -+ * their sync point is hit which will result in the client stalled there -+ * until it receives a response. -+ * -+ * All other clients will receive the expected response as normal. -+ */ -+ attrd_add_client_to_waitlist(request); -+ -+ } else { - /* If the client doesn't want to wait for a sync point, go ahead and send - * the ACK immediately. Otherwise, we'll send the ACK when the appropriate - * sync point is reached. -diff --git a/daemons/attrd/attrd_sync.c b/daemons/attrd/attrd_sync.c -index 92759d2..2981bd0 100644 ---- a/daemons/attrd/attrd_sync.c -+++ b/daemons/attrd/attrd_sync.c -@@ -14,6 +14,143 @@ - - #include "pacemaker-attrd.h" - -+/* A hash table storing clients that are waiting on a sync point to be reached. -+ * The key is waitlist_client - just a plain int. The obvious key would be -+ * the IPC client's ID, but this is not guaranteed to be unique. A single client -+ * could be waiting on a sync point for multiple attributes at the same time. -+ * -+ * It is not expected that this hash table will ever be especially large. -+ */ -+static GHashTable *waitlist = NULL; -+static int waitlist_client = 0; -+ -+struct waitlist_node { -+ /* What kind of sync point does this node describe? */ -+ enum attrd_sync_point sync_point; -+ -+ /* Information required to construct and send a reply to the client. */ -+ char *client_id; -+ uint32_t ipc_id; -+ uint32_t flags; -+}; -+ -+static void -+next_key(void) -+{ -+ do { -+ waitlist_client++; -+ if (waitlist_client < 0) { -+ waitlist_client = 1; -+ } -+ } while (g_hash_table_contains(waitlist, GINT_TO_POINTER(waitlist_client))); -+} -+ -+static void -+free_waitlist_node(gpointer data) -+{ -+ struct waitlist_node *wl = (struct waitlist_node *) data; -+ -+ free(wl->client_id); -+ free(wl); -+} -+ -+static const char * -+sync_point_str(enum attrd_sync_point sync_point) -+{ -+ if (sync_point == attrd_sync_point_local) { -+ return PCMK__VALUE_LOCAL; -+ } else if (sync_point == attrd_sync_point_cluster) { -+ return PCMK__VALUE_CLUSTER; -+ } else { -+ return "unknown"; -+ } -+} -+ -+void -+attrd_add_client_to_waitlist(pcmk__request_t *request) -+{ -+ const char *sync_point = attrd_request_sync_point(request->xml); -+ struct waitlist_node *wl = NULL; -+ -+ if (sync_point == NULL) { -+ return; -+ } -+ -+ if (waitlist == NULL) { -+ waitlist = pcmk__intkey_table(free_waitlist_node); -+ } -+ -+ wl = calloc(sizeof(struct waitlist_node), 1); -+ -+ CRM_ASSERT(wl != NULL); -+ -+ wl->client_id = strdup(request->ipc_client->id); -+ -+ CRM_ASSERT(wl->client_id); -+ -+ if (pcmk__str_eq(sync_point, PCMK__VALUE_LOCAL, pcmk__str_none)) { -+ wl->sync_point = attrd_sync_point_local; -+ } else if (pcmk__str_eq(sync_point, PCMK__VALUE_CLUSTER, pcmk__str_none)) { -+ wl->sync_point = attrd_sync_point_cluster; -+ } else { -+ free_waitlist_node(wl); -+ return; -+ } -+ -+ wl->ipc_id = request->ipc_id; -+ wl->flags = request->flags; -+ -+ crm_debug("Added client %s to waitlist for %s sync point", -+ wl->client_id, sync_point_str(wl->sync_point)); -+ -+ next_key(); -+ pcmk__intkey_table_insert(waitlist, waitlist_client, wl); -+ -+ /* And then add the key to the request XML so we can uniquely identify -+ * it when it comes time to issue the ACK. -+ */ -+ crm_xml_add_int(request->xml, XML_LRM_ATTR_CALLID, waitlist_client); -+} -+ -+void -+attrd_ack_waitlist_clients(enum attrd_sync_point sync_point, const xmlNode *xml) -+{ -+ int callid; -+ gpointer value; -+ -+ if (waitlist == NULL) { -+ return; -+ } -+ -+ if (crm_element_value_int(xml, XML_LRM_ATTR_CALLID, &callid) == -1) { -+ crm_warn("Could not get callid from request XML"); -+ return; -+ } -+ -+ value = pcmk__intkey_table_lookup(waitlist, callid); -+ if (value != NULL) { -+ struct waitlist_node *wl = (struct waitlist_node *) value; -+ pcmk__client_t *client = NULL; -+ -+ if (wl->sync_point != sync_point) { -+ return; -+ } -+ -+ crm_debug("Alerting client %s for reached %s sync point", -+ wl->client_id, sync_point_str(wl->sync_point)); -+ -+ client = pcmk__find_client_by_id(wl->client_id); -+ if (client == NULL) { -+ return; -+ } -+ -+ attrd_send_ack(client, wl->ipc_id, wl->flags | crm_ipc_client_response); -+ -+ /* And then remove the client so it doesn't get alerted again. */ -+ pcmk__intkey_table_remove(waitlist, callid); -+ } -+} -+ - const char * - attrd_request_sync_point(xmlNode *xml) - { -diff --git a/daemons/attrd/pacemaker-attrd.h b/daemons/attrd/pacemaker-attrd.h -index ff850bb..9dd8320 100644 ---- a/daemons/attrd/pacemaker-attrd.h -+++ b/daemons/attrd/pacemaker-attrd.h -@@ -182,6 +182,13 @@ mainloop_timer_t *attrd_add_timer(const char *id, int timeout_ms, attribute_t *a - void attrd_unregister_handlers(void); - void attrd_handle_request(pcmk__request_t *request); - -+enum attrd_sync_point { -+ attrd_sync_point_local, -+ attrd_sync_point_cluster, -+}; -+ -+void attrd_add_client_to_waitlist(pcmk__request_t *request); -+void attrd_ack_waitlist_clients(enum attrd_sync_point sync_point, const xmlNode *xml); - const char *attrd_request_sync_point(xmlNode *xml); - bool attrd_request_has_sync_point(xmlNode *xml); - --- -2.31.1 - -From 59caaf1682191a91d6062358b770f8b9457ba3eb Mon Sep 17 00:00:00 2001 -From: Chris Lumens -Date: Thu, 20 Oct 2022 14:56:58 -0400 -Subject: [PATCH 10/26] Feature: daemons: If a client disconnects, remove it - from the waitlist. - ---- - daemons/attrd/attrd_ipc.c | 5 +++++ - daemons/attrd/attrd_sync.c | 21 +++++++++++++++++++++ - daemons/attrd/pacemaker-attrd.h | 1 + - 3 files changed, 27 insertions(+) - -diff --git a/daemons/attrd/attrd_ipc.c b/daemons/attrd/attrd_ipc.c -index 7e4a1c0..8aa39c2 100644 ---- a/daemons/attrd/attrd_ipc.c -+++ b/daemons/attrd/attrd_ipc.c -@@ -438,8 +438,13 @@ attrd_ipc_closed(qb_ipcs_connection_t *c) - crm_trace("Ignoring request to clean up unknown connection %p", c); - } else { - crm_trace("Cleaning up closed client connection %p", c); -+ -+ /* Remove the client from the sync point waitlist if it's present. */ -+ attrd_remove_client_from_waitlist(client); -+ - pcmk__free_client(client); - } -+ - return FALSE; - } - -diff --git a/daemons/attrd/attrd_sync.c b/daemons/attrd/attrd_sync.c -index 2981bd0..7293318 100644 ---- a/daemons/attrd/attrd_sync.c -+++ b/daemons/attrd/attrd_sync.c -@@ -112,6 +112,27 @@ attrd_add_client_to_waitlist(pcmk__request_t *request) - crm_xml_add_int(request->xml, XML_LRM_ATTR_CALLID, waitlist_client); - } - -+void -+attrd_remove_client_from_waitlist(pcmk__client_t *client) -+{ -+ GHashTableIter iter; -+ gpointer value; -+ -+ if (waitlist == NULL) { -+ return; -+ } -+ -+ g_hash_table_iter_init(&iter, waitlist); -+ -+ while (g_hash_table_iter_next(&iter, NULL, &value)) { -+ struct waitlist_node *wl = (struct waitlist_node *) value; -+ -+ if (wl->client_id == client->id) { -+ g_hash_table_iter_remove(&iter); -+ } -+ } -+} -+ - void - attrd_ack_waitlist_clients(enum attrd_sync_point sync_point, const xmlNode *xml) - { -diff --git a/daemons/attrd/pacemaker-attrd.h b/daemons/attrd/pacemaker-attrd.h -index 9dd8320..b6ecb75 100644 ---- a/daemons/attrd/pacemaker-attrd.h -+++ b/daemons/attrd/pacemaker-attrd.h -@@ -189,6 +189,7 @@ enum attrd_sync_point { - - void attrd_add_client_to_waitlist(pcmk__request_t *request); - void attrd_ack_waitlist_clients(enum attrd_sync_point sync_point, const xmlNode *xml); -+void attrd_remove_client_from_waitlist(pcmk__client_t *client); - const char *attrd_request_sync_point(xmlNode *xml); - bool attrd_request_has_sync_point(xmlNode *xml); - --- -2.31.1 - -From b28042e1d64b48c96dbd9da1e9ee3ff481bbf620 Mon Sep 17 00:00:00 2001 -From: Chris Lumens -Date: Mon, 10 Oct 2022 11:00:20 -0400 -Subject: [PATCH 11/26] Feature: daemons: Add support for local sync points on - clearing failures. - -attrd_clear_client_failure just calls attrd_client_update underneath, so -that function will handle all the rest of the sync point functionality -for us. ---- - daemons/attrd/attrd_ipc.c | 2 -- - daemons/attrd/attrd_messages.c | 19 +++++++++++++++++++ - 2 files changed, 19 insertions(+), 2 deletions(-) - -diff --git a/daemons/attrd/attrd_ipc.c b/daemons/attrd/attrd_ipc.c -index 8aa39c2..2e614e8 100644 ---- a/daemons/attrd/attrd_ipc.c -+++ b/daemons/attrd/attrd_ipc.c -@@ -101,8 +101,6 @@ attrd_client_clear_failure(pcmk__request_t *request) - xmlNode *xml = request->xml; - const char *rsc, *op, *interval_spec; - -- attrd_send_ack(request->ipc_client, request->ipc_id, request->ipc_flags); -- - if (minimum_protocol_version >= 2) { - /* Propagate to all peers (including ourselves). - * This ends up at attrd_peer_message(). -diff --git a/daemons/attrd/attrd_messages.c b/daemons/attrd/attrd_messages.c -index c96700f..3ba14a6 100644 ---- a/daemons/attrd/attrd_messages.c -+++ b/daemons/attrd/attrd_messages.c -@@ -42,6 +42,25 @@ handle_clear_failure_request(pcmk__request_t *request) - pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); - return NULL; - } else { -+ if (attrd_request_has_sync_point(request->xml)) { -+ /* If this client supplied a sync point it wants to wait for, add it to -+ * the wait list. Clients on this list will not receive an ACK until -+ * their sync point is hit which will result in the client stalled there -+ * until it receives a response. -+ * -+ * All other clients will receive the expected response as normal. -+ */ -+ attrd_add_client_to_waitlist(request); -+ -+ } else { -+ /* If the client doesn't want to wait for a sync point, go ahead and send -+ * the ACK immediately. Otherwise, we'll send the ACK when the appropriate -+ * sync point is reached. -+ */ -+ attrd_send_ack(request->ipc_client, request->ipc_id, -+ request->ipc_flags); -+ } -+ - return attrd_client_clear_failure(request); - } - } --- -2.31.1 - -From 291dc3b91e57f2584bbf88cfbe3a360e0332e814 Mon Sep 17 00:00:00 2001 -From: Chris Lumens -Date: Mon, 10 Oct 2022 13:17:24 -0400 -Subject: [PATCH 12/26] Refactor: daemons: Free the waitlist on attrd exit. - ---- - daemons/attrd/attrd_sync.c | 11 +++++++++++ - daemons/attrd/attrd_utils.c | 2 ++ - daemons/attrd/pacemaker-attrd.c | 1 + - daemons/attrd/pacemaker-attrd.h | 1 + - 4 files changed, 15 insertions(+) - -diff --git a/daemons/attrd/attrd_sync.c b/daemons/attrd/attrd_sync.c -index 7293318..557e49a 100644 ---- a/daemons/attrd/attrd_sync.c -+++ b/daemons/attrd/attrd_sync.c -@@ -112,6 +112,17 @@ attrd_add_client_to_waitlist(pcmk__request_t *request) - crm_xml_add_int(request->xml, XML_LRM_ATTR_CALLID, waitlist_client); - } - -+void -+attrd_free_waitlist(void) -+{ -+ if (waitlist == NULL) { -+ return; -+ } -+ -+ g_hash_table_destroy(waitlist); -+ waitlist = NULL; -+} -+ - void - attrd_remove_client_from_waitlist(pcmk__client_t *client) - { -diff --git a/daemons/attrd/attrd_utils.c b/daemons/attrd/attrd_utils.c -index 6a19009..00b879b 100644 ---- a/daemons/attrd/attrd_utils.c -+++ b/daemons/attrd/attrd_utils.c -@@ -93,6 +93,8 @@ attrd_shutdown(int nsig) - mainloop_destroy_signal(SIGUSR2); - mainloop_destroy_signal(SIGTRAP); - -+ attrd_free_waitlist(); -+ - if ((mloop == NULL) || !g_main_loop_is_running(mloop)) { - /* If there's no main loop active, just exit. This should be possible - * only if we get SIGTERM in brief windows at start-up and shutdown. -diff --git a/daemons/attrd/pacemaker-attrd.c b/daemons/attrd/pacemaker-attrd.c -index 2100db4..1336542 100644 ---- a/daemons/attrd/pacemaker-attrd.c -+++ b/daemons/attrd/pacemaker-attrd.c -@@ -300,6 +300,7 @@ main(int argc, char **argv) - attrd_ipc_fini(); - attrd_lrmd_disconnect(); - attrd_cib_disconnect(); -+ attrd_free_waitlist(); - g_hash_table_destroy(attributes); - } - -diff --git a/daemons/attrd/pacemaker-attrd.h b/daemons/attrd/pacemaker-attrd.h -index b6ecb75..537bf85 100644 ---- a/daemons/attrd/pacemaker-attrd.h -+++ b/daemons/attrd/pacemaker-attrd.h -@@ -52,6 +52,7 @@ void attrd_run_mainloop(void); - - void attrd_set_requesting_shutdown(void); - void attrd_clear_requesting_shutdown(void); -+void attrd_free_waitlist(void); - bool attrd_requesting_shutdown(void); - bool attrd_shutting_down(void); - void attrd_shutdown(int nsig); --- -2.31.1 - -From 7715ce617c520e14687a82e11ff794c93cd7f64a Mon Sep 17 00:00:00 2001 -From: Chris Lumens -Date: Mon, 10 Oct 2022 13:21:16 -0400 -Subject: [PATCH 13/26] Feature: includes: Bump CRM_FEATURE_SET for local sync - points. - ---- - include/crm/crm.h | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/include/crm/crm.h b/include/crm/crm.h -index 5710e4b..7c5c602 100644 ---- a/include/crm/crm.h -+++ b/include/crm/crm.h -@@ -66,7 +66,7 @@ extern "C" { - * >=3.0.13: Fail counts include operation name and interval - * >=3.2.0: DC supports PCMK_EXEC_INVALID and PCMK_EXEC_NOT_CONNECTED - */ --# define CRM_FEATURE_SET "3.16.1" -+# define CRM_FEATURE_SET "3.16.2" - - /* Pacemaker's CPG protocols use fixed-width binary fields for the sender and - * recipient of a CPG message. This imposes an arbitrary limit on cluster node --- -2.31.1 - -From b9054425a76d03f538cd0b3ae27490b1874eee8a Mon Sep 17 00:00:00 2001 -From: Chris Lumens -Date: Fri, 28 Oct 2022 14:23:49 -0400 -Subject: [PATCH 14/26] Refactor: daemons: Add comments for previously added - sync point code. - ---- - daemons/attrd/attrd_sync.c | 63 ++++++++++++++++++++++++++++++++++++++ - 1 file changed, 63 insertions(+) - -diff --git a/daemons/attrd/attrd_sync.c b/daemons/attrd/attrd_sync.c -index 557e49a..e9690b5 100644 ---- a/daemons/attrd/attrd_sync.c -+++ b/daemons/attrd/attrd_sync.c -@@ -66,6 +66,20 @@ sync_point_str(enum attrd_sync_point sync_point) - } - } - -+/*! -+ * \internal -+ * \brief Add a client to the attrd waitlist -+ * -+ * Typically, a client receives an ACK for its XML IPC request immediately. However, -+ * some clients want to wait until their request has been processed and taken effect. -+ * This is called a sync point. Any client placed on this waitlist will have its -+ * ACK message delayed until either its requested sync point is hit, or until it -+ * times out. -+ * -+ * The XML IPC request must specify the type of sync point it wants to wait for. -+ * -+ * \param[in,out] request The request describing the client to place on the waitlist. -+ */ - void - attrd_add_client_to_waitlist(pcmk__request_t *request) - { -@@ -112,6 +126,11 @@ attrd_add_client_to_waitlist(pcmk__request_t *request) - crm_xml_add_int(request->xml, XML_LRM_ATTR_CALLID, waitlist_client); - } - -+/*! -+ * \internal -+ * \brief Free all memory associated with the waitlist. This is most typically -+ * used when attrd shuts down. -+ */ - void - attrd_free_waitlist(void) - { -@@ -123,6 +142,13 @@ attrd_free_waitlist(void) - waitlist = NULL; - } - -+/*! -+ * \internal -+ * \brief Unconditionally remove a client from the waitlist, such as when the client -+ * node disconnects from the cluster -+ * -+ * \param[in] client The client to remove -+ */ - void - attrd_remove_client_from_waitlist(pcmk__client_t *client) - { -@@ -144,6 +170,18 @@ attrd_remove_client_from_waitlist(pcmk__client_t *client) - } - } - -+/*! -+ * \internal -+ * \brief Send an IPC ACK message to all awaiting clients -+ * -+ * This function will search the waitlist for all clients that are currently awaiting -+ * an ACK indicating their attrd operation is complete. Only those clients with a -+ * matching sync point type and callid from their original XML IPC request will be -+ * ACKed. Once they have received an ACK, they will be removed from the waitlist. -+ * -+ * \param[in] sync_point What kind of sync point have we hit? -+ * \param[in] xml The original XML IPC request. -+ */ - void - attrd_ack_waitlist_clients(enum attrd_sync_point sync_point, const xmlNode *xml) - { -@@ -183,6 +221,23 @@ attrd_ack_waitlist_clients(enum attrd_sync_point sync_point, const xmlNode *xml) - } - } - -+/*! -+ * \internal -+ * \brief Return the sync point attribute for an IPC request -+ * -+ * This function will check both the top-level element of \p xml for a sync -+ * point attribute, as well as all of its \p op children, if any. The latter -+ * is useful for newer versions of attrd that can put multiple IPC requests -+ * into a single message. -+ * -+ * \param[in] xml An XML IPC request -+ * -+ * \note It is assumed that if one child element has a sync point attribute, -+ * all will have a sync point attribute and they will all be the same -+ * sync point. No other configuration is supported. -+ * -+ * \return The sync point attribute of \p xml, or NULL if none. -+ */ - const char * - attrd_request_sync_point(xmlNode *xml) - { -@@ -200,6 +255,14 @@ attrd_request_sync_point(xmlNode *xml) - } - } - -+/*! -+ * \internal -+ * \brief Does an IPC request contain any sync point attribute? -+ * -+ * \param[in] xml An XML IPC request -+ * -+ * \return true if there's a sync point attribute, false otherwise -+ */ - bool - attrd_request_has_sync_point(xmlNode *xml) - { --- -2.31.1 - -From 64219fb7075ee58d29f94f077a3b8f94174bb32a Mon Sep 17 00:00:00 2001 -From: Chris Lumens -Date: Wed, 26 Oct 2022 12:43:05 -0400 -Subject: [PATCH 15/26] Feature: tools: Add --wait=cluster option to - attrd_updater. - ---- - tools/attrd_updater.c | 10 ++++++++-- - 1 file changed, 8 insertions(+), 2 deletions(-) - -diff --git a/tools/attrd_updater.c b/tools/attrd_updater.c -index c4779a6..3cd766d 100644 ---- a/tools/attrd_updater.c -+++ b/tools/attrd_updater.c -@@ -106,6 +106,10 @@ wait_cb (const gchar *option_name, const gchar *optarg, gpointer data, GError ** - pcmk__clear_node_attr_flags(options.attr_options, pcmk__node_attr_sync_local | pcmk__node_attr_sync_cluster); - pcmk__set_node_attr_flags(options.attr_options, pcmk__node_attr_sync_local); - return TRUE; -+ } else if (pcmk__str_eq(optarg, PCMK__VALUE_CLUSTER, pcmk__str_none)) { -+ pcmk__clear_node_attr_flags(options.attr_options, pcmk__node_attr_sync_local | pcmk__node_attr_sync_cluster); -+ pcmk__set_node_attr_flags(options.attr_options, pcmk__node_attr_sync_cluster); -+ return TRUE; - } else { - g_set_error(err, PCMK__EXITC_ERROR, CRM_EX_USAGE, - "--wait= must be one of 'no', 'local', 'cluster'"); -@@ -193,10 +197,12 @@ static GOptionEntry addl_entries[] = { - - { "wait", 'W', 0, G_OPTION_ARG_CALLBACK, wait_cb, - "Wait for some event to occur before returning. Values are 'no' (wait\n" -- INDENT "only for the attribute daemon to acknowledge the request) or\n" -+ INDENT "only for the attribute daemon to acknowledge the request),\n" - INDENT "'local' (wait until the change has propagated to where a local\n" - INDENT "query will return the request value, or the value set by a\n" -- INDENT "later request). Default is 'no'.", -+ INDENT "later request), or 'cluster' (wait until the change has propagated\n" -+ INDENT "to where a query anywhere on the cluster will return the requested\n" -+ INDENT "value, or the value set by a later request). Default is 'no'.", - "UNTIL" }, - - { NULL } --- -2.31.1 - -From 1bc5511fadf6ad670508bd3a2a55129bde16f774 Mon Sep 17 00:00:00 2001 -From: Chris Lumens -Date: Fri, 16 Sep 2022 14:55:06 -0400 -Subject: [PATCH 16/26] Refactor: daemons: Add a confirm= attribute to attrd - messages. - -This allows informing the originator of a message that the message has -been received and processed. As yet, there is no mechanism for handling -and returning the confirmation, only for requesting it. ---- - daemons/attrd/attrd_corosync.c | 6 +++--- - daemons/attrd/attrd_ipc.c | 26 +++++++++++++++++++++----- - daemons/attrd/attrd_messages.c | 11 +++++++++-- - daemons/attrd/pacemaker-attrd.h | 7 ++++--- - include/crm_internal.h | 1 + - 5 files changed, 38 insertions(+), 13 deletions(-) - -diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c -index 4337280..e86ca07 100644 ---- a/daemons/attrd/attrd_corosync.c -+++ b/daemons/attrd/attrd_corosync.c -@@ -124,7 +124,7 @@ broadcast_local_value(const attribute_t *a) - - crm_xml_add(sync, PCMK__XA_TASK, PCMK__ATTRD_CMD_SYNC_RESPONSE); - attrd_add_value_xml(sync, a, v, false); -- attrd_send_message(NULL, sync); -+ attrd_send_message(NULL, sync, false); - free_xml(sync); - return v; - } -@@ -387,7 +387,7 @@ broadcast_unseen_local_values(void) - - if (sync != NULL) { - crm_debug("Broadcasting local-only values"); -- attrd_send_message(NULL, sync); -+ attrd_send_message(NULL, sync, false); - free_xml(sync); - } - } -@@ -539,7 +539,7 @@ attrd_peer_sync(crm_node_t *peer, xmlNode *xml) - } - - crm_debug("Syncing values to %s", peer?peer->uname:"everyone"); -- attrd_send_message(peer, sync); -+ attrd_send_message(peer, sync, false); - free_xml(sync); - } - -diff --git a/daemons/attrd/attrd_ipc.c b/daemons/attrd/attrd_ipc.c -index 2e614e8..0fc5e93 100644 ---- a/daemons/attrd/attrd_ipc.c -+++ b/daemons/attrd/attrd_ipc.c -@@ -105,7 +105,7 @@ attrd_client_clear_failure(pcmk__request_t *request) - /* Propagate to all peers (including ourselves). - * This ends up at attrd_peer_message(). - */ -- attrd_send_message(NULL, xml); -+ attrd_send_message(NULL, xml, false); - pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); - return NULL; - } -@@ -184,7 +184,7 @@ attrd_client_peer_remove(pcmk__request_t *request) - if (host) { - crm_info("Client %s is requesting all values for %s be removed", - pcmk__client_name(request->ipc_client), host); -- attrd_send_message(NULL, xml); /* ends up at attrd_peer_message() */ -+ attrd_send_message(NULL, xml, false); /* ends up at attrd_peer_message() */ - free(host_alloc); - } else { - crm_info("Ignoring request by client %s to remove all peer values without specifying peer", -@@ -314,7 +314,7 @@ attrd_client_update(pcmk__request_t *request) - } - } - -- attrd_send_message(NULL, xml); -+ attrd_send_message(NULL, xml, false); - pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); - - } else { -@@ -358,7 +358,7 @@ attrd_client_update(pcmk__request_t *request) - if (status == 0) { - crm_trace("Matched %s with %s", attr, regex); - crm_xml_add(xml, PCMK__XA_ATTR_NAME, attr); -- attrd_send_message(NULL, xml); -+ attrd_send_message(NULL, xml, false); - } - } - -@@ -388,7 +388,23 @@ attrd_client_update(pcmk__request_t *request) - crm_debug("Broadcasting %s[%s]=%s%s", attr, crm_element_value(xml, PCMK__XA_ATTR_NODE_NAME), - value, (attrd_election_won()? " (writer)" : "")); - -- attrd_send_message(NULL, xml); /* ends up at attrd_peer_message() */ -+ if (pcmk__str_eq(attrd_request_sync_point(xml), PCMK__VALUE_CLUSTER, pcmk__str_none)) { -+ /* The client is waiting on the cluster-wide sync point. In this case, -+ * the response ACK is not sent until this attrd broadcasts the update -+ * and receives its own confirmation back from all peers. -+ */ -+ attrd_send_message(NULL, xml, true); /* ends up at attrd_peer_message() */ -+ -+ } else { -+ /* The client is either waiting on the local sync point or was not -+ * waiting on any sync point at all. For the local sync point, the -+ * response ACK is sent in attrd_peer_update. For clients not -+ * waiting on any sync point, the response ACK is sent in -+ * handle_update_request immediately before this function was called. -+ */ -+ attrd_send_message(NULL, xml, false); /* ends up at attrd_peer_message() */ -+ } -+ - pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); - return NULL; - } -diff --git a/daemons/attrd/attrd_messages.c b/daemons/attrd/attrd_messages.c -index 3ba14a6..78df0d0 100644 ---- a/daemons/attrd/attrd_messages.c -+++ b/daemons/attrd/attrd_messages.c -@@ -279,16 +279,23 @@ attrd_broadcast_protocol(void) - crm_debug("Broadcasting attrd protocol version %s for node %s", - ATTRD_PROTOCOL_VERSION, attrd_cluster->uname); - -- attrd_send_message(NULL, attrd_op); /* ends up at attrd_peer_message() */ -+ attrd_send_message(NULL, attrd_op, false); /* ends up at attrd_peer_message() */ - - free_xml(attrd_op); - } - - gboolean --attrd_send_message(crm_node_t * node, xmlNode * data) -+attrd_send_message(crm_node_t *node, xmlNode *data, bool confirm) - { - crm_xml_add(data, F_TYPE, T_ATTRD); - crm_xml_add(data, PCMK__XA_ATTR_VERSION, ATTRD_PROTOCOL_VERSION); -+ -+ /* Request a confirmation from the destination peer node (which could -+ * be all if node is NULL) that the message has been received and -+ * acted upon. -+ */ -+ pcmk__xe_set_bool_attr(data, PCMK__XA_CONFIRM, confirm); -+ - attrd_xml_add_writer(data); - return send_cluster_message(node, crm_msg_attrd, data, TRUE); - } -diff --git a/daemons/attrd/pacemaker-attrd.h b/daemons/attrd/pacemaker-attrd.h -index 537bf85..25f7c8a 100644 ---- a/daemons/attrd/pacemaker-attrd.h -+++ b/daemons/attrd/pacemaker-attrd.h -@@ -39,10 +39,11 @@ - * PCMK__ATTRD_CMD_UPDATE_DELAY - * 2 1.1.17 PCMK__ATTRD_CMD_CLEAR_FAILURE - * 3 2.1.1 PCMK__ATTRD_CMD_SYNC_RESPONSE indicates remote nodes -- * 4 2.2.0 Multiple attributes can be updated in a single IPC -+ * 4 2.1.5 Multiple attributes can be updated in a single IPC - * message -+ * 5 2.1.5 Peers can request confirmation of a sent message - */ --#define ATTRD_PROTOCOL_VERSION "4" -+#define ATTRD_PROTOCOL_VERSION "5" - - #define attrd_send_ack(client, id, flags) \ - pcmk__ipc_send_ack((client), (id), (flags), "ack", ATTRD_PROTOCOL_VERSION, CRM_EX_INDETERMINATE) -@@ -162,7 +163,7 @@ xmlNode *attrd_client_clear_failure(pcmk__request_t *request); - xmlNode *attrd_client_update(pcmk__request_t *request); - xmlNode *attrd_client_refresh(pcmk__request_t *request); - xmlNode *attrd_client_query(pcmk__request_t *request); --gboolean attrd_send_message(crm_node_t * node, xmlNode * data); -+gboolean attrd_send_message(crm_node_t *node, xmlNode *data, bool confirm); - - xmlNode *attrd_add_value_xml(xmlNode *parent, const attribute_t *a, - const attribute_value_t *v, bool force_write); -diff --git a/include/crm_internal.h b/include/crm_internal.h -index 08193c3..63a1726 100644 ---- a/include/crm_internal.h -+++ b/include/crm_internal.h -@@ -79,6 +79,7 @@ - #define PCMK__XA_ATTR_WRITER "attr_writer" - #define PCMK__XA_CONFIG_ERRORS "config-errors" - #define PCMK__XA_CONFIG_WARNINGS "config-warnings" -+#define PCMK__XA_CONFIRM "confirm" - #define PCMK__XA_GRAPH_ERRORS "graph-errors" - #define PCMK__XA_GRAPH_WARNINGS "graph-warnings" - #define PCMK__XA_MODE "mode" --- -2.31.1 - -From 6f389038fc0b11f6291c022c99f188666c65f530 Mon Sep 17 00:00:00 2001 -From: Chris Lumens -Date: Wed, 26 Oct 2022 14:44:42 -0400 -Subject: [PATCH 17/26] Feature: daemons: Respond to received attrd - confirmation requests. - -On the receiving peer side, if the XML request contains confirm="true", -construct a confirmation message after handling the request completes -and send it back to the originating peer. - -On the originating peer side, add a skeleton handler for confirmation -messages. This does nothing at the moment except log it. ---- - daemons/attrd/attrd_corosync.c | 38 ++++++++++++++++++++++++++++++++++ - daemons/attrd/attrd_messages.c | 13 ++++++++++++ - include/crm_internal.h | 1 + - 3 files changed, 52 insertions(+) - -diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c -index e86ca07..1245d9c 100644 ---- a/daemons/attrd/attrd_corosync.c -+++ b/daemons/attrd/attrd_corosync.c -@@ -25,6 +25,19 @@ - - extern crm_exit_t attrd_exit_status; - -+static xmlNode * -+attrd_confirmation(int callid) -+{ -+ xmlNode *node = create_xml_node(NULL, __func__); -+ -+ crm_xml_add(node, F_TYPE, T_ATTRD); -+ crm_xml_add(node, F_ORIG, get_local_node_name()); -+ crm_xml_add(node, PCMK__XA_TASK, PCMK__ATTRD_CMD_CONFIRM); -+ crm_xml_add_int(node, XML_LRM_ATTR_CALLID, callid); -+ -+ return node; -+} -+ - static void - attrd_peer_message(crm_node_t *peer, xmlNode *xml) - { -@@ -57,6 +70,31 @@ attrd_peer_message(crm_node_t *peer, xmlNode *xml) - CRM_CHECK(request.op != NULL, return); - - attrd_handle_request(&request); -+ -+ /* Having finished handling the request, check to see if the originating -+ * peer requested confirmation. If so, send that confirmation back now. -+ */ -+ if (pcmk__xe_attr_is_true(xml, PCMK__XA_CONFIRM)) { -+ int callid = 0; -+ xmlNode *reply = NULL; -+ -+ /* Add the confirmation ID for the message we are confirming to the -+ * response so the originating peer knows what they're a confirmation -+ * for. -+ */ -+ crm_element_value_int(xml, XML_LRM_ATTR_CALLID, &callid); -+ reply = attrd_confirmation(callid); -+ -+ /* And then send the confirmation back to the originating peer. This -+ * ends up right back in this same function (attrd_peer_message) on the -+ * peer where it will have to do something with a PCMK__XA_CONFIRM type -+ * message. -+ */ -+ crm_debug("Sending %s a confirmation", peer->uname); -+ attrd_send_message(peer, reply, false); -+ free_xml(reply); -+ } -+ - pcmk__reset_request(&request); - } - } -diff --git a/daemons/attrd/attrd_messages.c b/daemons/attrd/attrd_messages.c -index 78df0d0..9c792b2 100644 ---- a/daemons/attrd/attrd_messages.c -+++ b/daemons/attrd/attrd_messages.c -@@ -65,6 +65,18 @@ handle_clear_failure_request(pcmk__request_t *request) - } - } - -+static xmlNode * -+handle_confirm_request(pcmk__request_t *request) -+{ -+ if (request->peer != NULL) { -+ crm_debug("Received confirmation from %s", request->peer); -+ pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); -+ return NULL; -+ } else { -+ return handle_unknown_request(request); -+ } -+} -+ - static xmlNode * - handle_flush_request(pcmk__request_t *request) - { -@@ -190,6 +202,7 @@ attrd_register_handlers(void) - { - pcmk__server_command_t handlers[] = { - { PCMK__ATTRD_CMD_CLEAR_FAILURE, handle_clear_failure_request }, -+ { PCMK__ATTRD_CMD_CONFIRM, handle_confirm_request }, - { PCMK__ATTRD_CMD_FLUSH, handle_flush_request }, - { PCMK__ATTRD_CMD_PEER_REMOVE, handle_remove_request }, - { PCMK__ATTRD_CMD_QUERY, handle_query_request }, -diff --git a/include/crm_internal.h b/include/crm_internal.h -index 63a1726..f60e7b4 100644 ---- a/include/crm_internal.h -+++ b/include/crm_internal.h -@@ -108,6 +108,7 @@ - #define PCMK__ATTRD_CMD_SYNC "sync" - #define PCMK__ATTRD_CMD_SYNC_RESPONSE "sync-response" - #define PCMK__ATTRD_CMD_CLEAR_FAILURE "clear-failure" -+#define PCMK__ATTRD_CMD_CONFIRM "confirm" - - #define PCMK__CONTROLD_CMD_NODES "list-nodes" - --- -2.31.1 - -From dfb730e9ced9dc75886fda9452c584860573fe30 Mon Sep 17 00:00:00 2001 -From: Chris Lumens -Date: Wed, 26 Oct 2022 15:58:00 -0400 -Subject: [PATCH 18/26] Feature: daemons: Keep track of #attrd-protocol from - each peer. - -This information can be used in the future when dealing with -cluster-wide sync points to know which peers we are waiting on a reply -from. ---- - daemons/attrd/attrd_corosync.c | 3 +- - daemons/attrd/attrd_utils.c | 60 ++++++++++++++++++++++++++++++--- - daemons/attrd/pacemaker-attrd.h | 4 ++- - 3 files changed, 60 insertions(+), 7 deletions(-) - -diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c -index 1245d9c..6f88ab6 100644 ---- a/daemons/attrd/attrd_corosync.c -+++ b/daemons/attrd/attrd_corosync.c -@@ -268,6 +268,7 @@ attrd_peer_change_cb(enum crm_status_type kind, crm_node_t *peer, const void *da - // Remove votes from cluster nodes that leave, in case election in progress - if (gone && !is_remote) { - attrd_remove_voter(peer); -+ attrd_remove_peer_protocol_ver(peer->uname); - - // Ensure remote nodes that come up are in the remote node cache - } else if (!gone && is_remote) { -@@ -395,7 +396,7 @@ attrd_peer_update_one(const crm_node_t *peer, xmlNode *xml, bool filter) - * version, check to see if it's a new minimum version. - */ - if (pcmk__str_eq(attr, CRM_ATTR_PROTOCOL, pcmk__str_none)) { -- attrd_update_minimum_protocol_ver(value); -+ attrd_update_minimum_protocol_ver(peer->uname, value); - } - } - -diff --git a/daemons/attrd/attrd_utils.c b/daemons/attrd/attrd_utils.c -index 00b879b..421faed 100644 ---- a/daemons/attrd/attrd_utils.c -+++ b/daemons/attrd/attrd_utils.c -@@ -29,6 +29,11 @@ static bool requesting_shutdown = false; - static bool shutting_down = false; - static GMainLoop *mloop = NULL; - -+/* A hash table storing information on the protocol version of each peer attrd. -+ * The key is the peer's uname, and the value is the protocol version number. -+ */ -+GHashTable *peer_protocol_vers = NULL; -+ - /*! - * \internal - * \brief Set requesting_shutdown state -@@ -94,6 +99,10 @@ attrd_shutdown(int nsig) - mainloop_destroy_signal(SIGTRAP); - - attrd_free_waitlist(); -+ if (peer_protocol_vers != NULL) { -+ g_hash_table_destroy(peer_protocol_vers); -+ peer_protocol_vers = NULL; -+ } - - if ((mloop == NULL) || !g_main_loop_is_running(mloop)) { - /* If there's no main loop active, just exit. This should be possible -@@ -273,16 +282,57 @@ attrd_free_attribute(gpointer data) - } - } - -+/*! -+ * \internal -+ * \brief When a peer node leaves the cluster, stop tracking its protocol version. -+ * -+ * \param[in] host The peer node's uname to be removed -+ */ -+void -+attrd_remove_peer_protocol_ver(const char *host) -+{ -+ if (peer_protocol_vers != NULL) { -+ g_hash_table_remove(peer_protocol_vers, host); -+ } -+} -+ -+/*! -+ * \internal -+ * \brief When a peer node broadcasts a message with its protocol version, keep -+ * track of that information. -+ * -+ * We keep track of each peer's protocol version so we know which peers to -+ * expect confirmation messages from when handling cluster-wide sync points. -+ * We additionally keep track of the lowest protocol version supported by all -+ * peers so we know when we can send IPC messages containing more than one -+ * request. -+ * -+ * \param[in] host The peer node's uname to be tracked -+ * \param[in] value The peer node's protocol version -+ */ - void --attrd_update_minimum_protocol_ver(const char *value) -+attrd_update_minimum_protocol_ver(const char *host, const char *value) - { - int ver; - -+ if (peer_protocol_vers == NULL) { -+ peer_protocol_vers = pcmk__strkey_table(free, NULL); -+ } -+ - pcmk__scan_min_int(value, &ver, 0); - -- if (ver > 0 && (minimum_protocol_version == -1 || ver < minimum_protocol_version)) { -- minimum_protocol_version = ver; -- crm_trace("Set minimum attrd protocol version to %d", -- minimum_protocol_version); -+ if (ver > 0) { -+ char *host_name = strdup(host); -+ -+ /* Record the peer attrd's protocol version. */ -+ CRM_ASSERT(host_name != NULL); -+ g_hash_table_insert(peer_protocol_vers, host_name, GINT_TO_POINTER(ver)); -+ -+ /* If the protocol version is a new minimum, record it as such. */ -+ if (minimum_protocol_version == -1 || ver < minimum_protocol_version) { -+ minimum_protocol_version = ver; -+ crm_trace("Set minimum attrd protocol version to %d", -+ minimum_protocol_version); -+ } - } - } -diff --git a/daemons/attrd/pacemaker-attrd.h b/daemons/attrd/pacemaker-attrd.h -index 25f7c8a..302ef63 100644 ---- a/daemons/attrd/pacemaker-attrd.h -+++ b/daemons/attrd/pacemaker-attrd.h -@@ -145,6 +145,7 @@ typedef struct attribute_value_s { - - extern crm_cluster_t *attrd_cluster; - extern GHashTable *attributes; -+extern GHashTable *peer_protocol_vers; - - #define CIB_OP_TIMEOUT_S 120 - -@@ -177,7 +178,8 @@ void attrd_write_attributes(bool all, bool ignore_delay); - void attrd_write_or_elect_attribute(attribute_t *a); - - extern int minimum_protocol_version; --void attrd_update_minimum_protocol_ver(const char *value); -+void attrd_remove_peer_protocol_ver(const char *host); -+void attrd_update_minimum_protocol_ver(const char *host, const char *value); - - mainloop_timer_t *attrd_add_timer(const char *id, int timeout_ms, attribute_t *attr); - --- -2.31.1 - -From 945f0fe51d3bf69c2cb1258b394f2f11b8996525 Mon Sep 17 00:00:00 2001 -From: Chris Lumens -Date: Thu, 27 Oct 2022 14:42:59 -0400 -Subject: [PATCH 19/26] Feature: daemons: Handle cluster-wide sync points in - attrd. - -When an attrd receives an IPC request to update some value, record the -protocol versions of all peer attrds. Additionally register a function -that will be called when all confirmations are received. - -The originating IPC cilent (attrd_updater for instance) will sit there -waiting for an ACK until its timeout is hit. - -As each confirmation message comes back to attrd, mark it off the list -of peers we are waiting on. When no more peers are expected, call the -previously registered function. - -For attribute updates, this function just sends an ack back to -attrd_updater. - -Fixes T35 ---- - daemons/attrd/attrd_corosync.c | 1 + - daemons/attrd/attrd_ipc.c | 4 + - daemons/attrd/attrd_messages.c | 10 ++ - daemons/attrd/attrd_sync.c | 260 +++++++++++++++++++++++++++++++- - daemons/attrd/attrd_utils.c | 2 + - daemons/attrd/pacemaker-attrd.h | 8 + - 6 files changed, 281 insertions(+), 4 deletions(-) - -diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c -index 6f88ab6..37701aa 100644 ---- a/daemons/attrd/attrd_corosync.c -+++ b/daemons/attrd/attrd_corosync.c -@@ -269,6 +269,7 @@ attrd_peer_change_cb(enum crm_status_type kind, crm_node_t *peer, const void *da - if (gone && !is_remote) { - attrd_remove_voter(peer); - attrd_remove_peer_protocol_ver(peer->uname); -+ attrd_do_not_expect_from_peer(peer->uname); - - // Ensure remote nodes that come up are in the remote node cache - } else if (!gone && is_remote) { -diff --git a/daemons/attrd/attrd_ipc.c b/daemons/attrd/attrd_ipc.c -index 0fc5e93..c70aa1b 100644 ---- a/daemons/attrd/attrd_ipc.c -+++ b/daemons/attrd/attrd_ipc.c -@@ -393,6 +393,7 @@ attrd_client_update(pcmk__request_t *request) - * the response ACK is not sent until this attrd broadcasts the update - * and receives its own confirmation back from all peers. - */ -+ attrd_expect_confirmations(request, attrd_cluster_sync_point_update); - attrd_send_message(NULL, xml, true); /* ends up at attrd_peer_message() */ - - } else { -@@ -456,6 +457,9 @@ attrd_ipc_closed(qb_ipcs_connection_t *c) - /* Remove the client from the sync point waitlist if it's present. */ - attrd_remove_client_from_waitlist(client); - -+ /* And no longer wait for confirmations from any peers. */ -+ attrd_do_not_wait_for_client(client); -+ - pcmk__free_client(client); - } - -diff --git a/daemons/attrd/attrd_messages.c b/daemons/attrd/attrd_messages.c -index 9c792b2..f7b9c7c 100644 ---- a/daemons/attrd/attrd_messages.c -+++ b/daemons/attrd/attrd_messages.c -@@ -69,7 +69,17 @@ static xmlNode * - handle_confirm_request(pcmk__request_t *request) - { - if (request->peer != NULL) { -+ int callid; -+ - crm_debug("Received confirmation from %s", request->peer); -+ -+ if (crm_element_value_int(request->xml, XML_LRM_ATTR_CALLID, &callid) == -1) { -+ pcmk__set_result(&request->result, CRM_EX_PROTOCOL, PCMK_EXEC_INVALID, -+ "Could not get callid from XML"); -+ } else { -+ attrd_handle_confirmation(callid, request->peer); -+ } -+ - pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); - return NULL; - } else { -diff --git a/daemons/attrd/attrd_sync.c b/daemons/attrd/attrd_sync.c -index e9690b5..d3d7108 100644 ---- a/daemons/attrd/attrd_sync.c -+++ b/daemons/attrd/attrd_sync.c -@@ -34,6 +34,51 @@ struct waitlist_node { - uint32_t flags; - }; - -+/* A hash table storing information on in-progress IPC requests that are awaiting -+ * confirmations. These requests are currently being processed by peer attrds and -+ * we are waiting to receive confirmation messages from each peer indicating that -+ * processing is complete. -+ * -+ * Multiple requests could be waiting on confirmations at the same time. -+ * -+ * The key is the unique callid for the IPC request, and the value is a -+ * confirmation_action struct. -+ */ -+static GHashTable *expected_confirmations = NULL; -+ -+/*! -+ * \internal -+ * \brief A structure describing a single IPC request that is awaiting confirmations -+ */ -+struct confirmation_action { -+ /*! -+ * \brief A list of peer attrds that we are waiting to receive confirmation -+ * messages from -+ * -+ * This list is dynamic - as confirmations arrive from peer attrds, they will -+ * be removed from this list. When the list is empty, all peers have processed -+ * the request and the associated confirmation action will be taken. -+ */ -+ GList *respondents; -+ -+ /*! -+ * \brief A function to run when all confirmations have been received -+ */ -+ attrd_confirmation_action_fn fn; -+ -+ /*! -+ * \brief Information required to construct and send a reply to the client -+ */ -+ char *client_id; -+ uint32_t ipc_id; -+ uint32_t flags; -+ -+ /*! -+ * \brief The XML request containing the callid associated with this action -+ */ -+ void *xml; -+}; -+ - static void - next_key(void) - { -@@ -114,12 +159,13 @@ attrd_add_client_to_waitlist(pcmk__request_t *request) - wl->ipc_id = request->ipc_id; - wl->flags = request->flags; - -- crm_debug("Added client %s to waitlist for %s sync point", -- wl->client_id, sync_point_str(wl->sync_point)); -- - next_key(); - pcmk__intkey_table_insert(waitlist, waitlist_client, wl); - -+ crm_trace("Added client %s to waitlist for %s sync point", -+ wl->client_id, sync_point_str(wl->sync_point)); -+ crm_trace("%d clients now on waitlist", g_hash_table_size(waitlist)); -+ - /* And then add the key to the request XML so we can uniquely identify - * it when it comes time to issue the ACK. - */ -@@ -166,6 +212,7 @@ attrd_remove_client_from_waitlist(pcmk__client_t *client) - - if (wl->client_id == client->id) { - g_hash_table_iter_remove(&iter); -+ crm_trace("%d clients now on waitlist", g_hash_table_size(waitlist)); - } - } - } -@@ -206,7 +253,7 @@ attrd_ack_waitlist_clients(enum attrd_sync_point sync_point, const xmlNode *xml) - return; - } - -- crm_debug("Alerting client %s for reached %s sync point", -+ crm_trace("Alerting client %s for reached %s sync point", - wl->client_id, sync_point_str(wl->sync_point)); - - client = pcmk__find_client_by_id(wl->client_id); -@@ -218,9 +265,28 @@ attrd_ack_waitlist_clients(enum attrd_sync_point sync_point, const xmlNode *xml) - - /* And then remove the client so it doesn't get alerted again. */ - pcmk__intkey_table_remove(waitlist, callid); -+ -+ crm_trace("%d clients now on waitlist", g_hash_table_size(waitlist)); - } - } - -+/*! -+ * \internal -+ * \brief Action to take when a cluster sync point is hit for a -+ * PCMK__ATTRD_CMD_UPDATE* message. -+ * -+ * \param[in] xml The request that should be passed along to -+ * attrd_ack_waitlist_clients. This should be the original -+ * IPC request containing the callid for this update message. -+ */ -+int -+attrd_cluster_sync_point_update(xmlNode *xml) -+{ -+ crm_trace("Hit cluster sync point for attribute update"); -+ attrd_ack_waitlist_clients(attrd_sync_point_cluster, xml); -+ return pcmk_rc_ok; -+} -+ - /*! - * \internal - * \brief Return the sync point attribute for an IPC request -@@ -268,3 +334,189 @@ attrd_request_has_sync_point(xmlNode *xml) - { - return attrd_request_sync_point(xml) != NULL; - } -+ -+static void -+free_action(gpointer data) -+{ -+ struct confirmation_action *action = (struct confirmation_action *) data; -+ g_list_free_full(action->respondents, free); -+ free_xml(action->xml); -+ free(action->client_id); -+ free(action); -+} -+ -+/*! -+ * \internal -+ * \brief When a peer disconnects from the cluster, no longer wait for its confirmation -+ * for any IPC action. If this peer is the last one being waited on, this will -+ * trigger the confirmation action. -+ * -+ * \param[in] host The disconnecting peer attrd's uname -+ */ -+void -+attrd_do_not_expect_from_peer(const char *host) -+{ -+ GList *keys = g_hash_table_get_keys(expected_confirmations); -+ -+ crm_trace("Removing peer %s from expected confirmations", host); -+ -+ for (GList *node = keys; node != NULL; node = node->next) { -+ int callid = *(int *) node->data; -+ attrd_handle_confirmation(callid, host); -+ } -+ -+ g_list_free(keys); -+} -+ -+/*! -+ * \internal -+ * \brief When a client disconnects from the cluster, no longer wait on confirmations -+ * for it. Because the peer attrds may still be processing the original IPC -+ * message, they may still send us confirmations. However, we will take no -+ * action on them. -+ * -+ * \param[in] client The disconnecting client -+ */ -+void -+attrd_do_not_wait_for_client(pcmk__client_t *client) -+{ -+ GHashTableIter iter; -+ gpointer value; -+ -+ if (expected_confirmations == NULL) { -+ return; -+ } -+ -+ g_hash_table_iter_init(&iter, expected_confirmations); -+ -+ while (g_hash_table_iter_next(&iter, NULL, &value)) { -+ struct confirmation_action *action = (struct confirmation_action *) value; -+ -+ if (pcmk__str_eq(action->client_id, client->id, pcmk__str_none)) { -+ crm_trace("Removing client %s from expected confirmations", client->id); -+ g_hash_table_iter_remove(&iter); -+ crm_trace("%d requests now in expected confirmations table", g_hash_table_size(expected_confirmations)); -+ break; -+ } -+ } -+} -+ -+/*! -+ * \internal -+ * \brief Register some action to be taken when IPC request confirmations are -+ * received -+ * -+ * When this function is called, a list of all peer attrds that support confirming -+ * requests is generated. As confirmations from these peer attrds are received, -+ * they are removed from this list. When the list is empty, the registered action -+ * will be called. -+ * -+ * \note This function should always be called before attrd_send_message is called -+ * to broadcast to the peers to ensure that we know what replies we are -+ * waiting on. Otherwise, it is possible the peer could finish and confirm -+ * before we know to expect it. -+ * -+ * \param[in] request The request that is awaiting confirmations -+ * \param[in] fn A function to be run after all confirmations are received -+ */ -+void -+attrd_expect_confirmations(pcmk__request_t *request, attrd_confirmation_action_fn fn) -+{ -+ struct confirmation_action *action = NULL; -+ GHashTableIter iter; -+ gpointer host, ver; -+ GList *respondents = NULL; -+ int callid; -+ -+ if (expected_confirmations == NULL) { -+ expected_confirmations = pcmk__intkey_table((GDestroyNotify) free_action); -+ } -+ -+ if (crm_element_value_int(request->xml, XML_LRM_ATTR_CALLID, &callid) == -1) { -+ crm_err("Could not get callid from xml"); -+ return; -+ } -+ -+ if (pcmk__intkey_table_lookup(expected_confirmations, callid)) { -+ crm_err("Already waiting on confirmations for call id %d", callid); -+ return; -+ } -+ -+ g_hash_table_iter_init(&iter, peer_protocol_vers); -+ while (g_hash_table_iter_next(&iter, &host, &ver)) { -+ if (GPOINTER_TO_INT(ver) >= 5) { -+ char *s = strdup((char *) host); -+ -+ CRM_ASSERT(s != NULL); -+ respondents = g_list_prepend(respondents, s); -+ } -+ } -+ -+ action = calloc(1, sizeof(struct confirmation_action)); -+ CRM_ASSERT(action != NULL); -+ -+ action->respondents = respondents; -+ action->fn = fn; -+ action->xml = copy_xml(request->xml); -+ -+ action->client_id = strdup(request->ipc_client->id); -+ CRM_ASSERT(action->client_id != NULL); -+ -+ action->ipc_id = request->ipc_id; -+ action->flags = request->flags; -+ -+ pcmk__intkey_table_insert(expected_confirmations, callid, action); -+ crm_trace("Callid %d now waiting on %d confirmations", callid, g_list_length(respondents)); -+ crm_trace("%d requests now in expected confirmations table", g_hash_table_size(expected_confirmations)); -+} -+ -+void -+attrd_free_confirmations(void) -+{ -+ if (expected_confirmations != NULL) { -+ g_hash_table_destroy(expected_confirmations); -+ expected_confirmations = NULL; -+ } -+} -+ -+/*! -+ * \internal -+ * \brief Process a confirmation message from a peer attrd -+ * -+ * This function is called every time a PCMK__ATTRD_CMD_CONFIRM message is -+ * received from a peer attrd. If this is the last confirmation we are waiting -+ * on for a given operation, the registered action will be called. -+ * -+ * \param[in] callid The unique callid for the XML IPC request -+ * \param[in] host The confirming peer attrd's uname -+ */ -+void -+attrd_handle_confirmation(int callid, const char *host) -+{ -+ struct confirmation_action *action = NULL; -+ GList *node = NULL; -+ -+ if (expected_confirmations == NULL) { -+ return; -+ } -+ -+ action = pcmk__intkey_table_lookup(expected_confirmations, callid); -+ if (action == NULL) { -+ return; -+ } -+ -+ node = g_list_find_custom(action->respondents, host, (GCompareFunc) strcasecmp); -+ -+ if (node == NULL) { -+ return; -+ } -+ -+ action->respondents = g_list_remove(action->respondents, node->data); -+ crm_trace("Callid %d now waiting on %d confirmations", callid, g_list_length(action->respondents)); -+ -+ if (action->respondents == NULL) { -+ action->fn(action->xml); -+ pcmk__intkey_table_remove(expected_confirmations, callid); -+ crm_trace("%d requests now in expected confirmations table", g_hash_table_size(expected_confirmations)); -+ } -+} -diff --git a/daemons/attrd/attrd_utils.c b/daemons/attrd/attrd_utils.c -index 421faed..f3a2059 100644 ---- a/daemons/attrd/attrd_utils.c -+++ b/daemons/attrd/attrd_utils.c -@@ -99,6 +99,8 @@ attrd_shutdown(int nsig) - mainloop_destroy_signal(SIGTRAP); - - attrd_free_waitlist(); -+ attrd_free_confirmations(); -+ - if (peer_protocol_vers != NULL) { - g_hash_table_destroy(peer_protocol_vers); - peer_protocol_vers = NULL; -diff --git a/daemons/attrd/pacemaker-attrd.h b/daemons/attrd/pacemaker-attrd.h -index 302ef63..bcc329d 100644 ---- a/daemons/attrd/pacemaker-attrd.h -+++ b/daemons/attrd/pacemaker-attrd.h -@@ -191,8 +191,16 @@ enum attrd_sync_point { - attrd_sync_point_cluster, - }; - -+typedef int (*attrd_confirmation_action_fn)(xmlNode *); -+ - void attrd_add_client_to_waitlist(pcmk__request_t *request); - void attrd_ack_waitlist_clients(enum attrd_sync_point sync_point, const xmlNode *xml); -+int attrd_cluster_sync_point_update(xmlNode *xml); -+void attrd_do_not_expect_from_peer(const char *host); -+void attrd_do_not_wait_for_client(pcmk__client_t *client); -+void attrd_expect_confirmations(pcmk__request_t *request, attrd_confirmation_action_fn fn); -+void attrd_free_confirmations(void); -+void attrd_handle_confirmation(int callid, const char *host); - void attrd_remove_client_from_waitlist(pcmk__client_t *client); - const char *attrd_request_sync_point(xmlNode *xml); - bool attrd_request_has_sync_point(xmlNode *xml); --- -2.31.1 - -From 07a032a7eb2f03dce18a7c94c56b8c837dedda15 Mon Sep 17 00:00:00 2001 -From: Chris Lumens -Date: Fri, 28 Oct 2022 14:54:15 -0400 -Subject: [PATCH 20/26] Refactor: daemons: Add some attrd version checking - macros. - -These are just to make it a little more obvious what is actually being -asked in the code, instead of having magic numbers sprinkled around. ---- - daemons/attrd/attrd_ipc.c | 2 +- - daemons/attrd/attrd_sync.c | 2 +- - daemons/attrd/pacemaker-attrd.h | 3 +++ - 3 files changed, 5 insertions(+), 2 deletions(-) - -diff --git a/daemons/attrd/attrd_ipc.c b/daemons/attrd/attrd_ipc.c -index c70aa1b..16bfff4 100644 ---- a/daemons/attrd/attrd_ipc.c -+++ b/daemons/attrd/attrd_ipc.c -@@ -294,7 +294,7 @@ attrd_client_update(pcmk__request_t *request) - * two ways we can handle that. - */ - if (xml_has_children(xml)) { -- if (minimum_protocol_version >= 4) { -+ if (ATTRD_SUPPORTS_MULTI_MESSAGE(minimum_protocol_version)) { - /* First, if all peers support a certain protocol version, we can - * just broadcast the big message and they'll handle it. However, - * we also need to apply all the transformations in this function -diff --git a/daemons/attrd/attrd_sync.c b/daemons/attrd/attrd_sync.c -index d3d7108..e48f82e 100644 ---- a/daemons/attrd/attrd_sync.c -+++ b/daemons/attrd/attrd_sync.c -@@ -444,7 +444,7 @@ attrd_expect_confirmations(pcmk__request_t *request, attrd_confirmation_action_f - - g_hash_table_iter_init(&iter, peer_protocol_vers); - while (g_hash_table_iter_next(&iter, &host, &ver)) { -- if (GPOINTER_TO_INT(ver) >= 5) { -+ if (ATTRD_SUPPORTS_CONFIRMATION(GPOINTER_TO_INT(ver))) { - char *s = strdup((char *) host); - - CRM_ASSERT(s != NULL); -diff --git a/daemons/attrd/pacemaker-attrd.h b/daemons/attrd/pacemaker-attrd.h -index bcc329d..83d7c6b 100644 ---- a/daemons/attrd/pacemaker-attrd.h -+++ b/daemons/attrd/pacemaker-attrd.h -@@ -45,6 +45,9 @@ - */ - #define ATTRD_PROTOCOL_VERSION "5" - -+#define ATTRD_SUPPORTS_MULTI_MESSAGE(x) ((x) >= 4) -+#define ATTRD_SUPPORTS_CONFIRMATION(x) ((x) >= 5) -+ - #define attrd_send_ack(client, id, flags) \ - pcmk__ipc_send_ack((client), (id), (flags), "ack", ATTRD_PROTOCOL_VERSION, CRM_EX_INDETERMINATE) - --- -2.31.1 - -From 811361b96c6f26a1f5eccc54b6e8bf6e6fd003be Mon Sep 17 00:00:00 2001 -From: Chris Lumens -Date: Mon, 31 Oct 2022 12:53:22 -0400 -Subject: [PATCH 21/26] Low: attrd: Fix removing clients from the waitlist when - they disconnect. - -The client ID is a string, so it must be compared like a string. ---- - daemons/attrd/attrd_sync.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/daemons/attrd/attrd_sync.c b/daemons/attrd/attrd_sync.c -index e48f82e..c9b4784 100644 ---- a/daemons/attrd/attrd_sync.c -+++ b/daemons/attrd/attrd_sync.c -@@ -210,7 +210,7 @@ attrd_remove_client_from_waitlist(pcmk__client_t *client) - while (g_hash_table_iter_next(&iter, NULL, &value)) { - struct waitlist_node *wl = (struct waitlist_node *) value; - -- if (wl->client_id == client->id) { -+ if (pcmk__str_eq(wl->client_id, client->id, pcmk__str_none)) { - g_hash_table_iter_remove(&iter); - crm_trace("%d clients now on waitlist", g_hash_table_size(waitlist)); - } --- -2.31.1 - -From 4e933ad14456af85c60701410c3b23b4eab03f86 Mon Sep 17 00:00:00 2001 -From: Chris Lumens -Date: Tue, 1 Nov 2022 12:35:12 -0400 -Subject: [PATCH 22/26] Feature: daemons: Handle an attrd client timing out. - -If the update confirmations do not come back in time, use a main loop -timer to remove the client from the table. ---- - daemons/attrd/attrd_sync.c | 49 ++++++++++++++++++++++++++++++++++++++ - 1 file changed, 49 insertions(+) - -diff --git a/daemons/attrd/attrd_sync.c b/daemons/attrd/attrd_sync.c -index c9b4784..9d07796 100644 ---- a/daemons/attrd/attrd_sync.c -+++ b/daemons/attrd/attrd_sync.c -@@ -61,6 +61,12 @@ struct confirmation_action { - */ - GList *respondents; - -+ /*! -+ * \brief A timer that will be used to remove the client should it time out -+ * before receiving all confirmations -+ */ -+ mainloop_timer_t *timer; -+ - /*! - * \brief A function to run when all confirmations have been received - */ -@@ -340,11 +346,51 @@ free_action(gpointer data) - { - struct confirmation_action *action = (struct confirmation_action *) data; - g_list_free_full(action->respondents, free); -+ mainloop_timer_del(action->timer); - free_xml(action->xml); - free(action->client_id); - free(action); - } - -+/* Remove an IPC request from the expected_confirmations table if the peer attrds -+ * don't respond before the timeout is hit. We set the timeout to 15s. The exact -+ * number isn't critical - we just want to make sure that the table eventually gets -+ * cleared of things that didn't complete. -+ */ -+static gboolean -+confirmation_timeout_cb(gpointer data) -+{ -+ struct confirmation_action *action = (struct confirmation_action *) data; -+ -+ GHashTableIter iter; -+ gpointer value; -+ -+ if (expected_confirmations == NULL) { -+ return G_SOURCE_REMOVE; -+ } -+ -+ g_hash_table_iter_init(&iter, expected_confirmations); -+ -+ while (g_hash_table_iter_next(&iter, NULL, &value)) { -+ if (value == action) { -+ pcmk__client_t *client = pcmk__find_client_by_id(action->client_id); -+ if (client == NULL) { -+ return G_SOURCE_REMOVE; -+ } -+ -+ crm_trace("Timed out waiting for confirmations for client %s", client->id); -+ pcmk__ipc_send_ack(client, action->ipc_id, action->flags | crm_ipc_client_response, -+ "ack", ATTRD_PROTOCOL_VERSION, CRM_EX_TIMEOUT); -+ -+ g_hash_table_iter_remove(&iter); -+ crm_trace("%d requests now in expected confirmations table", g_hash_table_size(expected_confirmations)); -+ break; -+ } -+ } -+ -+ return G_SOURCE_REMOVE; -+} -+ - /*! - * \internal - * \brief When a peer disconnects from the cluster, no longer wait for its confirmation -@@ -465,6 +511,9 @@ attrd_expect_confirmations(pcmk__request_t *request, attrd_confirmation_action_f - action->ipc_id = request->ipc_id; - action->flags = request->flags; - -+ action->timer = mainloop_timer_add(NULL, 15000, FALSE, confirmation_timeout_cb, action); -+ mainloop_timer_start(action->timer); -+ - pcmk__intkey_table_insert(expected_confirmations, callid, action); - crm_trace("Callid %d now waiting on %d confirmations", callid, g_list_length(respondents)); - crm_trace("%d requests now in expected confirmations table", g_hash_table_size(expected_confirmations)); --- -2.31.1 - -From 101896383cbe0103c98078e46540c076af08f040 Mon Sep 17 00:00:00 2001 -From: Chris Lumens -Date: Wed, 2 Nov 2022 14:40:30 -0400 -Subject: [PATCH 23/26] Refactor: Demote a sync point related message to trace. - ---- - daemons/attrd/attrd_corosync.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c -index 37701aa..5cbed7e 100644 ---- a/daemons/attrd/attrd_corosync.c -+++ b/daemons/attrd/attrd_corosync.c -@@ -633,7 +633,7 @@ attrd_peer_update(const crm_node_t *peer, xmlNode *xml, const char *host, - * point, process that now. - */ - if (handle_sync_point) { -- crm_debug("Hit local sync point for attribute update"); -+ crm_trace("Hit local sync point for attribute update"); - attrd_ack_waitlist_clients(attrd_sync_point_local, xml); - } - } --- -2.31.1 - -From acd13246d4c2bef7982ca103e34896efcad22348 Mon Sep 17 00:00:00 2001 -From: Chris Lumens -Date: Thu, 3 Nov 2022 10:29:20 -0400 -Subject: [PATCH 24/26] Low: daemons: Avoid infinite confirm loops in attrd. - -On the sending side, do not add confirm="yes" to a message with -op="confirm". On the receiving side, do not confirm a message with -op="confirm" even if confirm="yes" is set. ---- - daemons/attrd/attrd_corosync.c | 3 ++- - daemons/attrd/attrd_messages.c | 6 +++++- - 2 files changed, 7 insertions(+), 2 deletions(-) - -diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c -index 5cbed7e..88c1ecc 100644 ---- a/daemons/attrd/attrd_corosync.c -+++ b/daemons/attrd/attrd_corosync.c -@@ -74,7 +74,8 @@ attrd_peer_message(crm_node_t *peer, xmlNode *xml) - /* Having finished handling the request, check to see if the originating - * peer requested confirmation. If so, send that confirmation back now. - */ -- if (pcmk__xe_attr_is_true(xml, PCMK__XA_CONFIRM)) { -+ if (pcmk__xe_attr_is_true(xml, PCMK__XA_CONFIRM) && -+ !pcmk__str_eq(request.op, PCMK__ATTRD_CMD_CONFIRM, pcmk__str_none)) { - int callid = 0; - xmlNode *reply = NULL; - -diff --git a/daemons/attrd/attrd_messages.c b/daemons/attrd/attrd_messages.c -index f7b9c7c..184176a 100644 ---- a/daemons/attrd/attrd_messages.c -+++ b/daemons/attrd/attrd_messages.c -@@ -310,6 +310,8 @@ attrd_broadcast_protocol(void) - gboolean - attrd_send_message(crm_node_t *node, xmlNode *data, bool confirm) - { -+ const char *op = crm_element_value(data, PCMK__XA_TASK); -+ - crm_xml_add(data, F_TYPE, T_ATTRD); - crm_xml_add(data, PCMK__XA_ATTR_VERSION, ATTRD_PROTOCOL_VERSION); - -@@ -317,7 +319,9 @@ attrd_send_message(crm_node_t *node, xmlNode *data, bool confirm) - * be all if node is NULL) that the message has been received and - * acted upon. - */ -- pcmk__xe_set_bool_attr(data, PCMK__XA_CONFIRM, confirm); -+ if (!pcmk__str_eq(op, PCMK__ATTRD_CMD_CONFIRM, pcmk__str_none)) { -+ pcmk__xe_set_bool_attr(data, PCMK__XA_CONFIRM, confirm); -+ } - - attrd_xml_add_writer(data); - return send_cluster_message(node, crm_msg_attrd, data, TRUE); --- -2.31.1 - -From 115e6c3a0d8db4df3eccf6da1c344168799f890d Mon Sep 17 00:00:00 2001 -From: Chris Lumens -Date: Tue, 15 Nov 2022 09:35:28 -0500 -Subject: [PATCH 25/26] Fix: daemons: Check for NULL in - attrd_do_not_expect_from_peer. - ---- - daemons/attrd/attrd_sync.c | 8 +++++++- - 1 file changed, 7 insertions(+), 1 deletion(-) - -diff --git a/daemons/attrd/attrd_sync.c b/daemons/attrd/attrd_sync.c -index 9d07796..6936771 100644 ---- a/daemons/attrd/attrd_sync.c -+++ b/daemons/attrd/attrd_sync.c -@@ -402,7 +402,13 @@ confirmation_timeout_cb(gpointer data) - void - attrd_do_not_expect_from_peer(const char *host) - { -- GList *keys = g_hash_table_get_keys(expected_confirmations); -+ GList *keys = NULL; -+ -+ if (expected_confirmations == NULL) { -+ return; -+ } -+ -+ keys = g_hash_table_get_keys(expected_confirmations); - - crm_trace("Removing peer %s from expected confirmations", host); - --- -2.31.1 - -From 05da14f97ccd4f63f53801acc107ad661e5fd0c8 Mon Sep 17 00:00:00 2001 -From: Chris Lumens -Date: Wed, 16 Nov 2022 17:37:44 -0500 -Subject: [PATCH 26/26] Low: daemons: Support cluster-wide sync points for - multi IPC messages. - -Supporting cluster-wide sync points means attrd_expect_confirmations -needs to be called, and then attrd_send_message needs "true" as a third -argument. This indicates attrd wants confirmations back from all its -peers when they have applied the update. - -We're already doing this at the end of attrd_client_update for -single-update IPC messages, and handling it for multi-update messages is -a simple matter of breaking that code out into a function and making -sure it's called. - -Note that this leaves two other spots where sync points still need to be -dealt with: - -* An update message that uses a regex. See - https://projects.clusterlabs.org/T600 for details. - -* A multi-update IPC message in a cluster where that is not supported. - See https://projects.clusterlabs.org/T601 for details. ---- - daemons/attrd/attrd_ipc.c | 43 ++++++++++++++++++++++----------------- - 1 file changed, 24 insertions(+), 19 deletions(-) - -diff --git a/daemons/attrd/attrd_ipc.c b/daemons/attrd/attrd_ipc.c -index 16bfff4..8c5660d 100644 ---- a/daemons/attrd/attrd_ipc.c -+++ b/daemons/attrd/attrd_ipc.c -@@ -283,6 +283,28 @@ handle_value_expansion(const char **value, xmlNode *xml, const char *op, - return pcmk_rc_ok; - } - -+static void -+send_update_msg_to_cluster(pcmk__request_t *request, xmlNode *xml) -+{ -+ if (pcmk__str_eq(attrd_request_sync_point(xml), PCMK__VALUE_CLUSTER, pcmk__str_none)) { -+ /* The client is waiting on the cluster-wide sync point. In this case, -+ * the response ACK is not sent until this attrd broadcasts the update -+ * and receives its own confirmation back from all peers. -+ */ -+ attrd_expect_confirmations(request, attrd_cluster_sync_point_update); -+ attrd_send_message(NULL, xml, true); /* ends up at attrd_peer_message() */ -+ -+ } else { -+ /* The client is either waiting on the local sync point or was not -+ * waiting on any sync point at all. For the local sync point, the -+ * response ACK is sent in attrd_peer_update. For clients not -+ * waiting on any sync point, the response ACK is sent in -+ * handle_update_request immediately before this function was called. -+ */ -+ attrd_send_message(NULL, xml, false); /* ends up at attrd_peer_message() */ -+ } -+} -+ - xmlNode * - attrd_client_update(pcmk__request_t *request) - { -@@ -314,7 +336,7 @@ attrd_client_update(pcmk__request_t *request) - } - } - -- attrd_send_message(NULL, xml, false); -+ send_update_msg_to_cluster(request, xml); - pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); - - } else { -@@ -388,24 +410,7 @@ attrd_client_update(pcmk__request_t *request) - crm_debug("Broadcasting %s[%s]=%s%s", attr, crm_element_value(xml, PCMK__XA_ATTR_NODE_NAME), - value, (attrd_election_won()? " (writer)" : "")); - -- if (pcmk__str_eq(attrd_request_sync_point(xml), PCMK__VALUE_CLUSTER, pcmk__str_none)) { -- /* The client is waiting on the cluster-wide sync point. In this case, -- * the response ACK is not sent until this attrd broadcasts the update -- * and receives its own confirmation back from all peers. -- */ -- attrd_expect_confirmations(request, attrd_cluster_sync_point_update); -- attrd_send_message(NULL, xml, true); /* ends up at attrd_peer_message() */ -- -- } else { -- /* The client is either waiting on the local sync point or was not -- * waiting on any sync point at all. For the local sync point, the -- * response ACK is sent in attrd_peer_update. For clients not -- * waiting on any sync point, the response ACK is sent in -- * handle_update_request immediately before this function was called. -- */ -- attrd_send_message(NULL, xml, false); /* ends up at attrd_peer_message() */ -- } -- -+ send_update_msg_to_cluster(request, xml); - pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); - return NULL; - } --- -2.31.1 - diff --git a/SOURCES/002-group-colocation-constraint.patch b/SOURCES/002-group-colocation-constraint.patch new file mode 100644 index 0000000..4cd58c0 --- /dev/null +++ b/SOURCES/002-group-colocation-constraint.patch @@ -0,0 +1,2661 @@ +From 6d438daa021eaef4ca41b84009b9d6fc11173826 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Thu, 20 Apr 2023 11:01:41 -0500 +Subject: [PATCH 01/17] Refactor: scheduler: drop redundant argument from + pcmk__new_colocation() + +--- + lib/pacemaker/libpacemaker_private.h | 2 +- + lib/pacemaker/pcmk_sched_bundle.c | 5 ++--- + lib/pacemaker/pcmk_sched_colocation.c | 27 +++++++++++---------------- + lib/pacemaker/pcmk_sched_group.c | 3 +-- + lib/pacemaker/pcmk_sched_primitive.c | 3 +-- + 5 files changed, 16 insertions(+), 24 deletions(-) + +diff --git a/lib/pacemaker/libpacemaker_private.h b/lib/pacemaker/libpacemaker_private.h +index 192d5a703ff..a6c13220e1d 100644 +--- a/lib/pacemaker/libpacemaker_private.h ++++ b/lib/pacemaker/libpacemaker_private.h +@@ -483,7 +483,7 @@ G_GNUC_INTERNAL + void pcmk__new_colocation(const char *id, const char *node_attr, int score, + pe_resource_t *dependent, pe_resource_t *primary, + const char *dependent_role, const char *primary_role, +- bool influence, pe_working_set_t *data_set); ++ bool influence); + + G_GNUC_INTERNAL + void pcmk__block_colocation_dependents(pe_action_t *action, +diff --git a/lib/pacemaker/pcmk_sched_bundle.c b/lib/pacemaker/pcmk_sched_bundle.c +index 5682744395a..6024da68fb7 100644 +--- a/lib/pacemaker/pcmk_sched_bundle.c ++++ b/lib/pacemaker/pcmk_sched_bundle.c +@@ -83,7 +83,7 @@ pcmk__bundle_allocate(pe_resource_t *rsc, const pe_node_t *prefer) + pcmk__new_colocation("child-remote-with-docker-remote", NULL, + INFINITY, replica->remote, + container_host->details->remote_rsc, NULL, +- NULL, true, rsc->cluster); ++ NULL, true); + } + + if (replica->remote) { +@@ -252,8 +252,7 @@ pcmk__bundle_internal_constraints(pe_resource_t *rsc) + pe_order_implies_first|pe_order_preserve); + + pcmk__new_colocation("ip-with-docker", NULL, INFINITY, replica->ip, +- replica->container, NULL, NULL, true, +- rsc->cluster); ++ replica->container, NULL, NULL, true); + } + + if (replica->remote) { +diff --git a/lib/pacemaker/pcmk_sched_colocation.c b/lib/pacemaker/pcmk_sched_colocation.c +index eeef4f1ca55..7d41f4d03e5 100644 +--- a/lib/pacemaker/pcmk_sched_colocation.c ++++ b/lib/pacemaker/pcmk_sched_colocation.c +@@ -297,13 +297,12 @@ anti_colocation_order(pe_resource_t *first_rsc, int first_role, + * \param[in] dependent_role Current role of \p dependent + * \param[in] primary_role Current role of \p primary + * \param[in] influence Whether colocation constraint has influence +- * \param[in,out] data_set Cluster working set to add constraint to + */ + void + pcmk__new_colocation(const char *id, const char *node_attr, int score, + pe_resource_t *dependent, pe_resource_t *primary, + const char *dependent_role, const char *primary_role, +- bool influence, pe_working_set_t *data_set) ++ bool influence) + { + pcmk__colocation_t *new_con = NULL; + +@@ -351,8 +350,8 @@ pcmk__new_colocation(const char *id, const char *node_attr, int score, + pcmk__add_this_with(&(dependent->rsc_cons), new_con); + pcmk__add_with_this(&(primary->rsc_cons_lhs), new_con); + +- data_set->colocation_constraints = g_list_append(data_set->colocation_constraints, +- new_con); ++ dependent->cluster->colocation_constraints = g_list_append( ++ dependent->cluster->colocation_constraints, new_con); + + if (score <= -INFINITY) { + anti_colocation_order(dependent, new_con->dependent_role, primary, +@@ -433,7 +432,7 @@ unpack_colocation_set(xmlNode *set, int score, const char *coloc_id, + pcmk__new_colocation(set_id, NULL, local_score, resource, + with, role, role, + unpack_influence(coloc_id, resource, +- influence_s), data_set); ++ influence_s)); + } + with = resource; + } +@@ -451,7 +450,7 @@ unpack_colocation_set(xmlNode *set, int score, const char *coloc_id, + pcmk__new_colocation(set_id, NULL, local_score, last, + resource, role, role, + unpack_influence(coloc_id, last, +- influence_s), data_set); ++ influence_s)); + } + + last = resource; +@@ -484,8 +483,7 @@ unpack_colocation_set(xmlNode *set, int score, const char *coloc_id, + pe_rsc_trace(resource, "Anti-Colocating %s with %s", resource->id, + with->id); + pcmk__new_colocation(set_id, NULL, local_score, +- resource, with, role, role, +- influence, data_set); ++ resource, with, role, role, influence); + } + } + } +@@ -535,8 +533,7 @@ colocate_rsc_sets(const char *id, xmlNode *set1, xmlNode *set2, int score, + + if ((rsc_1 != NULL) && (rsc_2 != NULL)) { + pcmk__new_colocation(id, NULL, score, rsc_1, rsc_2, role_1, role_2, +- unpack_influence(id, rsc_1, influence_s), +- data_set); ++ unpack_influence(id, rsc_1, influence_s)); + + } else if (rsc_1 != NULL) { + bool influence = unpack_influence(id, rsc_1, influence_s); +@@ -546,7 +543,7 @@ colocate_rsc_sets(const char *id, xmlNode *set1, xmlNode *set2, int score, + + EXPAND_CONSTRAINT_IDREF(id, rsc_2, ID(xml_rsc)); + pcmk__new_colocation(id, NULL, score, rsc_1, rsc_2, role_1, +- role_2, influence, data_set); ++ role_2, influence); + } + + } else if (rsc_2 != NULL) { +@@ -556,8 +553,7 @@ colocate_rsc_sets(const char *id, xmlNode *set1, xmlNode *set2, int score, + EXPAND_CONSTRAINT_IDREF(id, rsc_1, ID(xml_rsc)); + pcmk__new_colocation(id, NULL, score, rsc_1, rsc_2, role_1, + role_2, +- unpack_influence(id, rsc_1, influence_s), +- data_set); ++ unpack_influence(id, rsc_1, influence_s)); + } + + } else { +@@ -576,8 +572,7 @@ colocate_rsc_sets(const char *id, xmlNode *set1, xmlNode *set2, int score, + + EXPAND_CONSTRAINT_IDREF(id, rsc_2, ID(xml_rsc_2)); + pcmk__new_colocation(id, NULL, score, rsc_1, rsc_2, +- role_1, role_2, influence, +- data_set); ++ role_1, role_2, influence); + } + } + } +@@ -678,7 +673,7 @@ unpack_simple_colocation(xmlNode *xml_obj, const char *id, + + pcmk__new_colocation(id, attr, score_i, dependent, primary, + dependent_role, primary_role, +- unpack_influence(id, dependent, influence_s), data_set); ++ unpack_influence(id, dependent, influence_s)); + } + + // \return Standard Pacemaker return code +diff --git a/lib/pacemaker/pcmk_sched_group.c b/lib/pacemaker/pcmk_sched_group.c +index cb139f7ddf9..c1392e07a4c 100644 +--- a/lib/pacemaker/pcmk_sched_group.c ++++ b/lib/pacemaker/pcmk_sched_group.c +@@ -171,8 +171,7 @@ member_internal_constraints(gpointer data, gpointer user_data) + // Colocate this member with the previous one + pcmk__new_colocation("group:internal_colocation", NULL, INFINITY, + member, member_data->previous_member, NULL, NULL, +- pcmk_is_set(member->flags, pe_rsc_critical), +- member->cluster); ++ pcmk_is_set(member->flags, pe_rsc_critical)); + } + + if (member_data->promotable) { +diff --git a/lib/pacemaker/pcmk_sched_primitive.c b/lib/pacemaker/pcmk_sched_primitive.c +index aefbf9aa140..4e3eca3e18a 100644 +--- a/lib/pacemaker/pcmk_sched_primitive.c ++++ b/lib/pacemaker/pcmk_sched_primitive.c +@@ -999,8 +999,7 @@ pcmk__primitive_internal_constraints(pe_resource_t *rsc) + score = INFINITY; /* Force them to run on the same host */ + } + pcmk__new_colocation("resource-with-container", NULL, score, rsc, +- rsc->container, NULL, NULL, true, +- rsc->cluster); ++ rsc->container, NULL, NULL, true); + } + } + + +From c6efbe4bc45795f6991b600fc0a70b6a46c10fc3 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Mon, 26 Jun 2023 11:50:57 -0500 +Subject: [PATCH 02/17] Low: scheduler: improve error-checking when creating + colocations + +--- + lib/pacemaker/pcmk_sched_colocation.c | 20 ++++++++++++-------- + 1 file changed, 12 insertions(+), 8 deletions(-) + +diff --git a/lib/pacemaker/pcmk_sched_colocation.c b/lib/pacemaker/pcmk_sched_colocation.c +index 7d41f4d03e5..d591550fb97 100644 +--- a/lib/pacemaker/pcmk_sched_colocation.c ++++ b/lib/pacemaker/pcmk_sched_colocation.c +@@ -306,21 +306,24 @@ pcmk__new_colocation(const char *id, const char *node_attr, int score, + { + pcmk__colocation_t *new_con = NULL; + +- if (score == 0) { +- crm_trace("Ignoring colocation '%s' because score is 0", id); +- return; +- } ++ CRM_CHECK(id != NULL, return); ++ + if ((dependent == NULL) || (primary == NULL)) { + pcmk__config_err("Ignoring colocation '%s' because resource " + "does not exist", id); + return; + } + +- new_con = calloc(1, sizeof(pcmk__colocation_t)); +- if (new_con == NULL) { ++ if (score == 0) { ++ pe_rsc_trace(dependent, ++ "Ignoring colocation '%s' (%s with %s) because score is 0", ++ id, dependent->id, primary->id); + return; + } + ++ new_con = calloc(1, sizeof(pcmk__colocation_t)); ++ CRM_ASSERT(new_con != NULL); ++ + if (pcmk__str_eq(dependent_role, RSC_ROLE_STARTED_S, + pcmk__str_null_matches|pcmk__str_casei)) { + dependent_role = RSC_ROLE_UNKNOWN_S; +@@ -344,8 +347,9 @@ pcmk__new_colocation(const char *id, const char *node_attr, int score, + node_attr = CRM_ATTR_UNAME; + } + +- pe_rsc_trace(dependent, "%s ==> %s (%s %d)", +- dependent->id, primary->id, node_attr, score); ++ pe_rsc_trace(dependent, "Added colocation %s (%s with %s @%s using %s)", ++ new_con->id, dependent->id, primary->id, ++ pcmk_readable_score(score), node_attr); + + pcmk__add_this_with(&(dependent->rsc_cons), new_con); + pcmk__add_with_this(&(primary->rsc_cons_lhs), new_con); + +From 589403f548459eeddfd5188ba70723ecf9987d2b Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Mon, 26 Jun 2023 12:19:44 -0500 +Subject: [PATCH 03/17] Refactor: scheduler: use flag group instead of bool for + colocation influence + +... so we can add more flags +--- + include/pcmki/pcmki_scheduler.h | 2 +- + lib/pacemaker/libpacemaker_private.h | 13 +++++- + lib/pacemaker/pcmk_sched_bundle.c | 5 ++- + lib/pacemaker/pcmk_sched_colocation.c | 61 ++++++++++++++------------- + lib/pacemaker/pcmk_sched_group.c | 8 +++- + lib/pacemaker/pcmk_sched_primitive.c | 3 +- + 6 files changed, 55 insertions(+), 37 deletions(-) + +diff --git a/include/pcmki/pcmki_scheduler.h b/include/pcmki/pcmki_scheduler.h +index dde50a57e32..53de7e1f52e 100644 +--- a/include/pcmki/pcmki_scheduler.h ++++ b/include/pcmki/pcmki_scheduler.h +@@ -29,7 +29,7 @@ typedef struct { + int primary_role; // Colocation applies only if primary has this role + + int score; +- bool influence; // Whether dependent influences active primary placement ++ uint32_t flags; // Group of enum pcmk__coloc_flags + } pcmk__colocation_t; + + void pcmk__unpack_constraints(pe_working_set_t *data_set); +diff --git a/lib/pacemaker/libpacemaker_private.h b/lib/pacemaker/libpacemaker_private.h +index a6c13220e1d..51de9d3e9a9 100644 +--- a/lib/pacemaker/libpacemaker_private.h ++++ b/lib/pacemaker/libpacemaker_private.h +@@ -16,6 +16,14 @@ + + #include // pe_action_t, pe_node_t, pe_working_set_t + ++// Colocation flags ++enum pcmk__coloc_flags { ++ pcmk__coloc_none = 0U, ++ ++ // Primary is affected even if already active ++ pcmk__coloc_influence = (1U << 0), ++}; ++ + // Flags to modify the behavior of add_colocated_node_scores() + enum pcmk__coloc_select { + // With no other flags, apply all "with this" colocations +@@ -483,7 +491,7 @@ G_GNUC_INTERNAL + void pcmk__new_colocation(const char *id, const char *node_attr, int score, + pe_resource_t *dependent, pe_resource_t *primary, + const char *dependent_role, const char *primary_role, +- bool influence); ++ uint32_t flags); + + G_GNUC_INTERNAL + void pcmk__block_colocation_dependents(pe_action_t *action, +@@ -530,7 +538,8 @@ pcmk__colocation_has_influence(const pcmk__colocation_t *colocation, + /* The dependent in a colocation influences the primary's location + * if the influence option is true or the primary is not yet active. + */ +- return colocation->influence || (rsc->running_on == NULL); ++ return pcmk_is_set(colocation->flags, pcmk__coloc_influence) ++ || (rsc->running_on == NULL); + } + + +diff --git a/lib/pacemaker/pcmk_sched_bundle.c b/lib/pacemaker/pcmk_sched_bundle.c +index 6024da68fb7..ca3c21a9977 100644 +--- a/lib/pacemaker/pcmk_sched_bundle.c ++++ b/lib/pacemaker/pcmk_sched_bundle.c +@@ -83,7 +83,7 @@ pcmk__bundle_allocate(pe_resource_t *rsc, const pe_node_t *prefer) + pcmk__new_colocation("child-remote-with-docker-remote", NULL, + INFINITY, replica->remote, + container_host->details->remote_rsc, NULL, +- NULL, true); ++ NULL, pcmk__coloc_influence); + } + + if (replica->remote) { +@@ -252,7 +252,8 @@ pcmk__bundle_internal_constraints(pe_resource_t *rsc) + pe_order_implies_first|pe_order_preserve); + + pcmk__new_colocation("ip-with-docker", NULL, INFINITY, replica->ip, +- replica->container, NULL, NULL, true); ++ replica->container, NULL, NULL, ++ pcmk__coloc_influence); + } + + if (replica->remote) { +diff --git a/lib/pacemaker/pcmk_sched_colocation.c b/lib/pacemaker/pcmk_sched_colocation.c +index d591550fb97..dbdefadfd10 100644 +--- a/lib/pacemaker/pcmk_sched_colocation.c ++++ b/lib/pacemaker/pcmk_sched_colocation.c +@@ -296,13 +296,13 @@ anti_colocation_order(pe_resource_t *first_rsc, int first_role, + * \param[in,out] primary Resource to colocate \p dependent with + * \param[in] dependent_role Current role of \p dependent + * \param[in] primary_role Current role of \p primary +- * \param[in] influence Whether colocation constraint has influence ++ * \param[in] flags Group of enum pcmk__coloc_flags + */ + void + pcmk__new_colocation(const char *id, const char *node_attr, int score, + pe_resource_t *dependent, pe_resource_t *primary, + const char *dependent_role, const char *primary_role, +- bool influence) ++ uint32_t flags) + { + pcmk__colocation_t *new_con = NULL; + +@@ -341,7 +341,7 @@ pcmk__new_colocation(const char *id, const char *node_attr, int score, + new_con->dependent_role = text2role(dependent_role); + new_con->primary_role = text2role(primary_role); + new_con->node_attribute = node_attr; +- new_con->influence = influence; ++ new_con->flags = flags; + + if (node_attr == NULL) { + node_attr = CRM_ATTR_UNAME; +@@ -373,10 +373,11 @@ pcmk__new_colocation(const char *id, const char *node_attr, int score, + * \param[in] rsc Resource involved in constraint (for default) + * \param[in] influence_s String value of influence option + * +- * \return true if string evaluates true, false if string evaluates false, +- * or value of resource's critical option if string is NULL or invalid ++ * \return pcmk__coloc_influence if string evaluates true, or string is NULL or ++ * invalid and resource's critical option evaluates true, otherwise ++ * pcmk__coloc_none + */ +-static bool ++static uint32_t + unpack_influence(const char *coloc_id, const pe_resource_t *rsc, + const char *influence_s) + { +@@ -388,10 +389,13 @@ unpack_influence(const char *coloc_id, const pe_resource_t *rsc, + XML_COLOC_ATTR_INFLUENCE " (using default)", + coloc_id); + } else { +- return (influence_i != 0); ++ return (influence_i == 0)? pcmk__coloc_none : pcmk__coloc_influence; + } + } +- return pcmk_is_set(rsc->flags, pe_rsc_critical); ++ if (pcmk_is_set(rsc->flags, pe_rsc_critical)) { ++ return pcmk__coloc_influence; ++ } ++ return pcmk__coloc_none; + } + + static void +@@ -406,7 +410,7 @@ unpack_colocation_set(xmlNode *set, int score, const char *coloc_id, + const char *ordering = crm_element_value(set, "ordering"); + int local_score = score; + bool sequential = false; +- ++ uint32_t flags = pcmk__coloc_none; + const char *score_s = crm_element_value(set, XML_RULE_ATTR_SCORE); + + if (score_s) { +@@ -433,10 +437,9 @@ unpack_colocation_set(xmlNode *set, int score, const char *coloc_id, + EXPAND_CONSTRAINT_IDREF(set_id, resource, ID(xml_rsc)); + if (with != NULL) { + pe_rsc_trace(resource, "Colocating %s with %s", resource->id, with->id); ++ flags = unpack_influence(coloc_id, resource, influence_s); + pcmk__new_colocation(set_id, NULL, local_score, resource, +- with, role, role, +- unpack_influence(coloc_id, resource, +- influence_s)); ++ with, role, role, flags); + } + with = resource; + } +@@ -451,12 +454,10 @@ unpack_colocation_set(xmlNode *set, int score, const char *coloc_id, + if (last != NULL) { + pe_rsc_trace(resource, "Colocating %s with %s", + last->id, resource->id); ++ flags = unpack_influence(coloc_id, resource, influence_s); + pcmk__new_colocation(set_id, NULL, local_score, last, +- resource, role, role, +- unpack_influence(coloc_id, last, +- influence_s)); ++ resource, role, role, flags); + } +- + last = resource; + } + +@@ -470,11 +471,10 @@ unpack_colocation_set(xmlNode *set, int score, const char *coloc_id, + xml_rsc != NULL; xml_rsc = crm_next_same_xml(xml_rsc)) { + + xmlNode *xml_rsc_with = NULL; +- bool influence = true; + + EXPAND_CONSTRAINT_IDREF(set_id, resource, ID(xml_rsc)); +- influence = unpack_influence(coloc_id, resource, influence_s); + ++ flags = unpack_influence(coloc_id, resource, influence_s); + for (xml_rsc_with = first_named_child(set, XML_TAG_RESOURCE_REF); + xml_rsc_with != NULL; + xml_rsc_with = crm_next_same_xml(xml_rsc_with)) { +@@ -487,7 +487,7 @@ unpack_colocation_set(xmlNode *set, int score, const char *coloc_id, + pe_rsc_trace(resource, "Anti-Colocating %s with %s", resource->id, + with->id); + pcmk__new_colocation(set_id, NULL, local_score, +- resource, with, role, role, influence); ++ resource, with, role, role, flags); + } + } + } +@@ -506,6 +506,7 @@ colocate_rsc_sets(const char *id, xmlNode *set1, xmlNode *set2, int score, + + int rc = pcmk_rc_ok; + bool sequential = false; ++ uint32_t flags = pcmk__coloc_none; + + if (score == 0) { + crm_trace("Ignoring colocation '%s' between sets because score is 0", +@@ -536,18 +537,18 @@ colocate_rsc_sets(const char *id, xmlNode *set1, xmlNode *set2, int score, + } + + if ((rsc_1 != NULL) && (rsc_2 != NULL)) { ++ flags = unpack_influence(id, rsc_1, influence_s); + pcmk__new_colocation(id, NULL, score, rsc_1, rsc_2, role_1, role_2, +- unpack_influence(id, rsc_1, influence_s)); ++ flags); + + } else if (rsc_1 != NULL) { +- bool influence = unpack_influence(id, rsc_1, influence_s); +- ++ flags = unpack_influence(id, rsc_1, influence_s); + for (xml_rsc = first_named_child(set2, XML_TAG_RESOURCE_REF); + xml_rsc != NULL; xml_rsc = crm_next_same_xml(xml_rsc)) { + + EXPAND_CONSTRAINT_IDREF(id, rsc_2, ID(xml_rsc)); + pcmk__new_colocation(id, NULL, score, rsc_1, rsc_2, role_1, +- role_2, influence); ++ role_2, flags); + } + + } else if (rsc_2 != NULL) { +@@ -555,9 +556,9 @@ colocate_rsc_sets(const char *id, xmlNode *set1, xmlNode *set2, int score, + xml_rsc != NULL; xml_rsc = crm_next_same_xml(xml_rsc)) { + + EXPAND_CONSTRAINT_IDREF(id, rsc_1, ID(xml_rsc)); ++ flags = unpack_influence(id, rsc_1, influence_s); + pcmk__new_colocation(id, NULL, score, rsc_1, rsc_2, role_1, +- role_2, +- unpack_influence(id, rsc_1, influence_s)); ++ role_2, flags); + } + + } else { +@@ -565,18 +566,17 @@ colocate_rsc_sets(const char *id, xmlNode *set1, xmlNode *set2, int score, + xml_rsc != NULL; xml_rsc = crm_next_same_xml(xml_rsc)) { + + xmlNode *xml_rsc_2 = NULL; +- bool influence = true; + + EXPAND_CONSTRAINT_IDREF(id, rsc_1, ID(xml_rsc)); +- influence = unpack_influence(id, rsc_1, influence_s); + ++ flags = unpack_influence(id, rsc_1, influence_s); + for (xml_rsc_2 = first_named_child(set2, XML_TAG_RESOURCE_REF); + xml_rsc_2 != NULL; + xml_rsc_2 = crm_next_same_xml(xml_rsc_2)) { + + EXPAND_CONSTRAINT_IDREF(id, rsc_2, ID(xml_rsc_2)); + pcmk__new_colocation(id, NULL, score, rsc_1, rsc_2, +- role_1, role_2, influence); ++ role_1, role_2, flags); + } + } + } +@@ -587,6 +587,7 @@ unpack_simple_colocation(xmlNode *xml_obj, const char *id, + const char *influence_s, pe_working_set_t *data_set) + { + int score_i = 0; ++ uint32_t flags = pcmk__coloc_none; + + const char *score = crm_element_value(xml_obj, XML_RULE_ATTR_SCORE); + const char *dependent_id = crm_element_value(xml_obj, +@@ -675,9 +676,9 @@ unpack_simple_colocation(xmlNode *xml_obj, const char *id, + score_i = char2score(score); + } + ++ flags = unpack_influence(id, dependent, influence_s); + pcmk__new_colocation(id, attr, score_i, dependent, primary, +- dependent_role, primary_role, +- unpack_influence(id, dependent, influence_s)); ++ dependent_role, primary_role, flags); + } + + // \return Standard Pacemaker return code +diff --git a/lib/pacemaker/pcmk_sched_group.c b/lib/pacemaker/pcmk_sched_group.c +index c1392e07a4c..72f088a2709 100644 +--- a/lib/pacemaker/pcmk_sched_group.c ++++ b/lib/pacemaker/pcmk_sched_group.c +@@ -168,10 +168,16 @@ member_internal_constraints(gpointer data, gpointer user_data) + } + + } else if (member_data->colocated) { ++ uint32_t flags = pcmk__coloc_none; ++ ++ if (pcmk_is_set(member->flags, pe_rsc_critical)) { ++ flags |= pcmk__coloc_influence; ++ } ++ + // Colocate this member with the previous one + pcmk__new_colocation("group:internal_colocation", NULL, INFINITY, + member, member_data->previous_member, NULL, NULL, +- pcmk_is_set(member->flags, pe_rsc_critical)); ++ flags); + } + + if (member_data->promotable) { +diff --git a/lib/pacemaker/pcmk_sched_primitive.c b/lib/pacemaker/pcmk_sched_primitive.c +index 4e3eca3e18a..ff7052f6c79 100644 +--- a/lib/pacemaker/pcmk_sched_primitive.c ++++ b/lib/pacemaker/pcmk_sched_primitive.c +@@ -999,7 +999,8 @@ pcmk__primitive_internal_constraints(pe_resource_t *rsc) + score = INFINITY; /* Force them to run on the same host */ + } + pcmk__new_colocation("resource-with-container", NULL, score, rsc, +- rsc->container, NULL, NULL, true); ++ rsc->container, NULL, NULL, ++ pcmk__coloc_influence); + } + } + + +From 2f8d4186e16fb026176f1ddb774eb38940c90390 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Mon, 26 Jun 2023 12:33:49 -0500 +Subject: [PATCH 04/17] Refactor: scheduler: prefix all internal colocation IDs + with "#" + +... to ensure they're easily distinguished from user-configured colocations +in log messages. +--- + lib/pacemaker/pcmk_sched_bundle.c | 6 +++--- + lib/pacemaker/pcmk_sched_group.c | 5 ++--- + lib/pacemaker/pcmk_sched_primitive.c | 2 +- + 3 files changed, 6 insertions(+), 7 deletions(-) + +diff --git a/lib/pacemaker/pcmk_sched_bundle.c b/lib/pacemaker/pcmk_sched_bundle.c +index ca3c21a9977..b4beb0d488f 100644 +--- a/lib/pacemaker/pcmk_sched_bundle.c ++++ b/lib/pacemaker/pcmk_sched_bundle.c +@@ -80,7 +80,7 @@ pcmk__bundle_allocate(pe_resource_t *rsc, const pe_node_t *prefer) + * host because pacemaker-remoted only supports a single + * active connection + */ +- pcmk__new_colocation("child-remote-with-docker-remote", NULL, ++ pcmk__new_colocation("#replica-remote-with-host-remote", NULL, + INFINITY, replica->remote, + container_host->details->remote_rsc, NULL, + NULL, pcmk__coloc_influence); +@@ -251,14 +251,14 @@ pcmk__bundle_internal_constraints(pe_resource_t *rsc) + pcmk__order_stops(replica->container, replica->ip, + pe_order_implies_first|pe_order_preserve); + +- pcmk__new_colocation("ip-with-docker", NULL, INFINITY, replica->ip, ++ pcmk__new_colocation("#ip-with-container", NULL, INFINITY, replica->ip, + replica->container, NULL, NULL, + pcmk__coloc_influence); + } + + if (replica->remote) { + /* This handles ordering and colocating remote relative to container +- * (via "resource-with-container"). Since IP is also ordered and ++ * (via "#resource-with-container"). Since IP is also ordered and + * colocated relative to the container, we don't need to do anything + * explicit here with IP. + */ +diff --git a/lib/pacemaker/pcmk_sched_group.c b/lib/pacemaker/pcmk_sched_group.c +index 72f088a2709..1b6c5c416ab 100644 +--- a/lib/pacemaker/pcmk_sched_group.c ++++ b/lib/pacemaker/pcmk_sched_group.c +@@ -175,9 +175,8 @@ member_internal_constraints(gpointer data, gpointer user_data) + } + + // Colocate this member with the previous one +- pcmk__new_colocation("group:internal_colocation", NULL, INFINITY, +- member, member_data->previous_member, NULL, NULL, +- flags); ++ pcmk__new_colocation("#group-members", NULL, INFINITY, member, ++ member_data->previous_member, NULL, NULL, flags); + } + + if (member_data->promotable) { +diff --git a/lib/pacemaker/pcmk_sched_primitive.c b/lib/pacemaker/pcmk_sched_primitive.c +index ff7052f6c79..d6b39e38c5f 100644 +--- a/lib/pacemaker/pcmk_sched_primitive.c ++++ b/lib/pacemaker/pcmk_sched_primitive.c +@@ -998,7 +998,7 @@ pcmk__primitive_internal_constraints(pe_resource_t *rsc) + } else { + score = INFINITY; /* Force them to run on the same host */ + } +- pcmk__new_colocation("resource-with-container", NULL, score, rsc, ++ pcmk__new_colocation("#resource-with-container", NULL, score, rsc, + rsc->container, NULL, NULL, + pcmk__coloc_influence); + } + +From 93230be27fb4c156a1cc15daf161e2206961421e Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Mon, 26 Jun 2023 16:25:02 -0500 +Subject: [PATCH 05/17] Refactor: scheduler: don't use macro for finding + constraint resource + +It obscured what was happening +--- + lib/pacemaker/pcmk_sched_colocation.c | 105 ++++++++++++++++++++------ + 1 file changed, 81 insertions(+), 24 deletions(-) + +diff --git a/lib/pacemaker/pcmk_sched_colocation.c b/lib/pacemaker/pcmk_sched_colocation.c +index dbdefadfd10..4d8fe74c206 100644 +--- a/lib/pacemaker/pcmk_sched_colocation.c ++++ b/lib/pacemaker/pcmk_sched_colocation.c +@@ -21,14 +21,6 @@ + #include "crm/msg_xml.h" + #include "libpacemaker_private.h" + +-#define EXPAND_CONSTRAINT_IDREF(__set, __rsc, __name) do { \ +- __rsc = pcmk__find_constraint_resource(data_set->resources, __name); \ +- if (__rsc == NULL) { \ +- pcmk__config_err("%s: No resource found for %s", __set, __name); \ +- return; \ +- } \ +- } while(0) +- + // Used to temporarily mark a node as unusable + #define INFINITY_HACK (INFINITY * -100) + +@@ -411,6 +403,7 @@ unpack_colocation_set(xmlNode *set, int score, const char *coloc_id, + int local_score = score; + bool sequential = false; + uint32_t flags = pcmk__coloc_none; ++ const char *xml_rsc_id = NULL; + const char *score_s = crm_element_value(set, XML_RULE_ATTR_SCORE); + + if (score_s) { +@@ -434,7 +427,14 @@ unpack_colocation_set(xmlNode *set, int score, const char *coloc_id, + for (xml_rsc = first_named_child(set, XML_TAG_RESOURCE_REF); + xml_rsc != NULL; xml_rsc = crm_next_same_xml(xml_rsc)) { + +- EXPAND_CONSTRAINT_IDREF(set_id, resource, ID(xml_rsc)); ++ xml_rsc_id = ID(xml_rsc); ++ resource = pcmk__find_constraint_resource(data_set->resources, ++ xml_rsc_id); ++ if (resource == NULL) { ++ pcmk__config_err("%s: No resource found for %s", ++ set_id, xml_rsc_id); ++ return; ++ } + if (with != NULL) { + pe_rsc_trace(resource, "Colocating %s with %s", resource->id, with->id); + flags = unpack_influence(coloc_id, resource, influence_s); +@@ -450,7 +450,14 @@ unpack_colocation_set(xmlNode *set, int score, const char *coloc_id, + for (xml_rsc = first_named_child(set, XML_TAG_RESOURCE_REF); + xml_rsc != NULL; xml_rsc = crm_next_same_xml(xml_rsc)) { + +- EXPAND_CONSTRAINT_IDREF(set_id, resource, ID(xml_rsc)); ++ xml_rsc_id = ID(xml_rsc); ++ resource = pcmk__find_constraint_resource(data_set->resources, ++ xml_rsc_id); ++ if (resource == NULL) { ++ pcmk__config_err("%s: No resource found for %s", ++ set_id, xml_rsc_id); ++ return; ++ } + if (last != NULL) { + pe_rsc_trace(resource, "Colocating %s with %s", + last->id, resource->id); +@@ -472,18 +479,30 @@ unpack_colocation_set(xmlNode *set, int score, const char *coloc_id, + + xmlNode *xml_rsc_with = NULL; + +- EXPAND_CONSTRAINT_IDREF(set_id, resource, ID(xml_rsc)); +- ++ xml_rsc_id = ID(xml_rsc); ++ resource = pcmk__find_constraint_resource(data_set->resources, ++ xml_rsc_id); ++ if (resource == NULL) { ++ pcmk__config_err("%s: No resource found for %s", ++ set_id, xml_rsc_id); ++ return; ++ } + flags = unpack_influence(coloc_id, resource, influence_s); + for (xml_rsc_with = first_named_child(set, XML_TAG_RESOURCE_REF); + xml_rsc_with != NULL; + xml_rsc_with = crm_next_same_xml(xml_rsc_with)) { + +- if (pcmk__str_eq(resource->id, ID(xml_rsc_with), +- pcmk__str_casei)) { ++ xml_rsc_id = ID(xml_rsc_with); ++ if (pcmk__str_eq(resource->id, xml_rsc_id, pcmk__str_none)) { + break; + } +- EXPAND_CONSTRAINT_IDREF(set_id, with, ID(xml_rsc_with)); ++ with = pcmk__find_constraint_resource(data_set->resources, ++ xml_rsc_id); ++ if (with == NULL) { ++ pcmk__config_err("%s: No resource found for %s", ++ set_id, xml_rsc_id); ++ return; ++ } + pe_rsc_trace(resource, "Anti-Colocating %s with %s", resource->id, + with->id); + pcmk__new_colocation(set_id, NULL, local_score, +@@ -501,6 +520,7 @@ colocate_rsc_sets(const char *id, xmlNode *set1, xmlNode *set2, int score, + pe_resource_t *rsc_1 = NULL; + pe_resource_t *rsc_2 = NULL; + ++ const char *xml_rsc_id = NULL; + const char *role_1 = crm_element_value(set1, "role"); + const char *role_2 = crm_element_value(set2, "role"); + +@@ -519,21 +539,30 @@ colocate_rsc_sets(const char *id, xmlNode *set1, xmlNode *set2, int score, + // Get the first one + xml_rsc = first_named_child(set1, XML_TAG_RESOURCE_REF); + if (xml_rsc != NULL) { +- EXPAND_CONSTRAINT_IDREF(id, rsc_1, ID(xml_rsc)); ++ xml_rsc_id = ID(xml_rsc); ++ rsc_1 = pcmk__find_constraint_resource(data_set->resources, ++ xml_rsc_id); ++ if (rsc_1 == NULL) { ++ pcmk__config_err("%s: No resource found for %s", ++ id, xml_rsc_id); ++ return; ++ } + } + } + + rc = pcmk__xe_get_bool_attr(set2, "sequential", &sequential); + if (rc != pcmk_rc_ok || sequential) { + // Get the last one +- const char *rid = NULL; +- + for (xml_rsc = first_named_child(set2, XML_TAG_RESOURCE_REF); + xml_rsc != NULL; xml_rsc = crm_next_same_xml(xml_rsc)) { + +- rid = ID(xml_rsc); ++ xml_rsc_id = ID(xml_rsc); ++ } ++ rsc_2 = pcmk__find_constraint_resource(data_set->resources, xml_rsc_id); ++ if (rsc_2 == NULL) { ++ pcmk__config_err("%s: No resource found for %s", id, xml_rsc_id); ++ return; + } +- EXPAND_CONSTRAINT_IDREF(id, rsc_2, rid); + } + + if ((rsc_1 != NULL) && (rsc_2 != NULL)) { +@@ -546,7 +575,14 @@ colocate_rsc_sets(const char *id, xmlNode *set1, xmlNode *set2, int score, + for (xml_rsc = first_named_child(set2, XML_TAG_RESOURCE_REF); + xml_rsc != NULL; xml_rsc = crm_next_same_xml(xml_rsc)) { + +- EXPAND_CONSTRAINT_IDREF(id, rsc_2, ID(xml_rsc)); ++ xml_rsc_id = ID(xml_rsc); ++ rsc_2 = pcmk__find_constraint_resource(data_set->resources, ++ xml_rsc_id); ++ if (rsc_2 == NULL) { ++ pcmk__config_err("%s: No resource found for %s", ++ id, xml_rsc_id); ++ return; ++ } + pcmk__new_colocation(id, NULL, score, rsc_1, rsc_2, role_1, + role_2, flags); + } +@@ -555,7 +591,14 @@ colocate_rsc_sets(const char *id, xmlNode *set1, xmlNode *set2, int score, + for (xml_rsc = first_named_child(set1, XML_TAG_RESOURCE_REF); + xml_rsc != NULL; xml_rsc = crm_next_same_xml(xml_rsc)) { + +- EXPAND_CONSTRAINT_IDREF(id, rsc_1, ID(xml_rsc)); ++ xml_rsc_id = ID(xml_rsc); ++ rsc_1 = pcmk__find_constraint_resource(data_set->resources, ++ xml_rsc_id); ++ if (rsc_1 == NULL) { ++ pcmk__config_err("%s: No resource found for %s", ++ id, xml_rsc_id); ++ return; ++ } + flags = unpack_influence(id, rsc_1, influence_s); + pcmk__new_colocation(id, NULL, score, rsc_1, rsc_2, role_1, + role_2, flags); +@@ -567,14 +610,28 @@ colocate_rsc_sets(const char *id, xmlNode *set1, xmlNode *set2, int score, + + xmlNode *xml_rsc_2 = NULL; + +- EXPAND_CONSTRAINT_IDREF(id, rsc_1, ID(xml_rsc)); ++ xml_rsc_id = ID(xml_rsc); ++ rsc_1 = pcmk__find_constraint_resource(data_set->resources, ++ xml_rsc_id); ++ if (rsc_1 == NULL) { ++ pcmk__config_err("%s: No resource found for %s", ++ id, xml_rsc_id); ++ return; ++ } + + flags = unpack_influence(id, rsc_1, influence_s); + for (xml_rsc_2 = first_named_child(set2, XML_TAG_RESOURCE_REF); + xml_rsc_2 != NULL; + xml_rsc_2 = crm_next_same_xml(xml_rsc_2)) { + +- EXPAND_CONSTRAINT_IDREF(id, rsc_2, ID(xml_rsc_2)); ++ xml_rsc_id = ID(xml_rsc_2); ++ rsc_2 = pcmk__find_constraint_resource(data_set->resources, ++ xml_rsc_id); ++ if (rsc_2 == NULL) { ++ pcmk__config_err("%s: No resource found for %s", ++ id, xml_rsc_id); ++ return; ++ } + pcmk__new_colocation(id, NULL, score, rsc_1, rsc_2, + role_1, role_2, flags); + } + +From 23393992a75905f6bd4636f71263c15338c1556f Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 27 Jun 2023 10:15:19 -0500 +Subject: [PATCH 06/17] Refactor: scheduler: use bool for "group ordering" in + colocation sets + +... for readability +--- + lib/pacemaker/pcmk_sched_colocation.c | 13 ++++++++----- + 1 file changed, 8 insertions(+), 5 deletions(-) + +diff --git a/lib/pacemaker/pcmk_sched_colocation.c b/lib/pacemaker/pcmk_sched_colocation.c +index 4d8fe74c206..4c8bca56e86 100644 +--- a/lib/pacemaker/pcmk_sched_colocation.c ++++ b/lib/pacemaker/pcmk_sched_colocation.c +@@ -399,7 +399,7 @@ unpack_colocation_set(xmlNode *set, int score, const char *coloc_id, + pe_resource_t *resource = NULL; + const char *set_id = ID(set); + const char *role = crm_element_value(set, "role"); +- const char *ordering = crm_element_value(set, "ordering"); ++ bool with_previous = false; + int local_score = score; + bool sequential = false; + uint32_t flags = pcmk__coloc_none; +@@ -415,15 +415,18 @@ unpack_colocation_set(xmlNode *set, int score, const char *coloc_id, + return; + } + +- if (ordering == NULL) { +- ordering = "group"; ++ /* The "ordering" attribute specifies whether resources in a positive-score ++ * set are colocated with the previous or next resource. ++ */ ++ if (pcmk__str_eq(crm_element_value(set, "ordering"), "group", ++ pcmk__str_null_matches|pcmk__str_casei)) { ++ with_previous = true; + } + + if (pcmk__xe_get_bool_attr(set, "sequential", &sequential) == pcmk_rc_ok && !sequential) { + return; + +- } else if ((local_score > 0) +- && pcmk__str_eq(ordering, "group", pcmk__str_casei)) { ++ } else if ((local_score > 0) && with_previous) { + for (xml_rsc = first_named_child(set, XML_TAG_RESOURCE_REF); + xml_rsc != NULL; xml_rsc = crm_next_same_xml(xml_rsc)) { + + +From e42ec03e0fe488a80172e79b319a3084854332de Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 27 Jun 2023 10:18:22 -0500 +Subject: [PATCH 07/17] Refactor: scheduler: simplify unpacking a colocation + set (slightly) + +--- + lib/pacemaker/pcmk_sched_colocation.c | 56 ++++++++++----------------- + 1 file changed, 20 insertions(+), 36 deletions(-) + +diff --git a/lib/pacemaker/pcmk_sched_colocation.c b/lib/pacemaker/pcmk_sched_colocation.c +index 4c8bca56e86..e8f01e49a27 100644 +--- a/lib/pacemaker/pcmk_sched_colocation.c ++++ b/lib/pacemaker/pcmk_sched_colocation.c +@@ -395,7 +395,7 @@ unpack_colocation_set(xmlNode *set, int score, const char *coloc_id, + const char *influence_s, pe_working_set_t *data_set) + { + xmlNode *xml_rsc = NULL; +- pe_resource_t *with = NULL; ++ pe_resource_t *other = NULL; + pe_resource_t *resource = NULL; + const char *set_id = ID(set); + const char *role = crm_element_value(set, "role"); +@@ -426,30 +426,7 @@ unpack_colocation_set(xmlNode *set, int score, const char *coloc_id, + if (pcmk__xe_get_bool_attr(set, "sequential", &sequential) == pcmk_rc_ok && !sequential) { + return; + +- } else if ((local_score > 0) && with_previous) { +- for (xml_rsc = first_named_child(set, XML_TAG_RESOURCE_REF); +- xml_rsc != NULL; xml_rsc = crm_next_same_xml(xml_rsc)) { +- +- xml_rsc_id = ID(xml_rsc); +- resource = pcmk__find_constraint_resource(data_set->resources, +- xml_rsc_id); +- if (resource == NULL) { +- pcmk__config_err("%s: No resource found for %s", +- set_id, xml_rsc_id); +- return; +- } +- if (with != NULL) { +- pe_rsc_trace(resource, "Colocating %s with %s", resource->id, with->id); +- flags = unpack_influence(coloc_id, resource, influence_s); +- pcmk__new_colocation(set_id, NULL, local_score, resource, +- with, role, role, flags); +- } +- with = resource; +- } +- + } else if (local_score > 0) { +- pe_resource_t *last = NULL; +- + for (xml_rsc = first_named_child(set, XML_TAG_RESOURCE_REF); + xml_rsc != NULL; xml_rsc = crm_next_same_xml(xml_rsc)) { + +@@ -461,14 +438,21 @@ unpack_colocation_set(xmlNode *set, int score, const char *coloc_id, + set_id, xml_rsc_id); + return; + } +- if (last != NULL) { +- pe_rsc_trace(resource, "Colocating %s with %s", +- last->id, resource->id); ++ if (other != NULL) { + flags = unpack_influence(coloc_id, resource, influence_s); +- pcmk__new_colocation(set_id, NULL, local_score, last, +- resource, role, role, flags); ++ if (with_previous) { ++ pe_rsc_trace(resource, "Colocating %s with %s in set %s", ++ resource->id, other->id, set_id); ++ pcmk__new_colocation(set_id, NULL, local_score, resource, ++ other, role, role, flags); ++ } else { ++ pe_rsc_trace(resource, "Colocating %s with %s in set %s", ++ other->id, resource->id, set_id); ++ pcmk__new_colocation(set_id, NULL, local_score, other, ++ resource, role, role, flags); ++ } + } +- last = resource; ++ other = resource; + } + + } else { +@@ -499,17 +483,17 @@ unpack_colocation_set(xmlNode *set, int score, const char *coloc_id, + if (pcmk__str_eq(resource->id, xml_rsc_id, pcmk__str_none)) { + break; + } +- with = pcmk__find_constraint_resource(data_set->resources, +- xml_rsc_id); +- if (with == NULL) { ++ other = pcmk__find_constraint_resource(data_set->resources, ++ xml_rsc_id); ++ if (other == NULL) { + pcmk__config_err("%s: No resource found for %s", + set_id, xml_rsc_id); + return; + } +- pe_rsc_trace(resource, "Anti-Colocating %s with %s", resource->id, +- with->id); ++ pe_rsc_trace(resource, "Anti-Colocating %s with %s", ++ resource->id, other->id); + pcmk__new_colocation(set_id, NULL, local_score, +- resource, with, role, role, flags); ++ resource, other, role, role, flags); + } + } + } + +From a26ebb380b4bcf1f4fb8a2d69d4b8c8af306dfec Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Mon, 26 Jun 2023 14:56:53 -0500 +Subject: [PATCH 08/17] Feature: CIB: deprecate "ordering" attribute of + "resource_set" + +It's undocumented, and makes sets even more confusing than they already are, +especially since it only applies when the score is positive. +--- + include/crm/pengine/internal.h | 1 + + lib/pacemaker/pcmk_sched_colocation.c | 9 +++++++-- + 2 files changed, 8 insertions(+), 2 deletions(-) + +diff --git a/include/crm/pengine/internal.h b/include/crm/pengine/internal.h +index 1b5f6f1d8d9..53cbb54de5e 100644 +--- a/include/crm/pengine/internal.h ++++ b/include/crm/pengine/internal.h +@@ -170,6 +170,7 @@ enum pe_warn_once_e { + pe_wo_group_coloc = (1 << 12), + pe_wo_upstart = (1 << 13), + pe_wo_nagios = (1 << 14), ++ pe_wo_set_ordering = (1 << 15), + }; + + extern uint32_t pe_wo; +diff --git a/lib/pacemaker/pcmk_sched_colocation.c b/lib/pacemaker/pcmk_sched_colocation.c +index e8f01e49a27..36558f38c4e 100644 +--- a/lib/pacemaker/pcmk_sched_colocation.c ++++ b/lib/pacemaker/pcmk_sched_colocation.c +@@ -415,12 +415,17 @@ unpack_colocation_set(xmlNode *set, int score, const char *coloc_id, + return; + } + +- /* The "ordering" attribute specifies whether resources in a positive-score +- * set are colocated with the previous or next resource. ++ /* @COMPAT The deprecated "ordering" attribute specifies whether resources ++ * in a positive-score set are colocated with the previous or next resource. + */ + if (pcmk__str_eq(crm_element_value(set, "ordering"), "group", + pcmk__str_null_matches|pcmk__str_casei)) { + with_previous = true; ++ } else { ++ pe_warn_once(pe_wo_set_ordering, ++ "Support for 'ordering' other than 'group' in " ++ XML_CONS_TAG_RSC_SET " (such as %s) is deprecated and " ++ "will be removed in a future release", set_id); + } + + if (pcmk__xe_get_bool_attr(set, "sequential", &sequential) == pcmk_rc_ok && !sequential) { + +From f18f365c0995df68599ec2c241f81bae54d2bd38 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Mon, 26 Jun 2023 15:05:21 -0500 +Subject: [PATCH 09/17] Log: scheduler: improve logs when unpacking colocation + sets + +--- + lib/pacemaker/pcmk_sched_colocation.c | 54 +++++++++++++++++---------- + 1 file changed, 34 insertions(+), 20 deletions(-) + +diff --git a/lib/pacemaker/pcmk_sched_colocation.c b/lib/pacemaker/pcmk_sched_colocation.c +index 36558f38c4e..7555afbc522 100644 +--- a/lib/pacemaker/pcmk_sched_colocation.c ++++ b/lib/pacemaker/pcmk_sched_colocation.c +@@ -439,8 +439,9 @@ unpack_colocation_set(xmlNode *set, int score, const char *coloc_id, + resource = pcmk__find_constraint_resource(data_set->resources, + xml_rsc_id); + if (resource == NULL) { +- pcmk__config_err("%s: No resource found for %s", +- set_id, xml_rsc_id); ++ // Should be possible only with validation disabled ++ pcmk__config_err("Ignoring %s and later resources in set %s: " ++ "No such resource", xml_rsc_id, set_id); + return; + } + if (other != NULL) { +@@ -475,8 +476,9 @@ unpack_colocation_set(xmlNode *set, int score, const char *coloc_id, + resource = pcmk__find_constraint_resource(data_set->resources, + xml_rsc_id); + if (resource == NULL) { +- pcmk__config_err("%s: No resource found for %s", +- set_id, xml_rsc_id); ++ // Should be possible only with validation disabled ++ pcmk__config_err("Ignoring %s and later resources in set %s: " ++ "No such resource", xml_rsc_id, set_id); + return; + } + flags = unpack_influence(coloc_id, resource, influence_s); +@@ -490,11 +492,7 @@ unpack_colocation_set(xmlNode *set, int score, const char *coloc_id, + } + other = pcmk__find_constraint_resource(data_set->resources, + xml_rsc_id); +- if (other == NULL) { +- pcmk__config_err("%s: No resource found for %s", +- set_id, xml_rsc_id); +- return; +- } ++ CRM_ASSERT(other != NULL); // We already processed it + pe_rsc_trace(resource, "Anti-Colocating %s with %s", + resource->id, other->id); + pcmk__new_colocation(set_id, NULL, local_score, +@@ -527,7 +525,7 @@ colocate_rsc_sets(const char *id, xmlNode *set1, xmlNode *set2, int score, + } + + rc = pcmk__xe_get_bool_attr(set1, "sequential", &sequential); +- if (rc != pcmk_rc_ok || sequential) { ++ if ((rc != pcmk_rc_ok) || sequential) { + // Get the first one + xml_rsc = first_named_child(set1, XML_TAG_RESOURCE_REF); + if (xml_rsc != NULL) { +@@ -535,15 +533,17 @@ colocate_rsc_sets(const char *id, xmlNode *set1, xmlNode *set2, int score, + rsc_1 = pcmk__find_constraint_resource(data_set->resources, + xml_rsc_id); + if (rsc_1 == NULL) { +- pcmk__config_err("%s: No resource found for %s", +- id, xml_rsc_id); ++ // Should be possible only with validation disabled ++ pcmk__config_err("Ignoring colocation of set %s with set %s " ++ "because first resource %s not found", ++ ID(set1), ID(set2), xml_rsc_id); + return; + } + } + } + + rc = pcmk__xe_get_bool_attr(set2, "sequential", &sequential); +- if (rc != pcmk_rc_ok || sequential) { ++ if ((rc != pcmk_rc_ok) || sequential) { + // Get the last one + for (xml_rsc = first_named_child(set2, XML_TAG_RESOURCE_REF); + xml_rsc != NULL; xml_rsc = crm_next_same_xml(xml_rsc)) { +@@ -552,7 +552,10 @@ colocate_rsc_sets(const char *id, xmlNode *set1, xmlNode *set2, int score, + } + rsc_2 = pcmk__find_constraint_resource(data_set->resources, xml_rsc_id); + if (rsc_2 == NULL) { +- pcmk__config_err("%s: No resource found for %s", id, xml_rsc_id); ++ // Should be possible only with validation disabled ++ pcmk__config_err("Ignoring colocation of set %s with set %s " ++ "because last resource %s not found", ++ ID(set1), ID(set2), xml_rsc_id); + return; + } + } +@@ -573,6 +576,10 @@ colocate_rsc_sets(const char *id, xmlNode *set1, xmlNode *set2, int score, + if (rsc_2 == NULL) { + pcmk__config_err("%s: No resource found for %s", + id, xml_rsc_id); ++ // Should be possible only with validation disabled ++ pcmk__config_err("Ignoring resource %s and later in set %s " ++ "for colocation with set %s: No such resource", ++ xml_rsc_id, set2, set1); + return; + } + pcmk__new_colocation(id, NULL, score, rsc_1, rsc_2, role_1, +@@ -587,8 +594,10 @@ colocate_rsc_sets(const char *id, xmlNode *set1, xmlNode *set2, int score, + rsc_1 = pcmk__find_constraint_resource(data_set->resources, + xml_rsc_id); + if (rsc_1 == NULL) { +- pcmk__config_err("%s: No resource found for %s", +- id, xml_rsc_id); ++ // Should be possible only with validation disabled ++ pcmk__config_err("Ignoring resource %s and later in set %s " ++ "for colocation with set %s: No such resource", ++ xml_rsc_id, set1, set2); + return; + } + flags = unpack_influence(id, rsc_1, influence_s); +@@ -606,8 +615,10 @@ colocate_rsc_sets(const char *id, xmlNode *set1, xmlNode *set2, int score, + rsc_1 = pcmk__find_constraint_resource(data_set->resources, + xml_rsc_id); + if (rsc_1 == NULL) { +- pcmk__config_err("%s: No resource found for %s", +- id, xml_rsc_id); ++ // Should be possible only with validation disabled ++ pcmk__config_err("Ignoring resource %s and later in set %s " ++ "for colocation with set %s: No such resource", ++ xml_rsc_id, set1, set2); + return; + } + +@@ -620,8 +631,11 @@ colocate_rsc_sets(const char *id, xmlNode *set1, xmlNode *set2, int score, + rsc_2 = pcmk__find_constraint_resource(data_set->resources, + xml_rsc_id); + if (rsc_2 == NULL) { +- pcmk__config_err("%s: No resource found for %s", +- id, xml_rsc_id); ++ // Should be possible only with validation disabled ++ pcmk__config_err("Ignoring resource %s and later in set %s " ++ "for colocation with %s in set %s: " ++ "No such resource", ++ xml_rsc_id, set2, ID(xml_rsc), set1); + return; + } + pcmk__new_colocation(id, NULL, score, rsc_1, rsc_2, + +From 19e9a9d3b30e857f98459b7f5c4f4938e48e4261 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Mon, 26 Jun 2023 16:25:17 -0500 +Subject: [PATCH 10/17] Refactor: scheduler: mark explicitly configured + colocations + +--- + lib/pacemaker/libpacemaker_private.h | 3 +++ + lib/pacemaker/pcmk_sched_colocation.c | 18 +++++++++++------- + 2 files changed, 14 insertions(+), 7 deletions(-) + +diff --git a/lib/pacemaker/libpacemaker_private.h b/lib/pacemaker/libpacemaker_private.h +index 51de9d3e9a9..a49d55d3c41 100644 +--- a/lib/pacemaker/libpacemaker_private.h ++++ b/lib/pacemaker/libpacemaker_private.h +@@ -22,6 +22,9 @@ enum pcmk__coloc_flags { + + // Primary is affected even if already active + pcmk__coloc_influence = (1U << 0), ++ ++ // Colocation was explicitly configured in CIB ++ pcmk__coloc_explicit = (1U << 1), + }; + + // Flags to modify the behavior of add_colocated_node_scores() +diff --git a/lib/pacemaker/pcmk_sched_colocation.c b/lib/pacemaker/pcmk_sched_colocation.c +index 7555afbc522..e0b39b59e81 100644 +--- a/lib/pacemaker/pcmk_sched_colocation.c ++++ b/lib/pacemaker/pcmk_sched_colocation.c +@@ -445,7 +445,8 @@ unpack_colocation_set(xmlNode *set, int score, const char *coloc_id, + return; + } + if (other != NULL) { +- flags = unpack_influence(coloc_id, resource, influence_s); ++ flags = pcmk__coloc_explicit ++ | unpack_influence(coloc_id, resource, influence_s); + if (with_previous) { + pe_rsc_trace(resource, "Colocating %s with %s in set %s", + resource->id, other->id, set_id); +@@ -481,7 +482,8 @@ unpack_colocation_set(xmlNode *set, int score, const char *coloc_id, + "No such resource", xml_rsc_id, set_id); + return; + } +- flags = unpack_influence(coloc_id, resource, influence_s); ++ flags = pcmk__coloc_explicit ++ | unpack_influence(coloc_id, resource, influence_s); + for (xml_rsc_with = first_named_child(set, XML_TAG_RESOURCE_REF); + xml_rsc_with != NULL; + xml_rsc_with = crm_next_same_xml(xml_rsc_with)) { +@@ -561,12 +563,12 @@ colocate_rsc_sets(const char *id, xmlNode *set1, xmlNode *set2, int score, + } + + if ((rsc_1 != NULL) && (rsc_2 != NULL)) { +- flags = unpack_influence(id, rsc_1, influence_s); ++ flags = pcmk__coloc_explicit | unpack_influence(id, rsc_1, influence_s); + pcmk__new_colocation(id, NULL, score, rsc_1, rsc_2, role_1, role_2, + flags); + + } else if (rsc_1 != NULL) { +- flags = unpack_influence(id, rsc_1, influence_s); ++ flags = pcmk__coloc_explicit | unpack_influence(id, rsc_1, influence_s); + for (xml_rsc = first_named_child(set2, XML_TAG_RESOURCE_REF); + xml_rsc != NULL; xml_rsc = crm_next_same_xml(xml_rsc)) { + +@@ -600,7 +602,8 @@ colocate_rsc_sets(const char *id, xmlNode *set1, xmlNode *set2, int score, + xml_rsc_id, set1, set2); + return; + } +- flags = unpack_influence(id, rsc_1, influence_s); ++ flags = pcmk__coloc_explicit ++ | unpack_influence(id, rsc_1, influence_s); + pcmk__new_colocation(id, NULL, score, rsc_1, rsc_2, role_1, + role_2, flags); + } +@@ -622,7 +625,8 @@ colocate_rsc_sets(const char *id, xmlNode *set1, xmlNode *set2, int score, + return; + } + +- flags = unpack_influence(id, rsc_1, influence_s); ++ flags = pcmk__coloc_explicit ++ | unpack_influence(id, rsc_1, influence_s); + for (xml_rsc_2 = first_named_child(set2, XML_TAG_RESOURCE_REF); + xml_rsc_2 != NULL; + xml_rsc_2 = crm_next_same_xml(xml_rsc_2)) { +@@ -739,7 +743,7 @@ unpack_simple_colocation(xmlNode *xml_obj, const char *id, + score_i = char2score(score); + } + +- flags = unpack_influence(id, dependent, influence_s); ++ flags = pcmk__coloc_explicit | unpack_influence(id, dependent, influence_s); + pcmk__new_colocation(id, attr, score_i, dependent, primary, + dependent_role, primary_role, flags); + } + +From 4f9e2bc6fb1dd78d5784d918a85bb2028f01d265 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 27 Jun 2023 10:24:58 -0500 +Subject: [PATCH 11/17] Test: scheduler: add regression test for colocation + with an inner group member + +As of this commit, the behavior is incorrect. +--- + cts/cts-scheduler.in | 4 + + .../dot/coloc-with-inner-group-member.dot | 8 + + .../exp/coloc-with-inner-group-member.exp | 38 +++ + .../coloc-with-inner-group-member.scores | 46 ++++ + .../coloc-with-inner-group-member.summary | 33 +++ + .../xml/coloc-with-inner-group-member.xml | 258 ++++++++++++++++++ + 6 files changed, 387 insertions(+) + create mode 100644 cts/scheduler/dot/coloc-with-inner-group-member.dot + create mode 100644 cts/scheduler/exp/coloc-with-inner-group-member.exp + create mode 100644 cts/scheduler/scores/coloc-with-inner-group-member.scores + create mode 100644 cts/scheduler/summary/coloc-with-inner-group-member.summary + create mode 100644 cts/scheduler/xml/coloc-with-inner-group-member.xml + +diff --git a/cts/cts-scheduler.in b/cts/cts-scheduler.in +index ee0cb7b4722..de455105985 100644 +--- a/cts/cts-scheduler.in ++++ b/cts/cts-scheduler.in +@@ -80,6 +80,10 @@ TESTS = [ + [ "group-dependents", "Account for the location preferences of things colocated with a group" ], + [ "group-stop-ordering", "Ensure blocked group member stop does not force other member stops" ], + [ "colocate-unmanaged-group", "Respect mandatory colocations even if earlier group member is unmanaged" ], ++ [ ++ "coloc-with-inner-group-member", ++ "Consider explicit colocations with inner group members" ++ ], + ], + [ + [ "rsc_dep1", "Must not" ], +diff --git a/cts/scheduler/dot/coloc-with-inner-group-member.dot b/cts/scheduler/dot/coloc-with-inner-group-member.dot +new file mode 100644 +index 00000000000..77e1a8e6e40 +--- /dev/null ++++ b/cts/scheduler/dot/coloc-with-inner-group-member.dot +@@ -0,0 +1,8 @@ ++ digraph "g" { ++"grp_stop_0" -> "grp_stopped_0" [ style = bold] ++"grp_stop_0" -> "vip_stop_0 rhel8-3" [ style = bold] ++"grp_stop_0" [ style=bold color="green" fontcolor="orange"] ++"grp_stopped_0" [ style=bold color="green" fontcolor="orange"] ++"vip_stop_0 rhel8-3" -> "grp_stopped_0" [ style = bold] ++"vip_stop_0 rhel8-3" [ style=bold color="green" fontcolor="black"] ++} +diff --git a/cts/scheduler/exp/coloc-with-inner-group-member.exp b/cts/scheduler/exp/coloc-with-inner-group-member.exp +new file mode 100644 +index 00000000000..e6d94d5fe7f +--- /dev/null ++++ b/cts/scheduler/exp/coloc-with-inner-group-member.exp +@@ -0,0 +1,38 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +diff --git a/cts/scheduler/scores/coloc-with-inner-group-member.scores b/cts/scheduler/scores/coloc-with-inner-group-member.scores +new file mode 100644 +index 00000000000..10fe944cb42 +--- /dev/null ++++ b/cts/scheduler/scores/coloc-with-inner-group-member.scores +@@ -0,0 +1,46 @@ ++ ++pcmk__group_assign: bar allocation score on rhel8-1: 0 ++pcmk__group_assign: bar allocation score on rhel8-2: 0 ++pcmk__group_assign: bar allocation score on rhel8-3: 0 ++pcmk__group_assign: bar allocation score on rhel8-4: 0 ++pcmk__group_assign: bar allocation score on rhel8-5: 0 ++pcmk__group_assign: foo allocation score on rhel8-1: 0 ++pcmk__group_assign: foo allocation score on rhel8-2: 0 ++pcmk__group_assign: foo allocation score on rhel8-3: 0 ++pcmk__group_assign: foo allocation score on rhel8-4: 0 ++pcmk__group_assign: foo allocation score on rhel8-5: 0 ++pcmk__group_assign: grp allocation score on rhel8-1: 0 ++pcmk__group_assign: grp allocation score on rhel8-2: 0 ++pcmk__group_assign: grp allocation score on rhel8-3: 0 ++pcmk__group_assign: grp allocation score on rhel8-4: 0 ++pcmk__group_assign: grp allocation score on rhel8-5: 0 ++pcmk__group_assign: vip allocation score on rhel8-1: 0 ++pcmk__group_assign: vip allocation score on rhel8-2: 0 ++pcmk__group_assign: vip allocation score on rhel8-3: 0 ++pcmk__group_assign: vip allocation score on rhel8-4: 0 ++pcmk__group_assign: vip allocation score on rhel8-5: 0 ++pcmk__primitive_assign: Fencing allocation score on rhel8-1: 0 ++pcmk__primitive_assign: Fencing allocation score on rhel8-2: 0 ++pcmk__primitive_assign: Fencing allocation score on rhel8-3: 0 ++pcmk__primitive_assign: Fencing allocation score on rhel8-4: 0 ++pcmk__primitive_assign: Fencing allocation score on rhel8-5: 0 ++pcmk__primitive_assign: bar allocation score on rhel8-1: -INFINITY ++pcmk__primitive_assign: bar allocation score on rhel8-2: -INFINITY ++pcmk__primitive_assign: bar allocation score on rhel8-3: -INFINITY ++pcmk__primitive_assign: bar allocation score on rhel8-4: 0 ++pcmk__primitive_assign: bar allocation score on rhel8-5: -INFINITY ++pcmk__primitive_assign: foo allocation score on rhel8-1: 0 ++pcmk__primitive_assign: foo allocation score on rhel8-2: 0 ++pcmk__primitive_assign: foo allocation score on rhel8-3: 0 ++pcmk__primitive_assign: foo allocation score on rhel8-4: 0 ++pcmk__primitive_assign: foo allocation score on rhel8-5: 0 ++pcmk__primitive_assign: vip allocation score on rhel8-1: -INFINITY ++pcmk__primitive_assign: vip allocation score on rhel8-2: -INFINITY ++pcmk__primitive_assign: vip allocation score on rhel8-3: -INFINITY ++pcmk__primitive_assign: vip allocation score on rhel8-4: -INFINITY ++pcmk__primitive_assign: vip allocation score on rhel8-5: -INFINITY ++pcmk__primitive_assign: vip-dep allocation score on rhel8-1: 0 ++pcmk__primitive_assign: vip-dep allocation score on rhel8-2: 0 ++pcmk__primitive_assign: vip-dep allocation score on rhel8-3: 0 ++pcmk__primitive_assign: vip-dep allocation score on rhel8-4: 0 ++pcmk__primitive_assign: vip-dep allocation score on rhel8-5: 0 +diff --git a/cts/scheduler/summary/coloc-with-inner-group-member.summary b/cts/scheduler/summary/coloc-with-inner-group-member.summary +new file mode 100644 +index 00000000000..3e87f0867ef +--- /dev/null ++++ b/cts/scheduler/summary/coloc-with-inner-group-member.summary +@@ -0,0 +1,33 @@ ++Using the original execution date of: 2023-06-20 20:45:06Z ++Current cluster status: ++ * Node List: ++ * Online: [ rhel8-1 rhel8-2 rhel8-3 rhel8-4 rhel8-5 ] ++ ++ * Full List of Resources: ++ * Fencing (stonith:fence_xvm): Started rhel8-1 ++ * vip-dep (ocf:pacemaker:Dummy): Started rhel8-3 ++ * Resource Group: grp: ++ * foo (ocf:pacemaker:Dummy): Started rhel8-4 ++ * bar (ocf:pacemaker:Dummy): Started rhel8-4 ++ * vip (ocf:pacemaker:Dummy): Started rhel8-3 ++ ++Transition Summary: ++ * Stop vip ( rhel8-3 ) due to node availability ++ ++Executing Cluster Transition: ++ * Pseudo action: grp_stop_0 ++ * Resource action: vip stop on rhel8-3 ++ * Pseudo action: grp_stopped_0 ++Using the original execution date of: 2023-06-20 20:45:06Z ++ ++Revised Cluster Status: ++ * Node List: ++ * Online: [ rhel8-1 rhel8-2 rhel8-3 rhel8-4 rhel8-5 ] ++ ++ * Full List of Resources: ++ * Fencing (stonith:fence_xvm): Started rhel8-1 ++ * vip-dep (ocf:pacemaker:Dummy): Started rhel8-3 ++ * Resource Group: grp: ++ * foo (ocf:pacemaker:Dummy): Started rhel8-4 ++ * bar (ocf:pacemaker:Dummy): Started rhel8-4 ++ * vip (ocf:pacemaker:Dummy): Stopped +diff --git a/cts/scheduler/xml/coloc-with-inner-group-member.xml b/cts/scheduler/xml/coloc-with-inner-group-member.xml +new file mode 100644 +index 00000000000..c07edecb81a +--- /dev/null ++++ b/cts/scheduler/xml/coloc-with-inner-group-member.xml +@@ -0,0 +1,258 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ + +From 7fa4999f96d7541ee0dad248477c3e7d4affff00 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 20 Jun 2023 19:23:18 -0500 +Subject: [PATCH 12/17] Fix: scheduler: consider explicit colocations with + group members + +Previously, a group's colocations would include only colocations explicitly +with the group itself, and with its first member (for "group with" colocations) +or last member (for "with group" colocations). Explicit colocations with a +different group member could cause incorrect node assignment. + +Fixes T679 +--- + lib/pacemaker/pcmk_sched_group.c | 70 +++++++++++++++++++++------- + lib/pacemaker/pcmk_sched_primitive.c | 52 ++++++++++++++------- + 2 files changed, 90 insertions(+), 32 deletions(-) + +diff --git a/lib/pacemaker/pcmk_sched_group.c b/lib/pacemaker/pcmk_sched_group.c +index 1b6c5c416ab..95e2d77aa5f 100644 +--- a/lib/pacemaker/pcmk_sched_group.c ++++ b/lib/pacemaker/pcmk_sched_group.c +@@ -674,16 +674,36 @@ pcmk__with_group_colocations(const pe_resource_t *rsc, + } + + /* "With this" colocations are needed only for the group itself and for its +- * last member. Add the group's colocations plus any relevant +- * parent colocations if cloned. ++ * last member. (Previous members will chain via the group internal ++ * colocations.) + */ +- if ((rsc == orig_rsc) || (orig_rsc == pe__last_group_member(rsc))) { +- crm_trace("Adding 'with %s' colocations to list for %s", +- rsc->id, orig_rsc->id); +- pcmk__add_with_this_list(list, rsc->rsc_cons_lhs); +- if (rsc->parent != NULL) { // Cloned group +- rsc->parent->cmds->with_this_colocations(rsc->parent, orig_rsc, +- list); ++ if ((orig_rsc != rsc) && (orig_rsc != pe__last_group_member(rsc))) { ++ return; ++ } ++ ++ pe_rsc_trace(rsc, "Adding 'with %s' colocations to list for %s", ++ rsc->id, orig_rsc->id); ++ ++ // Add the group's own colocations ++ pcmk__add_with_this_list(list, rsc->rsc_cons_lhs); ++ ++ // If cloned, add any relevant colocations with the clone ++ if (rsc->parent != NULL) { ++ rsc->parent->cmds->with_this_colocations(rsc->parent, orig_rsc, ++ list); ++ } ++ ++ if (!pe__group_flag_is_set(rsc, pe__group_colocated)) { ++ // @COMPAT Non-colocated groups are deprecated ++ return; ++ } ++ ++ // Add explicit colocations with the group's (other) children ++ for (GList *iter = rsc->children; iter != NULL; iter = iter->next) { ++ pe_resource_t *member = iter->data; ++ ++ if (member != orig_rsc) { ++ member->cmds->with_this_colocations(member, orig_rsc, list); + } + } + } +@@ -693,6 +713,8 @@ void + pcmk__group_with_colocations(const pe_resource_t *rsc, + const pe_resource_t *orig_rsc, GList **list) + { ++ const pe_resource_t *member = NULL; ++ + CRM_CHECK((rsc != NULL) && (rsc->variant == pe_group) + && (orig_rsc != NULL) && (list != NULL), + return); +@@ -702,18 +724,35 @@ pcmk__group_with_colocations(const pe_resource_t *rsc, + return; + } + +- /* Colocations for the group itself, or for its first member, consist of the +- * group's colocations plus any relevant parent colocations if cloned. ++ /* "This with" colocations are normally needed only for the group itself and ++ * for its first member. + */ + if ((rsc == orig_rsc) + || (orig_rsc == (const pe_resource_t *) rsc->children->data)) { +- crm_trace("Adding '%s with' colocations to list for %s", +- rsc->id, orig_rsc->id); ++ pe_rsc_trace(rsc, "Adding '%s with' colocations to list for %s", ++ rsc->id, orig_rsc->id); ++ ++ // Add the group's own colocations + pcmk__add_this_with_list(list, rsc->rsc_cons); +- if (rsc->parent != NULL) { // Cloned group ++ ++ // If cloned, add any relevant colocations involving the clone ++ if (rsc->parent != NULL) { + rsc->parent->cmds->this_with_colocations(rsc->parent, orig_rsc, + list); + } ++ ++ if (!pe__group_flag_is_set(rsc, pe__group_colocated)) { ++ // @COMPAT Non-colocated groups are deprecated ++ return; ++ } ++ ++ // Add explicit colocations involving the group's (other) children ++ for (GList *iter = rsc->children; iter != NULL; iter = iter->next) { ++ member = iter->data; ++ if (member != orig_rsc) { ++ member->cmds->this_with_colocations(member, orig_rsc, list); ++ } ++ } + return; + } + +@@ -723,8 +762,7 @@ pcmk__group_with_colocations(const pe_resource_t *rsc, + * happen, so the group's mandatory colocations must be explicitly added. + */ + for (GList *iter = rsc->children; iter != NULL; iter = iter->next) { +- const pe_resource_t *member = (const pe_resource_t *) iter->data; +- ++ member = iter->data; + if (orig_rsc == member) { + break; // We've seen all earlier members, and none are unmanaged + } +diff --git a/lib/pacemaker/pcmk_sched_primitive.c b/lib/pacemaker/pcmk_sched_primitive.c +index d6b39e38c5f..bfc6fc7fedd 100644 +--- a/lib/pacemaker/pcmk_sched_primitive.c ++++ b/lib/pacemaker/pcmk_sched_primitive.c +@@ -1069,15 +1069,25 @@ void + pcmk__with_primitive_colocations(const pe_resource_t *rsc, + const pe_resource_t *orig_rsc, GList **list) + { +- // Primitives don't have children, so rsc should also be orig_rsc +- CRM_CHECK((rsc != NULL) && (rsc->variant == pe_native) +- && (rsc == orig_rsc) && (list != NULL), +- return); ++ CRM_ASSERT((rsc != NULL) && (rsc->variant == pe_native) && (list != NULL)); + +- // Add primitive's own colocations plus any relevant ones from parent +- pcmk__add_with_this_list(list, rsc->rsc_cons_lhs); +- if (rsc->parent != NULL) { +- rsc->parent->cmds->with_this_colocations(rsc->parent, rsc, list); ++ if (rsc == orig_rsc) { ++ /* For the resource itself, add all of its own colocations and relevant ++ * colocations from its parent (if any). ++ */ ++ pcmk__add_with_this_list(list, rsc->rsc_cons_lhs); ++ if (rsc->parent != NULL) { ++ rsc->parent->cmds->with_this_colocations(rsc->parent, rsc, list); ++ } ++ } else { ++ // For an ancestor, add only explicitly configured constraints ++ for (GList *iter = rsc->rsc_cons_lhs; iter != NULL; iter = iter->next) { ++ pcmk__colocation_t *colocation = iter->data; ++ ++ if (pcmk_is_set(colocation->flags, pcmk__coloc_explicit)) { ++ pcmk__add_with_this(list, colocation); ++ } ++ } + } + } + +@@ -1088,15 +1098,25 @@ void + pcmk__primitive_with_colocations(const pe_resource_t *rsc, + const pe_resource_t *orig_rsc, GList **list) + { +- // Primitives don't have children, so rsc should also be orig_rsc +- CRM_CHECK((rsc != NULL) && (rsc->variant == pe_native) +- && (rsc == orig_rsc) && (list != NULL), +- return); ++ CRM_ASSERT((rsc != NULL) && (rsc->variant == pe_native) && (list != NULL)); + +- // Add primitive's own colocations plus any relevant ones from parent +- pcmk__add_this_with_list(list, rsc->rsc_cons); +- if (rsc->parent != NULL) { +- rsc->parent->cmds->this_with_colocations(rsc->parent, rsc, list); ++ if (rsc == orig_rsc) { ++ /* For the resource itself, add all of its own colocations and relevant ++ * colocations from its parent (if any). ++ */ ++ pcmk__add_this_with_list(list, rsc->rsc_cons); ++ if (rsc->parent != NULL) { ++ rsc->parent->cmds->this_with_colocations(rsc->parent, rsc, list); ++ } ++ } else { ++ // For an ancestor, add only explicitly configured constraints ++ for (GList *iter = rsc->rsc_cons; iter != NULL; iter = iter->next) { ++ pcmk__colocation_t *colocation = iter->data; ++ ++ if (pcmk_is_set(colocation->flags, pcmk__coloc_explicit)) { ++ pcmk__add_this_with(list, colocation); ++ } ++ } + } + } + + +From e9e734eabf147a827c8bc6731da4c54b2a4d8658 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 27 Jun 2023 10:31:18 -0500 +Subject: [PATCH 13/17] Test: scheduler: update test output for group + colocation fix + +--- + .../dot/coloc-with-inner-group-member.dot | 32 ++++ + .../exp/coloc-with-inner-group-member.exp | 176 +++++++++++++++++- + .../coloc-with-inner-group-member.scores | 14 +- + .../coloc-with-inner-group-member.summary | 20 +- + 4 files changed, 225 insertions(+), 17 deletions(-) + +diff --git a/cts/scheduler/dot/coloc-with-inner-group-member.dot b/cts/scheduler/dot/coloc-with-inner-group-member.dot +index 77e1a8e6e40..a3bad7aab12 100644 +--- a/cts/scheduler/dot/coloc-with-inner-group-member.dot ++++ b/cts/scheduler/dot/coloc-with-inner-group-member.dot +@@ -1,8 +1,40 @@ + digraph "g" { ++"bar_monitor_10000 rhel8-3" [ style=bold color="green" fontcolor="black"] ++"bar_start_0 rhel8-3" -> "bar_monitor_10000 rhel8-3" [ style = bold] ++"bar_start_0 rhel8-3" -> "grp_running_0" [ style = bold] ++"bar_start_0 rhel8-3" -> "vip_start_0 rhel8-3" [ style = bold] ++"bar_start_0 rhel8-3" [ style=bold color="green" fontcolor="black"] ++"bar_stop_0 rhel8-4" -> "bar_start_0 rhel8-3" [ style = bold] ++"bar_stop_0 rhel8-4" -> "foo_stop_0 rhel8-4" [ style = bold] ++"bar_stop_0 rhel8-4" -> "grp_stopped_0" [ style = bold] ++"bar_stop_0 rhel8-4" [ style=bold color="green" fontcolor="black"] ++"foo_monitor_10000 rhel8-3" [ style=bold color="green" fontcolor="black"] ++"foo_start_0 rhel8-3" -> "bar_start_0 rhel8-3" [ style = bold] ++"foo_start_0 rhel8-3" -> "foo_monitor_10000 rhel8-3" [ style = bold] ++"foo_start_0 rhel8-3" -> "grp_running_0" [ style = bold] ++"foo_start_0 rhel8-3" [ style=bold color="green" fontcolor="black"] ++"foo_stop_0 rhel8-4" -> "foo_start_0 rhel8-3" [ style = bold] ++"foo_stop_0 rhel8-4" -> "grp_stopped_0" [ style = bold] ++"foo_stop_0 rhel8-4" [ style=bold color="green" fontcolor="black"] ++"grp_running_0" [ style=bold color="green" fontcolor="orange"] ++"grp_start_0" -> "bar_start_0 rhel8-3" [ style = bold] ++"grp_start_0" -> "foo_start_0 rhel8-3" [ style = bold] ++"grp_start_0" -> "grp_running_0" [ style = bold] ++"grp_start_0" -> "vip_start_0 rhel8-3" [ style = bold] ++"grp_start_0" [ style=bold color="green" fontcolor="orange"] ++"grp_stop_0" -> "bar_stop_0 rhel8-4" [ style = bold] ++"grp_stop_0" -> "foo_stop_0 rhel8-4" [ style = bold] + "grp_stop_0" -> "grp_stopped_0" [ style = bold] + "grp_stop_0" -> "vip_stop_0 rhel8-3" [ style = bold] + "grp_stop_0" [ style=bold color="green" fontcolor="orange"] ++"grp_stopped_0" -> "grp_start_0" [ style = bold] + "grp_stopped_0" [ style=bold color="green" fontcolor="orange"] ++"vip_monitor_10000 rhel8-3" [ style=bold color="green" fontcolor="black"] ++"vip_start_0 rhel8-3" -> "grp_running_0" [ style = bold] ++"vip_start_0 rhel8-3" -> "vip_monitor_10000 rhel8-3" [ style = bold] ++"vip_start_0 rhel8-3" [ style=bold color="green" fontcolor="black"] ++"vip_stop_0 rhel8-3" -> "bar_stop_0 rhel8-4" [ style = bold] + "vip_stop_0 rhel8-3" -> "grp_stopped_0" [ style = bold] ++"vip_stop_0 rhel8-3" -> "vip_start_0 rhel8-3" [ style = bold] + "vip_stop_0 rhel8-3" [ style=bold color="green" fontcolor="black"] + } +diff --git a/cts/scheduler/exp/coloc-with-inner-group-member.exp b/cts/scheduler/exp/coloc-with-inner-group-member.exp +index e6d94d5fe7f..bb8f779feb1 100644 +--- a/cts/scheduler/exp/coloc-with-inner-group-member.exp ++++ b/cts/scheduler/exp/coloc-with-inner-group-member.exp +@@ -1,22 +1,28 @@ + + + +- ++ + + + + + +- ++ + + +- ++ ++ ++ ++ ++ ++ ++ + + + + + +- ++ + + + +@@ -24,14 +30,172 @@ + + + +- ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ + + + + + + +- ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ + + + +diff --git a/cts/scheduler/scores/coloc-with-inner-group-member.scores b/cts/scheduler/scores/coloc-with-inner-group-member.scores +index 10fe944cb42..8d1c6f621c1 100644 +--- a/cts/scheduler/scores/coloc-with-inner-group-member.scores ++++ b/cts/scheduler/scores/coloc-with-inner-group-member.scores +@@ -26,17 +26,17 @@ pcmk__primitive_assign: Fencing allocation score on rhel8-4: 0 + pcmk__primitive_assign: Fencing allocation score on rhel8-5: 0 + pcmk__primitive_assign: bar allocation score on rhel8-1: -INFINITY + pcmk__primitive_assign: bar allocation score on rhel8-2: -INFINITY +-pcmk__primitive_assign: bar allocation score on rhel8-3: -INFINITY +-pcmk__primitive_assign: bar allocation score on rhel8-4: 0 ++pcmk__primitive_assign: bar allocation score on rhel8-3: 0 ++pcmk__primitive_assign: bar allocation score on rhel8-4: -INFINITY + pcmk__primitive_assign: bar allocation score on rhel8-5: -INFINITY +-pcmk__primitive_assign: foo allocation score on rhel8-1: 0 +-pcmk__primitive_assign: foo allocation score on rhel8-2: 0 ++pcmk__primitive_assign: foo allocation score on rhel8-1: -INFINITY ++pcmk__primitive_assign: foo allocation score on rhel8-2: -INFINITY + pcmk__primitive_assign: foo allocation score on rhel8-3: 0 +-pcmk__primitive_assign: foo allocation score on rhel8-4: 0 +-pcmk__primitive_assign: foo allocation score on rhel8-5: 0 ++pcmk__primitive_assign: foo allocation score on rhel8-4: -INFINITY ++pcmk__primitive_assign: foo allocation score on rhel8-5: -INFINITY + pcmk__primitive_assign: vip allocation score on rhel8-1: -INFINITY + pcmk__primitive_assign: vip allocation score on rhel8-2: -INFINITY +-pcmk__primitive_assign: vip allocation score on rhel8-3: -INFINITY ++pcmk__primitive_assign: vip allocation score on rhel8-3: 0 + pcmk__primitive_assign: vip allocation score on rhel8-4: -INFINITY + pcmk__primitive_assign: vip allocation score on rhel8-5: -INFINITY + pcmk__primitive_assign: vip-dep allocation score on rhel8-1: 0 +diff --git a/cts/scheduler/summary/coloc-with-inner-group-member.summary b/cts/scheduler/summary/coloc-with-inner-group-member.summary +index 3e87f0867ef..6659721a79c 100644 +--- a/cts/scheduler/summary/coloc-with-inner-group-member.summary ++++ b/cts/scheduler/summary/coloc-with-inner-group-member.summary +@@ -12,12 +12,24 @@ Current cluster status: + * vip (ocf:pacemaker:Dummy): Started rhel8-3 + + Transition Summary: +- * Stop vip ( rhel8-3 ) due to node availability ++ * Move foo ( rhel8-4 -> rhel8-3 ) ++ * Move bar ( rhel8-4 -> rhel8-3 ) ++ * Restart vip ( rhel8-3 ) due to required bar start + + Executing Cluster Transition: + * Pseudo action: grp_stop_0 + * Resource action: vip stop on rhel8-3 ++ * Resource action: bar stop on rhel8-4 ++ * Resource action: foo stop on rhel8-4 + * Pseudo action: grp_stopped_0 ++ * Pseudo action: grp_start_0 ++ * Resource action: foo start on rhel8-3 ++ * Resource action: bar start on rhel8-3 ++ * Resource action: vip start on rhel8-3 ++ * Resource action: vip monitor=10000 on rhel8-3 ++ * Pseudo action: grp_running_0 ++ * Resource action: foo monitor=10000 on rhel8-3 ++ * Resource action: bar monitor=10000 on rhel8-3 + Using the original execution date of: 2023-06-20 20:45:06Z + + Revised Cluster Status: +@@ -28,6 +40,6 @@ Revised Cluster Status: + * Fencing (stonith:fence_xvm): Started rhel8-1 + * vip-dep (ocf:pacemaker:Dummy): Started rhel8-3 + * Resource Group: grp: +- * foo (ocf:pacemaker:Dummy): Started rhel8-4 +- * bar (ocf:pacemaker:Dummy): Started rhel8-4 +- * vip (ocf:pacemaker:Dummy): Stopped ++ * foo (ocf:pacemaker:Dummy): Started rhel8-3 ++ * bar (ocf:pacemaker:Dummy): Started rhel8-3 ++ * vip (ocf:pacemaker:Dummy): Started rhel8-3 + +From 9ada709b568cf5050f768b83e4682a8b93d1b361 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Thu, 29 Jun 2023 09:01:41 -0500 +Subject: [PATCH 14/17] Fix: CIB: be more strict about ignoring colocation + elements without an ID + +Callers of pcmk__unpack_colocation() have more context about the element being +unpacked, so the checks are done there. +--- + lib/pacemaker/pcmk_sched_colocation.c | 24 ++++++++++++++++++------ + 1 file changed, 18 insertions(+), 6 deletions(-) + +diff --git a/lib/pacemaker/pcmk_sched_colocation.c b/lib/pacemaker/pcmk_sched_colocation.c +index e0b39b59e81..a2baddbbb5c 100644 +--- a/lib/pacemaker/pcmk_sched_colocation.c ++++ b/lib/pacemaker/pcmk_sched_colocation.c +@@ -886,23 +886,30 @@ pcmk__unpack_colocation(xmlNode *xml_obj, pe_working_set_t *data_set) + xmlNode *expanded_xml = NULL; + + const char *id = crm_element_value(xml_obj, XML_ATTR_ID); +- const char *score = crm_element_value(xml_obj, XML_RULE_ATTR_SCORE); +- const char *influence_s = crm_element_value(xml_obj, +- XML_COLOC_ATTR_INFLUENCE); ++ const char *score = NULL; ++ const char *influence_s = NULL; + +- if (score) { +- score_i = char2score(score); ++ if (pcmk__str_empty(id)) { ++ pcmk__config_err("Ignoring " XML_CONS_TAG_RSC_DEPEND ++ " without " CRM_ATTR_ID); ++ return; + } + + if (unpack_colocation_tags(xml_obj, &expanded_xml, + data_set) != pcmk_rc_ok) { + return; + } +- if (expanded_xml) { ++ if (expanded_xml != NULL) { + orig_xml = xml_obj; + xml_obj = expanded_xml; + } + ++ score = crm_element_value(xml_obj, XML_RULE_ATTR_SCORE); ++ if (score != NULL) { ++ score_i = char2score(score); ++ } ++ influence_s = crm_element_value(xml_obj, XML_COLOC_ATTR_INFLUENCE); ++ + for (set = first_named_child(xml_obj, XML_CONS_TAG_RSC_SET); set != NULL; + set = crm_next_same_xml(set)) { + +@@ -914,6 +921,11 @@ pcmk__unpack_colocation(xmlNode *xml_obj, pe_working_set_t *data_set) + return; + } + ++ if (pcmk__str_empty(ID(set))) { ++ pcmk__config_err("Ignoring " XML_CONS_TAG_RSC_SET ++ " without " CRM_ATTR_ID); ++ continue; ++ } + unpack_colocation_set(set, score_i, id, influence_s, data_set); + + if (last != NULL) { + +From e830a9663c80ea348eff694a8e71a1e07d380690 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Thu, 29 Jun 2023 09:40:57 -0500 +Subject: [PATCH 15/17] Log: scheduler: improve colocation unpacking messages + (and comments) + +--- + lib/pacemaker/pcmk_sched_colocation.c | 60 ++++++++++++++------------- + 1 file changed, 32 insertions(+), 28 deletions(-) + +diff --git a/lib/pacemaker/pcmk_sched_colocation.c b/lib/pacemaker/pcmk_sched_colocation.c +index a2baddbbb5c..9c9195ed02c 100644 +--- a/lib/pacemaker/pcmk_sched_colocation.c ++++ b/lib/pacemaker/pcmk_sched_colocation.c +@@ -136,13 +136,13 @@ pcmk__add_this_with(GList **list, const pcmk__colocation_t *colocation) + { + CRM_ASSERT((list != NULL) && (colocation != NULL)); + +- crm_trace("Adding colocation %s (%s with %s%s%s @%d) " ++ crm_trace("Adding colocation %s (%s with %s%s%s @%s) " + "to 'this with' list", + colocation->id, colocation->dependent->id, + colocation->primary->id, + (colocation->node_attribute == NULL)? "" : " using ", + pcmk__s(colocation->node_attribute, ""), +- colocation->score); ++ pcmk_readable_score(colocation->score)); + *list = g_list_insert_sorted(*list, (gpointer) colocation, + cmp_primary_priority); + } +@@ -187,13 +187,13 @@ pcmk__add_with_this(GList **list, const pcmk__colocation_t *colocation) + { + CRM_ASSERT((list != NULL) && (colocation != NULL)); + +- crm_trace("Adding colocation %s (%s with %s%s%s @%d) " ++ crm_trace("Adding colocation %s (%s with %s%s%s @%s) " + "to 'with this' list", + colocation->id, colocation->dependent->id, + colocation->primary->id, + (colocation->node_attribute == NULL)? "" : " using ", + pcmk__s(colocation->node_attribute, ""), +- colocation->score); ++ pcmk_readable_score(colocation->score)); + *list = g_list_insert_sorted(*list, (gpointer) colocation, + cmp_dependent_priority); + } +@@ -339,10 +339,6 @@ pcmk__new_colocation(const char *id, const char *node_attr, int score, + node_attr = CRM_ATTR_UNAME; + } + +- pe_rsc_trace(dependent, "Added colocation %s (%s with %s @%s using %s)", +- new_con->id, dependent->id, primary->id, +- pcmk_readable_score(score), node_attr); +- + pcmk__add_this_with(&(dependent->rsc_cons), new_con); + pcmk__add_with_this(&(primary->rsc_cons_lhs), new_con); + +@@ -495,8 +491,6 @@ unpack_colocation_set(xmlNode *set, int score, const char *coloc_id, + other = pcmk__find_constraint_resource(data_set->resources, + xml_rsc_id); + CRM_ASSERT(other != NULL); // We already processed it +- pe_rsc_trace(resource, "Anti-Colocating %s with %s", +- resource->id, other->id); + pcmk__new_colocation(set_id, NULL, local_score, + resource, other, role, role, flags); + } +@@ -504,9 +498,21 @@ unpack_colocation_set(xmlNode *set, int score, const char *coloc_id, + } + } + ++/*! ++ * \internal ++ * \brief Colocate two resource sets relative to each other ++ * ++ * \param[in] id Colocation XML ID ++ * \param[in] set1 Dependent set ++ * \param[in] set2 Primary set ++ * \param[in] score Colocation score ++ * \param[in] influence_s Value of colocation's "influence" attribute ++ * \param[in,out] data_set Cluster working set ++ */ + static void +-colocate_rsc_sets(const char *id, xmlNode *set1, xmlNode *set2, int score, +- const char *influence_s, pe_working_set_t *data_set) ++colocate_rsc_sets(const char *id, const xmlNode *set1, const xmlNode *set2, ++ int score, const char *influence_s, ++ pe_working_set_t *data_set) + { + xmlNode *xml_rsc = NULL; + pe_resource_t *rsc_1 = NULL; +@@ -521,8 +527,8 @@ colocate_rsc_sets(const char *id, xmlNode *set1, xmlNode *set2, int score, + uint32_t flags = pcmk__coloc_none; + + if (score == 0) { +- crm_trace("Ignoring colocation '%s' between sets because score is 0", +- id); ++ crm_trace("Ignoring colocation '%s' between sets %s and %s " ++ "because score is 0", id, ID(set1), ID(set2)); + return; + } + +@@ -562,12 +568,12 @@ colocate_rsc_sets(const char *id, xmlNode *set1, xmlNode *set2, int score, + } + } + +- if ((rsc_1 != NULL) && (rsc_2 != NULL)) { ++ if ((rsc_1 != NULL) && (rsc_2 != NULL)) { // Both sets are sequential + flags = pcmk__coloc_explicit | unpack_influence(id, rsc_1, influence_s); + pcmk__new_colocation(id, NULL, score, rsc_1, rsc_2, role_1, role_2, + flags); + +- } else if (rsc_1 != NULL) { ++ } else if (rsc_1 != NULL) { // Only set1 is sequential + flags = pcmk__coloc_explicit | unpack_influence(id, rsc_1, influence_s); + for (xml_rsc = first_named_child(set2, XML_TAG_RESOURCE_REF); + xml_rsc != NULL; xml_rsc = crm_next_same_xml(xml_rsc)) { +@@ -576,19 +582,17 @@ colocate_rsc_sets(const char *id, xmlNode *set1, xmlNode *set2, int score, + rsc_2 = pcmk__find_constraint_resource(data_set->resources, + xml_rsc_id); + if (rsc_2 == NULL) { +- pcmk__config_err("%s: No resource found for %s", +- id, xml_rsc_id); + // Should be possible only with validation disabled +- pcmk__config_err("Ignoring resource %s and later in set %s " +- "for colocation with set %s: No such resource", +- xml_rsc_id, set2, set1); ++ pcmk__config_err("Ignoring set %s colocation with resource %s " ++ "and later in set %s: No such resource", ++ ID(set1), xml_rsc_id, ID(set2)); + return; + } + pcmk__new_colocation(id, NULL, score, rsc_1, rsc_2, role_1, + role_2, flags); + } + +- } else if (rsc_2 != NULL) { ++ } else if (rsc_2 != NULL) { // Only set2 is sequential + for (xml_rsc = first_named_child(set1, XML_TAG_RESOURCE_REF); + xml_rsc != NULL; xml_rsc = crm_next_same_xml(xml_rsc)) { + +@@ -599,7 +603,7 @@ colocate_rsc_sets(const char *id, xmlNode *set1, xmlNode *set2, int score, + // Should be possible only with validation disabled + pcmk__config_err("Ignoring resource %s and later in set %s " + "for colocation with set %s: No such resource", +- xml_rsc_id, set1, set2); ++ xml_rsc_id, ID(set1), ID(set2)); + return; + } + flags = pcmk__coloc_explicit +@@ -608,7 +612,7 @@ colocate_rsc_sets(const char *id, xmlNode *set1, xmlNode *set2, int score, + role_2, flags); + } + +- } else { ++ } else { // Neither set is sequential + for (xml_rsc = first_named_child(set1, XML_TAG_RESOURCE_REF); + xml_rsc != NULL; xml_rsc = crm_next_same_xml(xml_rsc)) { + +@@ -621,7 +625,7 @@ colocate_rsc_sets(const char *id, xmlNode *set1, xmlNode *set2, int score, + // Should be possible only with validation disabled + pcmk__config_err("Ignoring resource %s and later in set %s " + "for colocation with set %s: No such resource", +- xml_rsc_id, set1, set2); ++ xml_rsc_id, ID(set1), ID(set2)); + return; + } + +@@ -636,10 +640,10 @@ colocate_rsc_sets(const char *id, xmlNode *set1, xmlNode *set2, int score, + xml_rsc_id); + if (rsc_2 == NULL) { + // Should be possible only with validation disabled +- pcmk__config_err("Ignoring resource %s and later in set %s " +- "for colocation with %s in set %s: " ++ pcmk__config_err("Ignoring set %s resource %s colocation with " ++ "resource %s and later in set %s: " + "No such resource", +- xml_rsc_id, set2, ID(xml_rsc), set1); ++ ID(set1), ID(xml_rsc), xml_rsc_id, ID(set2)); + return; + } + pcmk__new_colocation(id, NULL, score, rsc_1, rsc_2, + +From 737d74b656cad7b5514397bb461b8a18fb5590df Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Thu, 29 Jun 2023 09:49:13 -0500 +Subject: [PATCH 16/17] Low: scheduler: continue with non-sequential set + members after error + +--- + lib/pacemaker/pcmk_sched_colocation.c | 30 +++++++++++++-------------- + 1 file changed, 15 insertions(+), 15 deletions(-) + +diff --git a/lib/pacemaker/pcmk_sched_colocation.c b/lib/pacemaker/pcmk_sched_colocation.c +index 9c9195ed02c..3e094a4b87b 100644 +--- a/lib/pacemaker/pcmk_sched_colocation.c ++++ b/lib/pacemaker/pcmk_sched_colocation.c +@@ -584,9 +584,9 @@ colocate_rsc_sets(const char *id, const xmlNode *set1, const xmlNode *set2, + if (rsc_2 == NULL) { + // Should be possible only with validation disabled + pcmk__config_err("Ignoring set %s colocation with resource %s " +- "and later in set %s: No such resource", ++ "in set %s: No such resource", + ID(set1), xml_rsc_id, ID(set2)); +- return; ++ continue; + } + pcmk__new_colocation(id, NULL, score, rsc_1, rsc_2, role_1, + role_2, flags); +@@ -601,10 +601,10 @@ colocate_rsc_sets(const char *id, const xmlNode *set1, const xmlNode *set2, + xml_rsc_id); + if (rsc_1 == NULL) { + // Should be possible only with validation disabled +- pcmk__config_err("Ignoring resource %s and later in set %s " +- "for colocation with set %s: No such resource", +- xml_rsc_id, ID(set1), ID(set2)); +- return; ++ pcmk__config_err("Ignoring colocation of set %s resource %s " ++ "with set %s: No such resource", ++ ID(set1), xml_rsc_id, ID(set2)); ++ continue; + } + flags = pcmk__coloc_explicit + | unpack_influence(id, rsc_1, influence_s); +@@ -623,10 +623,10 @@ colocate_rsc_sets(const char *id, const xmlNode *set1, const xmlNode *set2, + xml_rsc_id); + if (rsc_1 == NULL) { + // Should be possible only with validation disabled +- pcmk__config_err("Ignoring resource %s and later in set %s " +- "for colocation with set %s: No such resource", +- xml_rsc_id, ID(set1), ID(set2)); +- return; ++ pcmk__config_err("Ignoring colocation of set %s resource %s " ++ "with set %s: No such resource", ++ ID(set1), xml_rsc_id, ID(set2)); ++ continue; + } + + flags = pcmk__coloc_explicit +@@ -640,11 +640,11 @@ colocate_rsc_sets(const char *id, const xmlNode *set1, const xmlNode *set2, + xml_rsc_id); + if (rsc_2 == NULL) { + // Should be possible only with validation disabled +- pcmk__config_err("Ignoring set %s resource %s colocation with " +- "resource %s and later in set %s: " +- "No such resource", +- ID(set1), ID(xml_rsc), xml_rsc_id, ID(set2)); +- return; ++ pcmk__config_err("Ignoring colocation of set %s resource " ++ "%s with set %s resource %s: No such " ++ "resource", ID(set1), ID(xml_rsc), ++ ID(set2), xml_rsc_id); ++ continue; + } + pcmk__new_colocation(id, NULL, score, rsc_1, rsc_2, + role_1, role_2, flags); + +From d9c8593f17975371e64e0c187bc8234e901349a9 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Thu, 29 Jun 2023 09:49:55 -0500 +Subject: [PATCH 17/17] Refactor: scheduler: make some variables const that can + be + +--- + lib/pacemaker/pcmk_sched_group.c | 9 +++++---- + 1 file changed, 5 insertions(+), 4 deletions(-) + +diff --git a/lib/pacemaker/pcmk_sched_group.c b/lib/pacemaker/pcmk_sched_group.c +index 95e2d77aa5f..a2bf5f6dcd4 100644 +--- a/lib/pacemaker/pcmk_sched_group.c ++++ b/lib/pacemaker/pcmk_sched_group.c +@@ -699,8 +699,8 @@ pcmk__with_group_colocations(const pe_resource_t *rsc, + } + + // Add explicit colocations with the group's (other) children +- for (GList *iter = rsc->children; iter != NULL; iter = iter->next) { +- pe_resource_t *member = iter->data; ++ for (const GList *iter = rsc->children; iter != NULL; iter = iter->next) { ++ const pe_resource_t *member = iter->data; + + if (member != orig_rsc) { + member->cmds->with_this_colocations(member, orig_rsc, list); +@@ -747,7 +747,8 @@ pcmk__group_with_colocations(const pe_resource_t *rsc, + } + + // Add explicit colocations involving the group's (other) children +- for (GList *iter = rsc->children; iter != NULL; iter = iter->next) { ++ for (const GList *iter = rsc->children; ++ iter != NULL; iter = iter->next) { + member = iter->data; + if (member != orig_rsc) { + member->cmds->this_with_colocations(member, orig_rsc, list); +@@ -761,7 +762,7 @@ pcmk__group_with_colocations(const pe_resource_t *rsc, + * However, if an earlier group member is unmanaged, this chaining will not + * happen, so the group's mandatory colocations must be explicitly added. + */ +- for (GList *iter = rsc->children; iter != NULL; iter = iter->next) { ++ for (const GList *iter = rsc->children; iter != NULL; iter = iter->next) { + member = iter->data; + if (orig_rsc == member) { + break; // We've seen all earlier members, and none are unmanaged diff --git a/SOURCES/002-remote-regression.patch b/SOURCES/002-remote-regression.patch deleted file mode 100644 index 0f0bea8..0000000 --- a/SOURCES/002-remote-regression.patch +++ /dev/null @@ -1,98 +0,0 @@ -From d8e08729ad5e3dc62f774172f992210902fc0ed4 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Mon, 23 Jan 2023 14:25:56 -0600 -Subject: [PATCH] High: executor: fix regression in remote node shutdown - -This reverts the essential part of d61494347, which was based on misdiagnosing -a remote node shutdown issue. Initially, it was thought that a "TLS server -session ended" log just after a remote node requested shutdown indicated that -the proxy connection coincidentally dropped at that moment. It actually is the -routine stopping of accepting new proxy connections, and existing when that -happens makes the remote node exit immediately without waiting for the -all-clear from the cluster. - -Fixes T361 ---- - daemons/execd/pacemaker-execd.c | 19 +------------------ - daemons/execd/pacemaker-execd.h | 3 +-- - daemons/execd/remoted_tls.c | 6 +----- - 3 files changed, 3 insertions(+), 25 deletions(-) - -diff --git a/daemons/execd/pacemaker-execd.c b/daemons/execd/pacemaker-execd.c -index db12674f13..491808974a 100644 ---- a/daemons/execd/pacemaker-execd.c -+++ b/daemons/execd/pacemaker-execd.c -@@ -1,5 +1,5 @@ - /* -- * Copyright 2012-2022 the Pacemaker project contributors -+ * Copyright 2012-2023 the Pacemaker project contributors - * - * The version control history for this file may have further details. - * -@@ -305,23 +305,6 @@ lrmd_exit(gpointer data) - return FALSE; - } - --/*! -- * \internal -- * \brief Clean up and exit if shutdown has started -- * -- * \return Doesn't return -- */ --void --execd_exit_if_shutting_down(void) --{ --#ifdef PCMK__COMPILE_REMOTE -- if (shutting_down) { -- crm_warn("exit because TLS connection was closed and 'shutting_down' set"); -- lrmd_exit(NULL); -- } --#endif --} -- - /*! - * \internal - * \brief Request cluster shutdown if appropriate, otherwise exit immediately -diff --git a/daemons/execd/pacemaker-execd.h b/daemons/execd/pacemaker-execd.h -index 6646ae29e3..f78e8dcdde 100644 ---- a/daemons/execd/pacemaker-execd.h -+++ b/daemons/execd/pacemaker-execd.h -@@ -1,5 +1,5 @@ - /* -- * Copyright 2012-2022 the Pacemaker project contributors -+ * Copyright 2012-2023 the Pacemaker project contributors - * - * The version control history for this file may have further details. - * -@@ -105,6 +105,5 @@ void remoted_spawn_pidone(int argc, char **argv, char **envp); - int process_lrmd_alert_exec(pcmk__client_t *client, uint32_t id, - xmlNode *request); - void lrmd_drain_alerts(GMainLoop *mloop); --void execd_exit_if_shutting_down(void); - - #endif // PACEMAKER_EXECD__H -diff --git a/daemons/execd/remoted_tls.c b/daemons/execd/remoted_tls.c -index 6f4b2d0062..c65e3f394d 100644 ---- a/daemons/execd/remoted_tls.c -+++ b/daemons/execd/remoted_tls.c -@@ -1,5 +1,5 @@ - /* -- * Copyright 2012-2022 the Pacemaker project contributors -+ * Copyright 2012-2023 the Pacemaker project contributors - * - * The version control history for this file may have further details. - * -@@ -250,10 +250,6 @@ static void - tls_server_dropped(gpointer user_data) - { - crm_notice("TLS server session ended"); -- /* If we are in the process of shutting down, then we should actually exit. -- * bz#1804259 -- */ -- execd_exit_if_shutting_down(); - return; - } - --- -2.31.1 - diff --git a/SOURCES/003-clone-shuffle.patch b/SOURCES/003-clone-shuffle.patch new file mode 100644 index 0000000..e615705 --- /dev/null +++ b/SOURCES/003-clone-shuffle.patch @@ -0,0 +1,15630 @@ +From eae355ca4c869f7ccf1ad3d1f5ce488375a6f353 Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Tue, 20 Apr 2021 12:55:45 -0700 +Subject: [PATCH 01/19] Refactor: libpe_status: Add pe__rsc_node flag enum + +This commit adds a new pe__rsc_node flag enum containing values for +assigned, current, and pending. This indicates the criterion used to +look up a resource's location. + +After a compatibility break, native_location() could use these flags +instead of an int. + +Signed-off-by: Reid Wahl +--- + include/crm/pengine/internal.h | 14 ++++++++++++++ + lib/pengine/native.c | 1 + + 2 files changed, 15 insertions(+) + +diff --git a/include/crm/pengine/internal.h b/include/crm/pengine/internal.h +index 1b5f6f1d8d9..8fcb9c991f3 100644 +--- a/include/crm/pengine/internal.h ++++ b/include/crm/pengine/internal.h +@@ -235,6 +235,19 @@ bool pe_can_fence(const pe_working_set_t *data_set, const pe_node_t *node); + + void add_hash_param(GHashTable * hash, const char *name, const char *value); + ++/*! ++ * \internal ++ * \enum pe__rsc_node ++ * \brief Type of resource location lookup to perform ++ */ ++enum pe__rsc_node { ++ pe__rsc_node_assigned = 0, //!< Where resource is assigned ++ pe__rsc_node_current = 1, //!< Where resource is running ++ ++ // @COMPAT: Use in native_location() at a compatibility break ++ pe__rsc_node_pending = 2, //!< Where resource is pending ++}; ++ + char *native_parameter(pe_resource_t * rsc, pe_node_t * node, gboolean create, const char *name, + pe_working_set_t * data_set); + pe_node_t *native_location(const pe_resource_t *rsc, GList **list, int current); +@@ -576,6 +589,7 @@ bool pe__bundle_needs_remote_name(pe_resource_t *rsc); + const char *pe__add_bundle_remote_name(pe_resource_t *rsc, + pe_working_set_t *data_set, + xmlNode *xml, const char *field); ++ + const char *pe_node_attribute_calculated(const pe_node_t *node, + const char *name, + const pe_resource_t *rsc); +diff --git a/lib/pengine/native.c b/lib/pengine/native.c +index 5e92ddcefdf..44d4805ac56 100644 +--- a/lib/pengine/native.c ++++ b/lib/pengine/native.c +@@ -1092,6 +1092,7 @@ native_resource_state(const pe_resource_t * rsc, gboolean current) + pe_node_t * + native_location(const pe_resource_t *rsc, GList **list, int current) + { ++ // @COMPAT: Accept a pe__rsc_node argument instead of int current + pe_node_t *one = NULL; + GList *result = NULL; + + +From 809b9c2ea13e5f32bfa6eecf3482eb257802b92d Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Sun, 11 Sep 2022 19:36:07 -0700 +Subject: [PATCH 02/19] Refactor: libpe_status: pe_node_attribute_calculated() + accepts node type + +Use enum pe__rsc_node in pe_node_attribute_calculated() to determine +which container host (assigned or current) to get the attribute value +from. For now, there's no use case for pending. + +Pass pe__rsc_node_current for existing calls, since that maintains the +existing behavior. + +Signed-off-by: Reid Wahl +--- + include/crm/pengine/internal.h | 3 +- + lib/pacemaker/pcmk_sched_location.c | 5 ++- + lib/pacemaker/pcmk_sched_promotable.c | 3 +- + lib/pengine/common.c | 60 ++++++++++++++++++++++----- + 4 files changed, 57 insertions(+), 14 deletions(-) + +diff --git a/include/crm/pengine/internal.h b/include/crm/pengine/internal.h +index 8fcb9c991f3..ef8c382f62b 100644 +--- a/include/crm/pengine/internal.h ++++ b/include/crm/pengine/internal.h +@@ -592,7 +592,8 @@ const char *pe__add_bundle_remote_name(pe_resource_t *rsc, + + const char *pe_node_attribute_calculated(const pe_node_t *node, + const char *name, +- const pe_resource_t *rsc); ++ const pe_resource_t *rsc, ++ enum pe__rsc_node node_type); + const char *pe_node_attribute_raw(const pe_node_t *node, const char *name); + bool pe__is_universal_clone(const pe_resource_t *rsc, + const pe_working_set_t *data_set); +diff --git a/lib/pacemaker/pcmk_sched_location.c b/lib/pacemaker/pcmk_sched_location.c +index b4ce4ff07dc..5f42ec0fc8c 100644 +--- a/lib/pacemaker/pcmk_sched_location.c ++++ b/lib/pacemaker/pcmk_sched_location.c +@@ -31,7 +31,10 @@ get_node_score(const char *rule, const char *score, bool raw, + score_f = char2score(score); + + } else { +- const char *attr_score = pe_node_attribute_calculated(node, score, rsc); ++ const char *attr_score = NULL; ++ ++ attr_score = pe_node_attribute_calculated(node, score, rsc, ++ pe__rsc_node_current); + + if (attr_score == NULL) { + crm_debug("Rule %s: %s did not have a value for %s", +diff --git a/lib/pacemaker/pcmk_sched_promotable.c b/lib/pacemaker/pcmk_sched_promotable.c +index d12d017bab2..2bad1d0c487 100644 +--- a/lib/pacemaker/pcmk_sched_promotable.c ++++ b/lib/pacemaker/pcmk_sched_promotable.c +@@ -649,7 +649,8 @@ promotion_attr_value(const pe_resource_t *rsc, const pe_node_t *node, + CRM_CHECK((rsc != NULL) && (node != NULL) && (name != NULL), return NULL); + + attr_name = pcmk_promotion_score_name(name); +- attr_value = pe_node_attribute_calculated(node, attr_name, rsc); ++ attr_value = pe_node_attribute_calculated(node, attr_name, rsc, ++ pe__rsc_node_current); + free(attr_name); + return attr_value; + } +diff --git a/lib/pengine/common.c b/lib/pengine/common.c +index 6c69bfcb41a..af41c1f6e89 100644 +--- a/lib/pengine/common.c ++++ b/lib/pengine/common.c +@@ -516,9 +516,15 @@ add_hash_param(GHashTable * hash, const char *name, const char *value) + + const char * + pe_node_attribute_calculated(const pe_node_t *node, const char *name, +- const pe_resource_t *rsc) ++ const pe_resource_t *rsc, ++ enum pe__rsc_node node_type) + { +- const char *source; ++ const char *source = NULL; ++ const char *node_type_s = NULL; ++ const char *reason = NULL; ++ ++ const pe_resource_t *container = NULL; ++ const pe_node_t *host = NULL; + + if(node == NULL) { + return NULL; +@@ -539,18 +545,50 @@ pe_node_attribute_calculated(const pe_node_t *node, const char *name, + * storage + */ + +- CRM_ASSERT(node->details->remote_rsc); +- CRM_ASSERT(node->details->remote_rsc->container); ++ CRM_ASSERT(node->details->remote_rsc != NULL); ++ ++ container = node->details->remote_rsc->container; ++ CRM_ASSERT(container != NULL); ++ ++ switch (node_type) { ++ case pe__rsc_node_assigned: ++ node_type_s = "assigned"; ++ host = container->allocated_to; ++ if (host == NULL) { ++ reason = "not assigned"; ++ } ++ break; ++ ++ case pe__rsc_node_current: ++ node_type_s = "current"; + +- if(node->details->remote_rsc->container->running_on) { +- pe_node_t *host = node->details->remote_rsc->container->running_on->data; +- pe_rsc_trace(rsc, "%s: Looking for %s on the container host %s", +- rsc->id, name, pe__node_name(host)); +- return g_hash_table_lookup(host->details->attrs, name); ++ if (container->running_on != NULL) { ++ host = container->running_on->data; ++ } ++ if (host == NULL) { ++ reason = "inactive"; ++ } ++ break; ++ ++ default: ++ // Add support for other enum pe__rsc_node values if needed ++ CRM_ASSERT(false); ++ break; + } + +- pe_rsc_trace(rsc, "%s: Not looking for %s on the container host: %s is inactive", +- rsc->id, name, node->details->remote_rsc->container->id); ++ if (host != NULL) { ++ const char *value = g_hash_table_lookup(host->details->attrs, name); ++ ++ pe_rsc_trace(rsc, ++ "%s: Value lookup for %s on %s container host %s %s%s", ++ rsc->id, name, node_type_s, pe__node_name(host), ++ ((value != NULL)? "succeeded: " : "failed"), ++ pcmk__s(value, "")); ++ return value; ++ } ++ pe_rsc_trace(rsc, ++ "%s: Not looking for %s on %s container host: %s is %s", ++ rsc->id, name, node_type_s, container->id, reason); + return NULL; + } + + +From d5a56afd2ecd861e0cf0d1049157e82a034f3f7a Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Thu, 15 Jun 2023 00:34:39 -0700 +Subject: [PATCH 03/19] Fix: libpacemaker: Get container attr from assigned + node, if any + +promotion_attr_value() should get a container's promotion score from +the host to which it's assigned (if it's been assigned), rather than the +host on which it's running. + +Ref T489 + +Signed-off-by: Reid Wahl +--- + lib/pacemaker/pcmk_sched_promotable.c | 10 ++++++---- + 1 file changed, 6 insertions(+), 4 deletions(-) + +diff --git a/lib/pacemaker/pcmk_sched_promotable.c b/lib/pacemaker/pcmk_sched_promotable.c +index 2bad1d0c487..8612c25a51d 100644 +--- a/lib/pacemaker/pcmk_sched_promotable.c ++++ b/lib/pacemaker/pcmk_sched_promotable.c +@@ -645,12 +645,14 @@ promotion_attr_value(const pe_resource_t *rsc, const pe_node_t *node, + { + char *attr_name = NULL; + const char *attr_value = NULL; ++ enum pe__rsc_node node_type = pe__rsc_node_assigned; + +- CRM_CHECK((rsc != NULL) && (node != NULL) && (name != NULL), return NULL); +- ++ if (pcmk_is_set(rsc->flags, pe_rsc_provisional)) { ++ // Not assigned yet ++ node_type = pe__rsc_node_current; ++ } + attr_name = pcmk_promotion_score_name(name); +- attr_value = pe_node_attribute_calculated(node, attr_name, rsc, +- pe__rsc_node_current); ++ attr_value = pe_node_attribute_calculated(node, attr_name, rsc, node_type); + free(attr_name); + return attr_value; + } + +From cfc2cd20e15c0f1c6b6ed8517c310acd756c1533 Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Mon, 10 Jul 2023 02:26:26 -0700 +Subject: [PATCH 04/19] Test: scheduler: Update outputs for + promotion_attr_value() fix + +Update outputs after previous commit (get container's promotion score +from assigned host). + +There are a few changes to scores, as well as dot and exp files. The +behavior in the bundle-interleave-start test appears to be an +improvement. + +Ref T489 + +Signed-off-by: Reid Wahl +--- + cts/scheduler/dot/bundle-interleave-start.dot | 44 +- + cts/scheduler/exp/bundle-interleave-start.exp | 556 +++++++++++------- + .../exp/no-promote-on-unrunnable-guest.exp | 14 +- + .../scores/bundle-interleave-start.scores | 12 +- + .../scores/cancel-behind-moving-remote.scores | 2 +- + .../scores/guest-host-not-fenceable.scores | 2 +- + .../no-promote-on-unrunnable-guest.scores | 2 +- + .../summary/bundle-interleave-start.summary | 54 +- + cts/scheduler/xml/bundle-interleave-start.xml | 3 +- + 9 files changed, 445 insertions(+), 244 deletions(-) + +diff --git a/cts/scheduler/dot/bundle-interleave-start.dot b/cts/scheduler/dot/bundle-interleave-start.dot +index bf6ed7f9edb..a513ac5806b 100644 +--- a/cts/scheduler/dot/bundle-interleave-start.dot ++++ b/cts/scheduler/dot/bundle-interleave-start.dot +@@ -41,9 +41,15 @@ + "app-bundle-2_monitor_0 node5" [ style=bold color="green" fontcolor="black"] + "app-bundle-2_monitor_30000 node4" [ style=bold color="green" fontcolor="black"] + "app-bundle-2_start_0 node4" -> "app-bundle-2_monitor_30000 node4" [ style = bold] +-"app-bundle-2_start_0 node4" -> "app:2_monitor_16000 app-bundle-2" [ style = bold] ++"app-bundle-2_start_0 node4" -> "app:2_monitor_15000 app-bundle-2" [ style = bold] ++"app-bundle-2_start_0 node4" -> "app:2_promote_0 app-bundle-2" [ style = bold] + "app-bundle-2_start_0 node4" -> "app:2_start_0 app-bundle-2" [ style = bold] + "app-bundle-2_start_0 node4" [ style=bold color="green" fontcolor="black"] ++"app-bundle-clone_promote_0" -> "app:2_promote_0 app-bundle-2" [ style = bold] ++"app-bundle-clone_promote_0" [ style=bold color="green" fontcolor="orange"] ++"app-bundle-clone_promoted_0" -> "app-bundle_promoted_0" [ style = bold] ++"app-bundle-clone_promoted_0" [ style=bold color="green" fontcolor="orange"] ++"app-bundle-clone_running_0" -> "app-bundle-clone_promote_0" [ style = bold] + "app-bundle-clone_running_0" -> "app-bundle_running_0" [ style = bold] + "app-bundle-clone_running_0" [ style=bold color="green" fontcolor="orange"] + "app-bundle-clone_start_0" -> "app-bundle-clone_running_0" [ style = bold] +@@ -133,8 +139,13 @@ + "app-bundle-podman-2_start_0 node4" -> "app-bundle-2_start_0 node4" [ style = bold] + "app-bundle-podman-2_start_0 node4" -> "app-bundle-podman-2_monitor_60000 node4" [ style = bold] + "app-bundle-podman-2_start_0 node4" -> "app-bundle_running_0" [ style = bold] ++"app-bundle-podman-2_start_0 node4" -> "app:2_promote_0 app-bundle-2" [ style = bold] + "app-bundle-podman-2_start_0 node4" -> "app:2_start_0 app-bundle-2" [ style = bold] + "app-bundle-podman-2_start_0 node4" [ style=bold color="green" fontcolor="black"] ++"app-bundle_promote_0" -> "app-bundle-clone_promote_0" [ style = bold] ++"app-bundle_promote_0" [ style=bold color="green" fontcolor="orange"] ++"app-bundle_promoted_0" [ style=bold color="green" fontcolor="orange"] ++"app-bundle_running_0" -> "app-bundle_promote_0" [ style = bold] + "app-bundle_running_0" [ style=bold color="green" fontcolor="orange"] + "app-bundle_start_0" -> "app-bundle-clone_start_0" [ style = bold] + "app-bundle_start_0" -> "app-bundle-podman-0_start_0 node2" [ style = bold] +@@ -151,9 +162,13 @@ + "app:1_start_0 app-bundle-1" -> "app:1_monitor_16000 app-bundle-1" [ style = bold] + "app:1_start_0 app-bundle-1" -> "app:2_start_0 app-bundle-2" [ style = bold] + "app:1_start_0 app-bundle-1" [ style=bold color="green" fontcolor="black"] +-"app:2_monitor_16000 app-bundle-2" [ style=bold color="green" fontcolor="black"] ++"app:2_monitor_15000 app-bundle-2" [ style=bold color="green" fontcolor="black"] ++"app:2_promote_0 app-bundle-2" -> "app-bundle-clone_promoted_0" [ style = bold] ++"app:2_promote_0 app-bundle-2" -> "app:2_monitor_15000 app-bundle-2" [ style = bold] ++"app:2_promote_0 app-bundle-2" [ style=bold color="green" fontcolor="black"] + "app:2_start_0 app-bundle-2" -> "app-bundle-clone_running_0" [ style = bold] +-"app:2_start_0 app-bundle-2" -> "app:2_monitor_16000 app-bundle-2" [ style = bold] ++"app:2_start_0 app-bundle-2" -> "app:2_monitor_15000 app-bundle-2" [ style = bold] ++"app:2_start_0 app-bundle-2" -> "app:2_promote_0 app-bundle-2" [ style = bold] + "app:2_start_0 app-bundle-2" [ style=bold color="green" fontcolor="black"] + "base-bundle-0_monitor_0 node1" -> "base-bundle-0_start_0 node2" [ style = bold] + "base-bundle-0_monitor_0 node1" [ style=bold color="green" fontcolor="black"] +@@ -197,9 +212,15 @@ + "base-bundle-2_monitor_0 node5" [ style=bold color="green" fontcolor="black"] + "base-bundle-2_monitor_30000 node4" [ style=bold color="green" fontcolor="black"] + "base-bundle-2_start_0 node4" -> "base-bundle-2_monitor_30000 node4" [ style = bold] +-"base-bundle-2_start_0 node4" -> "base:2_monitor_16000 base-bundle-2" [ style = bold] ++"base-bundle-2_start_0 node4" -> "base:2_monitor_15000 base-bundle-2" [ style = bold] ++"base-bundle-2_start_0 node4" -> "base:2_promote_0 base-bundle-2" [ style = bold] + "base-bundle-2_start_0 node4" -> "base:2_start_0 base-bundle-2" [ style = bold] + "base-bundle-2_start_0 node4" [ style=bold color="green" fontcolor="black"] ++"base-bundle-clone_promote_0" -> "base:2_promote_0 base-bundle-2" [ style = bold] ++"base-bundle-clone_promote_0" [ style=bold color="green" fontcolor="orange"] ++"base-bundle-clone_promoted_0" -> "base-bundle_promoted_0" [ style = bold] ++"base-bundle-clone_promoted_0" [ style=bold color="green" fontcolor="orange"] ++"base-bundle-clone_running_0" -> "base-bundle-clone_promote_0" [ style = bold] + "base-bundle-clone_running_0" -> "base-bundle_running_0" [ style = bold] + "base-bundle-clone_running_0" [ style=bold color="green" fontcolor="orange"] + "base-bundle-clone_start_0" -> "base-bundle-clone_running_0" [ style = bold] +@@ -289,9 +310,15 @@ + "base-bundle-podman-2_start_0 node4" -> "base-bundle-2_start_0 node4" [ style = bold] + "base-bundle-podman-2_start_0 node4" -> "base-bundle-podman-2_monitor_60000 node4" [ style = bold] + "base-bundle-podman-2_start_0 node4" -> "base-bundle_running_0" [ style = bold] ++"base-bundle-podman-2_start_0 node4" -> "base:2_promote_0 base-bundle-2" [ style = bold] + "base-bundle-podman-2_start_0 node4" -> "base:2_start_0 base-bundle-2" [ style = bold] + "base-bundle-podman-2_start_0 node4" [ style=bold color="green" fontcolor="black"] ++"base-bundle_promote_0" -> "base-bundle-clone_promote_0" [ style = bold] ++"base-bundle_promote_0" [ style=bold color="green" fontcolor="orange"] ++"base-bundle_promoted_0" -> "app-bundle_promote_0" [ style = bold] ++"base-bundle_promoted_0" [ style=bold color="green" fontcolor="orange"] + "base-bundle_running_0" -> "app-bundle_start_0" [ style = bold] ++"base-bundle_running_0" -> "base-bundle_promote_0" [ style = bold] + "base-bundle_running_0" [ style=bold color="green" fontcolor="orange"] + "base-bundle_start_0" -> "base-bundle-clone_start_0" [ style = bold] + "base-bundle_start_0" -> "base-bundle-podman-0_start_0 node2" [ style = bold] +@@ -310,9 +337,14 @@ + "base:1_start_0 base-bundle-1" -> "base:1_monitor_16000 base-bundle-1" [ style = bold] + "base:1_start_0 base-bundle-1" -> "base:2_start_0 base-bundle-2" [ style = bold] + "base:1_start_0 base-bundle-1" [ style=bold color="green" fontcolor="black"] +-"base:2_monitor_16000 base-bundle-2" [ style=bold color="green" fontcolor="black"] ++"base:2_monitor_15000 base-bundle-2" [ style=bold color="green" fontcolor="black"] ++"base:2_promote_0 base-bundle-2" -> "app:2_promote_0 app-bundle-2" [ style = bold] ++"base:2_promote_0 base-bundle-2" -> "base-bundle-clone_promoted_0" [ style = bold] ++"base:2_promote_0 base-bundle-2" -> "base:2_monitor_15000 base-bundle-2" [ style = bold] ++"base:2_promote_0 base-bundle-2" [ style=bold color="green" fontcolor="black"] + "base:2_start_0 base-bundle-2" -> "app-bundle-podman-2_start_0 node4" [ style = bold] + "base:2_start_0 base-bundle-2" -> "base-bundle-clone_running_0" [ style = bold] +-"base:2_start_0 base-bundle-2" -> "base:2_monitor_16000 base-bundle-2" [ style = bold] ++"base:2_start_0 base-bundle-2" -> "base:2_monitor_15000 base-bundle-2" [ style = bold] ++"base:2_start_0 base-bundle-2" -> "base:2_promote_0 base-bundle-2" [ style = bold] + "base:2_start_0 base-bundle-2" [ style=bold color="green" fontcolor="black"] + } +diff --git a/cts/scheduler/exp/bundle-interleave-start.exp b/cts/scheduler/exp/bundle-interleave-start.exp +index e676b1bfba9..57e551c487e 100644 +--- a/cts/scheduler/exp/bundle-interleave-start.exp ++++ b/cts/scheduler/exp/bundle-interleave-start.exp +@@ -1,7 +1,7 @@ + + + +- ++ + + + +@@ -11,13 +11,13 @@ + + + +- ++ + + + + + +- ++ + + + +@@ -30,13 +30,13 @@ + + + +- ++ + + + + + +- ++ + + + +@@ -46,13 +46,13 @@ + + + +- ++ + + + + + +- ++ + + + +@@ -65,18 +65,18 @@ + + + +- ++ + + +- ++ + + + + + +- ++ + +- ++ + + + +@@ -84,13 +84,38 @@ + + + +- ++ ++ ++ ++ + + + + + +- ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ + + + +@@ -103,37 +128,64 @@ + + + +- ++ + + +- ++ + + + +- ++ + +- ++ + + + + + +- ++ + ++ ++ ++ ++ ++ ++ ++ ++ ++ + +- ++ + + +- ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ + + +- ++ ++ ++ ++ ++ ++ ++ + + + +- ++ + +- ++ + + + +@@ -188,7 +240,7 @@ + + + +- ++ + + + +@@ -201,7 +253,7 @@ + + + +- ++ + + + +@@ -235,7 +287,7 @@ + + + +- ++ + + + +@@ -244,7 +296,7 @@ + + + +- ++ + + + +@@ -253,7 +305,7 @@ + + + +- ++ + + + +@@ -262,7 +314,7 @@ + + + +- ++ + + + +@@ -271,7 +323,7 @@ + + + +- ++ + + + +@@ -280,7 +332,7 @@ + + + +- ++ + + + +@@ -293,7 +345,7 @@ + + + +- ++ + + + +@@ -321,7 +373,7 @@ + + + +- ++ + + + +@@ -334,7 +386,7 @@ + + + +- ++ + + + +@@ -347,7 +399,7 @@ + + + +- ++ + + + +@@ -360,7 +412,7 @@ + + + +- ++ + + + +@@ -373,7 +425,7 @@ + + + +- ++ + + + +@@ -386,7 +438,7 @@ + + + +- ++ + + + +@@ -399,7 +451,7 @@ + + + +- ++ + + + +@@ -433,7 +485,7 @@ + + + +- ++ + + + +@@ -442,7 +494,7 @@ + + + +- ++ + + + +@@ -451,7 +503,7 @@ + + + +- ++ + + + +@@ -460,7 +512,7 @@ + + + +- ++ + + + +@@ -469,7 +521,7 @@ + + + +- ++ + + + +@@ -478,7 +530,7 @@ + + + +- ++ + + + +@@ -491,7 +543,7 @@ + + + +- ++ + + + +@@ -519,7 +571,7 @@ + + + +- ++ + + + +@@ -532,7 +584,7 @@ + + + +- ++ + + + +@@ -545,7 +597,7 @@ + + + +- ++ + + + +@@ -558,7 +610,7 @@ + + + +- ++ + + + +@@ -571,7 +623,7 @@ + + + +- ++ + + + +@@ -584,7 +636,7 @@ + + + +- ++ + + + +@@ -597,7 +649,7 @@ + + + +- ++ + + + +@@ -631,7 +683,7 @@ + + + +- ++ + + + +@@ -640,7 +692,7 @@ + + + +- ++ + + + +@@ -649,7 +701,7 @@ + + + +- ++ + + + +@@ -658,7 +710,7 @@ + + + +- ++ + + + +@@ -667,7 +719,7 @@ + + + +- ++ + + + +@@ -676,7 +728,7 @@ + + + +- ++ + + + +@@ -689,7 +741,7 @@ + + + +- ++ + + + +@@ -717,7 +769,7 @@ + + + +- ++ + + + +@@ -730,7 +782,7 @@ + + + +- ++ + + + +@@ -743,7 +795,7 @@ + + + +- ++ + + + +@@ -756,7 +808,7 @@ + + + +- ++ + + + +@@ -769,7 +821,7 @@ + + + +- ++ + + + +@@ -782,141 +834,196 @@ + + + +- ++ + +- ++ + + + + + + +- ++ + + +- ++ + + + +- ++ + +- ++ + + + + + + +- ++ + + +- ++ + + +- ++ + + + +- ++ + +- ++ + + + + + + +- ++ + + +- ++ + + + +- ++ + +- ++ + + + + + + +- ++ + + +- ++ + + +- ++ + + +- ++ + + + +- ++ + +- ++ + +- ++ + + + + +- ++ + + +- ++ ++ ++ ++ + + + +- ++ + +- ++ + + + + + + +- ++ ++ ++ ++ + + +- ++ + + +- ++ + + +- ++ + + + +- ++ + +- ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ + + + + + +- ++ + + +- ++ + + +- ++ + + +- ++ + + + +- ++ + +- ++ + + + +@@ -967,26 +1074,26 @@ + + + +- ++ + + + +- ++ + +- ++ + + + + + + +- ++ + + + +- ++ + +- ++ + + + +@@ -1014,14 +1121,14 @@ + + + +- ++ + + +- ++ + + + +- ++ + + + +@@ -1030,7 +1137,7 @@ + + + +- ++ + + + +@@ -1039,7 +1146,7 @@ + + + +- ++ + + + +@@ -1048,7 +1155,7 @@ + + + +- ++ + + + +@@ -1057,7 +1164,7 @@ + + + +- ++ + + + +@@ -1066,22 +1173,22 @@ + + + +- ++ + +- ++ + + + + + + +- ++ + + + +- ++ + +- ++ + + + +@@ -1103,11 +1210,11 @@ + + + +- ++ + + + +- ++ + + + +@@ -1116,11 +1223,11 @@ + + + +- ++ + + + +- ++ + + + +@@ -1129,11 +1236,11 @@ + + + +- ++ + + + +- ++ + + + +@@ -1142,11 +1249,11 @@ + + + +- ++ + + + +- ++ + + + +@@ -1155,11 +1262,11 @@ + + + +- ++ + + + +- ++ + + + +@@ -1168,26 +1275,26 @@ + + + +- ++ + + + +- ++ + +- ++ + + + + + + +- ++ + + + +- ++ + +- ++ + + + +@@ -1215,14 +1322,14 @@ + + + +- ++ + + +- ++ + + + +- ++ + + + +@@ -1231,7 +1338,7 @@ + + + +- ++ + + + +@@ -1240,7 +1347,7 @@ + + + +- ++ + + + +@@ -1249,7 +1356,7 @@ + + + +- ++ + + + +@@ -1258,7 +1365,7 @@ + + + +- ++ + + + +@@ -1267,22 +1374,22 @@ + + + +- ++ + +- ++ + + + + + + +- ++ + + + +- ++ + +- ++ + + + +@@ -1304,11 +1411,11 @@ + + + +- ++ + + + +- ++ + + + +@@ -1317,11 +1424,11 @@ + + + +- ++ + + + +- ++ + + + +@@ -1330,11 +1437,11 @@ + + + +- ++ + + + +- ++ + + + +@@ -1343,11 +1450,11 @@ + + + +- ++ + + + +- ++ + + + +@@ -1356,11 +1463,11 @@ + + + +- ++ + + + +- ++ + + + +@@ -1369,26 +1476,26 @@ + + + +- ++ + + + +- ++ + +- ++ + + + + + + +- ++ + + + +- ++ + +- ++ + + + +@@ -1416,14 +1523,14 @@ + + + +- ++ + + +- ++ + + + +- ++ + + + +@@ -1432,7 +1539,7 @@ + + + +- ++ + + + +@@ -1441,7 +1548,7 @@ + + + +- ++ + + + +@@ -1450,7 +1557,7 @@ + + + +- ++ + + + +@@ -1459,7 +1566,7 @@ + + + +- ++ + + + +@@ -1468,22 +1575,22 @@ + + + +- ++ + +- ++ + + + + + + +- ++ + + + +- ++ + +- ++ + + + +@@ -1505,11 +1612,11 @@ + + + +- ++ + + + +- ++ + + + +@@ -1518,11 +1625,11 @@ + + + +- ++ + + + +- ++ + + + +@@ -1531,11 +1638,11 @@ + + + +- ++ + + + +- ++ + + + +@@ -1544,11 +1651,11 @@ + + + +- ++ + + + +- ++ + + + +@@ -1557,11 +1664,11 @@ + + + +- ++ + + + +- ++ + + + +@@ -1570,34 +1677,61 @@ + + + +- ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ + + + +- ++ + +- ++ + + + + + +- ++ + + +- ++ + ++ ++ ++ ++ ++ ++ ++ ++ ++ + +- ++ + + +- ++ ++ ++ ++ ++ ++ ++ + + + +- ++ + +- ++ + + + +@@ -1607,7 +1741,31 @@ + + + +- ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ + + + +@@ -1624,11 +1782,11 @@ + + + +- ++ + + + +- ++ + + + +diff --git a/cts/scheduler/exp/no-promote-on-unrunnable-guest.exp b/cts/scheduler/exp/no-promote-on-unrunnable-guest.exp +index 351aec11df0..350495f4a6f 100644 +--- a/cts/scheduler/exp/no-promote-on-unrunnable-guest.exp ++++ b/cts/scheduler/exp/no-promote-on-unrunnable-guest.exp +@@ -14,7 +14,7 @@ + + + +- ++ + + + +@@ -82,14 +82,14 @@ + + + +- ++ + + + + + + +- ++ + + + +@@ -101,7 +101,7 @@ + + + +- ++ + + + +@@ -250,7 +250,7 @@ + + + +- ++ + + + +@@ -396,7 +396,7 @@ + + + +- ++ + + + +@@ -473,7 +473,7 @@ + + + +- ++ + + + +diff --git a/cts/scheduler/scores/bundle-interleave-start.scores b/cts/scheduler/scores/bundle-interleave-start.scores +index 7f4a370474d..b3aa9b571e8 100644 +--- a/cts/scheduler/scores/bundle-interleave-start.scores ++++ b/cts/scheduler/scores/bundle-interleave-start.scores +@@ -1,10 +1,10 @@ + +-app:0 promotion score on app-bundle-0: -1 +-app:1 promotion score on app-bundle-1: -1 +-app:2 promotion score on app-bundle-2: -1 +-base:0 promotion score on base-bundle-0: -1 +-base:1 promotion score on base-bundle-1: -1 +-base:2 promotion score on base-bundle-2: -1 ++app:0 promotion score on app-bundle-0: 12 ++app:1 promotion score on app-bundle-1: 13 ++app:2 promotion score on app-bundle-2: 14 ++base:0 promotion score on base-bundle-0: 12 ++base:1 promotion score on base-bundle-1: 13 ++base:2 promotion score on base-bundle-2: 14 + pcmk__bundle_allocate: app-bundle allocation score on node1: 0 + pcmk__bundle_allocate: app-bundle allocation score on node2: 0 + pcmk__bundle_allocate: app-bundle allocation score on node3: 0 +diff --git a/cts/scheduler/scores/cancel-behind-moving-remote.scores b/cts/scheduler/scores/cancel-behind-moving-remote.scores +index 0dfd78caa92..0e11b225aea 100644 +--- a/cts/scheduler/scores/cancel-behind-moving-remote.scores ++++ b/cts/scheduler/scores/cancel-behind-moving-remote.scores +@@ -2,7 +2,7 @@ + galera:0 promotion score on galera-bundle-0: 100 + galera:1 promotion score on galera-bundle-1: 100 + galera:2 promotion score on galera-bundle-2: 100 +-ovndb_servers:0 promotion score on ovn-dbs-bundle-0: -1 ++ovndb_servers:0 promotion score on ovn-dbs-bundle-0: 5 + ovndb_servers:1 promotion score on ovn-dbs-bundle-1: 5 + ovndb_servers:2 promotion score on ovn-dbs-bundle-2: 5 + pcmk__bundle_allocate: galera-bundle allocation score on compute-0: -INFINITY +diff --git a/cts/scheduler/scores/guest-host-not-fenceable.scores b/cts/scheduler/scores/guest-host-not-fenceable.scores +index e4c7fc2033d..5f43bcb0812 100644 +--- a/cts/scheduler/scores/guest-host-not-fenceable.scores ++++ b/cts/scheduler/scores/guest-host-not-fenceable.scores +@@ -1,6 +1,6 @@ + + galera:0 promotion score on galera-bundle-0: 100 +-galera:1 promotion score on galera-bundle-1: 100 ++galera:1 promotion score on galera-bundle-1: -1 + galera:2 promotion score on galera-bundle-2: -1 + pcmk__bundle_allocate: galera-bundle allocation score on node1: 0 + pcmk__bundle_allocate: galera-bundle allocation score on node2: 0 +diff --git a/cts/scheduler/scores/no-promote-on-unrunnable-guest.scores b/cts/scheduler/scores/no-promote-on-unrunnable-guest.scores +index 7923cdc2320..9362dc0e1f2 100644 +--- a/cts/scheduler/scores/no-promote-on-unrunnable-guest.scores ++++ b/cts/scheduler/scores/no-promote-on-unrunnable-guest.scores +@@ -2,7 +2,7 @@ + galera:0 promotion score on galera-bundle-0: 100 + galera:1 promotion score on galera-bundle-1: 100 + galera:2 promotion score on galera-bundle-2: 100 +-ovndb_servers:0 promotion score on ovn-dbs-bundle-0: 5 ++ovndb_servers:0 promotion score on ovn-dbs-bundle-0: -1 + ovndb_servers:1 promotion score on ovn-dbs-bundle-1: 5 + ovndb_servers:2 promotion score on ovn-dbs-bundle-2: 5 + pcmk__bundle_allocate: galera-bundle allocation score on controller-0: 0 +diff --git a/cts/scheduler/summary/bundle-interleave-start.summary b/cts/scheduler/summary/bundle-interleave-start.summary +index 1648e929bf7..07ff7561968 100644 +--- a/cts/scheduler/summary/bundle-interleave-start.summary ++++ b/cts/scheduler/summary/bundle-interleave-start.summary +@@ -14,24 +14,24 @@ Current cluster status: + * app-bundle-2 (ocf:pacemaker:Stateful): Stopped + + Transition Summary: +- * Start base-bundle-podman-0 ( node2 ) +- * Start base-bundle-0 ( node2 ) +- * Start base:0 ( base-bundle-0 ) +- * Start base-bundle-podman-1 ( node3 ) +- * Start base-bundle-1 ( node3 ) +- * Start base:1 ( base-bundle-1 ) +- * Start base-bundle-podman-2 ( node4 ) +- * Start base-bundle-2 ( node4 ) +- * Start base:2 ( base-bundle-2 ) +- * Start app-bundle-podman-0 ( node2 ) +- * Start app-bundle-0 ( node2 ) +- * Start app:0 ( app-bundle-0 ) +- * Start app-bundle-podman-1 ( node3 ) +- * Start app-bundle-1 ( node3 ) +- * Start app:1 ( app-bundle-1 ) +- * Start app-bundle-podman-2 ( node4 ) +- * Start app-bundle-2 ( node4 ) +- * Start app:2 ( app-bundle-2 ) ++ * Start base-bundle-podman-0 ( node2 ) ++ * Start base-bundle-0 ( node2 ) ++ * Start base:0 ( base-bundle-0 ) ++ * Start base-bundle-podman-1 ( node3 ) ++ * Start base-bundle-1 ( node3 ) ++ * Start base:1 ( base-bundle-1 ) ++ * Start base-bundle-podman-2 ( node4 ) ++ * Start base-bundle-2 ( node4 ) ++ * Promote base:2 ( Stopped -> Promoted base-bundle-2 ) ++ * Start app-bundle-podman-0 ( node2 ) ++ * Start app-bundle-0 ( node2 ) ++ * Start app:0 ( app-bundle-0 ) ++ * Start app-bundle-podman-1 ( node3 ) ++ * Start app-bundle-1 ( node3 ) ++ * Start app:1 ( app-bundle-1 ) ++ * Start app-bundle-podman-2 ( node4 ) ++ * Start app-bundle-2 ( node4 ) ++ * Promote app:2 ( Stopped -> Promoted app-bundle-2 ) + + Executing Cluster Transition: + * Resource action: base-bundle-podman-0 monitor on node5 +@@ -100,8 +100,9 @@ Executing Cluster Transition: + * Pseudo action: base-bundle_running_0 + * Resource action: base:0 monitor=16000 on base-bundle-0 + * Resource action: base:1 monitor=16000 on base-bundle-1 +- * Resource action: base:2 monitor=16000 on base-bundle-2 + * Pseudo action: app-bundle_start_0 ++ * Pseudo action: base-bundle_promote_0 ++ * Pseudo action: base-bundle-clone_promote_0 + * Pseudo action: app-bundle-clone_start_0 + * Resource action: app-bundle-podman-0 start on node2 + * Resource action: app-bundle-0 monitor on node5 +@@ -121,12 +122,16 @@ Executing Cluster Transition: + * Resource action: app-bundle-2 monitor on node3 + * Resource action: app-bundle-2 monitor on node2 + * Resource action: app-bundle-2 monitor on node1 ++ * Resource action: base:2 promote on base-bundle-2 ++ * Pseudo action: base-bundle-clone_promoted_0 + * Resource action: app-bundle-podman-0 monitor=60000 on node2 + * Resource action: app-bundle-0 start on node2 + * Resource action: app-bundle-podman-1 monitor=60000 on node3 + * Resource action: app-bundle-1 start on node3 + * Resource action: app-bundle-podman-2 monitor=60000 on node4 + * Resource action: app-bundle-2 start on node4 ++ * Pseudo action: base-bundle_promoted_0 ++ * Resource action: base:2 monitor=15000 on base-bundle-2 + * Resource action: app:0 start on app-bundle-0 + * Resource action: app:1 start on app-bundle-1 + * Resource action: app:2 start on app-bundle-2 +@@ -137,7 +142,12 @@ Executing Cluster Transition: + * Pseudo action: app-bundle_running_0 + * Resource action: app:0 monitor=16000 on app-bundle-0 + * Resource action: app:1 monitor=16000 on app-bundle-1 +- * Resource action: app:2 monitor=16000 on app-bundle-2 ++ * Pseudo action: app-bundle_promote_0 ++ * Pseudo action: app-bundle-clone_promote_0 ++ * Resource action: app:2 promote on app-bundle-2 ++ * Pseudo action: app-bundle-clone_promoted_0 ++ * Pseudo action: app-bundle_promoted_0 ++ * Resource action: app:2 monitor=15000 on app-bundle-2 + + Revised Cluster Status: + * Node List: +@@ -149,8 +159,8 @@ Revised Cluster Status: + * Container bundle set: base-bundle [localhost/pcmktest:base]: + * base-bundle-0 (ocf:pacemaker:Stateful): Unpromoted node2 + * base-bundle-1 (ocf:pacemaker:Stateful): Unpromoted node3 +- * base-bundle-2 (ocf:pacemaker:Stateful): Unpromoted node4 ++ * base-bundle-2 (ocf:pacemaker:Stateful): Promoted node4 + * Container bundle set: app-bundle [localhost/pcmktest:app]: + * app-bundle-0 (ocf:pacemaker:Stateful): Unpromoted node2 + * app-bundle-1 (ocf:pacemaker:Stateful): Unpromoted node3 +- * app-bundle-2 (ocf:pacemaker:Stateful): Unpromoted node4 ++ * app-bundle-2 (ocf:pacemaker:Stateful): Promoted node4 +diff --git a/cts/scheduler/xml/bundle-interleave-start.xml b/cts/scheduler/xml/bundle-interleave-start.xml +index e8630cdf054..facb181b2a4 100644 +--- a/cts/scheduler/xml/bundle-interleave-start.xml ++++ b/cts/scheduler/xml/bundle-interleave-start.xml +@@ -6,7 +6,8 @@ + and its promoted role is colocated with base's. App's starts and + promotes are ordered after base's. + +- In this test, all are stopped and must be started. ++ In this test, all are stopped and must be started. One replica of each ++ bundle must be promoted. + --> + + + +From 6e5bc0d119c1609a3228763a5116a68829870948 Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Mon, 26 Jun 2023 12:42:10 -0700 +Subject: [PATCH 05/19] Refactor: libpacemaker: De-functionize + pcmk__finalize_assignment() + +Move it into pcmk__assign_resource(). Also correct the "was assignment +changed" logic, and allocate rc_stopped only once. + +Signed-off-by: Reid Wahl +--- + lib/pacemaker/libpacemaker_private.h | 4 - + lib/pacemaker/pcmk_sched_primitive.c | 9 +- + lib/pacemaker/pcmk_sched_resource.c | 156 ++++++++++++--------------- + 3 files changed, 74 insertions(+), 95 deletions(-) + +diff --git a/lib/pacemaker/libpacemaker_private.h b/lib/pacemaker/libpacemaker_private.h +index 192d5a703ff..614d695f83f 100644 +--- a/lib/pacemaker/libpacemaker_private.h ++++ b/lib/pacemaker/libpacemaker_private.h +@@ -908,10 +908,6 @@ void pcmk__noop_add_graph_meta(const pe_resource_t *rsc, xmlNode *xml); + G_GNUC_INTERNAL + void pcmk__output_resource_actions(pe_resource_t *rsc); + +-G_GNUC_INTERNAL +-bool pcmk__finalize_assignment(pe_resource_t *rsc, pe_node_t *chosen, +- bool force); +- + G_GNUC_INTERNAL + bool pcmk__assign_resource(pe_resource_t *rsc, pe_node_t *node, bool force); + +diff --git a/lib/pacemaker/pcmk_sched_primitive.c b/lib/pacemaker/pcmk_sched_primitive.c +index aefbf9aa140..2470b08ed69 100644 +--- a/lib/pacemaker/pcmk_sched_primitive.c ++++ b/lib/pacemaker/pcmk_sched_primitive.c +@@ -152,7 +152,6 @@ assign_best_node(pe_resource_t *rsc, const pe_node_t *prefer) + GList *nodes = NULL; + pe_node_t *chosen = NULL; + pe_node_t *best = NULL; +- bool result = false; + const pe_node_t *most_free_node = pcmk__ban_insufficient_capacity(rsc); + + if (prefer == NULL) { +@@ -260,9 +259,9 @@ assign_best_node(pe_resource_t *rsc, const pe_node_t *prefer) + pe__node_name(chosen), rsc->id, g_list_length(nodes)); + } + +- result = pcmk__finalize_assignment(rsc, chosen, false); ++ pcmk__assign_resource(rsc, chosen, false); + g_list_free(nodes); +- return result; ++ return rsc->allocated_to != NULL; + } + + /*! +@@ -475,11 +474,11 @@ pcmk__primitive_assign(pe_resource_t *rsc, const pe_node_t *prefer) + } + pe_rsc_info(rsc, "Unmanaged resource %s assigned to %s: %s", rsc->id, + (assign_to? assign_to->details->uname : "no node"), reason); +- pcmk__finalize_assignment(rsc, assign_to, true); ++ pcmk__assign_resource(rsc, assign_to, true); + + } else if (pcmk_is_set(rsc->cluster->flags, pe_flag_stop_everything)) { + pe_rsc_debug(rsc, "Forcing %s to stop: stop-all-resources", rsc->id); +- pcmk__finalize_assignment(rsc, NULL, true); ++ pcmk__assign_resource(rsc, NULL, true); + + } else if (pcmk_is_set(rsc->flags, pe_rsc_provisional) + && assign_best_node(rsc, prefer)) { +diff --git a/lib/pacemaker/pcmk_sched_resource.c b/lib/pacemaker/pcmk_sched_resource.c +index b8554998197..dd9939a42a6 100644 +--- a/lib/pacemaker/pcmk_sched_resource.c ++++ b/lib/pacemaker/pcmk_sched_resource.c +@@ -331,140 +331,124 @@ pcmk__output_resource_actions(pe_resource_t *rsc) + + /*! + * \internal +- * \brief Assign a specified primitive resource to a node ++ * \brief Assign a specified resource (of any variant) to a node + * +- * Assign a specified primitive resource to a specified node, if the node can +- * run the resource (or unconditionally, if \p force is true). Mark the resource +- * as no longer provisional. If the primitive can't be assigned (or \p chosen is +- * NULL), unassign any previous assignment for it, set its next role to stopped, +- * and update any existing actions scheduled for it. This is not done +- * recursively for children, so it should be called only for primitives. ++ * Assign a specified resource and its children (if any) to a specified node, if ++ * the node can run the resource (or unconditionally, if \p force is true). Mark ++ * the resources as no longer provisional. If a resource can't be assigned (or ++ * \p node is \c NULL), unassign any previous assignment, set next role to ++ * stopped, and update any existing actions scheduled for it. + * +- * \param[in,out] rsc Resource to assign +- * \param[in,out] chosen Node to assign \p rsc to +- * \param[in] force If true, assign to \p chosen even if unavailable ++ * \param[in,out] rsc Resource to assign ++ * \param[in,out] node Node to assign \p rsc to ++ * \param[in] force If true, assign to \p node even if unavailable + * +- * \return true if \p rsc could be assigned, otherwise false ++ * \return \c true if the assignment of \p rsc changed, or \c false otherwise + * + * \note Assigning a resource to the NULL node using this function is different + * from calling pcmk__unassign_resource(), in that it will also update any + * actions created for the resource. ++ * \note The \c resource_alloc_functions_t:assign() method is preferred, unless ++ * a resource should be assigned to the \c NULL node or every resource in ++ * a tree should be assigned to the same node. + */ + bool +-pcmk__finalize_assignment(pe_resource_t *rsc, pe_node_t *chosen, bool force) ++pcmk__assign_resource(pe_resource_t *rsc, pe_node_t *node, bool force) + { +- pcmk__output_t *out = rsc->cluster->priv; ++ bool changed = false; ++ ++ CRM_ASSERT(rsc != NULL); + +- CRM_ASSERT(rsc->variant == pe_native); +- +- if (!force && (chosen != NULL)) { +- if ((chosen->weight < 0) +- // Allow the graph to assume that guest node connections will come up +- || (!pcmk__node_available(chosen, true, false) +- && !pe__is_guest_node(chosen))) { +- +- crm_debug("All nodes for resource %s are unavailable, unclean or " +- "shutting down (%s can%s run resources, with weight %d)", +- rsc->id, pe__node_name(chosen), +- (pcmk__node_available(chosen, true, false)? "" : "not"), +- chosen->weight); +- pe__set_next_role(rsc, RSC_ROLE_STOPPED, "node availability"); +- chosen = NULL; ++ if (rsc->children != NULL) { ++ for (GList *iter = rsc->children; iter != NULL; iter = iter->next) { ++ pe_resource_t *child_rsc = iter->data; ++ ++ changed |= pcmk__assign_resource(child_rsc, node, force); + } ++ return changed; + } + ++ // Assigning a primitive ++ ++ if (!force && (node != NULL) ++ && ((node->weight < 0) ++ // Allow graph to assume that guest node connections will come up ++ || (!pcmk__node_available(node, true, false) ++ && !pe__is_guest_node(node)))) { ++ ++ pe_rsc_debug(rsc, ++ "All nodes for resource %s are unavailable, unclean or " ++ "shutting down (%s can%s run resources, with score %s)", ++ rsc->id, pe__node_name(node), ++ (pcmk__node_available(node, true, false)? "" : "not"), ++ pcmk_readable_score(node->weight)); ++ pe__set_next_role(rsc, RSC_ROLE_STOPPED, "node availability"); ++ node = NULL; ++ } ++ ++ if (rsc->allocated_to != NULL) { ++ changed = !pe__same_node(rsc->allocated_to, node); ++ } else { ++ changed = (node != NULL); ++ } + pcmk__unassign_resource(rsc); + pe__clear_resource_flags(rsc, pe_rsc_provisional); + +- if (chosen == NULL) { +- crm_debug("Could not allocate a node for %s", rsc->id); +- pe__set_next_role(rsc, RSC_ROLE_STOPPED, "unable to allocate"); ++ if (node == NULL) { ++ char *rc_stopped = NULL; ++ ++ pe_rsc_debug(rsc, "Could not assign %s to a node", rsc->id); ++ pe__set_next_role(rsc, RSC_ROLE_STOPPED, "unable to assign"); + + for (GList *iter = rsc->actions; iter != NULL; iter = iter->next) { + pe_action_t *op = (pe_action_t *) iter->data; + +- crm_debug("Updating %s for allocation failure", op->uuid); ++ pe_rsc_debug(rsc, "Updating %s for %s assignment failure", ++ op->uuid, rsc->id); + + if (pcmk__str_eq(op->task, RSC_STOP, pcmk__str_casei)) { + pe__clear_action_flags(op, pe_action_optional); + + } else if (pcmk__str_eq(op->task, RSC_START, pcmk__str_casei)) { + pe__clear_action_flags(op, pe_action_runnable); +- //pe__set_resource_flags(rsc, pe_rsc_block); + + } else { + // Cancel recurring actions, unless for stopped state + const char *interval_ms_s = NULL; + const char *target_rc_s = NULL; +- char *rc_stopped = pcmk__itoa(PCMK_OCF_NOT_RUNNING); + + interval_ms_s = g_hash_table_lookup(op->meta, + XML_LRM_ATTR_INTERVAL_MS); + target_rc_s = g_hash_table_lookup(op->meta, + XML_ATTR_TE_TARGET_RC); +- if ((interval_ms_s != NULL) +- && !pcmk__str_eq(interval_ms_s, "0", pcmk__str_none) ++ if (rc_stopped == NULL) { ++ rc_stopped = pcmk__itoa(PCMK_OCF_NOT_RUNNING); ++ } ++ ++ if (!pcmk__str_eq(interval_ms_s, "0", pcmk__str_null_matches) + && !pcmk__str_eq(rc_stopped, target_rc_s, pcmk__str_none)) { ++ + pe__clear_action_flags(op, pe_action_runnable); + } +- free(rc_stopped); + } + } +- return false; ++ free(rc_stopped); ++ return changed; + } + +- crm_debug("Assigning %s to %s", rsc->id, pe__node_name(chosen)); +- rsc->allocated_to = pe__copy_node(chosen); ++ pe_rsc_debug(rsc, "Assigning %s to %s", rsc->id, pe__node_name(node)); ++ rsc->allocated_to = pe__copy_node(node); + +- chosen->details->allocated_rsc = g_list_prepend(chosen->details->allocated_rsc, +- rsc); +- chosen->details->num_resources++; +- chosen->count++; +- pcmk__consume_node_capacity(chosen->details->utilization, rsc); ++ node->details->allocated_rsc = g_list_prepend(node->details->allocated_rsc, ++ rsc); ++ node->details->num_resources++; ++ node->count++; ++ pcmk__consume_node_capacity(node->details->utilization, rsc); + + if (pcmk_is_set(rsc->cluster->flags, pe_flag_show_utilization)) { +- out->message(out, "resource-util", rsc, chosen, __func__); +- } +- return true; +-} +- +-/*! +- * \internal +- * \brief Assign a specified resource (of any variant) to a node +- * +- * Assign a specified resource and its children (if any) to a specified node, if +- * the node can run the resource (or unconditionally, if \p force is true). Mark +- * the resources as no longer provisional. If the resources can't be assigned +- * (or \p chosen is NULL), unassign any previous assignments, set next role to +- * stopped, and update any existing actions scheduled for them. +- * +- * \param[in,out] rsc Resource to assign +- * \param[in,out] chosen Node to assign \p rsc to +- * \param[in] force If true, assign to \p chosen even if unavailable +- * +- * \return true if \p rsc could be assigned, otherwise false +- * +- * \note Assigning a resource to the NULL node using this function is different +- * from calling pcmk__unassign_resource(), in that it will also update any +- * actions created for the resource. +- */ +-bool +-pcmk__assign_resource(pe_resource_t *rsc, pe_node_t *node, bool force) +-{ +- bool changed = false; +- +- if (rsc->children == NULL) { +- if (rsc->allocated_to != NULL) { +- changed = true; +- } +- pcmk__finalize_assignment(rsc, node, force); +- +- } else { +- for (GList *iter = rsc->children; iter != NULL; iter = iter->next) { +- pe_resource_t *child_rsc = (pe_resource_t *) iter->data; ++ pcmk__output_t *out = rsc->cluster->priv; + +- changed |= pcmk__assign_resource(child_rsc, node, force); +- } ++ out->message(out, "resource-util", rsc, node, __func__); + } + return changed; + } + +From b01ecf9444e856227cd61c53f1c0106936eccd74 Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Mon, 10 Jul 2023 02:28:54 -0700 +Subject: [PATCH 06/19] Test: cts-cli: Update tests after defunctionization + +pcmk__finalize_assignment() -> pcmk__assign_resource() + +Signed-off-by: Reid Wahl +--- + cts/cli/regression.tools.exp | 42 ++++++++++++++++++------------------ + 1 file changed, 21 insertions(+), 21 deletions(-) + +diff --git a/cts/cli/regression.tools.exp b/cts/cli/regression.tools.exp +index a8e2236063c..506e9ba01b6 100644 +--- a/cts/cli/regression.tools.exp ++++ b/cts/cli/regression.tools.exp +@@ -5711,26 +5711,26 @@ Original: cluster02 capacity: + Original: httpd-bundle-0 capacity: + Original: httpd-bundle-1 capacity: + Original: httpd-bundle-2 capacity: +-pcmk__finalize_assignment: ping:0 utilization on cluster02: +-pcmk__finalize_assignment: ping:1 utilization on cluster01: +-pcmk__finalize_assignment: Fencing utilization on cluster01: +-pcmk__finalize_assignment: dummy utilization on cluster02: +-pcmk__finalize_assignment: httpd-bundle-docker-0 utilization on cluster01: +-pcmk__finalize_assignment: httpd-bundle-docker-1 utilization on cluster02: +-pcmk__finalize_assignment: httpd-bundle-ip-192.168.122.131 utilization on cluster01: +-pcmk__finalize_assignment: httpd-bundle-0 utilization on cluster01: +-pcmk__finalize_assignment: httpd:0 utilization on httpd-bundle-0: +-pcmk__finalize_assignment: httpd-bundle-ip-192.168.122.132 utilization on cluster02: +-pcmk__finalize_assignment: httpd-bundle-1 utilization on cluster02: +-pcmk__finalize_assignment: httpd:1 utilization on httpd-bundle-1: +-pcmk__finalize_assignment: httpd-bundle-2 utilization on cluster01: +-pcmk__finalize_assignment: httpd:2 utilization on httpd-bundle-2: +-pcmk__finalize_assignment: Public-IP utilization on cluster02: +-pcmk__finalize_assignment: Email utilization on cluster02: +-pcmk__finalize_assignment: mysql-proxy:0 utilization on cluster02: +-pcmk__finalize_assignment: mysql-proxy:1 utilization on cluster01: +-pcmk__finalize_assignment: promotable-rsc:0 utilization on cluster02: +-pcmk__finalize_assignment: promotable-rsc:1 utilization on cluster01: ++pcmk__assign_resource: ping:0 utilization on cluster02: ++pcmk__assign_resource: ping:1 utilization on cluster01: ++pcmk__assign_resource: Fencing utilization on cluster01: ++pcmk__assign_resource: dummy utilization on cluster02: ++pcmk__assign_resource: httpd-bundle-docker-0 utilization on cluster01: ++pcmk__assign_resource: httpd-bundle-docker-1 utilization on cluster02: ++pcmk__assign_resource: httpd-bundle-ip-192.168.122.131 utilization on cluster01: ++pcmk__assign_resource: httpd-bundle-0 utilization on cluster01: ++pcmk__assign_resource: httpd:0 utilization on httpd-bundle-0: ++pcmk__assign_resource: httpd-bundle-ip-192.168.122.132 utilization on cluster02: ++pcmk__assign_resource: httpd-bundle-1 utilization on cluster02: ++pcmk__assign_resource: httpd:1 utilization on httpd-bundle-1: ++pcmk__assign_resource: httpd-bundle-2 utilization on cluster01: ++pcmk__assign_resource: httpd:2 utilization on httpd-bundle-2: ++pcmk__assign_resource: Public-IP utilization on cluster02: ++pcmk__assign_resource: Email utilization on cluster02: ++pcmk__assign_resource: mysql-proxy:0 utilization on cluster02: ++pcmk__assign_resource: mysql-proxy:1 utilization on cluster01: ++pcmk__assign_resource: promotable-rsc:0 utilization on cluster02: ++pcmk__assign_resource: promotable-rsc:1 utilization on cluster01: + Remaining: cluster01 capacity: + Remaining: cluster02 capacity: + Remaining: httpd-bundle-0 capacity: +@@ -5961,7 +5961,7 @@ Transition Summary: + * Move Public-IP ( cluster02 -> cluster01 ) + * Move Email ( cluster02 -> cluster01 ) + * Stop mysql-proxy:0 ( cluster02 ) due to node availability +- * Stop promotable-rsc:0 ( Promoted cluster02 ) due to node availability ++ * Stop promotable-rsc:0 ( Promoted cluster02 ) due to node availability + + Executing Cluster Transition: + * Pseudo action: httpd-bundle-1_stop_0 + +From 0ad4a3c8404d57e2026e41a234a9b8a0a237b2bd Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Tue, 20 Jun 2023 23:22:54 -0700 +Subject: [PATCH 07/19] Test: scheduler: Clone instances should not shuffle + unnecessarily + +In some cases, clone instances may be shuffled when a new instance is +scheduled to start or promote. This can cause instances to be stopped +and started unnecessarily. + +Here we add tests for three types of clones: +* "Bare" clones of primitives +* Clones of groups +* Clones of primitives within bundles (clone is bundle child resource) + +For each clone type, we add four tests. In each test, no clone instance +is running on node 1, and a new instance should be started and possibly +promoted there. +* No constraints or stickiness +* Location constraint preferring node 1 +* Promotable clone where node 1 has the highest promotion score +* Promotable clone where node 1 does not have the highest promotion + score + +The following tests are currently incorrect: +* clone-no-recover-shuffle-4 (shuffling) +* clone-no-recover-shuffle-5 (all instances of an anonymous clone move + to one node) +* clone-no-recover-shuffle-6 (shuffling) +* clone-no-recover-shuffle-7 (shuffling) + +Ref T489 + +Signed-off-by: Reid Wahl +--- + cts/cts-scheduler.in | 33 ++ + .../dot/clone-recover-no-shuffle-1.dot | 10 + + .../dot/clone-recover-no-shuffle-10.dot | 10 + + .../dot/clone-recover-no-shuffle-11.dot | 21 + + .../dot/clone-recover-no-shuffle-12.dot | 35 ++ + .../dot/clone-recover-no-shuffle-2.dot | 21 + + .../dot/clone-recover-no-shuffle-3.dot | 32 ++ + .../dot/clone-recover-no-shuffle-4.dot | 23 + + .../dot/clone-recover-no-shuffle-5.dot | 80 +++ + .../dot/clone-recover-no-shuffle-6.dot | 97 ++++ + .../dot/clone-recover-no-shuffle-7.dot | 45 ++ + .../dot/clone-recover-no-shuffle-8.dot | 63 +++ + .../dot/clone-recover-no-shuffle-9.dot | 69 +++ + .../exp/clone-recover-no-shuffle-1.exp | 51 ++ + .../exp/clone-recover-no-shuffle-10.exp | 51 ++ + .../exp/clone-recover-no-shuffle-11.exp | 110 ++++ + .../exp/clone-recover-no-shuffle-12.exp | 187 +++++++ + .../exp/clone-recover-no-shuffle-2.exp | 110 ++++ + .../exp/clone-recover-no-shuffle-3.exp | 171 ++++++ + .../exp/clone-recover-no-shuffle-4.exp | 123 +++++ + .../exp/clone-recover-no-shuffle-5.exp | 452 ++++++++++++++++ + .../exp/clone-recover-no-shuffle-6.exp | 507 ++++++++++++++++++ + .../exp/clone-recover-no-shuffle-7.exp | 240 +++++++++ + .../exp/clone-recover-no-shuffle-8.exp | 338 ++++++++++++ + .../exp/clone-recover-no-shuffle-9.exp | 364 +++++++++++++ + .../scores/clone-recover-no-shuffle-1.scores | 25 + + .../scores/clone-recover-no-shuffle-10.scores | 31 ++ + .../scores/clone-recover-no-shuffle-11.scores | 82 +++ + .../scores/clone-recover-no-shuffle-12.scores | 67 +++ + .../scores/clone-recover-no-shuffle-2.scores | 79 +++ + .../scores/clone-recover-no-shuffle-3.scores | 64 +++ + .../scores/clone-recover-no-shuffle-4.scores | 31 ++ + .../scores/clone-recover-no-shuffle-5.scores | 79 +++ + .../scores/clone-recover-no-shuffle-6.scores | 70 +++ + .../scores/clone-recover-no-shuffle-7.scores | 34 ++ + .../scores/clone-recover-no-shuffle-8.scores | 82 +++ + .../scores/clone-recover-no-shuffle-9.scores | 67 +++ + .../clone-recover-no-shuffle-1.summary | 29 + + .../clone-recover-no-shuffle-10.summary | 29 + + .../clone-recover-no-shuffle-11.summary | 34 ++ + .../clone-recover-no-shuffle-12.summary | 43 ++ + .../clone-recover-no-shuffle-2.summary | 32 ++ + .../clone-recover-no-shuffle-3.summary | 42 ++ + .../clone-recover-no-shuffle-4.summary | 35 ++ + .../clone-recover-no-shuffle-5.summary | 59 ++ + .../clone-recover-no-shuffle-6.summary | 68 +++ + .../clone-recover-no-shuffle-7.summary | 44 ++ + .../clone-recover-no-shuffle-8.summary | 52 ++ + .../clone-recover-no-shuffle-9.summary | 56 ++ + .../xml/clone-recover-no-shuffle-1.xml | 113 ++++ + .../xml/clone-recover-no-shuffle-10.xml | 120 +++++ + .../xml/clone-recover-no-shuffle-11.xml | 153 ++++++ + .../xml/clone-recover-no-shuffle-12.xml | 186 +++++++ + .../xml/clone-recover-no-shuffle-2.xml | 141 +++++ + .../xml/clone-recover-no-shuffle-3.xml | 180 +++++++ + .../xml/clone-recover-no-shuffle-4.xml | 120 +++++ + .../xml/clone-recover-no-shuffle-5.xml | 148 +++++ + .../xml/clone-recover-no-shuffle-6.xml | 187 +++++++ + .../xml/clone-recover-no-shuffle-7.xml | 125 +++++ + .../xml/clone-recover-no-shuffle-8.xml | 153 ++++++ + .../xml/clone-recover-no-shuffle-9.xml | 186 +++++++ + 61 files changed, 6289 insertions(+) + create mode 100644 cts/scheduler/dot/clone-recover-no-shuffle-1.dot + create mode 100644 cts/scheduler/dot/clone-recover-no-shuffle-10.dot + create mode 100644 cts/scheduler/dot/clone-recover-no-shuffle-11.dot + create mode 100644 cts/scheduler/dot/clone-recover-no-shuffle-12.dot + create mode 100644 cts/scheduler/dot/clone-recover-no-shuffle-2.dot + create mode 100644 cts/scheduler/dot/clone-recover-no-shuffle-3.dot + create mode 100644 cts/scheduler/dot/clone-recover-no-shuffle-4.dot + create mode 100644 cts/scheduler/dot/clone-recover-no-shuffle-5.dot + create mode 100644 cts/scheduler/dot/clone-recover-no-shuffle-6.dot + create mode 100644 cts/scheduler/dot/clone-recover-no-shuffle-7.dot + create mode 100644 cts/scheduler/dot/clone-recover-no-shuffle-8.dot + create mode 100644 cts/scheduler/dot/clone-recover-no-shuffle-9.dot + create mode 100644 cts/scheduler/exp/clone-recover-no-shuffle-1.exp + create mode 100644 cts/scheduler/exp/clone-recover-no-shuffle-10.exp + create mode 100644 cts/scheduler/exp/clone-recover-no-shuffle-11.exp + create mode 100644 cts/scheduler/exp/clone-recover-no-shuffle-12.exp + create mode 100644 cts/scheduler/exp/clone-recover-no-shuffle-2.exp + create mode 100644 cts/scheduler/exp/clone-recover-no-shuffle-3.exp + create mode 100644 cts/scheduler/exp/clone-recover-no-shuffle-4.exp + create mode 100644 cts/scheduler/exp/clone-recover-no-shuffle-5.exp + create mode 100644 cts/scheduler/exp/clone-recover-no-shuffle-6.exp + create mode 100644 cts/scheduler/exp/clone-recover-no-shuffle-7.exp + create mode 100644 cts/scheduler/exp/clone-recover-no-shuffle-8.exp + create mode 100644 cts/scheduler/exp/clone-recover-no-shuffle-9.exp + create mode 100644 cts/scheduler/scores/clone-recover-no-shuffle-1.scores + create mode 100644 cts/scheduler/scores/clone-recover-no-shuffle-10.scores + create mode 100644 cts/scheduler/scores/clone-recover-no-shuffle-11.scores + create mode 100644 cts/scheduler/scores/clone-recover-no-shuffle-12.scores + create mode 100644 cts/scheduler/scores/clone-recover-no-shuffle-2.scores + create mode 100644 cts/scheduler/scores/clone-recover-no-shuffle-3.scores + create mode 100644 cts/scheduler/scores/clone-recover-no-shuffle-4.scores + create mode 100644 cts/scheduler/scores/clone-recover-no-shuffle-5.scores + create mode 100644 cts/scheduler/scores/clone-recover-no-shuffle-6.scores + create mode 100644 cts/scheduler/scores/clone-recover-no-shuffle-7.scores + create mode 100644 cts/scheduler/scores/clone-recover-no-shuffle-8.scores + create mode 100644 cts/scheduler/scores/clone-recover-no-shuffle-9.scores + create mode 100644 cts/scheduler/summary/clone-recover-no-shuffle-1.summary + create mode 100644 cts/scheduler/summary/clone-recover-no-shuffle-10.summary + create mode 100644 cts/scheduler/summary/clone-recover-no-shuffle-11.summary + create mode 100644 cts/scheduler/summary/clone-recover-no-shuffle-12.summary + create mode 100644 cts/scheduler/summary/clone-recover-no-shuffle-2.summary + create mode 100644 cts/scheduler/summary/clone-recover-no-shuffle-3.summary + create mode 100644 cts/scheduler/summary/clone-recover-no-shuffle-4.summary + create mode 100644 cts/scheduler/summary/clone-recover-no-shuffle-5.summary + create mode 100644 cts/scheduler/summary/clone-recover-no-shuffle-6.summary + create mode 100644 cts/scheduler/summary/clone-recover-no-shuffle-7.summary + create mode 100644 cts/scheduler/summary/clone-recover-no-shuffle-8.summary + create mode 100644 cts/scheduler/summary/clone-recover-no-shuffle-9.summary + create mode 100644 cts/scheduler/xml/clone-recover-no-shuffle-1.xml + create mode 100644 cts/scheduler/xml/clone-recover-no-shuffle-10.xml + create mode 100644 cts/scheduler/xml/clone-recover-no-shuffle-11.xml + create mode 100644 cts/scheduler/xml/clone-recover-no-shuffle-12.xml + create mode 100644 cts/scheduler/xml/clone-recover-no-shuffle-2.xml + create mode 100644 cts/scheduler/xml/clone-recover-no-shuffle-3.xml + create mode 100644 cts/scheduler/xml/clone-recover-no-shuffle-4.xml + create mode 100644 cts/scheduler/xml/clone-recover-no-shuffle-5.xml + create mode 100644 cts/scheduler/xml/clone-recover-no-shuffle-6.xml + create mode 100644 cts/scheduler/xml/clone-recover-no-shuffle-7.xml + create mode 100644 cts/scheduler/xml/clone-recover-no-shuffle-8.xml + create mode 100644 cts/scheduler/xml/clone-recover-no-shuffle-9.xml + +diff --git a/cts/cts-scheduler.in b/cts/cts-scheduler.in +index ee0cb7b4722..4ff035c23a3 100644 +--- a/cts/cts-scheduler.in ++++ b/cts/cts-scheduler.in +@@ -441,6 +441,39 @@ TESTS = [ + [ "cloned-group", "Make sure only the correct number of cloned groups are started" ], + [ "cloned-group-stop", "Ensure stopping qpidd also stops glance and cinder" ], + [ "clone-no-shuffle", "Don't prioritize allocation of instances that must be moved" ], ++ [ "clone-recover-no-shuffle-1", ++ "Don't shuffle instances when starting a new primitive instance" ], ++ [ "clone-recover-no-shuffle-2", ++ "Don't shuffle instances when starting a new group instance" ], ++ [ "clone-recover-no-shuffle-3", ++ "Don't shuffle instances when starting a new bundle instance" ], ++ [ "clone-recover-no-shuffle-4", ++ "Don't shuffle instances when starting a new primitive instance with " ++ "location preference "], ++ [ "clone-recover-no-shuffle-5", ++ "Don't shuffle instances when starting a new group instance with " ++ "location preference" ], ++ [ "clone-recover-no-shuffle-6", ++ "Don't shuffle instances when starting a new bundle instance with " ++ "location preference" ], ++ [ "clone-recover-no-shuffle-7", ++ "Don't shuffle instances when starting a new primitive instance that " ++ "will be promoted" ], ++ [ "clone-recover-no-shuffle-8", ++ "Don't shuffle instances when starting a new group instance that " ++ "will be promoted " ], ++ [ "clone-recover-no-shuffle-9", ++ "Don't shuffle instances when starting a new bundle instance that " ++ "will be promoted " ], ++ [ "clone-recover-no-shuffle-10", ++ "Don't shuffle instances when starting a new primitive instance that " ++ "won't be promoted" ], ++ [ "clone-recover-no-shuffle-11", ++ "Don't shuffle instances when starting a new group instance that " ++ "won't be promoted " ], ++ [ "clone-recover-no-shuffle-12", ++ "Don't shuffle instances when starting a new bundle instance that " ++ "won't be promoted " ], + [ "clone-max-zero", "Orphan processing with clone-max=0" ], + [ "clone-anon-dup", + "Bug LF#2087 - Correctly parse the state of anonymous clones that are active more than once per node" ], +diff --git a/cts/scheduler/dot/clone-recover-no-shuffle-1.dot b/cts/scheduler/dot/clone-recover-no-shuffle-1.dot +new file mode 100644 +index 00000000000..287d82d3806 +--- /dev/null ++++ b/cts/scheduler/dot/clone-recover-no-shuffle-1.dot +@@ -0,0 +1,10 @@ ++ digraph "g" { ++"dummy-clone_running_0" [ style=bold color="green" fontcolor="orange"] ++"dummy-clone_start_0" -> "dummy-clone_running_0" [ style = bold] ++"dummy-clone_start_0" -> "dummy:2_start_0 node1" [ style = bold] ++"dummy-clone_start_0" [ style=bold color="green" fontcolor="orange"] ++"dummy:2_monitor_10000 node1" [ style=bold color="green" fontcolor="black"] ++"dummy:2_start_0 node1" -> "dummy-clone_running_0" [ style = bold] ++"dummy:2_start_0 node1" -> "dummy:2_monitor_10000 node1" [ style = bold] ++"dummy:2_start_0 node1" [ style=bold color="green" fontcolor="black"] ++} +diff --git a/cts/scheduler/dot/clone-recover-no-shuffle-10.dot b/cts/scheduler/dot/clone-recover-no-shuffle-10.dot +new file mode 100644 +index 00000000000..1e1840966fa +--- /dev/null ++++ b/cts/scheduler/dot/clone-recover-no-shuffle-10.dot +@@ -0,0 +1,10 @@ ++ digraph "g" { ++"dummy-clone_running_0" [ style=bold color="green" fontcolor="orange"] ++"dummy-clone_start_0" -> "dummy-clone_running_0" [ style = bold] ++"dummy-clone_start_0" -> "dummy:2_start_0 node1" [ style = bold] ++"dummy-clone_start_0" [ style=bold color="green" fontcolor="orange"] ++"dummy:2_monitor_11000 node1" [ style=bold color="green" fontcolor="black"] ++"dummy:2_start_0 node1" -> "dummy-clone_running_0" [ style = bold] ++"dummy:2_start_0 node1" -> "dummy:2_monitor_11000 node1" [ style = bold] ++"dummy:2_start_0 node1" [ style=bold color="green" fontcolor="black"] ++} +diff --git a/cts/scheduler/dot/clone-recover-no-shuffle-11.dot b/cts/scheduler/dot/clone-recover-no-shuffle-11.dot +new file mode 100644 +index 00000000000..2b08a594561 +--- /dev/null ++++ b/cts/scheduler/dot/clone-recover-no-shuffle-11.dot +@@ -0,0 +1,21 @@ ++ digraph "g" { ++"grp-clone_running_0" [ style=bold color="green" fontcolor="orange"] ++"grp-clone_start_0" -> "grp-clone_running_0" [ style = bold] ++"grp-clone_start_0" -> "grp:2_start_0" [ style = bold] ++"grp-clone_start_0" [ style=bold color="green" fontcolor="orange"] ++"grp:2_running_0" -> "grp-clone_running_0" [ style = bold] ++"grp:2_running_0" [ style=bold color="green" fontcolor="orange"] ++"grp:2_start_0" -> "grp:2_running_0" [ style = bold] ++"grp:2_start_0" -> "rsc1:2_start_0 node1" [ style = bold] ++"grp:2_start_0" -> "rsc2:2_start_0 node1" [ style = bold] ++"grp:2_start_0" [ style=bold color="green" fontcolor="orange"] ++"rsc1:2_monitor_11000 node1" [ style=bold color="green" fontcolor="black"] ++"rsc1:2_start_0 node1" -> "grp:2_running_0" [ style = bold] ++"rsc1:2_start_0 node1" -> "rsc1:2_monitor_11000 node1" [ style = bold] ++"rsc1:2_start_0 node1" -> "rsc2:2_start_0 node1" [ style = bold] ++"rsc1:2_start_0 node1" [ style=bold color="green" fontcolor="black"] ++"rsc2:2_monitor_11000 node1" [ style=bold color="green" fontcolor="black"] ++"rsc2:2_start_0 node1" -> "grp:2_running_0" [ style = bold] ++"rsc2:2_start_0 node1" -> "rsc2:2_monitor_11000 node1" [ style = bold] ++"rsc2:2_start_0 node1" [ style=bold color="green" fontcolor="black"] ++} +diff --git a/cts/scheduler/dot/clone-recover-no-shuffle-12.dot b/cts/scheduler/dot/clone-recover-no-shuffle-12.dot +new file mode 100644 +index 00000000000..ebc1dc6a815 +--- /dev/null ++++ b/cts/scheduler/dot/clone-recover-no-shuffle-12.dot +@@ -0,0 +1,35 @@ ++ digraph "g" { ++"base-bundle-2_monitor_0 node1" -> "base-bundle-2_start_0 node1" [ style = bold] ++"base-bundle-2_monitor_0 node1" [ style=bold color="green" fontcolor="black"] ++"base-bundle-2_monitor_0 node2" -> "base-bundle-2_start_0 node1" [ style = bold] ++"base-bundle-2_monitor_0 node2" [ style=bold color="green" fontcolor="black"] ++"base-bundle-2_monitor_0 node3" -> "base-bundle-2_start_0 node1" [ style = bold] ++"base-bundle-2_monitor_0 node3" [ style=bold color="green" fontcolor="black"] ++"base-bundle-2_monitor_30000 node1" [ style=bold color="green" fontcolor="black"] ++"base-bundle-2_start_0 node1" -> "base-bundle-2_monitor_30000 node1" [ style = bold] ++"base-bundle-2_start_0 node1" -> "base:2_monitor_16000 base-bundle-2" [ style = bold] ++"base-bundle-2_start_0 node1" -> "base:2_start_0 base-bundle-2" [ style = bold] ++"base-bundle-2_start_0 node1" [ style=bold color="green" fontcolor="black"] ++"base-bundle-clone_running_0" -> "base-bundle_running_0" [ style = bold] ++"base-bundle-clone_running_0" [ style=bold color="green" fontcolor="orange"] ++"base-bundle-clone_start_0" -> "base-bundle-clone_running_0" [ style = bold] ++"base-bundle-clone_start_0" -> "base:2_start_0 base-bundle-2" [ style = bold] ++"base-bundle-clone_start_0" [ style=bold color="green" fontcolor="orange"] ++"base-bundle-podman-2_monitor_60000 node1" [ style=bold color="green" fontcolor="black"] ++"base-bundle-podman-2_start_0 node1" -> "base-bundle-2_monitor_0 node1" [ style = bold] ++"base-bundle-podman-2_start_0 node1" -> "base-bundle-2_monitor_0 node2" [ style = bold] ++"base-bundle-podman-2_start_0 node1" -> "base-bundle-2_monitor_0 node3" [ style = bold] ++"base-bundle-podman-2_start_0 node1" -> "base-bundle-2_start_0 node1" [ style = bold] ++"base-bundle-podman-2_start_0 node1" -> "base-bundle-podman-2_monitor_60000 node1" [ style = bold] ++"base-bundle-podman-2_start_0 node1" -> "base-bundle_running_0" [ style = bold] ++"base-bundle-podman-2_start_0 node1" -> "base:2_start_0 base-bundle-2" [ style = bold] ++"base-bundle-podman-2_start_0 node1" [ style=bold color="green" fontcolor="black"] ++"base-bundle_running_0" [ style=bold color="green" fontcolor="orange"] ++"base-bundle_start_0" -> "base-bundle-clone_start_0" [ style = bold] ++"base-bundle_start_0" -> "base-bundle-podman-2_start_0 node1" [ style = bold] ++"base-bundle_start_0" [ style=bold color="green" fontcolor="orange"] ++"base:2_monitor_16000 base-bundle-2" [ style=bold color="green" fontcolor="black"] ++"base:2_start_0 base-bundle-2" -> "base-bundle-clone_running_0" [ style = bold] ++"base:2_start_0 base-bundle-2" -> "base:2_monitor_16000 base-bundle-2" [ style = bold] ++"base:2_start_0 base-bundle-2" [ style=bold color="green" fontcolor="black"] ++} +diff --git a/cts/scheduler/dot/clone-recover-no-shuffle-2.dot b/cts/scheduler/dot/clone-recover-no-shuffle-2.dot +new file mode 100644 +index 00000000000..d3bdf04baa9 +--- /dev/null ++++ b/cts/scheduler/dot/clone-recover-no-shuffle-2.dot +@@ -0,0 +1,21 @@ ++ digraph "g" { ++"grp-clone_running_0" [ style=bold color="green" fontcolor="orange"] ++"grp-clone_start_0" -> "grp-clone_running_0" [ style = bold] ++"grp-clone_start_0" -> "grp:2_start_0" [ style = bold] ++"grp-clone_start_0" [ style=bold color="green" fontcolor="orange"] ++"grp:2_running_0" -> "grp-clone_running_0" [ style = bold] ++"grp:2_running_0" [ style=bold color="green" fontcolor="orange"] ++"grp:2_start_0" -> "grp:2_running_0" [ style = bold] ++"grp:2_start_0" -> "rsc1:2_start_0 node1" [ style = bold] ++"grp:2_start_0" -> "rsc2:2_start_0 node1" [ style = bold] ++"grp:2_start_0" [ style=bold color="green" fontcolor="orange"] ++"rsc1:2_monitor_10000 node1" [ style=bold color="green" fontcolor="black"] ++"rsc1:2_start_0 node1" -> "grp:2_running_0" [ style = bold] ++"rsc1:2_start_0 node1" -> "rsc1:2_monitor_10000 node1" [ style = bold] ++"rsc1:2_start_0 node1" -> "rsc2:2_start_0 node1" [ style = bold] ++"rsc1:2_start_0 node1" [ style=bold color="green" fontcolor="black"] ++"rsc2:2_monitor_10000 node1" [ style=bold color="green" fontcolor="black"] ++"rsc2:2_start_0 node1" -> "grp:2_running_0" [ style = bold] ++"rsc2:2_start_0 node1" -> "rsc2:2_monitor_10000 node1" [ style = bold] ++"rsc2:2_start_0 node1" [ style=bold color="green" fontcolor="black"] ++} +diff --git a/cts/scheduler/dot/clone-recover-no-shuffle-3.dot b/cts/scheduler/dot/clone-recover-no-shuffle-3.dot +new file mode 100644 +index 00000000000..f60fd2cc04e +--- /dev/null ++++ b/cts/scheduler/dot/clone-recover-no-shuffle-3.dot +@@ -0,0 +1,32 @@ ++ digraph "g" { ++"base-bundle-2_monitor_0 node1" -> "base-bundle-2_start_0 node1" [ style = bold] ++"base-bundle-2_monitor_0 node1" [ style=bold color="green" fontcolor="black"] ++"base-bundle-2_monitor_0 node2" -> "base-bundle-2_start_0 node1" [ style = bold] ++"base-bundle-2_monitor_0 node2" [ style=bold color="green" fontcolor="black"] ++"base-bundle-2_monitor_0 node3" -> "base-bundle-2_start_0 node1" [ style = bold] ++"base-bundle-2_monitor_0 node3" [ style=bold color="green" fontcolor="black"] ++"base-bundle-2_monitor_30000 node1" [ style=bold color="green" fontcolor="black"] ++"base-bundle-2_start_0 node1" -> "base-bundle-2_monitor_30000 node1" [ style = bold] ++"base-bundle-2_start_0 node1" -> "base:2_start_0 base-bundle-2" [ style = bold] ++"base-bundle-2_start_0 node1" [ style=bold color="green" fontcolor="black"] ++"base-bundle-clone_running_0" -> "base-bundle_running_0" [ style = bold] ++"base-bundle-clone_running_0" [ style=bold color="green" fontcolor="orange"] ++"base-bundle-clone_start_0" -> "base-bundle-clone_running_0" [ style = bold] ++"base-bundle-clone_start_0" -> "base:2_start_0 base-bundle-2" [ style = bold] ++"base-bundle-clone_start_0" [ style=bold color="green" fontcolor="orange"] ++"base-bundle-podman-2_monitor_60000 node1" [ style=bold color="green" fontcolor="black"] ++"base-bundle-podman-2_start_0 node1" -> "base-bundle-2_monitor_0 node1" [ style = bold] ++"base-bundle-podman-2_start_0 node1" -> "base-bundle-2_monitor_0 node2" [ style = bold] ++"base-bundle-podman-2_start_0 node1" -> "base-bundle-2_monitor_0 node3" [ style = bold] ++"base-bundle-podman-2_start_0 node1" -> "base-bundle-2_start_0 node1" [ style = bold] ++"base-bundle-podman-2_start_0 node1" -> "base-bundle-podman-2_monitor_60000 node1" [ style = bold] ++"base-bundle-podman-2_start_0 node1" -> "base-bundle_running_0" [ style = bold] ++"base-bundle-podman-2_start_0 node1" -> "base:2_start_0 base-bundle-2" [ style = bold] ++"base-bundle-podman-2_start_0 node1" [ style=bold color="green" fontcolor="black"] ++"base-bundle_running_0" [ style=bold color="green" fontcolor="orange"] ++"base-bundle_start_0" -> "base-bundle-clone_start_0" [ style = bold] ++"base-bundle_start_0" -> "base-bundle-podman-2_start_0 node1" [ style = bold] ++"base-bundle_start_0" [ style=bold color="green" fontcolor="orange"] ++"base:2_start_0 base-bundle-2" -> "base-bundle-clone_running_0" [ style = bold] ++"base:2_start_0 base-bundle-2" [ style=bold color="green" fontcolor="black"] ++} +diff --git a/cts/scheduler/dot/clone-recover-no-shuffle-4.dot b/cts/scheduler/dot/clone-recover-no-shuffle-4.dot +new file mode 100644 +index 00000000000..fd002f28fcf +--- /dev/null ++++ b/cts/scheduler/dot/clone-recover-no-shuffle-4.dot +@@ -0,0 +1,23 @@ ++ digraph "g" { ++"dummy-clone_running_0" [ style=bold color="green" fontcolor="orange"] ++"dummy-clone_start_0" -> "dummy-clone_running_0" [ style = bold] ++"dummy-clone_start_0" -> "dummy:2_start_0 node2" [ style = bold] ++"dummy-clone_start_0" -> "dummy_start_0 node1" [ style = bold] ++"dummy-clone_start_0" [ style=bold color="green" fontcolor="orange"] ++"dummy-clone_stop_0" -> "dummy-clone_stopped_0" [ style = bold] ++"dummy-clone_stop_0" -> "dummy_stop_0 node2" [ style = bold] ++"dummy-clone_stop_0" [ style=bold color="green" fontcolor="orange"] ++"dummy-clone_stopped_0" -> "dummy-clone_start_0" [ style = bold] ++"dummy-clone_stopped_0" [ style=bold color="green" fontcolor="orange"] ++"dummy:2_monitor_10000 node2" [ style=bold color="green" fontcolor="black"] ++"dummy:2_start_0 node2" -> "dummy-clone_running_0" [ style = bold] ++"dummy:2_start_0 node2" -> "dummy:2_monitor_10000 node2" [ style = bold] ++"dummy:2_start_0 node2" [ style=bold color="green" fontcolor="black"] ++"dummy_monitor_10000 node1" [ style=bold color="green" fontcolor="black"] ++"dummy_start_0 node1" -> "dummy-clone_running_0" [ style = bold] ++"dummy_start_0 node1" -> "dummy_monitor_10000 node1" [ style = bold] ++"dummy_start_0 node1" [ style=bold color="green" fontcolor="black"] ++"dummy_stop_0 node2" -> "dummy-clone_stopped_0" [ style = bold] ++"dummy_stop_0 node2" -> "dummy_start_0 node1" [ style = bold] ++"dummy_stop_0 node2" [ style=bold color="green" fontcolor="black"] ++} +diff --git a/cts/scheduler/dot/clone-recover-no-shuffle-5.dot b/cts/scheduler/dot/clone-recover-no-shuffle-5.dot +new file mode 100644 +index 00000000000..7219ee5a6d3 +--- /dev/null ++++ b/cts/scheduler/dot/clone-recover-no-shuffle-5.dot +@@ -0,0 +1,80 @@ ++ digraph "g" { ++"grp-clone_running_0" [ style=bold color="green" fontcolor="orange"] ++"grp-clone_start_0" -> "grp-clone_running_0" [ style = bold] ++"grp-clone_start_0" -> "grp:0_start_0" [ style = bold] ++"grp-clone_start_0" -> "grp:1_start_0" [ style = bold] ++"grp-clone_start_0" -> "grp:2_start_0" [ style = bold] ++"grp-clone_start_0" [ style=bold color="green" fontcolor="orange"] ++"grp-clone_stop_0" -> "grp-clone_stopped_0" [ style = bold] ++"grp-clone_stop_0" -> "grp:0_stop_0" [ style = bold] ++"grp-clone_stop_0" -> "grp:1_stop_0" [ style = bold] ++"grp-clone_stop_0" [ style=bold color="green" fontcolor="orange"] ++"grp-clone_stopped_0" -> "grp-clone_start_0" [ style = bold] ++"grp-clone_stopped_0" [ style=bold color="green" fontcolor="orange"] ++"grp:0_running_0" -> "grp-clone_running_0" [ style = bold] ++"grp:0_running_0" [ style=bold color="green" fontcolor="orange"] ++"grp:0_start_0" -> "grp:0_running_0" [ style = bold] ++"grp:0_start_0" -> "rsc1_start_0 node1" [ style = bold] ++"grp:0_start_0" -> "rsc2_start_0 node1" [ style = bold] ++"grp:0_start_0" [ style=bold color="green" fontcolor="orange"] ++"grp:0_stop_0" -> "grp:0_stopped_0" [ style = bold] ++"grp:0_stop_0" -> "rsc1_stop_0 node2" [ style = bold] ++"grp:0_stop_0" -> "rsc2_stop_0 node2" [ style = bold] ++"grp:0_stop_0" [ style=bold color="green" fontcolor="orange"] ++"grp:0_stopped_0" -> "grp-clone_stopped_0" [ style = bold] ++"grp:0_stopped_0" -> "grp:0_start_0" [ style = bold] ++"grp:0_stopped_0" [ style=bold color="green" fontcolor="orange"] ++"grp:1_running_0" -> "grp-clone_running_0" [ style = bold] ++"grp:1_running_0" [ style=bold color="green" fontcolor="orange"] ++"grp:1_start_0" -> "grp:1_running_0" [ style = bold] ++"grp:1_start_0" -> "rsc1_start_0 node1" [ style = bold] ++"grp:1_start_0" -> "rsc2_start_0 node1" [ style = bold] ++"grp:1_start_0" [ style=bold color="green" fontcolor="orange"] ++"grp:1_stop_0" -> "grp:1_stopped_0" [ style = bold] ++"grp:1_stop_0" -> "rsc1_stop_0 node3" [ style = bold] ++"grp:1_stop_0" -> "rsc2_stop_0 node3" [ style = bold] ++"grp:1_stop_0" [ style=bold color="green" fontcolor="orange"] ++"grp:1_stopped_0" -> "grp-clone_stopped_0" [ style = bold] ++"grp:1_stopped_0" -> "grp:1_start_0" [ style = bold] ++"grp:1_stopped_0" [ style=bold color="green" fontcolor="orange"] ++"grp:2_running_0" -> "grp-clone_running_0" [ style = bold] ++"grp:2_running_0" [ style=bold color="green" fontcolor="orange"] ++"grp:2_start_0" -> "grp:2_running_0" [ style = bold] ++"grp:2_start_0" -> "rsc1:2_start_0 node1" [ style = bold] ++"grp:2_start_0" -> "rsc2:2_start_0 node1" [ style = bold] ++"grp:2_start_0" [ style=bold color="green" fontcolor="orange"] ++"rsc1:2_monitor_10000 node1" [ style=bold color="green" fontcolor="black"] ++"rsc1:2_start_0 node1" -> "grp:2_running_0" [ style = bold] ++"rsc1:2_start_0 node1" -> "rsc1:2_monitor_10000 node1" [ style = bold] ++"rsc1:2_start_0 node1" -> "rsc2:2_start_0 node1" [ style = bold] ++"rsc1:2_start_0 node1" [ style=bold color="green" fontcolor="black"] ++"rsc1_monitor_10000 node1" [ style=bold color="green" fontcolor="black"] ++"rsc1_start_0 node1" -> "grp:0_running_0" [ style = bold] ++"rsc1_start_0 node1" -> "grp:1_running_0" [ style = bold] ++"rsc1_start_0 node1" -> "rsc1_monitor_10000 node1" [ style = bold] ++"rsc1_start_0 node1" -> "rsc2_start_0 node1" [ style = bold] ++"rsc1_start_0 node1" [ style=bold color="green" fontcolor="black"] ++"rsc1_stop_0 node2" -> "grp:0_stopped_0" [ style = bold] ++"rsc1_stop_0 node2" -> "rsc1_start_0 node1" [ style = bold] ++"rsc1_stop_0 node2" [ style=bold color="green" fontcolor="black"] ++"rsc1_stop_0 node3" -> "grp:1_stopped_0" [ style = bold] ++"rsc1_stop_0 node3" -> "rsc1_start_0 node1" [ style = bold] ++"rsc1_stop_0 node3" [ style=bold color="green" fontcolor="black"] ++"rsc2:2_monitor_10000 node1" [ style=bold color="green" fontcolor="black"] ++"rsc2:2_start_0 node1" -> "grp:2_running_0" [ style = bold] ++"rsc2:2_start_0 node1" -> "rsc2:2_monitor_10000 node1" [ style = bold] ++"rsc2:2_start_0 node1" [ style=bold color="green" fontcolor="black"] ++"rsc2_monitor_10000 node1" [ style=bold color="green" fontcolor="black"] ++"rsc2_start_0 node1" -> "grp:0_running_0" [ style = bold] ++"rsc2_start_0 node1" -> "grp:1_running_0" [ style = bold] ++"rsc2_start_0 node1" -> "rsc2_monitor_10000 node1" [ style = bold] ++"rsc2_start_0 node1" [ style=bold color="green" fontcolor="black"] ++"rsc2_stop_0 node2" -> "grp:0_stopped_0" [ style = bold] ++"rsc2_stop_0 node2" -> "rsc1_stop_0 node2" [ style = bold] ++"rsc2_stop_0 node2" -> "rsc2_start_0 node1" [ style = bold] ++"rsc2_stop_0 node2" [ style=bold color="green" fontcolor="black"] ++"rsc2_stop_0 node3" -> "grp:1_stopped_0" [ style = bold] ++"rsc2_stop_0 node3" -> "rsc1_stop_0 node3" [ style = bold] ++"rsc2_stop_0 node3" -> "rsc2_start_0 node1" [ style = bold] ++"rsc2_stop_0 node3" [ style=bold color="green" fontcolor="black"] ++} +diff --git a/cts/scheduler/dot/clone-recover-no-shuffle-6.dot b/cts/scheduler/dot/clone-recover-no-shuffle-6.dot +new file mode 100644 +index 00000000000..f8cfe9252d2 +--- /dev/null ++++ b/cts/scheduler/dot/clone-recover-no-shuffle-6.dot +@@ -0,0 +1,97 @@ ++ digraph "g" { ++"base-bundle-0_monitor_30000 node1" [ style=bold color="green" fontcolor="black"] ++"base-bundle-0_start_0 node1" -> "base-bundle-0_monitor_30000 node1" [ style = bold] ++"base-bundle-0_start_0 node1" -> "base_start_0 base-bundle-0" [ style = bold] ++"base-bundle-0_start_0 node1" [ style=bold color="green" fontcolor="black"] ++"base-bundle-0_stop_0 node3" -> "base-bundle-0_start_0 node1" [ style = bold] ++"base-bundle-0_stop_0 node3" -> "base-bundle-podman-0_stop_0 node3" [ style = bold] ++"base-bundle-0_stop_0 node3" [ style=bold color="green" fontcolor="black"] ++"base-bundle-1_monitor_30000 node3" [ style=bold color="green" fontcolor="black"] ++"base-bundle-1_start_0 node3" -> "base-bundle-1_monitor_30000 node3" [ style = bold] ++"base-bundle-1_start_0 node3" -> "base_start_0 base-bundle-1" [ style = bold] ++"base-bundle-1_start_0 node3" [ style=bold color="green" fontcolor="black"] ++"base-bundle-1_stop_0 node2" -> "base-bundle-1_start_0 node3" [ style = bold] ++"base-bundle-1_stop_0 node2" -> "base-bundle-podman-1_stop_0 node2" [ style = bold] ++"base-bundle-1_stop_0 node2" [ style=bold color="green" fontcolor="black"] ++"base-bundle-2_monitor_0 node1" -> "base-bundle-2_start_0 node2" [ style = bold] ++"base-bundle-2_monitor_0 node1" [ style=bold color="green" fontcolor="black"] ++"base-bundle-2_monitor_0 node2" -> "base-bundle-2_start_0 node2" [ style = bold] ++"base-bundle-2_monitor_0 node2" [ style=bold color="green" fontcolor="black"] ++"base-bundle-2_monitor_0 node3" -> "base-bundle-2_start_0 node2" [ style = bold] ++"base-bundle-2_monitor_0 node3" [ style=bold color="green" fontcolor="black"] ++"base-bundle-2_monitor_30000 node2" [ style=bold color="green" fontcolor="black"] ++"base-bundle-2_start_0 node2" -> "base-bundle-2_monitor_30000 node2" [ style = bold] ++"base-bundle-2_start_0 node2" -> "base:2_start_0 base-bundle-2" [ style = bold] ++"base-bundle-2_start_0 node2" [ style=bold color="green" fontcolor="black"] ++"base-bundle-clone_running_0" -> "base-bundle_running_0" [ style = bold] ++"base-bundle-clone_running_0" [ style=bold color="green" fontcolor="orange"] ++"base-bundle-clone_start_0" -> "base-bundle-clone_running_0" [ style = bold] ++"base-bundle-clone_start_0" -> "base:2_start_0 base-bundle-2" [ style = bold] ++"base-bundle-clone_start_0" -> "base_start_0 base-bundle-0" [ style = bold] ++"base-bundle-clone_start_0" -> "base_start_0 base-bundle-1" [ style = bold] ++"base-bundle-clone_start_0" [ style=bold color="green" fontcolor="orange"] ++"base-bundle-clone_stop_0" -> "base-bundle-clone_stopped_0" [ style = bold] ++"base-bundle-clone_stop_0" -> "base_stop_0 base-bundle-0" [ style = bold] ++"base-bundle-clone_stop_0" -> "base_stop_0 base-bundle-1" [ style = bold] ++"base-bundle-clone_stop_0" [ style=bold color="green" fontcolor="orange"] ++"base-bundle-clone_stopped_0" -> "base-bundle-clone_start_0" [ style = bold] ++"base-bundle-clone_stopped_0" -> "base-bundle_stopped_0" [ style = bold] ++"base-bundle-clone_stopped_0" [ style=bold color="green" fontcolor="orange"] ++"base-bundle-podman-0_monitor_60000 node1" [ style=bold color="green" fontcolor="black"] ++"base-bundle-podman-0_start_0 node1" -> "base-bundle-0_start_0 node1" [ style = bold] ++"base-bundle-podman-0_start_0 node1" -> "base-bundle-podman-0_monitor_60000 node1" [ style = bold] ++"base-bundle-podman-0_start_0 node1" -> "base-bundle_running_0" [ style = bold] ++"base-bundle-podman-0_start_0 node1" -> "base_start_0 base-bundle-0" [ style = bold] ++"base-bundle-podman-0_start_0 node1" [ style=bold color="green" fontcolor="black"] ++"base-bundle-podman-0_stop_0 node3" -> "base-bundle-podman-0_start_0 node1" [ style = bold] ++"base-bundle-podman-0_stop_0 node3" -> "base-bundle_stopped_0" [ style = bold] ++"base-bundle-podman-0_stop_0 node3" [ style=bold color="green" fontcolor="black"] ++"base-bundle-podman-1_monitor_60000 node3" [ style=bold color="green" fontcolor="black"] ++"base-bundle-podman-1_start_0 node3" -> "base-bundle-1_start_0 node3" [ style = bold] ++"base-bundle-podman-1_start_0 node3" -> "base-bundle-podman-1_monitor_60000 node3" [ style = bold] ++"base-bundle-podman-1_start_0 node3" -> "base-bundle_running_0" [ style = bold] ++"base-bundle-podman-1_start_0 node3" -> "base_start_0 base-bundle-1" [ style = bold] ++"base-bundle-podman-1_start_0 node3" [ style=bold color="green" fontcolor="black"] ++"base-bundle-podman-1_stop_0 node2" -> "base-bundle-podman-1_start_0 node3" [ style = bold] ++"base-bundle-podman-1_stop_0 node2" -> "base-bundle_stopped_0" [ style = bold] ++"base-bundle-podman-1_stop_0 node2" [ style=bold color="green" fontcolor="black"] ++"base-bundle-podman-2_monitor_60000 node2" [ style=bold color="green" fontcolor="black"] ++"base-bundle-podman-2_start_0 node2" -> "base-bundle-2_monitor_0 node1" [ style = bold] ++"base-bundle-podman-2_start_0 node2" -> "base-bundle-2_monitor_0 node2" [ style = bold] ++"base-bundle-podman-2_start_0 node2" -> "base-bundle-2_monitor_0 node3" [ style = bold] ++"base-bundle-podman-2_start_0 node2" -> "base-bundle-2_start_0 node2" [ style = bold] ++"base-bundle-podman-2_start_0 node2" -> "base-bundle-podman-2_monitor_60000 node2" [ style = bold] ++"base-bundle-podman-2_start_0 node2" -> "base-bundle_running_0" [ style = bold] ++"base-bundle-podman-2_start_0 node2" -> "base:2_start_0 base-bundle-2" [ style = bold] ++"base-bundle-podman-2_start_0 node2" [ style=bold color="green" fontcolor="black"] ++"base-bundle_running_0" [ style=bold color="green" fontcolor="orange"] ++"base-bundle_start_0" -> "base-bundle-clone_start_0" [ style = bold] ++"base-bundle_start_0" -> "base-bundle-podman-0_start_0 node1" [ style = bold] ++"base-bundle_start_0" -> "base-bundle-podman-1_start_0 node3" [ style = bold] ++"base-bundle_start_0" -> "base-bundle-podman-2_start_0 node2" [ style = bold] ++"base-bundle_start_0" [ style=bold color="green" fontcolor="orange"] ++"base-bundle_stop_0" -> "base-bundle-clone_stop_0" [ style = bold] ++"base-bundle_stop_0" -> "base-bundle-podman-0_stop_0 node3" [ style = bold] ++"base-bundle_stop_0" -> "base-bundle-podman-1_stop_0 node2" [ style = bold] ++"base-bundle_stop_0" -> "base_stop_0 base-bundle-0" [ style = bold] ++"base-bundle_stop_0" -> "base_stop_0 base-bundle-1" [ style = bold] ++"base-bundle_stop_0" [ style=bold color="green" fontcolor="orange"] ++"base-bundle_stopped_0" [ style=bold color="green" fontcolor="orange"] ++"base:2_start_0 base-bundle-2" -> "base-bundle-clone_running_0" [ style = bold] ++"base:2_start_0 base-bundle-2" [ style=bold color="green" fontcolor="black"] ++"base_start_0 base-bundle-0" -> "base-bundle-clone_running_0" [ style = bold] ++"base_start_0 base-bundle-0" -> "base_start_0 base-bundle-1" [ style = bold] ++"base_start_0 base-bundle-0" [ style=bold color="green" fontcolor="black"] ++"base_start_0 base-bundle-1" -> "base-bundle-clone_running_0" [ style = bold] ++"base_start_0 base-bundle-1" -> "base:2_start_0 base-bundle-2" [ style = bold] ++"base_start_0 base-bundle-1" [ style=bold color="green" fontcolor="black"] ++"base_stop_0 base-bundle-0" -> "base-bundle-0_stop_0 node3" [ style = bold] ++"base_stop_0 base-bundle-0" -> "base-bundle-clone_stopped_0" [ style = bold] ++"base_stop_0 base-bundle-0" -> "base_start_0 base-bundle-0" [ style = bold] ++"base_stop_0 base-bundle-0" [ style=bold color="green" fontcolor="black"] ++"base_stop_0 base-bundle-1" -> "base-bundle-1_stop_0 node2" [ style = bold] ++"base_stop_0 base-bundle-1" -> "base-bundle-clone_stopped_0" [ style = bold] ++"base_stop_0 base-bundle-1" -> "base_start_0 base-bundle-1" [ style = bold] ++"base_stop_0 base-bundle-1" -> "base_stop_0 base-bundle-0" [ style = bold] ++"base_stop_0 base-bundle-1" [ style=bold color="green" fontcolor="black"] ++} +diff --git a/cts/scheduler/dot/clone-recover-no-shuffle-7.dot b/cts/scheduler/dot/clone-recover-no-shuffle-7.dot +new file mode 100644 +index 00000000000..8bff7da01db +--- /dev/null ++++ b/cts/scheduler/dot/clone-recover-no-shuffle-7.dot +@@ -0,0 +1,45 @@ ++ digraph "g" { ++"Cancel dummy_monitor_10000 node2" -> "dummy_demote_0 node2" [ style = bold] ++"Cancel dummy_monitor_10000 node2" [ style=bold color="green" fontcolor="black"] ++"dummy-clone_demote_0" -> "dummy-clone_demoted_0" [ style = bold] ++"dummy-clone_demote_0" -> "dummy_demote_0 node2" [ style = bold] ++"dummy-clone_demote_0" [ style=bold color="green" fontcolor="orange"] ++"dummy-clone_demoted_0" -> "dummy-clone_promote_0" [ style = bold] ++"dummy-clone_demoted_0" -> "dummy-clone_start_0" [ style = bold] ++"dummy-clone_demoted_0" -> "dummy-clone_stop_0" [ style = bold] ++"dummy-clone_demoted_0" [ style=bold color="green" fontcolor="orange"] ++"dummy-clone_promote_0" -> "dummy_promote_0 node1" [ style = bold] ++"dummy-clone_promote_0" [ style=bold color="green" fontcolor="orange"] ++"dummy-clone_promoted_0" [ style=bold color="green" fontcolor="orange"] ++"dummy-clone_running_0" -> "dummy-clone_promote_0" [ style = bold] ++"dummy-clone_running_0" [ style=bold color="green" fontcolor="orange"] ++"dummy-clone_start_0" -> "dummy-clone_running_0" [ style = bold] ++"dummy-clone_start_0" -> "dummy:2_start_0 node3" [ style = bold] ++"dummy-clone_start_0" -> "dummy_start_0 node1" [ style = bold] ++"dummy-clone_start_0" [ style=bold color="green" fontcolor="orange"] ++"dummy-clone_stop_0" -> "dummy-clone_stopped_0" [ style = bold] ++"dummy-clone_stop_0" -> "dummy_stop_0 node3" [ style = bold] ++"dummy-clone_stop_0" [ style=bold color="green" fontcolor="orange"] ++"dummy-clone_stopped_0" -> "dummy-clone_promote_0" [ style = bold] ++"dummy-clone_stopped_0" -> "dummy-clone_start_0" [ style = bold] ++"dummy-clone_stopped_0" [ style=bold color="green" fontcolor="orange"] ++"dummy:2_monitor_11000 node3" [ style=bold color="green" fontcolor="black"] ++"dummy:2_start_0 node3" -> "dummy-clone_running_0" [ style = bold] ++"dummy:2_start_0 node3" -> "dummy:2_monitor_11000 node3" [ style = bold] ++"dummy:2_start_0 node3" [ style=bold color="green" fontcolor="black"] ++"dummy_demote_0 node2" -> "dummy-clone_demoted_0" [ style = bold] ++"dummy_demote_0 node2" -> "dummy_monitor_11000 node2" [ style = bold] ++"dummy_demote_0 node2" [ style=bold color="green" fontcolor="black"] ++"dummy_monitor_10000 node1" [ style=bold color="green" fontcolor="black"] ++"dummy_monitor_11000 node2" [ style=bold color="green" fontcolor="black"] ++"dummy_promote_0 node1" -> "dummy-clone_promoted_0" [ style = bold] ++"dummy_promote_0 node1" -> "dummy_monitor_10000 node1" [ style = bold] ++"dummy_promote_0 node1" [ style=bold color="green" fontcolor="black"] ++"dummy_start_0 node1" -> "dummy-clone_running_0" [ style = bold] ++"dummy_start_0 node1" -> "dummy_monitor_10000 node1" [ style = bold] ++"dummy_start_0 node1" -> "dummy_promote_0 node1" [ style = bold] ++"dummy_start_0 node1" [ style=bold color="green" fontcolor="black"] ++"dummy_stop_0 node3" -> "dummy-clone_stopped_0" [ style = bold] ++"dummy_stop_0 node3" -> "dummy_start_0 node1" [ style = bold] ++"dummy_stop_0 node3" [ style=bold color="green" fontcolor="black"] ++} +diff --git a/cts/scheduler/dot/clone-recover-no-shuffle-8.dot b/cts/scheduler/dot/clone-recover-no-shuffle-8.dot +new file mode 100644 +index 00000000000..d9c311a67cb +--- /dev/null ++++ b/cts/scheduler/dot/clone-recover-no-shuffle-8.dot +@@ -0,0 +1,63 @@ ++ digraph "g" { ++"Cancel rsc1_monitor_10000 node2" -> "rsc1_demote_0 node2" [ style = bold] ++"Cancel rsc1_monitor_10000 node2" [ style=bold color="green" fontcolor="black"] ++"Cancel rsc2_monitor_10000 node2" -> "rsc2_demote_0 node2" [ style = bold] ++"Cancel rsc2_monitor_10000 node2" [ style=bold color="green" fontcolor="black"] ++"grp-clone_demote_0" -> "grp-clone_demoted_0" [ style = bold] ++"grp-clone_demote_0" -> "grp:1_demote_0" [ style = bold] ++"grp-clone_demote_0" [ style=bold color="green" fontcolor="orange"] ++"grp-clone_demoted_0" -> "grp-clone_promote_0" [ style = bold] ++"grp-clone_demoted_0" -> "grp-clone_start_0" [ style = bold] ++"grp-clone_demoted_0" [ style=bold color="green" fontcolor="orange"] ++"grp-clone_promote_0" -> "grp:2_promote_0" [ style = bold] ++"grp-clone_promote_0" [ style=bold color="green" fontcolor="orange"] ++"grp-clone_promoted_0" [ style=bold color="green" fontcolor="orange"] ++"grp-clone_running_0" -> "grp-clone_promote_0" [ style = bold] ++"grp-clone_running_0" [ style=bold color="green" fontcolor="orange"] ++"grp-clone_start_0" -> "grp-clone_running_0" [ style = bold] ++"grp-clone_start_0" -> "grp:2_start_0" [ style = bold] ++"grp-clone_start_0" [ style=bold color="green" fontcolor="orange"] ++"grp:1_demote_0" -> "rsc1_demote_0 node2" [ style = bold] ++"grp:1_demote_0" -> "rsc2_demote_0 node2" [ style = bold] ++"grp:1_demote_0" [ style=bold color="green" fontcolor="orange"] ++"grp:1_demoted_0" -> "grp-clone_demoted_0" [ style = bold] ++"grp:1_demoted_0" [ style=bold color="green" fontcolor="orange"] ++"grp:2_promote_0" -> "rsc1:2_promote_0 node1" [ style = bold] ++"grp:2_promote_0" -> "rsc2:2_promote_0 node1" [ style = bold] ++"grp:2_promote_0" [ style=bold color="green" fontcolor="orange"] ++"grp:2_promoted_0" -> "grp-clone_promoted_0" [ style = bold] ++"grp:2_promoted_0" [ style=bold color="green" fontcolor="orange"] ++"grp:2_running_0" -> "grp-clone_running_0" [ style = bold] ++"grp:2_running_0" [ style=bold color="green" fontcolor="orange"] ++"grp:2_start_0" -> "grp:2_running_0" [ style = bold] ++"grp:2_start_0" -> "rsc1:2_start_0 node1" [ style = bold] ++"grp:2_start_0" -> "rsc2:2_start_0 node1" [ style = bold] ++"grp:2_start_0" [ style=bold color="green" fontcolor="orange"] ++"rsc1:2_monitor_10000 node1" [ style=bold color="green" fontcolor="black"] ++"rsc1:2_promote_0 node1" -> "grp:2_promoted_0" [ style = bold] ++"rsc1:2_promote_0 node1" -> "rsc1:2_monitor_10000 node1" [ style = bold] ++"rsc1:2_promote_0 node1" -> "rsc2:2_promote_0 node1" [ style = bold] ++"rsc1:2_promote_0 node1" [ style=bold color="green" fontcolor="black"] ++"rsc1:2_start_0 node1" -> "grp:2_running_0" [ style = bold] ++"rsc1:2_start_0 node1" -> "rsc1:2_monitor_10000 node1" [ style = bold] ++"rsc1:2_start_0 node1" -> "rsc1:2_promote_0 node1" [ style = bold] ++"rsc1:2_start_0 node1" -> "rsc2:2_start_0 node1" [ style = bold] ++"rsc1:2_start_0 node1" [ style=bold color="green" fontcolor="black"] ++"rsc1_demote_0 node2" -> "grp:1_demoted_0" [ style = bold] ++"rsc1_demote_0 node2" -> "rsc1_monitor_11000 node2" [ style = bold] ++"rsc1_demote_0 node2" [ style=bold color="green" fontcolor="black"] ++"rsc1_monitor_11000 node2" [ style=bold color="green" fontcolor="black"] ++"rsc2:2_monitor_10000 node1" [ style=bold color="green" fontcolor="black"] ++"rsc2:2_promote_0 node1" -> "grp:2_promoted_0" [ style = bold] ++"rsc2:2_promote_0 node1" -> "rsc2:2_monitor_10000 node1" [ style = bold] ++"rsc2:2_promote_0 node1" [ style=bold color="green" fontcolor="black"] ++"rsc2:2_start_0 node1" -> "grp:2_running_0" [ style = bold] ++"rsc2:2_start_0 node1" -> "rsc2:2_monitor_10000 node1" [ style = bold] ++"rsc2:2_start_0 node1" -> "rsc2:2_promote_0 node1" [ style = bold] ++"rsc2:2_start_0 node1" [ style=bold color="green" fontcolor="black"] ++"rsc2_demote_0 node2" -> "grp:1_demoted_0" [ style = bold] ++"rsc2_demote_0 node2" -> "rsc1_demote_0 node2" [ style = bold] ++"rsc2_demote_0 node2" -> "rsc2_monitor_11000 node2" [ style = bold] ++"rsc2_demote_0 node2" [ style=bold color="green" fontcolor="black"] ++"rsc2_monitor_11000 node2" [ style=bold color="green" fontcolor="black"] ++} +diff --git a/cts/scheduler/dot/clone-recover-no-shuffle-9.dot b/cts/scheduler/dot/clone-recover-no-shuffle-9.dot +new file mode 100644 +index 00000000000..45dbac47e2b +--- /dev/null ++++ b/cts/scheduler/dot/clone-recover-no-shuffle-9.dot +@@ -0,0 +1,69 @@ ++ digraph "g" { ++"Cancel base_monitor_15000 base-bundle-1" -> "base_demote_0 base-bundle-1" [ style = bold] ++"Cancel base_monitor_15000 base-bundle-1" [ style=bold color="green" fontcolor="black"] ++"base-bundle-2_monitor_0 node1" -> "base-bundle-2_start_0 node1" [ style = bold] ++"base-bundle-2_monitor_0 node1" [ style=bold color="green" fontcolor="black"] ++"base-bundle-2_monitor_0 node2" -> "base-bundle-2_start_0 node1" [ style = bold] ++"base-bundle-2_monitor_0 node2" [ style=bold color="green" fontcolor="black"] ++"base-bundle-2_monitor_0 node3" -> "base-bundle-2_start_0 node1" [ style = bold] ++"base-bundle-2_monitor_0 node3" [ style=bold color="green" fontcolor="black"] ++"base-bundle-2_monitor_30000 node1" [ style=bold color="green" fontcolor="black"] ++"base-bundle-2_start_0 node1" -> "base-bundle-2_monitor_30000 node1" [ style = bold] ++"base-bundle-2_start_0 node1" -> "base:2_monitor_15000 base-bundle-2" [ style = bold] ++"base-bundle-2_start_0 node1" -> "base:2_promote_0 base-bundle-2" [ style = bold] ++"base-bundle-2_start_0 node1" -> "base:2_start_0 base-bundle-2" [ style = bold] ++"base-bundle-2_start_0 node1" [ style=bold color="green" fontcolor="black"] ++"base-bundle-clone_demote_0" -> "base-bundle-clone_demoted_0" [ style = bold] ++"base-bundle-clone_demote_0" -> "base_demote_0 base-bundle-1" [ style = bold] ++"base-bundle-clone_demote_0" [ style=bold color="green" fontcolor="orange"] ++"base-bundle-clone_demoted_0" -> "base-bundle-clone_promote_0" [ style = bold] ++"base-bundle-clone_demoted_0" -> "base-bundle-clone_start_0" [ style = bold] ++"base-bundle-clone_demoted_0" -> "base-bundle_demoted_0" [ style = bold] ++"base-bundle-clone_demoted_0" [ style=bold color="green" fontcolor="orange"] ++"base-bundle-clone_promote_0" -> "base:2_promote_0 base-bundle-2" [ style = bold] ++"base-bundle-clone_promote_0" [ style=bold color="green" fontcolor="orange"] ++"base-bundle-clone_promoted_0" -> "base-bundle_promoted_0" [ style = bold] ++"base-bundle-clone_promoted_0" [ style=bold color="green" fontcolor="orange"] ++"base-bundle-clone_running_0" -> "base-bundle-clone_promote_0" [ style = bold] ++"base-bundle-clone_running_0" -> "base-bundle_running_0" [ style = bold] ++"base-bundle-clone_running_0" [ style=bold color="green" fontcolor="orange"] ++"base-bundle-clone_start_0" -> "base-bundle-clone_running_0" [ style = bold] ++"base-bundle-clone_start_0" -> "base:2_start_0 base-bundle-2" [ style = bold] ++"base-bundle-clone_start_0" [ style=bold color="green" fontcolor="orange"] ++"base-bundle-podman-2_monitor_60000 node1" [ style=bold color="green" fontcolor="black"] ++"base-bundle-podman-2_start_0 node1" -> "base-bundle-2_monitor_0 node1" [ style = bold] ++"base-bundle-podman-2_start_0 node1" -> "base-bundle-2_monitor_0 node2" [ style = bold] ++"base-bundle-podman-2_start_0 node1" -> "base-bundle-2_monitor_0 node3" [ style = bold] ++"base-bundle-podman-2_start_0 node1" -> "base-bundle-2_start_0 node1" [ style = bold] ++"base-bundle-podman-2_start_0 node1" -> "base-bundle-podman-2_monitor_60000 node1" [ style = bold] ++"base-bundle-podman-2_start_0 node1" -> "base-bundle_running_0" [ style = bold] ++"base-bundle-podman-2_start_0 node1" -> "base:2_promote_0 base-bundle-2" [ style = bold] ++"base-bundle-podman-2_start_0 node1" -> "base:2_start_0 base-bundle-2" [ style = bold] ++"base-bundle-podman-2_start_0 node1" [ style=bold color="green" fontcolor="black"] ++"base-bundle_demote_0" -> "base-bundle-clone_demote_0" [ style = bold] ++"base-bundle_demote_0" -> "base-bundle_demoted_0" [ style = bold] ++"base-bundle_demote_0" [ style=bold color="green" fontcolor="orange"] ++"base-bundle_demoted_0" -> "base-bundle_promote_0" [ style = bold] ++"base-bundle_demoted_0" -> "base-bundle_start_0" [ style = bold] ++"base-bundle_demoted_0" [ style=bold color="green" fontcolor="orange"] ++"base-bundle_promote_0" -> "base-bundle-clone_promote_0" [ style = bold] ++"base-bundle_promote_0" [ style=bold color="green" fontcolor="orange"] ++"base-bundle_promoted_0" [ style=bold color="green" fontcolor="orange"] ++"base-bundle_running_0" -> "base-bundle_promote_0" [ style = bold] ++"base-bundle_running_0" [ style=bold color="green" fontcolor="orange"] ++"base-bundle_start_0" -> "base-bundle-clone_start_0" [ style = bold] ++"base-bundle_start_0" -> "base-bundle-podman-2_start_0 node1" [ style = bold] ++"base-bundle_start_0" [ style=bold color="green" fontcolor="orange"] ++"base:2_monitor_15000 base-bundle-2" [ style=bold color="green" fontcolor="black"] ++"base:2_promote_0 base-bundle-2" -> "base-bundle-clone_promoted_0" [ style = bold] ++"base:2_promote_0 base-bundle-2" -> "base:2_monitor_15000 base-bundle-2" [ style = bold] ++"base:2_promote_0 base-bundle-2" [ style=bold color="green" fontcolor="black"] ++"base:2_start_0 base-bundle-2" -> "base-bundle-clone_running_0" [ style = bold] ++"base:2_start_0 base-bundle-2" -> "base:2_monitor_15000 base-bundle-2" [ style = bold] ++"base:2_start_0 base-bundle-2" -> "base:2_promote_0 base-bundle-2" [ style = bold] ++"base:2_start_0 base-bundle-2" [ style=bold color="green" fontcolor="black"] ++"base_demote_0 base-bundle-1" -> "base-bundle-clone_demoted_0" [ style = bold] ++"base_demote_0 base-bundle-1" -> "base_monitor_16000 base-bundle-1" [ style = bold] ++"base_demote_0 base-bundle-1" [ style=bold color="green" fontcolor="black"] ++"base_monitor_16000 base-bundle-1" [ style=bold color="green" fontcolor="black"] ++} +diff --git a/cts/scheduler/exp/clone-recover-no-shuffle-1.exp b/cts/scheduler/exp/clone-recover-no-shuffle-1.exp +new file mode 100644 +index 00000000000..670a823dac9 +--- /dev/null ++++ b/cts/scheduler/exp/clone-recover-no-shuffle-1.exp +@@ -0,0 +1,51 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +diff --git a/cts/scheduler/exp/clone-recover-no-shuffle-10.exp b/cts/scheduler/exp/clone-recover-no-shuffle-10.exp +new file mode 100644 +index 00000000000..27b8b7037c3 +--- /dev/null ++++ b/cts/scheduler/exp/clone-recover-no-shuffle-10.exp +@@ -0,0 +1,51 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +diff --git a/cts/scheduler/exp/clone-recover-no-shuffle-11.exp b/cts/scheduler/exp/clone-recover-no-shuffle-11.exp +new file mode 100644 +index 00000000000..40cf1f69c11 +--- /dev/null ++++ b/cts/scheduler/exp/clone-recover-no-shuffle-11.exp +@@ -0,0 +1,110 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +diff --git a/cts/scheduler/exp/clone-recover-no-shuffle-12.exp b/cts/scheduler/exp/clone-recover-no-shuffle-12.exp +new file mode 100644 +index 00000000000..919e6b291c0 +--- /dev/null ++++ b/cts/scheduler/exp/clone-recover-no-shuffle-12.exp +@@ -0,0 +1,187 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +diff --git a/cts/scheduler/exp/clone-recover-no-shuffle-2.exp b/cts/scheduler/exp/clone-recover-no-shuffle-2.exp +new file mode 100644 +index 00000000000..84b1e1bc98c +--- /dev/null ++++ b/cts/scheduler/exp/clone-recover-no-shuffle-2.exp +@@ -0,0 +1,110 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +diff --git a/cts/scheduler/exp/clone-recover-no-shuffle-3.exp b/cts/scheduler/exp/clone-recover-no-shuffle-3.exp +new file mode 100644 +index 00000000000..6b6ed075f57 +--- /dev/null ++++ b/cts/scheduler/exp/clone-recover-no-shuffle-3.exp +@@ -0,0 +1,171 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +diff --git a/cts/scheduler/exp/clone-recover-no-shuffle-4.exp b/cts/scheduler/exp/clone-recover-no-shuffle-4.exp +new file mode 100644 +index 00000000000..4596c685d0a +--- /dev/null ++++ b/cts/scheduler/exp/clone-recover-no-shuffle-4.exp +@@ -0,0 +1,123 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +diff --git a/cts/scheduler/exp/clone-recover-no-shuffle-5.exp b/cts/scheduler/exp/clone-recover-no-shuffle-5.exp +new file mode 100644 +index 00000000000..8a8e799793e +--- /dev/null ++++ b/cts/scheduler/exp/clone-recover-no-shuffle-5.exp +@@ -0,0 +1,452 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +diff --git a/cts/scheduler/exp/clone-recover-no-shuffle-6.exp b/cts/scheduler/exp/clone-recover-no-shuffle-6.exp +new file mode 100644 +index 00000000000..e6704c9e254 +--- /dev/null ++++ b/cts/scheduler/exp/clone-recover-no-shuffle-6.exp +@@ -0,0 +1,507 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +diff --git a/cts/scheduler/exp/clone-recover-no-shuffle-7.exp b/cts/scheduler/exp/clone-recover-no-shuffle-7.exp +new file mode 100644 +index 00000000000..950de9e0312 +--- /dev/null ++++ b/cts/scheduler/exp/clone-recover-no-shuffle-7.exp +@@ -0,0 +1,240 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +diff --git a/cts/scheduler/exp/clone-recover-no-shuffle-8.exp b/cts/scheduler/exp/clone-recover-no-shuffle-8.exp +new file mode 100644 +index 00000000000..763a2f02fb0 +--- /dev/null ++++ b/cts/scheduler/exp/clone-recover-no-shuffle-8.exp +@@ -0,0 +1,338 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +diff --git a/cts/scheduler/exp/clone-recover-no-shuffle-9.exp b/cts/scheduler/exp/clone-recover-no-shuffle-9.exp +new file mode 100644 +index 00000000000..7bfe3c47281 +--- /dev/null ++++ b/cts/scheduler/exp/clone-recover-no-shuffle-9.exp +@@ -0,0 +1,364 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +diff --git a/cts/scheduler/scores/clone-recover-no-shuffle-1.scores b/cts/scheduler/scores/clone-recover-no-shuffle-1.scores +new file mode 100644 +index 00000000000..c1d60b2f39a +--- /dev/null ++++ b/cts/scheduler/scores/clone-recover-no-shuffle-1.scores +@@ -0,0 +1,25 @@ ++ ++pcmk__clone_assign: dummy-clone allocation score on node1: 0 ++pcmk__clone_assign: dummy-clone allocation score on node2: 0 ++pcmk__clone_assign: dummy-clone allocation score on node3: 0 ++pcmk__clone_assign: dummy:0 allocation score on node1: 0 ++pcmk__clone_assign: dummy:0 allocation score on node2: 1 ++pcmk__clone_assign: dummy:0 allocation score on node3: 0 ++pcmk__clone_assign: dummy:1 allocation score on node1: 0 ++pcmk__clone_assign: dummy:1 allocation score on node2: 0 ++pcmk__clone_assign: dummy:1 allocation score on node3: 1 ++pcmk__clone_assign: dummy:2 allocation score on node1: 0 ++pcmk__clone_assign: dummy:2 allocation score on node2: 0 ++pcmk__clone_assign: dummy:2 allocation score on node3: 0 ++pcmk__primitive_assign: Fencing allocation score on node1: 0 ++pcmk__primitive_assign: Fencing allocation score on node2: 0 ++pcmk__primitive_assign: Fencing allocation score on node3: 0 ++pcmk__primitive_assign: dummy:0 allocation score on node1: 0 ++pcmk__primitive_assign: dummy:0 allocation score on node2: 1 ++pcmk__primitive_assign: dummy:0 allocation score on node3: 0 ++pcmk__primitive_assign: dummy:1 allocation score on node1: 0 ++pcmk__primitive_assign: dummy:1 allocation score on node2: -INFINITY ++pcmk__primitive_assign: dummy:1 allocation score on node3: 1 ++pcmk__primitive_assign: dummy:2 allocation score on node1: 0 ++pcmk__primitive_assign: dummy:2 allocation score on node2: -INFINITY ++pcmk__primitive_assign: dummy:2 allocation score on node3: -INFINITY +diff --git a/cts/scheduler/scores/clone-recover-no-shuffle-10.scores b/cts/scheduler/scores/clone-recover-no-shuffle-10.scores +new file mode 100644 +index 00000000000..4ac63e37058 +--- /dev/null ++++ b/cts/scheduler/scores/clone-recover-no-shuffle-10.scores +@@ -0,0 +1,31 @@ ++ ++dummy:0 promotion score on node3: 5 ++dummy:1 promotion score on node2: 15 ++dummy:2 promotion score on node1: 10 ++pcmk__clone_assign: dummy-clone allocation score on node1: 0 ++pcmk__clone_assign: dummy-clone allocation score on node2: 0 ++pcmk__clone_assign: dummy-clone allocation score on node3: 0 ++pcmk__clone_assign: dummy:0 allocation score on node1: 10 ++pcmk__clone_assign: dummy:0 allocation score on node2: 0 ++pcmk__clone_assign: dummy:0 allocation score on node3: 6 ++pcmk__clone_assign: dummy:1 allocation score on node1: 10 ++pcmk__clone_assign: dummy:1 allocation score on node2: 16 ++pcmk__clone_assign: dummy:1 allocation score on node3: 0 ++pcmk__clone_assign: dummy:2 allocation score on node1: 10 ++pcmk__clone_assign: dummy:2 allocation score on node2: 15 ++pcmk__clone_assign: dummy:2 allocation score on node3: 5 ++pcmk__primitive_assign: Fencing allocation score on node1: 0 ++pcmk__primitive_assign: Fencing allocation score on node2: 0 ++pcmk__primitive_assign: Fencing allocation score on node3: 0 ++pcmk__primitive_assign: dummy:0 allocation score on node1: -INFINITY ++pcmk__primitive_assign: dummy:0 allocation score on node1: 10 ++pcmk__primitive_assign: dummy:0 allocation score on node2: -INFINITY ++pcmk__primitive_assign: dummy:0 allocation score on node2: -INFINITY ++pcmk__primitive_assign: dummy:0 allocation score on node3: 6 ++pcmk__primitive_assign: dummy:0 allocation score on node3: 6 ++pcmk__primitive_assign: dummy:1 allocation score on node1: 10 ++pcmk__primitive_assign: dummy:1 allocation score on node2: 16 ++pcmk__primitive_assign: dummy:1 allocation score on node3: 0 ++pcmk__primitive_assign: dummy:2 allocation score on node1: 10 ++pcmk__primitive_assign: dummy:2 allocation score on node2: -INFINITY ++pcmk__primitive_assign: dummy:2 allocation score on node3: 5 +diff --git a/cts/scheduler/scores/clone-recover-no-shuffle-11.scores b/cts/scheduler/scores/clone-recover-no-shuffle-11.scores +new file mode 100644 +index 00000000000..1216dba711a +--- /dev/null ++++ b/cts/scheduler/scores/clone-recover-no-shuffle-11.scores +@@ -0,0 +1,82 @@ ++ ++grp:0 promotion score on node3: 10 ++grp:1 promotion score on node2: 30 ++grp:2 promotion score on node1: 20 ++pcmk__clone_assign: grp-clone allocation score on node1: 0 ++pcmk__clone_assign: grp-clone allocation score on node2: 0 ++pcmk__clone_assign: grp-clone allocation score on node3: 0 ++pcmk__clone_assign: grp:0 allocation score on node1: 20 ++pcmk__clone_assign: grp:0 allocation score on node2: 0 ++pcmk__clone_assign: grp:0 allocation score on node3: 10 ++pcmk__clone_assign: grp:1 allocation score on node1: 20 ++pcmk__clone_assign: grp:1 allocation score on node2: 30 ++pcmk__clone_assign: grp:1 allocation score on node3: 0 ++pcmk__clone_assign: grp:2 allocation score on node1: 20 ++pcmk__clone_assign: grp:2 allocation score on node2: 30 ++pcmk__clone_assign: grp:2 allocation score on node3: 10 ++pcmk__clone_assign: rsc1:0 allocation score on node1: 0 ++pcmk__clone_assign: rsc1:0 allocation score on node2: 0 ++pcmk__clone_assign: rsc1:0 allocation score on node3: 1 ++pcmk__clone_assign: rsc1:1 allocation score on node1: 0 ++pcmk__clone_assign: rsc1:1 allocation score on node2: 1 ++pcmk__clone_assign: rsc1:1 allocation score on node3: 0 ++pcmk__clone_assign: rsc1:2 allocation score on node1: 0 ++pcmk__clone_assign: rsc1:2 allocation score on node2: 0 ++pcmk__clone_assign: rsc1:2 allocation score on node3: 0 ++pcmk__clone_assign: rsc2:0 allocation score on node1: 0 ++pcmk__clone_assign: rsc2:0 allocation score on node2: 0 ++pcmk__clone_assign: rsc2:0 allocation score on node3: 1 ++pcmk__clone_assign: rsc2:1 allocation score on node1: 0 ++pcmk__clone_assign: rsc2:1 allocation score on node2: 1 ++pcmk__clone_assign: rsc2:1 allocation score on node3: 0 ++pcmk__clone_assign: rsc2:2 allocation score on node1: 0 ++pcmk__clone_assign: rsc2:2 allocation score on node2: 0 ++pcmk__clone_assign: rsc2:2 allocation score on node3: 0 ++pcmk__group_assign: grp:0 allocation score on node1: 20 ++pcmk__group_assign: grp:0 allocation score on node2: -INFINITY ++pcmk__group_assign: grp:0 allocation score on node3: 10 ++pcmk__group_assign: grp:1 allocation score on node1: 20 ++pcmk__group_assign: grp:1 allocation score on node2: 30 ++pcmk__group_assign: grp:1 allocation score on node3: 0 ++pcmk__group_assign: grp:2 allocation score on node1: 20 ++pcmk__group_assign: grp:2 allocation score on node2: -INFINITY ++pcmk__group_assign: grp:2 allocation score on node3: -INFINITY ++pcmk__group_assign: rsc1:0 allocation score on node1: 0 ++pcmk__group_assign: rsc1:0 allocation score on node2: -INFINITY ++pcmk__group_assign: rsc1:0 allocation score on node3: 1 ++pcmk__group_assign: rsc1:1 allocation score on node1: 0 ++pcmk__group_assign: rsc1:1 allocation score on node2: 1 ++pcmk__group_assign: rsc1:1 allocation score on node3: 0 ++pcmk__group_assign: rsc1:2 allocation score on node1: 0 ++pcmk__group_assign: rsc1:2 allocation score on node2: -INFINITY ++pcmk__group_assign: rsc1:2 allocation score on node3: -INFINITY ++pcmk__group_assign: rsc2:0 allocation score on node1: 0 ++pcmk__group_assign: rsc2:0 allocation score on node2: -INFINITY ++pcmk__group_assign: rsc2:0 allocation score on node3: 1 ++pcmk__group_assign: rsc2:1 allocation score on node1: 0 ++pcmk__group_assign: rsc2:1 allocation score on node2: 1 ++pcmk__group_assign: rsc2:1 allocation score on node3: 0 ++pcmk__group_assign: rsc2:2 allocation score on node1: 0 ++pcmk__group_assign: rsc2:2 allocation score on node2: -INFINITY ++pcmk__group_assign: rsc2:2 allocation score on node3: -INFINITY ++pcmk__primitive_assign: Fencing allocation score on node1: 0 ++pcmk__primitive_assign: Fencing allocation score on node2: 0 ++pcmk__primitive_assign: Fencing allocation score on node3: 0 ++pcmk__primitive_assign: rsc1:0 allocation score on node1: 0 ++pcmk__primitive_assign: rsc1:0 allocation score on node2: -INFINITY ++pcmk__primitive_assign: rsc1:0 allocation score on node3: 2 ++pcmk__primitive_assign: rsc1:1 allocation score on node1: 0 ++pcmk__primitive_assign: rsc1:1 allocation score on node2: 2 ++pcmk__primitive_assign: rsc1:1 allocation score on node3: 0 ++pcmk__primitive_assign: rsc1:2 allocation score on node1: 0 ++pcmk__primitive_assign: rsc1:2 allocation score on node2: -INFINITY ++pcmk__primitive_assign: rsc1:2 allocation score on node3: -INFINITY ++pcmk__primitive_assign: rsc2:0 allocation score on node1: -INFINITY ++pcmk__primitive_assign: rsc2:0 allocation score on node2: -INFINITY ++pcmk__primitive_assign: rsc2:0 allocation score on node3: 1 ++pcmk__primitive_assign: rsc2:1 allocation score on node1: -INFINITY ++pcmk__primitive_assign: rsc2:1 allocation score on node2: 1 ++pcmk__primitive_assign: rsc2:1 allocation score on node3: -INFINITY ++pcmk__primitive_assign: rsc2:2 allocation score on node1: 0 ++pcmk__primitive_assign: rsc2:2 allocation score on node2: -INFINITY ++pcmk__primitive_assign: rsc2:2 allocation score on node3: -INFINITY +diff --git a/cts/scheduler/scores/clone-recover-no-shuffle-12.scores b/cts/scheduler/scores/clone-recover-no-shuffle-12.scores +new file mode 100644 +index 00000000000..24cf3148c4c +--- /dev/null ++++ b/cts/scheduler/scores/clone-recover-no-shuffle-12.scores +@@ -0,0 +1,67 @@ ++ ++base:0 promotion score on base-bundle-0: 5 ++base:1 promotion score on base-bundle-1: 15 ++base:2 promotion score on base-bundle-2: 10 ++pcmk__bundle_allocate: base-bundle allocation score on node1: 0 ++pcmk__bundle_allocate: base-bundle allocation score on node2: 0 ++pcmk__bundle_allocate: base-bundle allocation score on node3: 0 ++pcmk__bundle_allocate: base-bundle-0 allocation score on node1: 0 ++pcmk__bundle_allocate: base-bundle-0 allocation score on node2: 0 ++pcmk__bundle_allocate: base-bundle-0 allocation score on node3: 0 ++pcmk__bundle_allocate: base-bundle-1 allocation score on node1: 0 ++pcmk__bundle_allocate: base-bundle-1 allocation score on node2: 0 ++pcmk__bundle_allocate: base-bundle-1 allocation score on node3: 0 ++pcmk__bundle_allocate: base-bundle-2 allocation score on node1: 0 ++pcmk__bundle_allocate: base-bundle-2 allocation score on node2: 0 ++pcmk__bundle_allocate: base-bundle-2 allocation score on node3: 0 ++pcmk__bundle_allocate: base-bundle-clone allocation score on base-bundle-0: -INFINITY ++pcmk__bundle_allocate: base-bundle-clone allocation score on base-bundle-1: -INFINITY ++pcmk__bundle_allocate: base-bundle-clone allocation score on base-bundle-2: -INFINITY ++pcmk__bundle_allocate: base-bundle-clone allocation score on node1: 0 ++pcmk__bundle_allocate: base-bundle-clone allocation score on node2: 0 ++pcmk__bundle_allocate: base-bundle-clone allocation score on node3: 0 ++pcmk__bundle_allocate: base-bundle-podman-0 allocation score on node1: 0 ++pcmk__bundle_allocate: base-bundle-podman-0 allocation score on node2: 0 ++pcmk__bundle_allocate: base-bundle-podman-0 allocation score on node3: 0 ++pcmk__bundle_allocate: base-bundle-podman-1 allocation score on node1: 0 ++pcmk__bundle_allocate: base-bundle-podman-1 allocation score on node2: 0 ++pcmk__bundle_allocate: base-bundle-podman-1 allocation score on node3: 0 ++pcmk__bundle_allocate: base-bundle-podman-2 allocation score on node1: 0 ++pcmk__bundle_allocate: base-bundle-podman-2 allocation score on node2: 0 ++pcmk__bundle_allocate: base-bundle-podman-2 allocation score on node3: 0 ++pcmk__bundle_allocate: base:0 allocation score on base-bundle-0: 501 ++pcmk__bundle_allocate: base:1 allocation score on base-bundle-1: 501 ++pcmk__bundle_allocate: base:2 allocation score on base-bundle-2: 500 ++pcmk__clone_assign: base-bundle-clone allocation score on base-bundle-0: 0 ++pcmk__clone_assign: base-bundle-clone allocation score on base-bundle-1: 0 ++pcmk__clone_assign: base-bundle-clone allocation score on base-bundle-2: 0 ++pcmk__clone_assign: base-bundle-clone allocation score on node1: -INFINITY ++pcmk__clone_assign: base-bundle-clone allocation score on node2: -INFINITY ++pcmk__clone_assign: base-bundle-clone allocation score on node3: -INFINITY ++pcmk__clone_assign: base:0 allocation score on base-bundle-0: INFINITY ++pcmk__clone_assign: base:1 allocation score on base-bundle-1: INFINITY ++pcmk__clone_assign: base:2 allocation score on base-bundle-2: INFINITY ++pcmk__primitive_assign: Fencing allocation score on node1: 0 ++pcmk__primitive_assign: Fencing allocation score on node2: 0 ++pcmk__primitive_assign: Fencing allocation score on node3: 0 ++pcmk__primitive_assign: base-bundle-0 allocation score on node1: 0 ++pcmk__primitive_assign: base-bundle-0 allocation score on node2: 0 ++pcmk__primitive_assign: base-bundle-0 allocation score on node3: 10000 ++pcmk__primitive_assign: base-bundle-1 allocation score on node1: 0 ++pcmk__primitive_assign: base-bundle-1 allocation score on node2: 10000 ++pcmk__primitive_assign: base-bundle-1 allocation score on node3: 0 ++pcmk__primitive_assign: base-bundle-2 allocation score on node1: 10000 ++pcmk__primitive_assign: base-bundle-2 allocation score on node2: 0 ++pcmk__primitive_assign: base-bundle-2 allocation score on node3: 0 ++pcmk__primitive_assign: base-bundle-podman-0 allocation score on node1: 0 ++pcmk__primitive_assign: base-bundle-podman-0 allocation score on node2: 0 ++pcmk__primitive_assign: base-bundle-podman-0 allocation score on node3: 0 ++pcmk__primitive_assign: base-bundle-podman-1 allocation score on node1: 0 ++pcmk__primitive_assign: base-bundle-podman-1 allocation score on node2: 0 ++pcmk__primitive_assign: base-bundle-podman-1 allocation score on node3: -INFINITY ++pcmk__primitive_assign: base-bundle-podman-2 allocation score on node1: 0 ++pcmk__primitive_assign: base-bundle-podman-2 allocation score on node2: -INFINITY ++pcmk__primitive_assign: base-bundle-podman-2 allocation score on node3: -INFINITY ++pcmk__primitive_assign: base:0 allocation score on base-bundle-0: INFINITY ++pcmk__primitive_assign: base:1 allocation score on base-bundle-1: INFINITY ++pcmk__primitive_assign: base:2 allocation score on base-bundle-2: INFINITY +diff --git a/cts/scheduler/scores/clone-recover-no-shuffle-2.scores b/cts/scheduler/scores/clone-recover-no-shuffle-2.scores +new file mode 100644 +index 00000000000..cfbd5bf5337 +--- /dev/null ++++ b/cts/scheduler/scores/clone-recover-no-shuffle-2.scores +@@ -0,0 +1,79 @@ ++ ++pcmk__clone_assign: grp-clone allocation score on node1: 0 ++pcmk__clone_assign: grp-clone allocation score on node2: 0 ++pcmk__clone_assign: grp-clone allocation score on node3: 0 ++pcmk__clone_assign: grp:0 allocation score on node1: 0 ++pcmk__clone_assign: grp:0 allocation score on node2: 0 ++pcmk__clone_assign: grp:0 allocation score on node3: 0 ++pcmk__clone_assign: grp:1 allocation score on node1: 0 ++pcmk__clone_assign: grp:1 allocation score on node2: 0 ++pcmk__clone_assign: grp:1 allocation score on node3: 0 ++pcmk__clone_assign: grp:2 allocation score on node1: 0 ++pcmk__clone_assign: grp:2 allocation score on node2: 0 ++pcmk__clone_assign: grp:2 allocation score on node3: 0 ++pcmk__clone_assign: rsc1:0 allocation score on node1: 0 ++pcmk__clone_assign: rsc1:0 allocation score on node2: 1 ++pcmk__clone_assign: rsc1:0 allocation score on node3: 0 ++pcmk__clone_assign: rsc1:1 allocation score on node1: 0 ++pcmk__clone_assign: rsc1:1 allocation score on node2: 0 ++pcmk__clone_assign: rsc1:1 allocation score on node3: 1 ++pcmk__clone_assign: rsc1:2 allocation score on node1: 0 ++pcmk__clone_assign: rsc1:2 allocation score on node2: 0 ++pcmk__clone_assign: rsc1:2 allocation score on node3: 0 ++pcmk__clone_assign: rsc2:0 allocation score on node1: 0 ++pcmk__clone_assign: rsc2:0 allocation score on node2: 1 ++pcmk__clone_assign: rsc2:0 allocation score on node3: 0 ++pcmk__clone_assign: rsc2:1 allocation score on node1: 0 ++pcmk__clone_assign: rsc2:1 allocation score on node2: 0 ++pcmk__clone_assign: rsc2:1 allocation score on node3: 1 ++pcmk__clone_assign: rsc2:2 allocation score on node1: 0 ++pcmk__clone_assign: rsc2:2 allocation score on node2: 0 ++pcmk__clone_assign: rsc2:2 allocation score on node3: 0 ++pcmk__group_assign: grp:0 allocation score on node1: 0 ++pcmk__group_assign: grp:0 allocation score on node2: 0 ++pcmk__group_assign: grp:0 allocation score on node3: 0 ++pcmk__group_assign: grp:1 allocation score on node1: 0 ++pcmk__group_assign: grp:1 allocation score on node2: -INFINITY ++pcmk__group_assign: grp:1 allocation score on node3: 0 ++pcmk__group_assign: grp:2 allocation score on node1: 0 ++pcmk__group_assign: grp:2 allocation score on node2: -INFINITY ++pcmk__group_assign: grp:2 allocation score on node3: -INFINITY ++pcmk__group_assign: rsc1:0 allocation score on node1: 0 ++pcmk__group_assign: rsc1:0 allocation score on node2: 1 ++pcmk__group_assign: rsc1:0 allocation score on node3: 0 ++pcmk__group_assign: rsc1:1 allocation score on node1: 0 ++pcmk__group_assign: rsc1:1 allocation score on node2: -INFINITY ++pcmk__group_assign: rsc1:1 allocation score on node3: 1 ++pcmk__group_assign: rsc1:2 allocation score on node1: 0 ++pcmk__group_assign: rsc1:2 allocation score on node2: -INFINITY ++pcmk__group_assign: rsc1:2 allocation score on node3: -INFINITY ++pcmk__group_assign: rsc2:0 allocation score on node1: 0 ++pcmk__group_assign: rsc2:0 allocation score on node2: 1 ++pcmk__group_assign: rsc2:0 allocation score on node3: 0 ++pcmk__group_assign: rsc2:1 allocation score on node1: 0 ++pcmk__group_assign: rsc2:1 allocation score on node2: -INFINITY ++pcmk__group_assign: rsc2:1 allocation score on node3: 1 ++pcmk__group_assign: rsc2:2 allocation score on node1: 0 ++pcmk__group_assign: rsc2:2 allocation score on node2: -INFINITY ++pcmk__group_assign: rsc2:2 allocation score on node3: -INFINITY ++pcmk__primitive_assign: Fencing allocation score on node1: 0 ++pcmk__primitive_assign: Fencing allocation score on node2: 0 ++pcmk__primitive_assign: Fencing allocation score on node3: 0 ++pcmk__primitive_assign: rsc1:0 allocation score on node1: 0 ++pcmk__primitive_assign: rsc1:0 allocation score on node2: 2 ++pcmk__primitive_assign: rsc1:0 allocation score on node3: 0 ++pcmk__primitive_assign: rsc1:1 allocation score on node1: 0 ++pcmk__primitive_assign: rsc1:1 allocation score on node2: -INFINITY ++pcmk__primitive_assign: rsc1:1 allocation score on node3: 2 ++pcmk__primitive_assign: rsc1:2 allocation score on node1: 0 ++pcmk__primitive_assign: rsc1:2 allocation score on node2: -INFINITY ++pcmk__primitive_assign: rsc1:2 allocation score on node3: -INFINITY ++pcmk__primitive_assign: rsc2:0 allocation score on node1: -INFINITY ++pcmk__primitive_assign: rsc2:0 allocation score on node2: 1 ++pcmk__primitive_assign: rsc2:0 allocation score on node3: -INFINITY ++pcmk__primitive_assign: rsc2:1 allocation score on node1: -INFINITY ++pcmk__primitive_assign: rsc2:1 allocation score on node2: -INFINITY ++pcmk__primitive_assign: rsc2:1 allocation score on node3: 1 ++pcmk__primitive_assign: rsc2:2 allocation score on node1: 0 ++pcmk__primitive_assign: rsc2:2 allocation score on node2: -INFINITY ++pcmk__primitive_assign: rsc2:2 allocation score on node3: -INFINITY +diff --git a/cts/scheduler/scores/clone-recover-no-shuffle-3.scores b/cts/scheduler/scores/clone-recover-no-shuffle-3.scores +new file mode 100644 +index 00000000000..461c11633b1 +--- /dev/null ++++ b/cts/scheduler/scores/clone-recover-no-shuffle-3.scores +@@ -0,0 +1,64 @@ ++ ++pcmk__bundle_allocate: base-bundle allocation score on node1: 0 ++pcmk__bundle_allocate: base-bundle allocation score on node2: 0 ++pcmk__bundle_allocate: base-bundle allocation score on node3: 0 ++pcmk__bundle_allocate: base-bundle-0 allocation score on node1: 0 ++pcmk__bundle_allocate: base-bundle-0 allocation score on node2: 0 ++pcmk__bundle_allocate: base-bundle-0 allocation score on node3: 0 ++pcmk__bundle_allocate: base-bundle-1 allocation score on node1: 0 ++pcmk__bundle_allocate: base-bundle-1 allocation score on node2: 0 ++pcmk__bundle_allocate: base-bundle-1 allocation score on node3: 0 ++pcmk__bundle_allocate: base-bundle-2 allocation score on node1: 0 ++pcmk__bundle_allocate: base-bundle-2 allocation score on node2: 0 ++pcmk__bundle_allocate: base-bundle-2 allocation score on node3: 0 ++pcmk__bundle_allocate: base-bundle-clone allocation score on base-bundle-0: -INFINITY ++pcmk__bundle_allocate: base-bundle-clone allocation score on base-bundle-1: -INFINITY ++pcmk__bundle_allocate: base-bundle-clone allocation score on base-bundle-2: -INFINITY ++pcmk__bundle_allocate: base-bundle-clone allocation score on node1: 0 ++pcmk__bundle_allocate: base-bundle-clone allocation score on node2: 0 ++pcmk__bundle_allocate: base-bundle-clone allocation score on node3: 0 ++pcmk__bundle_allocate: base-bundle-podman-0 allocation score on node1: 0 ++pcmk__bundle_allocate: base-bundle-podman-0 allocation score on node2: 0 ++pcmk__bundle_allocate: base-bundle-podman-0 allocation score on node3: 0 ++pcmk__bundle_allocate: base-bundle-podman-1 allocation score on node1: 0 ++pcmk__bundle_allocate: base-bundle-podman-1 allocation score on node2: 0 ++pcmk__bundle_allocate: base-bundle-podman-1 allocation score on node3: 0 ++pcmk__bundle_allocate: base-bundle-podman-2 allocation score on node1: 0 ++pcmk__bundle_allocate: base-bundle-podman-2 allocation score on node2: 0 ++pcmk__bundle_allocate: base-bundle-podman-2 allocation score on node3: 0 ++pcmk__bundle_allocate: base:0 allocation score on base-bundle-0: 501 ++pcmk__bundle_allocate: base:1 allocation score on base-bundle-1: 501 ++pcmk__bundle_allocate: base:2 allocation score on base-bundle-2: 500 ++pcmk__clone_assign: base-bundle-clone allocation score on base-bundle-0: 0 ++pcmk__clone_assign: base-bundle-clone allocation score on base-bundle-1: 0 ++pcmk__clone_assign: base-bundle-clone allocation score on base-bundle-2: 0 ++pcmk__clone_assign: base-bundle-clone allocation score on node1: -INFINITY ++pcmk__clone_assign: base-bundle-clone allocation score on node2: -INFINITY ++pcmk__clone_assign: base-bundle-clone allocation score on node3: -INFINITY ++pcmk__clone_assign: base:0 allocation score on base-bundle-0: INFINITY ++pcmk__clone_assign: base:1 allocation score on base-bundle-1: INFINITY ++pcmk__clone_assign: base:2 allocation score on base-bundle-2: INFINITY ++pcmk__primitive_assign: Fencing allocation score on node1: 0 ++pcmk__primitive_assign: Fencing allocation score on node2: 0 ++pcmk__primitive_assign: Fencing allocation score on node3: 0 ++pcmk__primitive_assign: base-bundle-0 allocation score on node1: 0 ++pcmk__primitive_assign: base-bundle-0 allocation score on node2: 0 ++pcmk__primitive_assign: base-bundle-0 allocation score on node3: 10000 ++pcmk__primitive_assign: base-bundle-1 allocation score on node1: 0 ++pcmk__primitive_assign: base-bundle-1 allocation score on node2: 10000 ++pcmk__primitive_assign: base-bundle-1 allocation score on node3: 0 ++pcmk__primitive_assign: base-bundle-2 allocation score on node1: 10000 ++pcmk__primitive_assign: base-bundle-2 allocation score on node2: 0 ++pcmk__primitive_assign: base-bundle-2 allocation score on node3: 0 ++pcmk__primitive_assign: base-bundle-podman-0 allocation score on node1: 0 ++pcmk__primitive_assign: base-bundle-podman-0 allocation score on node2: 0 ++pcmk__primitive_assign: base-bundle-podman-0 allocation score on node3: 0 ++pcmk__primitive_assign: base-bundle-podman-1 allocation score on node1: 0 ++pcmk__primitive_assign: base-bundle-podman-1 allocation score on node2: 0 ++pcmk__primitive_assign: base-bundle-podman-1 allocation score on node3: -INFINITY ++pcmk__primitive_assign: base-bundle-podman-2 allocation score on node1: 0 ++pcmk__primitive_assign: base-bundle-podman-2 allocation score on node2: -INFINITY ++pcmk__primitive_assign: base-bundle-podman-2 allocation score on node3: -INFINITY ++pcmk__primitive_assign: base:0 allocation score on base-bundle-0: INFINITY ++pcmk__primitive_assign: base:1 allocation score on base-bundle-1: INFINITY ++pcmk__primitive_assign: base:2 allocation score on base-bundle-2: INFINITY +diff --git a/cts/scheduler/scores/clone-recover-no-shuffle-4.scores b/cts/scheduler/scores/clone-recover-no-shuffle-4.scores +new file mode 100644 +index 00000000000..492dad1baa4 +--- /dev/null ++++ b/cts/scheduler/scores/clone-recover-no-shuffle-4.scores +@@ -0,0 +1,31 @@ ++ ++pcmk__clone_assign: dummy-clone allocation score on node1: 100 ++pcmk__clone_assign: dummy-clone allocation score on node2: 0 ++pcmk__clone_assign: dummy-clone allocation score on node3: 0 ++pcmk__clone_assign: dummy:0 allocation score on node1: 100 ++pcmk__clone_assign: dummy:0 allocation score on node2: 1 ++pcmk__clone_assign: dummy:0 allocation score on node3: 0 ++pcmk__clone_assign: dummy:1 allocation score on node1: 100 ++pcmk__clone_assign: dummy:1 allocation score on node2: 0 ++pcmk__clone_assign: dummy:1 allocation score on node3: 1 ++pcmk__clone_assign: dummy:2 allocation score on node1: 100 ++pcmk__clone_assign: dummy:2 allocation score on node2: 0 ++pcmk__clone_assign: dummy:2 allocation score on node3: 0 ++pcmk__primitive_assign: Fencing allocation score on node1: 0 ++pcmk__primitive_assign: Fencing allocation score on node2: 0 ++pcmk__primitive_assign: Fencing allocation score on node3: 0 ++pcmk__primitive_assign: dummy:0 allocation score on node1: 100 ++pcmk__primitive_assign: dummy:0 allocation score on node1: 100 ++pcmk__primitive_assign: dummy:0 allocation score on node2: 1 ++pcmk__primitive_assign: dummy:0 allocation score on node2: 1 ++pcmk__primitive_assign: dummy:0 allocation score on node3: 0 ++pcmk__primitive_assign: dummy:0 allocation score on node3: 0 ++pcmk__primitive_assign: dummy:1 allocation score on node1: -INFINITY ++pcmk__primitive_assign: dummy:1 allocation score on node1: 100 ++pcmk__primitive_assign: dummy:1 allocation score on node2: 0 ++pcmk__primitive_assign: dummy:1 allocation score on node2: 0 ++pcmk__primitive_assign: dummy:1 allocation score on node3: 1 ++pcmk__primitive_assign: dummy:1 allocation score on node3: 1 ++pcmk__primitive_assign: dummy:2 allocation score on node1: -INFINITY ++pcmk__primitive_assign: dummy:2 allocation score on node2: 0 ++pcmk__primitive_assign: dummy:2 allocation score on node3: -INFINITY +diff --git a/cts/scheduler/scores/clone-recover-no-shuffle-5.scores b/cts/scheduler/scores/clone-recover-no-shuffle-5.scores +new file mode 100644 +index 00000000000..eecba43fae0 +--- /dev/null ++++ b/cts/scheduler/scores/clone-recover-no-shuffle-5.scores +@@ -0,0 +1,79 @@ ++ ++pcmk__clone_assign: grp-clone allocation score on node1: 100 ++pcmk__clone_assign: grp-clone allocation score on node2: 0 ++pcmk__clone_assign: grp-clone allocation score on node3: 0 ++pcmk__clone_assign: grp:0 allocation score on node1: 100 ++pcmk__clone_assign: grp:0 allocation score on node2: 0 ++pcmk__clone_assign: grp:0 allocation score on node3: 0 ++pcmk__clone_assign: grp:1 allocation score on node1: 100 ++pcmk__clone_assign: grp:1 allocation score on node2: 0 ++pcmk__clone_assign: grp:1 allocation score on node3: 0 ++pcmk__clone_assign: grp:2 allocation score on node1: 100 ++pcmk__clone_assign: grp:2 allocation score on node2: 0 ++pcmk__clone_assign: grp:2 allocation score on node3: 0 ++pcmk__clone_assign: rsc1:0 allocation score on node1: 100 ++pcmk__clone_assign: rsc1:0 allocation score on node2: 1 ++pcmk__clone_assign: rsc1:0 allocation score on node3: 0 ++pcmk__clone_assign: rsc1:1 allocation score on node1: 100 ++pcmk__clone_assign: rsc1:1 allocation score on node2: 0 ++pcmk__clone_assign: rsc1:1 allocation score on node3: 1 ++pcmk__clone_assign: rsc1:2 allocation score on node1: 100 ++pcmk__clone_assign: rsc1:2 allocation score on node2: 0 ++pcmk__clone_assign: rsc1:2 allocation score on node3: 0 ++pcmk__clone_assign: rsc2:0 allocation score on node1: 0 ++pcmk__clone_assign: rsc2:0 allocation score on node2: 1 ++pcmk__clone_assign: rsc2:0 allocation score on node3: 0 ++pcmk__clone_assign: rsc2:1 allocation score on node1: 0 ++pcmk__clone_assign: rsc2:1 allocation score on node2: 0 ++pcmk__clone_assign: rsc2:1 allocation score on node3: 1 ++pcmk__clone_assign: rsc2:2 allocation score on node1: 0 ++pcmk__clone_assign: rsc2:2 allocation score on node2: 0 ++pcmk__clone_assign: rsc2:2 allocation score on node3: 0 ++pcmk__group_assign: grp:0 allocation score on node1: 100 ++pcmk__group_assign: grp:0 allocation score on node2: 0 ++pcmk__group_assign: grp:0 allocation score on node3: 0 ++pcmk__group_assign: grp:1 allocation score on node1: 100 ++pcmk__group_assign: grp:1 allocation score on node2: 0 ++pcmk__group_assign: grp:1 allocation score on node3: 0 ++pcmk__group_assign: grp:2 allocation score on node1: 100 ++pcmk__group_assign: grp:2 allocation score on node2: 0 ++pcmk__group_assign: grp:2 allocation score on node3: 0 ++pcmk__group_assign: rsc1:0 allocation score on node1: 100 ++pcmk__group_assign: rsc1:0 allocation score on node2: 1 ++pcmk__group_assign: rsc1:0 allocation score on node3: 0 ++pcmk__group_assign: rsc1:1 allocation score on node1: 100 ++pcmk__group_assign: rsc1:1 allocation score on node2: 0 ++pcmk__group_assign: rsc1:1 allocation score on node3: 1 ++pcmk__group_assign: rsc1:2 allocation score on node1: 100 ++pcmk__group_assign: rsc1:2 allocation score on node2: 0 ++pcmk__group_assign: rsc1:2 allocation score on node3: 0 ++pcmk__group_assign: rsc2:0 allocation score on node1: 0 ++pcmk__group_assign: rsc2:0 allocation score on node2: 1 ++pcmk__group_assign: rsc2:0 allocation score on node3: 0 ++pcmk__group_assign: rsc2:1 allocation score on node1: 0 ++pcmk__group_assign: rsc2:1 allocation score on node2: 0 ++pcmk__group_assign: rsc2:1 allocation score on node3: 1 ++pcmk__group_assign: rsc2:2 allocation score on node1: 0 ++pcmk__group_assign: rsc2:2 allocation score on node2: 0 ++pcmk__group_assign: rsc2:2 allocation score on node3: 0 ++pcmk__primitive_assign: Fencing allocation score on node1: 0 ++pcmk__primitive_assign: Fencing allocation score on node2: 0 ++pcmk__primitive_assign: Fencing allocation score on node3: 0 ++pcmk__primitive_assign: rsc1:0 allocation score on node1: 100 ++pcmk__primitive_assign: rsc1:0 allocation score on node2: 2 ++pcmk__primitive_assign: rsc1:0 allocation score on node3: 0 ++pcmk__primitive_assign: rsc1:1 allocation score on node1: 100 ++pcmk__primitive_assign: rsc1:1 allocation score on node2: 0 ++pcmk__primitive_assign: rsc1:1 allocation score on node3: 2 ++pcmk__primitive_assign: rsc1:2 allocation score on node1: 100 ++pcmk__primitive_assign: rsc1:2 allocation score on node2: 0 ++pcmk__primitive_assign: rsc1:2 allocation score on node3: 0 ++pcmk__primitive_assign: rsc2:0 allocation score on node1: 0 ++pcmk__primitive_assign: rsc2:0 allocation score on node2: -INFINITY ++pcmk__primitive_assign: rsc2:0 allocation score on node3: -INFINITY ++pcmk__primitive_assign: rsc2:1 allocation score on node1: 0 ++pcmk__primitive_assign: rsc2:1 allocation score on node2: -INFINITY ++pcmk__primitive_assign: rsc2:1 allocation score on node3: -INFINITY ++pcmk__primitive_assign: rsc2:2 allocation score on node1: 0 ++pcmk__primitive_assign: rsc2:2 allocation score on node2: -INFINITY ++pcmk__primitive_assign: rsc2:2 allocation score on node3: -INFINITY +diff --git a/cts/scheduler/scores/clone-recover-no-shuffle-6.scores b/cts/scheduler/scores/clone-recover-no-shuffle-6.scores +new file mode 100644 +index 00000000000..643e30f9d18 +--- /dev/null ++++ b/cts/scheduler/scores/clone-recover-no-shuffle-6.scores +@@ -0,0 +1,70 @@ ++ ++pcmk__bundle_allocate: base-bundle allocation score on node1: 100 ++pcmk__bundle_allocate: base-bundle allocation score on node2: 0 ++pcmk__bundle_allocate: base-bundle allocation score on node3: 0 ++pcmk__bundle_allocate: base-bundle-0 allocation score on node1: 0 ++pcmk__bundle_allocate: base-bundle-0 allocation score on node2: 0 ++pcmk__bundle_allocate: base-bundle-0 allocation score on node3: 0 ++pcmk__bundle_allocate: base-bundle-1 allocation score on node1: 0 ++pcmk__bundle_allocate: base-bundle-1 allocation score on node2: 0 ++pcmk__bundle_allocate: base-bundle-1 allocation score on node3: 0 ++pcmk__bundle_allocate: base-bundle-2 allocation score on node1: 0 ++pcmk__bundle_allocate: base-bundle-2 allocation score on node2: 0 ++pcmk__bundle_allocate: base-bundle-2 allocation score on node3: 0 ++pcmk__bundle_allocate: base-bundle-clone allocation score on base-bundle-0: -INFINITY ++pcmk__bundle_allocate: base-bundle-clone allocation score on base-bundle-1: -INFINITY ++pcmk__bundle_allocate: base-bundle-clone allocation score on base-bundle-2: -INFINITY ++pcmk__bundle_allocate: base-bundle-clone allocation score on node1: 0 ++pcmk__bundle_allocate: base-bundle-clone allocation score on node2: 0 ++pcmk__bundle_allocate: base-bundle-clone allocation score on node3: 0 ++pcmk__bundle_allocate: base-bundle-podman-0 allocation score on node1: 100 ++pcmk__bundle_allocate: base-bundle-podman-0 allocation score on node2: 0 ++pcmk__bundle_allocate: base-bundle-podman-0 allocation score on node3: 0 ++pcmk__bundle_allocate: base-bundle-podman-1 allocation score on node1: 100 ++pcmk__bundle_allocate: base-bundle-podman-1 allocation score on node2: 0 ++pcmk__bundle_allocate: base-bundle-podman-1 allocation score on node3: 0 ++pcmk__bundle_allocate: base-bundle-podman-2 allocation score on node1: 100 ++pcmk__bundle_allocate: base-bundle-podman-2 allocation score on node2: 0 ++pcmk__bundle_allocate: base-bundle-podman-2 allocation score on node3: 0 ++pcmk__bundle_allocate: base:0 allocation score on base-bundle-0: 501 ++pcmk__bundle_allocate: base:1 allocation score on base-bundle-1: 501 ++pcmk__bundle_allocate: base:2 allocation score on base-bundle-2: 500 ++pcmk__clone_assign: base-bundle-clone allocation score on base-bundle-0: 0 ++pcmk__clone_assign: base-bundle-clone allocation score on base-bundle-1: 0 ++pcmk__clone_assign: base-bundle-clone allocation score on base-bundle-2: 0 ++pcmk__clone_assign: base-bundle-clone allocation score on node1: -INFINITY ++pcmk__clone_assign: base-bundle-clone allocation score on node2: -INFINITY ++pcmk__clone_assign: base-bundle-clone allocation score on node3: -INFINITY ++pcmk__clone_assign: base:0 allocation score on base-bundle-0: INFINITY ++pcmk__clone_assign: base:1 allocation score on base-bundle-1: INFINITY ++pcmk__clone_assign: base:2 allocation score on base-bundle-2: INFINITY ++pcmk__primitive_assign: Fencing allocation score on node1: 0 ++pcmk__primitive_assign: Fencing allocation score on node2: 0 ++pcmk__primitive_assign: Fencing allocation score on node3: 0 ++pcmk__primitive_assign: base-bundle-0 allocation score on node1: 10000 ++pcmk__primitive_assign: base-bundle-0 allocation score on node2: 0 ++pcmk__primitive_assign: base-bundle-0 allocation score on node3: 0 ++pcmk__primitive_assign: base-bundle-1 allocation score on node1: 0 ++pcmk__primitive_assign: base-bundle-1 allocation score on node2: 0 ++pcmk__primitive_assign: base-bundle-1 allocation score on node3: 10000 ++pcmk__primitive_assign: base-bundle-2 allocation score on node1: 0 ++pcmk__primitive_assign: base-bundle-2 allocation score on node2: 10000 ++pcmk__primitive_assign: base-bundle-2 allocation score on node3: 0 ++pcmk__primitive_assign: base-bundle-podman-0 allocation score on node1: 100 ++pcmk__primitive_assign: base-bundle-podman-0 allocation score on node1: 100 ++pcmk__primitive_assign: base-bundle-podman-0 allocation score on node2: 0 ++pcmk__primitive_assign: base-bundle-podman-0 allocation score on node2: 0 ++pcmk__primitive_assign: base-bundle-podman-0 allocation score on node3: 0 ++pcmk__primitive_assign: base-bundle-podman-0 allocation score on node3: 0 ++pcmk__primitive_assign: base-bundle-podman-1 allocation score on node1: -INFINITY ++pcmk__primitive_assign: base-bundle-podman-1 allocation score on node1: 100 ++pcmk__primitive_assign: base-bundle-podman-1 allocation score on node2: 0 ++pcmk__primitive_assign: base-bundle-podman-1 allocation score on node2: 0 ++pcmk__primitive_assign: base-bundle-podman-1 allocation score on node3: 0 ++pcmk__primitive_assign: base-bundle-podman-1 allocation score on node3: 0 ++pcmk__primitive_assign: base-bundle-podman-2 allocation score on node1: -INFINITY ++pcmk__primitive_assign: base-bundle-podman-2 allocation score on node2: 0 ++pcmk__primitive_assign: base-bundle-podman-2 allocation score on node3: -INFINITY ++pcmk__primitive_assign: base:0 allocation score on base-bundle-0: INFINITY ++pcmk__primitive_assign: base:1 allocation score on base-bundle-1: INFINITY ++pcmk__primitive_assign: base:2 allocation score on base-bundle-2: INFINITY +diff --git a/cts/scheduler/scores/clone-recover-no-shuffle-7.scores b/cts/scheduler/scores/clone-recover-no-shuffle-7.scores +new file mode 100644 +index 00000000000..fc45bf740fd +--- /dev/null ++++ b/cts/scheduler/scores/clone-recover-no-shuffle-7.scores +@@ -0,0 +1,34 @@ ++ ++dummy:0 promotion score on node1: 15 ++dummy:1 promotion score on node2: 10 ++dummy:2 promotion score on node3: 5 ++pcmk__clone_assign: dummy-clone allocation score on node1: 0 ++pcmk__clone_assign: dummy-clone allocation score on node2: 0 ++pcmk__clone_assign: dummy-clone allocation score on node3: 0 ++pcmk__clone_assign: dummy:0 allocation score on node1: 15 ++pcmk__clone_assign: dummy:0 allocation score on node2: 0 ++pcmk__clone_assign: dummy:0 allocation score on node3: 6 ++pcmk__clone_assign: dummy:1 allocation score on node1: 15 ++pcmk__clone_assign: dummy:1 allocation score on node2: 11 ++pcmk__clone_assign: dummy:1 allocation score on node3: 0 ++pcmk__clone_assign: dummy:2 allocation score on node1: 15 ++pcmk__clone_assign: dummy:2 allocation score on node2: 10 ++pcmk__clone_assign: dummy:2 allocation score on node3: 5 ++pcmk__primitive_assign: Fencing allocation score on node1: 0 ++pcmk__primitive_assign: Fencing allocation score on node2: 0 ++pcmk__primitive_assign: Fencing allocation score on node3: 0 ++pcmk__primitive_assign: dummy:0 allocation score on node1: 15 ++pcmk__primitive_assign: dummy:0 allocation score on node1: 15 ++pcmk__primitive_assign: dummy:0 allocation score on node2: 0 ++pcmk__primitive_assign: dummy:0 allocation score on node2: 0 ++pcmk__primitive_assign: dummy:0 allocation score on node3: 6 ++pcmk__primitive_assign: dummy:0 allocation score on node3: 6 ++pcmk__primitive_assign: dummy:1 allocation score on node1: -INFINITY ++pcmk__primitive_assign: dummy:1 allocation score on node1: 15 ++pcmk__primitive_assign: dummy:1 allocation score on node2: 11 ++pcmk__primitive_assign: dummy:1 allocation score on node2: 11 ++pcmk__primitive_assign: dummy:1 allocation score on node3: 0 ++pcmk__primitive_assign: dummy:1 allocation score on node3: 0 ++pcmk__primitive_assign: dummy:2 allocation score on node1: -INFINITY ++pcmk__primitive_assign: dummy:2 allocation score on node2: -INFINITY ++pcmk__primitive_assign: dummy:2 allocation score on node3: 5 +diff --git a/cts/scheduler/scores/clone-recover-no-shuffle-8.scores b/cts/scheduler/scores/clone-recover-no-shuffle-8.scores +new file mode 100644 +index 00000000000..56d4cc8395a +--- /dev/null ++++ b/cts/scheduler/scores/clone-recover-no-shuffle-8.scores +@@ -0,0 +1,82 @@ ++ ++grp:0 promotion score on node3: 10 ++grp:1 promotion score on node2: 20 ++grp:2 promotion score on node1: 30 ++pcmk__clone_assign: grp-clone allocation score on node1: 0 ++pcmk__clone_assign: grp-clone allocation score on node2: 0 ++pcmk__clone_assign: grp-clone allocation score on node3: 0 ++pcmk__clone_assign: grp:0 allocation score on node1: 30 ++pcmk__clone_assign: grp:0 allocation score on node2: 0 ++pcmk__clone_assign: grp:0 allocation score on node3: 10 ++pcmk__clone_assign: grp:1 allocation score on node1: 30 ++pcmk__clone_assign: grp:1 allocation score on node2: 20 ++pcmk__clone_assign: grp:1 allocation score on node3: 0 ++pcmk__clone_assign: grp:2 allocation score on node1: 30 ++pcmk__clone_assign: grp:2 allocation score on node2: 20 ++pcmk__clone_assign: grp:2 allocation score on node3: 10 ++pcmk__clone_assign: rsc1:0 allocation score on node1: 0 ++pcmk__clone_assign: rsc1:0 allocation score on node2: 0 ++pcmk__clone_assign: rsc1:0 allocation score on node3: 1 ++pcmk__clone_assign: rsc1:1 allocation score on node1: 0 ++pcmk__clone_assign: rsc1:1 allocation score on node2: 1 ++pcmk__clone_assign: rsc1:1 allocation score on node3: 0 ++pcmk__clone_assign: rsc1:2 allocation score on node1: 0 ++pcmk__clone_assign: rsc1:2 allocation score on node2: 0 ++pcmk__clone_assign: rsc1:2 allocation score on node3: 0 ++pcmk__clone_assign: rsc2:0 allocation score on node1: 0 ++pcmk__clone_assign: rsc2:0 allocation score on node2: 0 ++pcmk__clone_assign: rsc2:0 allocation score on node3: 1 ++pcmk__clone_assign: rsc2:1 allocation score on node1: 0 ++pcmk__clone_assign: rsc2:1 allocation score on node2: 1 ++pcmk__clone_assign: rsc2:1 allocation score on node3: 0 ++pcmk__clone_assign: rsc2:2 allocation score on node1: 0 ++pcmk__clone_assign: rsc2:2 allocation score on node2: 0 ++pcmk__clone_assign: rsc2:2 allocation score on node3: 0 ++pcmk__group_assign: grp:0 allocation score on node1: 30 ++pcmk__group_assign: grp:0 allocation score on node2: 0 ++pcmk__group_assign: grp:0 allocation score on node3: 10 ++pcmk__group_assign: grp:1 allocation score on node1: 30 ++pcmk__group_assign: grp:1 allocation score on node2: 20 ++pcmk__group_assign: grp:1 allocation score on node3: -INFINITY ++pcmk__group_assign: grp:2 allocation score on node1: 30 ++pcmk__group_assign: grp:2 allocation score on node2: -INFINITY ++pcmk__group_assign: grp:2 allocation score on node3: -INFINITY ++pcmk__group_assign: rsc1:0 allocation score on node1: 0 ++pcmk__group_assign: rsc1:0 allocation score on node2: 0 ++pcmk__group_assign: rsc1:0 allocation score on node3: 1 ++pcmk__group_assign: rsc1:1 allocation score on node1: 0 ++pcmk__group_assign: rsc1:1 allocation score on node2: 1 ++pcmk__group_assign: rsc1:1 allocation score on node3: -INFINITY ++pcmk__group_assign: rsc1:2 allocation score on node1: 0 ++pcmk__group_assign: rsc1:2 allocation score on node2: -INFINITY ++pcmk__group_assign: rsc1:2 allocation score on node3: -INFINITY ++pcmk__group_assign: rsc2:0 allocation score on node1: 0 ++pcmk__group_assign: rsc2:0 allocation score on node2: 0 ++pcmk__group_assign: rsc2:0 allocation score on node3: 1 ++pcmk__group_assign: rsc2:1 allocation score on node1: 0 ++pcmk__group_assign: rsc2:1 allocation score on node2: 1 ++pcmk__group_assign: rsc2:1 allocation score on node3: -INFINITY ++pcmk__group_assign: rsc2:2 allocation score on node1: 0 ++pcmk__group_assign: rsc2:2 allocation score on node2: -INFINITY ++pcmk__group_assign: rsc2:2 allocation score on node3: -INFINITY ++pcmk__primitive_assign: Fencing allocation score on node1: 0 ++pcmk__primitive_assign: Fencing allocation score on node2: 0 ++pcmk__primitive_assign: Fencing allocation score on node3: 0 ++pcmk__primitive_assign: rsc1:0 allocation score on node1: 0 ++pcmk__primitive_assign: rsc1:0 allocation score on node2: 0 ++pcmk__primitive_assign: rsc1:0 allocation score on node3: 2 ++pcmk__primitive_assign: rsc1:1 allocation score on node1: 0 ++pcmk__primitive_assign: rsc1:1 allocation score on node2: 2 ++pcmk__primitive_assign: rsc1:1 allocation score on node3: -INFINITY ++pcmk__primitive_assign: rsc1:2 allocation score on node1: 0 ++pcmk__primitive_assign: rsc1:2 allocation score on node2: -INFINITY ++pcmk__primitive_assign: rsc1:2 allocation score on node3: -INFINITY ++pcmk__primitive_assign: rsc2:0 allocation score on node1: -INFINITY ++pcmk__primitive_assign: rsc2:0 allocation score on node2: -INFINITY ++pcmk__primitive_assign: rsc2:0 allocation score on node3: 1 ++pcmk__primitive_assign: rsc2:1 allocation score on node1: -INFINITY ++pcmk__primitive_assign: rsc2:1 allocation score on node2: 1 ++pcmk__primitive_assign: rsc2:1 allocation score on node3: -INFINITY ++pcmk__primitive_assign: rsc2:2 allocation score on node1: 0 ++pcmk__primitive_assign: rsc2:2 allocation score on node2: -INFINITY ++pcmk__primitive_assign: rsc2:2 allocation score on node3: -INFINITY +diff --git a/cts/scheduler/scores/clone-recover-no-shuffle-9.scores b/cts/scheduler/scores/clone-recover-no-shuffle-9.scores +new file mode 100644 +index 00000000000..947c86b262c +--- /dev/null ++++ b/cts/scheduler/scores/clone-recover-no-shuffle-9.scores +@@ -0,0 +1,67 @@ ++ ++base:0 promotion score on base-bundle-0: 5 ++base:1 promotion score on base-bundle-1: 10 ++base:2 promotion score on base-bundle-2: 15 ++pcmk__bundle_allocate: base-bundle allocation score on node1: 0 ++pcmk__bundle_allocate: base-bundle allocation score on node2: 0 ++pcmk__bundle_allocate: base-bundle allocation score on node3: 0 ++pcmk__bundle_allocate: base-bundle-0 allocation score on node1: 0 ++pcmk__bundle_allocate: base-bundle-0 allocation score on node2: 0 ++pcmk__bundle_allocate: base-bundle-0 allocation score on node3: 0 ++pcmk__bundle_allocate: base-bundle-1 allocation score on node1: 0 ++pcmk__bundle_allocate: base-bundle-1 allocation score on node2: 0 ++pcmk__bundle_allocate: base-bundle-1 allocation score on node3: 0 ++pcmk__bundle_allocate: base-bundle-2 allocation score on node1: 0 ++pcmk__bundle_allocate: base-bundle-2 allocation score on node2: 0 ++pcmk__bundle_allocate: base-bundle-2 allocation score on node3: 0 ++pcmk__bundle_allocate: base-bundle-clone allocation score on base-bundle-0: -INFINITY ++pcmk__bundle_allocate: base-bundle-clone allocation score on base-bundle-1: -INFINITY ++pcmk__bundle_allocate: base-bundle-clone allocation score on base-bundle-2: -INFINITY ++pcmk__bundle_allocate: base-bundle-clone allocation score on node1: 0 ++pcmk__bundle_allocate: base-bundle-clone allocation score on node2: 0 ++pcmk__bundle_allocate: base-bundle-clone allocation score on node3: 0 ++pcmk__bundle_allocate: base-bundle-podman-0 allocation score on node1: 0 ++pcmk__bundle_allocate: base-bundle-podman-0 allocation score on node2: 0 ++pcmk__bundle_allocate: base-bundle-podman-0 allocation score on node3: 0 ++pcmk__bundle_allocate: base-bundle-podman-1 allocation score on node1: 0 ++pcmk__bundle_allocate: base-bundle-podman-1 allocation score on node2: 0 ++pcmk__bundle_allocate: base-bundle-podman-1 allocation score on node3: 0 ++pcmk__bundle_allocate: base-bundle-podman-2 allocation score on node1: 0 ++pcmk__bundle_allocate: base-bundle-podman-2 allocation score on node2: 0 ++pcmk__bundle_allocate: base-bundle-podman-2 allocation score on node3: 0 ++pcmk__bundle_allocate: base:0 allocation score on base-bundle-0: 501 ++pcmk__bundle_allocate: base:1 allocation score on base-bundle-1: 501 ++pcmk__bundle_allocate: base:2 allocation score on base-bundle-2: 500 ++pcmk__clone_assign: base-bundle-clone allocation score on base-bundle-0: 0 ++pcmk__clone_assign: base-bundle-clone allocation score on base-bundle-1: 0 ++pcmk__clone_assign: base-bundle-clone allocation score on base-bundle-2: 0 ++pcmk__clone_assign: base-bundle-clone allocation score on node1: -INFINITY ++pcmk__clone_assign: base-bundle-clone allocation score on node2: -INFINITY ++pcmk__clone_assign: base-bundle-clone allocation score on node3: -INFINITY ++pcmk__clone_assign: base:0 allocation score on base-bundle-0: INFINITY ++pcmk__clone_assign: base:1 allocation score on base-bundle-1: INFINITY ++pcmk__clone_assign: base:2 allocation score on base-bundle-2: INFINITY ++pcmk__primitive_assign: Fencing allocation score on node1: 0 ++pcmk__primitive_assign: Fencing allocation score on node2: 0 ++pcmk__primitive_assign: Fencing allocation score on node3: 0 ++pcmk__primitive_assign: base-bundle-0 allocation score on node1: 0 ++pcmk__primitive_assign: base-bundle-0 allocation score on node2: 0 ++pcmk__primitive_assign: base-bundle-0 allocation score on node3: 10000 ++pcmk__primitive_assign: base-bundle-1 allocation score on node1: 0 ++pcmk__primitive_assign: base-bundle-1 allocation score on node2: 10000 ++pcmk__primitive_assign: base-bundle-1 allocation score on node3: 0 ++pcmk__primitive_assign: base-bundle-2 allocation score on node1: 10000 ++pcmk__primitive_assign: base-bundle-2 allocation score on node2: 0 ++pcmk__primitive_assign: base-bundle-2 allocation score on node3: 0 ++pcmk__primitive_assign: base-bundle-podman-0 allocation score on node1: 0 ++pcmk__primitive_assign: base-bundle-podman-0 allocation score on node2: 0 ++pcmk__primitive_assign: base-bundle-podman-0 allocation score on node3: 0 ++pcmk__primitive_assign: base-bundle-podman-1 allocation score on node1: 0 ++pcmk__primitive_assign: base-bundle-podman-1 allocation score on node2: 0 ++pcmk__primitive_assign: base-bundle-podman-1 allocation score on node3: -INFINITY ++pcmk__primitive_assign: base-bundle-podman-2 allocation score on node1: 0 ++pcmk__primitive_assign: base-bundle-podman-2 allocation score on node2: -INFINITY ++pcmk__primitive_assign: base-bundle-podman-2 allocation score on node3: -INFINITY ++pcmk__primitive_assign: base:0 allocation score on base-bundle-0: INFINITY ++pcmk__primitive_assign: base:1 allocation score on base-bundle-1: INFINITY ++pcmk__primitive_assign: base:2 allocation score on base-bundle-2: INFINITY +diff --git a/cts/scheduler/summary/clone-recover-no-shuffle-1.summary b/cts/scheduler/summary/clone-recover-no-shuffle-1.summary +new file mode 100644 +index 00000000000..0b6866ec16c +--- /dev/null ++++ b/cts/scheduler/summary/clone-recover-no-shuffle-1.summary +@@ -0,0 +1,29 @@ ++Using the original execution date of: 2023-06-21 00:59:59Z ++Current cluster status: ++ * Node List: ++ * Online: [ node1 node2 node3 ] ++ ++ * Full List of Resources: ++ * Fencing (stonith:fence_xvm): Started node2 ++ * Clone Set: dummy-clone [dummy]: ++ * Started: [ node2 node3 ] ++ * Stopped: [ node1 ] ++ ++Transition Summary: ++ * Start dummy:2 ( node1 ) ++ ++Executing Cluster Transition: ++ * Pseudo action: dummy-clone_start_0 ++ * Resource action: dummy start on node1 ++ * Pseudo action: dummy-clone_running_0 ++ * Resource action: dummy monitor=10000 on node1 ++Using the original execution date of: 2023-06-21 00:59:59Z ++ ++Revised Cluster Status: ++ * Node List: ++ * Online: [ node1 node2 node3 ] ++ ++ * Full List of Resources: ++ * Fencing (stonith:fence_xvm): Started node2 ++ * Clone Set: dummy-clone [dummy]: ++ * Started: [ node1 node2 node3 ] +diff --git a/cts/scheduler/summary/clone-recover-no-shuffle-10.summary b/cts/scheduler/summary/clone-recover-no-shuffle-10.summary +new file mode 100644 +index 00000000000..5b0f9b6d685 +--- /dev/null ++++ b/cts/scheduler/summary/clone-recover-no-shuffle-10.summary +@@ -0,0 +1,29 @@ ++Current cluster status: ++ * Node List: ++ * Online: [ node1 node2 node3 ] ++ ++ * Full List of Resources: ++ * Fencing (stonith:fence_xvm): Started node2 ++ * Clone Set: dummy-clone [dummy] (promotable): ++ * Promoted: [ node2 ] ++ * Unpromoted: [ node3 ] ++ * Stopped: [ node1 ] ++ ++Transition Summary: ++ * Start dummy:2 ( node1 ) ++ ++Executing Cluster Transition: ++ * Pseudo action: dummy-clone_start_0 ++ * Resource action: dummy start on node1 ++ * Pseudo action: dummy-clone_running_0 ++ * Resource action: dummy monitor=11000 on node1 ++ ++Revised Cluster Status: ++ * Node List: ++ * Online: [ node1 node2 node3 ] ++ ++ * Full List of Resources: ++ * Fencing (stonith:fence_xvm): Started node2 ++ * Clone Set: dummy-clone [dummy] (promotable): ++ * Promoted: [ node2 ] ++ * Unpromoted: [ node1 node3 ] +diff --git a/cts/scheduler/summary/clone-recover-no-shuffle-11.summary b/cts/scheduler/summary/clone-recover-no-shuffle-11.summary +new file mode 100644 +index 00000000000..e0bdb61d605 +--- /dev/null ++++ b/cts/scheduler/summary/clone-recover-no-shuffle-11.summary +@@ -0,0 +1,34 @@ ++Current cluster status: ++ * Node List: ++ * Online: [ node1 node2 node3 ] ++ ++ * Full List of Resources: ++ * Fencing (stonith:fence_xvm): Started node2 ++ * Clone Set: grp-clone [grp] (promotable): ++ * Promoted: [ node2 ] ++ * Unpromoted: [ node3 ] ++ * Stopped: [ node1 ] ++ ++Transition Summary: ++ * Start rsc1:2 ( node1 ) ++ * Start rsc2:2 ( node1 ) ++ ++Executing Cluster Transition: ++ * Pseudo action: grp-clone_start_0 ++ * Pseudo action: grp:2_start_0 ++ * Resource action: rsc1 start on node1 ++ * Resource action: rsc2 start on node1 ++ * Pseudo action: grp:2_running_0 ++ * Resource action: rsc1 monitor=11000 on node1 ++ * Resource action: rsc2 monitor=11000 on node1 ++ * Pseudo action: grp-clone_running_0 ++ ++Revised Cluster Status: ++ * Node List: ++ * Online: [ node1 node2 node3 ] ++ ++ * Full List of Resources: ++ * Fencing (stonith:fence_xvm): Started node2 ++ * Clone Set: grp-clone [grp] (promotable): ++ * Promoted: [ node2 ] ++ * Unpromoted: [ node1 node3 ] +diff --git a/cts/scheduler/summary/clone-recover-no-shuffle-12.summary b/cts/scheduler/summary/clone-recover-no-shuffle-12.summary +new file mode 100644 +index 00000000000..6e55a0b7f2f +--- /dev/null ++++ b/cts/scheduler/summary/clone-recover-no-shuffle-12.summary +@@ -0,0 +1,43 @@ ++Current cluster status: ++ * Node List: ++ * Online: [ node1 node2 node3 ] ++ * GuestOnline: [ base-bundle-0 base-bundle-1 ] ++ ++ * Full List of Resources: ++ * Fencing (stonith:fence_xvm): Started node2 ++ * Container bundle set: base-bundle [localhost/pcmktest]: ++ * base-bundle-0 (ocf:pacemaker:Stateful): Unpromoted node3 ++ * base-bundle-1 (ocf:pacemaker:Stateful): Promoted node2 ++ * base-bundle-2 (ocf:pacemaker:Stateful): Stopped ++ ++Transition Summary: ++ * Start base-bundle-podman-2 ( node1 ) ++ * Start base-bundle-2 ( node1 ) ++ * Start base:2 ( base-bundle-2 ) ++ ++Executing Cluster Transition: ++ * Pseudo action: base-bundle_start_0 ++ * Pseudo action: base-bundle-clone_start_0 ++ * Resource action: base-bundle-podman-2 start on node1 ++ * Resource action: base-bundle-2 monitor on node3 ++ * Resource action: base-bundle-2 monitor on node2 ++ * Resource action: base-bundle-2 monitor on node1 ++ * Resource action: base-bundle-podman-2 monitor=60000 on node1 ++ * Resource action: base-bundle-2 start on node1 ++ * Resource action: base start on base-bundle-2 ++ * Pseudo action: base-bundle-clone_running_0 ++ * Resource action: base-bundle-2 monitor=30000 on node1 ++ * Pseudo action: base-bundle_running_0 ++ * Resource action: base monitor=16000 on base-bundle-2 ++ ++Revised Cluster Status: ++ * Node List: ++ * Online: [ node1 node2 node3 ] ++ * GuestOnline: [ base-bundle-0 base-bundle-1 base-bundle-2 ] ++ ++ * Full List of Resources: ++ * Fencing (stonith:fence_xvm): Started node2 ++ * Container bundle set: base-bundle [localhost/pcmktest]: ++ * base-bundle-0 (ocf:pacemaker:Stateful): Unpromoted node3 ++ * base-bundle-1 (ocf:pacemaker:Stateful): Promoted node2 ++ * base-bundle-2 (ocf:pacemaker:Stateful): Unpromoted node1 +diff --git a/cts/scheduler/summary/clone-recover-no-shuffle-2.summary b/cts/scheduler/summary/clone-recover-no-shuffle-2.summary +new file mode 100644 +index 00000000000..8b18120ad8d +--- /dev/null ++++ b/cts/scheduler/summary/clone-recover-no-shuffle-2.summary +@@ -0,0 +1,32 @@ ++Current cluster status: ++ * Node List: ++ * Online: [ node1 node2 node3 ] ++ ++ * Full List of Resources: ++ * Fencing (stonith:fence_xvm): Started node2 ++ * Clone Set: grp-clone [grp]: ++ * Started: [ node2 node3 ] ++ * Stopped: [ node1 ] ++ ++Transition Summary: ++ * Start rsc1:2 ( node1 ) ++ * Start rsc2:2 ( node1 ) ++ ++Executing Cluster Transition: ++ * Pseudo action: grp-clone_start_0 ++ * Pseudo action: grp:2_start_0 ++ * Resource action: rsc1 start on node1 ++ * Resource action: rsc2 start on node1 ++ * Pseudo action: grp:2_running_0 ++ * Resource action: rsc1 monitor=10000 on node1 ++ * Resource action: rsc2 monitor=10000 on node1 ++ * Pseudo action: grp-clone_running_0 ++ ++Revised Cluster Status: ++ * Node List: ++ * Online: [ node1 node2 node3 ] ++ ++ * Full List of Resources: ++ * Fencing (stonith:fence_xvm): Started node2 ++ * Clone Set: grp-clone [grp]: ++ * Started: [ node1 node2 node3 ] +diff --git a/cts/scheduler/summary/clone-recover-no-shuffle-3.summary b/cts/scheduler/summary/clone-recover-no-shuffle-3.summary +new file mode 100644 +index 00000000000..5702177e33d +--- /dev/null ++++ b/cts/scheduler/summary/clone-recover-no-shuffle-3.summary +@@ -0,0 +1,42 @@ ++Current cluster status: ++ * Node List: ++ * Online: [ node1 node2 node3 ] ++ * GuestOnline: [ base-bundle-0 base-bundle-1 ] ++ ++ * Full List of Resources: ++ * Fencing (stonith:fence_xvm): Started node2 ++ * Container bundle set: base-bundle [localhost/pcmktest]: ++ * base-bundle-0 (ocf:pacemaker:Stateful): Started node3 ++ * base-bundle-1 (ocf:pacemaker:Stateful): Started node2 ++ * base-bundle-2 (ocf:pacemaker:Stateful): Stopped ++ ++Transition Summary: ++ * Start base-bundle-podman-2 ( node1 ) ++ * Start base-bundle-2 ( node1 ) ++ * Start base:2 ( base-bundle-2 ) ++ ++Executing Cluster Transition: ++ * Pseudo action: base-bundle_start_0 ++ * Pseudo action: base-bundle-clone_start_0 ++ * Resource action: base-bundle-podman-2 start on node1 ++ * Resource action: base-bundle-2 monitor on node3 ++ * Resource action: base-bundle-2 monitor on node2 ++ * Resource action: base-bundle-2 monitor on node1 ++ * Resource action: base-bundle-podman-2 monitor=60000 on node1 ++ * Resource action: base-bundle-2 start on node1 ++ * Resource action: base start on base-bundle-2 ++ * Pseudo action: base-bundle-clone_running_0 ++ * Resource action: base-bundle-2 monitor=30000 on node1 ++ * Pseudo action: base-bundle_running_0 ++ ++Revised Cluster Status: ++ * Node List: ++ * Online: [ node1 node2 node3 ] ++ * GuestOnline: [ base-bundle-0 base-bundle-1 base-bundle-2 ] ++ ++ * Full List of Resources: ++ * Fencing (stonith:fence_xvm): Started node2 ++ * Container bundle set: base-bundle [localhost/pcmktest]: ++ * base-bundle-0 (ocf:pacemaker:Stateful): Started node3 ++ * base-bundle-1 (ocf:pacemaker:Stateful): Started node2 ++ * base-bundle-2 (ocf:pacemaker:Stateful): Started node1 +diff --git a/cts/scheduler/summary/clone-recover-no-shuffle-4.summary b/cts/scheduler/summary/clone-recover-no-shuffle-4.summary +new file mode 100644 +index 00000000000..944bcb834b3 +--- /dev/null ++++ b/cts/scheduler/summary/clone-recover-no-shuffle-4.summary +@@ -0,0 +1,35 @@ ++Using the original execution date of: 2023-06-21 00:59:59Z ++Current cluster status: ++ * Node List: ++ * Online: [ node1 node2 node3 ] ++ ++ * Full List of Resources: ++ * Fencing (stonith:fence_xvm): Started node2 ++ * Clone Set: dummy-clone [dummy]: ++ * Started: [ node2 node3 ] ++ * Stopped: [ node1 ] ++ ++Transition Summary: ++ * Move dummy:0 ( node2 -> node1 ) ++ * Start dummy:2 ( node2 ) ++ ++Executing Cluster Transition: ++ * Pseudo action: dummy-clone_stop_0 ++ * Resource action: dummy stop on node2 ++ * Pseudo action: dummy-clone_stopped_0 ++ * Pseudo action: dummy-clone_start_0 ++ * Resource action: dummy start on node1 ++ * Resource action: dummy start on node2 ++ * Pseudo action: dummy-clone_running_0 ++ * Resource action: dummy monitor=10000 on node1 ++ * Resource action: dummy monitor=10000 on node2 ++Using the original execution date of: 2023-06-21 00:59:59Z ++ ++Revised Cluster Status: ++ * Node List: ++ * Online: [ node1 node2 node3 ] ++ ++ * Full List of Resources: ++ * Fencing (stonith:fence_xvm): Started node2 ++ * Clone Set: dummy-clone [dummy]: ++ * Started: [ node1 node2 node3 ] +diff --git a/cts/scheduler/summary/clone-recover-no-shuffle-5.summary b/cts/scheduler/summary/clone-recover-no-shuffle-5.summary +new file mode 100644 +index 00000000000..e84d0a574de +--- /dev/null ++++ b/cts/scheduler/summary/clone-recover-no-shuffle-5.summary +@@ -0,0 +1,59 @@ ++Current cluster status: ++ * Node List: ++ * Online: [ node1 node2 node3 ] ++ ++ * Full List of Resources: ++ * Fencing (stonith:fence_xvm): Started node2 ++ * Clone Set: grp-clone [grp]: ++ * Started: [ node2 node3 ] ++ * Stopped: [ node1 ] ++ ++Transition Summary: ++ * Move rsc1:0 ( node2 -> node1 ) ++ * Move rsc2:0 ( node2 -> node1 ) ++ * Move rsc1:1 ( node3 -> node1 ) ++ * Move rsc2:1 ( node3 -> node1 ) ++ * Start rsc1:2 ( node1 ) ++ * Start rsc2:2 ( node1 ) ++ ++Executing Cluster Transition: ++ * Pseudo action: grp-clone_stop_0 ++ * Pseudo action: grp:0_stop_0 ++ * Resource action: rsc2 stop on node2 ++ * Pseudo action: grp:1_stop_0 ++ * Resource action: rsc2 stop on node3 ++ * Resource action: rsc1 stop on node2 ++ * Resource action: rsc1 stop on node3 ++ * Pseudo action: grp:0_stopped_0 ++ * Pseudo action: grp:1_stopped_0 ++ * Pseudo action: grp-clone_stopped_0 ++ * Pseudo action: grp-clone_start_0 ++ * Pseudo action: grp:0_start_0 ++ * Resource action: rsc1 start on node1 ++ * Resource action: rsc2 start on node1 ++ * Pseudo action: grp:1_start_0 ++ * Resource action: rsc1 start on node1 ++ * Resource action: rsc2 start on node1 ++ * Pseudo action: grp:2_start_0 ++ * Resource action: rsc1 start on node1 ++ * Resource action: rsc2 start on node1 ++ * Pseudo action: grp:0_running_0 ++ * Resource action: rsc1 monitor=10000 on node1 ++ * Resource action: rsc2 monitor=10000 on node1 ++ * Pseudo action: grp:1_running_0 ++ * Resource action: rsc1 monitor=10000 on node1 ++ * Resource action: rsc2 monitor=10000 on node1 ++ * Pseudo action: grp:2_running_0 ++ * Resource action: rsc1 monitor=10000 on node1 ++ * Resource action: rsc2 monitor=10000 on node1 ++ * Pseudo action: grp-clone_running_0 ++ ++Revised Cluster Status: ++ * Node List: ++ * Online: [ node1 node2 node3 ] ++ ++ * Full List of Resources: ++ * Fencing (stonith:fence_xvm): Started node2 ++ * Clone Set: grp-clone [grp]: ++ * Started: [ node1 ] ++ * Stopped: [ node2 node3 ] +diff --git a/cts/scheduler/summary/clone-recover-no-shuffle-6.summary b/cts/scheduler/summary/clone-recover-no-shuffle-6.summary +new file mode 100644 +index 00000000000..19a957e15fb +--- /dev/null ++++ b/cts/scheduler/summary/clone-recover-no-shuffle-6.summary +@@ -0,0 +1,68 @@ ++Current cluster status: ++ * Node List: ++ * Online: [ node1 node2 node3 ] ++ * GuestOnline: [ base-bundle-0 base-bundle-1 ] ++ ++ * Full List of Resources: ++ * Fencing (stonith:fence_xvm): Started node2 ++ * Container bundle set: base-bundle [localhost/pcmktest]: ++ * base-bundle-0 (ocf:pacemaker:Stateful): Started node3 ++ * base-bundle-1 (ocf:pacemaker:Stateful): Started node2 ++ * base-bundle-2 (ocf:pacemaker:Stateful): Stopped ++ ++Transition Summary: ++ * Move base-bundle-podman-0 ( node3 -> node1 ) ++ * Move base-bundle-0 ( node3 -> node1 ) ++ * Restart base:0 ( base-bundle-0 ) due to required base-bundle-podman-0 start ++ * Move base-bundle-podman-1 ( node2 -> node3 ) ++ * Move base-bundle-1 ( node2 -> node3 ) ++ * Restart base:1 ( base-bundle-1 ) due to required base-bundle-podman-1 start ++ * Start base-bundle-podman-2 ( node2 ) ++ * Start base-bundle-2 ( node2 ) ++ * Start base:2 ( base-bundle-2 ) ++ ++Executing Cluster Transition: ++ * Pseudo action: base-bundle_stop_0 ++ * Pseudo action: base-bundle_start_0 ++ * Pseudo action: base-bundle-clone_stop_0 ++ * Resource action: base-bundle-podman-2 start on node2 ++ * Resource action: base-bundle-2 monitor on node3 ++ * Resource action: base-bundle-2 monitor on node2 ++ * Resource action: base-bundle-2 monitor on node1 ++ * Resource action: base stop on base-bundle-1 ++ * Resource action: base-bundle-1 stop on node2 ++ * Resource action: base-bundle-podman-2 monitor=60000 on node2 ++ * Resource action: base-bundle-2 start on node2 ++ * Resource action: base stop on base-bundle-0 ++ * Pseudo action: base-bundle-clone_stopped_0 ++ * Pseudo action: base-bundle-clone_start_0 ++ * Resource action: base-bundle-0 stop on node3 ++ * Resource action: base-bundle-podman-1 stop on node2 ++ * Resource action: base-bundle-2 monitor=30000 on node2 ++ * Resource action: base-bundle-podman-0 stop on node3 ++ * Resource action: base-bundle-podman-1 start on node3 ++ * Resource action: base-bundle-1 start on node3 ++ * Pseudo action: base-bundle_stopped_0 ++ * Resource action: base-bundle-podman-0 start on node1 ++ * Resource action: base-bundle-0 start on node1 ++ * Resource action: base-bundle-podman-1 monitor=60000 on node3 ++ * Resource action: base-bundle-1 monitor=30000 on node3 ++ * Resource action: base start on base-bundle-0 ++ * Resource action: base start on base-bundle-1 ++ * Resource action: base start on base-bundle-2 ++ * Pseudo action: base-bundle-clone_running_0 ++ * Resource action: base-bundle-podman-0 monitor=60000 on node1 ++ * Resource action: base-bundle-0 monitor=30000 on node1 ++ * Pseudo action: base-bundle_running_0 ++ ++Revised Cluster Status: ++ * Node List: ++ * Online: [ node1 node2 node3 ] ++ * GuestOnline: [ base-bundle-0 base-bundle-1 base-bundle-2 ] ++ ++ * Full List of Resources: ++ * Fencing (stonith:fence_xvm): Started node2 ++ * Container bundle set: base-bundle [localhost/pcmktest]: ++ * base-bundle-0 (ocf:pacemaker:Stateful): Started node1 ++ * base-bundle-1 (ocf:pacemaker:Stateful): Started node3 ++ * base-bundle-2 (ocf:pacemaker:Stateful): Started node2 +diff --git a/cts/scheduler/summary/clone-recover-no-shuffle-7.summary b/cts/scheduler/summary/clone-recover-no-shuffle-7.summary +new file mode 100644 +index 00000000000..e6c9baed0db +--- /dev/null ++++ b/cts/scheduler/summary/clone-recover-no-shuffle-7.summary +@@ -0,0 +1,44 @@ ++Current cluster status: ++ * Node List: ++ * Online: [ node1 node2 node3 ] ++ ++ * Full List of Resources: ++ * Fencing (stonith:fence_xvm): Started node2 ++ * Clone Set: dummy-clone [dummy] (promotable): ++ * Promoted: [ node2 ] ++ * Unpromoted: [ node3 ] ++ * Stopped: [ node1 ] ++ ++Transition Summary: ++ * Move dummy:0 ( Unpromoted node3 -> Promoted node1 ) ++ * Demote dummy:1 ( Promoted -> Unpromoted node2 ) ++ * Start dummy:2 ( node3 ) ++ ++Executing Cluster Transition: ++ * Resource action: dummy cancel=10000 on node2 ++ * Pseudo action: dummy-clone_demote_0 ++ * Resource action: dummy demote on node2 ++ * Pseudo action: dummy-clone_demoted_0 ++ * Pseudo action: dummy-clone_stop_0 ++ * Resource action: dummy stop on node3 ++ * Resource action: dummy monitor=11000 on node2 ++ * Pseudo action: dummy-clone_stopped_0 ++ * Pseudo action: dummy-clone_start_0 ++ * Resource action: dummy start on node1 ++ * Resource action: dummy start on node3 ++ * Pseudo action: dummy-clone_running_0 ++ * Resource action: dummy monitor=11000 on node3 ++ * Pseudo action: dummy-clone_promote_0 ++ * Resource action: dummy promote on node1 ++ * Pseudo action: dummy-clone_promoted_0 ++ * Resource action: dummy monitor=10000 on node1 ++ ++Revised Cluster Status: ++ * Node List: ++ * Online: [ node1 node2 node3 ] ++ ++ * Full List of Resources: ++ * Fencing (stonith:fence_xvm): Started node2 ++ * Clone Set: dummy-clone [dummy] (promotable): ++ * Promoted: [ node1 ] ++ * Unpromoted: [ node2 node3 ] +diff --git a/cts/scheduler/summary/clone-recover-no-shuffle-8.summary b/cts/scheduler/summary/clone-recover-no-shuffle-8.summary +new file mode 100644 +index 00000000000..878f24801dd +--- /dev/null ++++ b/cts/scheduler/summary/clone-recover-no-shuffle-8.summary +@@ -0,0 +1,52 @@ ++Current cluster status: ++ * Node List: ++ * Online: [ node1 node2 node3 ] ++ ++ * Full List of Resources: ++ * Fencing (stonith:fence_xvm): Started node2 ++ * Clone Set: grp-clone [grp] (promotable): ++ * Promoted: [ node2 ] ++ * Unpromoted: [ node3 ] ++ * Stopped: [ node1 ] ++ ++Transition Summary: ++ * Demote rsc1:1 ( Promoted -> Unpromoted node2 ) ++ * Demote rsc2:1 ( Promoted -> Unpromoted node2 ) ++ * Promote rsc1:2 ( Stopped -> Promoted node1 ) ++ * Promote rsc2:2 ( Stopped -> Promoted node1 ) ++ ++Executing Cluster Transition: ++ * Resource action: rsc1 cancel=10000 on node2 ++ * Resource action: rsc2 cancel=10000 on node2 ++ * Pseudo action: grp-clone_demote_0 ++ * Pseudo action: grp:1_demote_0 ++ * Resource action: rsc2 demote on node2 ++ * Resource action: rsc1 demote on node2 ++ * Resource action: rsc2 monitor=11000 on node2 ++ * Pseudo action: grp:1_demoted_0 ++ * Resource action: rsc1 monitor=11000 on node2 ++ * Pseudo action: grp-clone_demoted_0 ++ * Pseudo action: grp-clone_start_0 ++ * Pseudo action: grp:2_start_0 ++ * Resource action: rsc1 start on node1 ++ * Resource action: rsc2 start on node1 ++ * Pseudo action: grp:2_running_0 ++ * Pseudo action: grp-clone_running_0 ++ * Pseudo action: grp-clone_promote_0 ++ * Pseudo action: grp:2_promote_0 ++ * Resource action: rsc1 promote on node1 ++ * Resource action: rsc2 promote on node1 ++ * Pseudo action: grp:2_promoted_0 ++ * Resource action: rsc1 monitor=10000 on node1 ++ * Resource action: rsc2 monitor=10000 on node1 ++ * Pseudo action: grp-clone_promoted_0 ++ ++Revised Cluster Status: ++ * Node List: ++ * Online: [ node1 node2 node3 ] ++ ++ * Full List of Resources: ++ * Fencing (stonith:fence_xvm): Started node2 ++ * Clone Set: grp-clone [grp] (promotable): ++ * Promoted: [ node1 ] ++ * Unpromoted: [ node2 node3 ] +diff --git a/cts/scheduler/summary/clone-recover-no-shuffle-9.summary b/cts/scheduler/summary/clone-recover-no-shuffle-9.summary +new file mode 100644 +index 00000000000..7ede39a6e58 +--- /dev/null ++++ b/cts/scheduler/summary/clone-recover-no-shuffle-9.summary +@@ -0,0 +1,56 @@ ++Current cluster status: ++ * Node List: ++ * Online: [ node1 node2 node3 ] ++ * GuestOnline: [ base-bundle-0 base-bundle-1 ] ++ ++ * Full List of Resources: ++ * Fencing (stonith:fence_xvm): Started node2 ++ * Container bundle set: base-bundle [localhost/pcmktest]: ++ * base-bundle-0 (ocf:pacemaker:Stateful): Unpromoted node3 ++ * base-bundle-1 (ocf:pacemaker:Stateful): Promoted node2 ++ * base-bundle-2 (ocf:pacemaker:Stateful): Stopped ++ ++Transition Summary: ++ * Demote base:1 ( Promoted -> Unpromoted base-bundle-1 ) ++ * Start base-bundle-podman-2 ( node1 ) ++ * Start base-bundle-2 ( node1 ) ++ * Promote base:2 ( Stopped -> Promoted base-bundle-2 ) ++ ++Executing Cluster Transition: ++ * Resource action: base cancel=15000 on base-bundle-1 ++ * Pseudo action: base-bundle_demote_0 ++ * Pseudo action: base-bundle-clone_demote_0 ++ * Resource action: base demote on base-bundle-1 ++ * Pseudo action: base-bundle-clone_demoted_0 ++ * Pseudo action: base-bundle_demoted_0 ++ * Pseudo action: base-bundle_start_0 ++ * Resource action: base monitor=16000 on base-bundle-1 ++ * Pseudo action: base-bundle-clone_start_0 ++ * Resource action: base-bundle-podman-2 start on node1 ++ * Resource action: base-bundle-2 monitor on node3 ++ * Resource action: base-bundle-2 monitor on node2 ++ * Resource action: base-bundle-2 monitor on node1 ++ * Resource action: base-bundle-podman-2 monitor=60000 on node1 ++ * Resource action: base-bundle-2 start on node1 ++ * Resource action: base start on base-bundle-2 ++ * Pseudo action: base-bundle-clone_running_0 ++ * Resource action: base-bundle-2 monitor=30000 on node1 ++ * Pseudo action: base-bundle_running_0 ++ * Pseudo action: base-bundle_promote_0 ++ * Pseudo action: base-bundle-clone_promote_0 ++ * Resource action: base promote on base-bundle-2 ++ * Pseudo action: base-bundle-clone_promoted_0 ++ * Pseudo action: base-bundle_promoted_0 ++ * Resource action: base monitor=15000 on base-bundle-2 ++ ++Revised Cluster Status: ++ * Node List: ++ * Online: [ node1 node2 node3 ] ++ * GuestOnline: [ base-bundle-0 base-bundle-1 base-bundle-2 ] ++ ++ * Full List of Resources: ++ * Fencing (stonith:fence_xvm): Started node2 ++ * Container bundle set: base-bundle [localhost/pcmktest]: ++ * base-bundle-0 (ocf:pacemaker:Stateful): Unpromoted node3 ++ * base-bundle-1 (ocf:pacemaker:Stateful): Unpromoted node2 ++ * base-bundle-2 (ocf:pacemaker:Stateful): Promoted node1 +diff --git a/cts/scheduler/xml/clone-recover-no-shuffle-1.xml b/cts/scheduler/xml/clone-recover-no-shuffle-1.xml +new file mode 100644 +index 00000000000..a634ff352cd +--- /dev/null ++++ b/cts/scheduler/xml/clone-recover-no-shuffle-1.xml +@@ -0,0 +1,113 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +diff --git a/cts/scheduler/xml/clone-recover-no-shuffle-10.xml b/cts/scheduler/xml/clone-recover-no-shuffle-10.xml +new file mode 100644 +index 00000000000..faa202a0ae0 +--- /dev/null ++++ b/cts/scheduler/xml/clone-recover-no-shuffle-10.xml +@@ -0,0 +1,120 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +diff --git a/cts/scheduler/xml/clone-recover-no-shuffle-11.xml b/cts/scheduler/xml/clone-recover-no-shuffle-11.xml +new file mode 100644 +index 00000000000..43d6d749525 +--- /dev/null ++++ b/cts/scheduler/xml/clone-recover-no-shuffle-11.xml +@@ -0,0 +1,153 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +diff --git a/cts/scheduler/xml/clone-recover-no-shuffle-12.xml b/cts/scheduler/xml/clone-recover-no-shuffle-12.xml +new file mode 100644 +index 00000000000..e3026903533 +--- /dev/null ++++ b/cts/scheduler/xml/clone-recover-no-shuffle-12.xml +@@ -0,0 +1,186 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +diff --git a/cts/scheduler/xml/clone-recover-no-shuffle-2.xml b/cts/scheduler/xml/clone-recover-no-shuffle-2.xml +new file mode 100644 +index 00000000000..486666c1f26 +--- /dev/null ++++ b/cts/scheduler/xml/clone-recover-no-shuffle-2.xml +@@ -0,0 +1,141 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +diff --git a/cts/scheduler/xml/clone-recover-no-shuffle-3.xml b/cts/scheduler/xml/clone-recover-no-shuffle-3.xml +new file mode 100644 +index 00000000000..ddafb741dce +--- /dev/null ++++ b/cts/scheduler/xml/clone-recover-no-shuffle-3.xml +@@ -0,0 +1,180 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +diff --git a/cts/scheduler/xml/clone-recover-no-shuffle-4.xml b/cts/scheduler/xml/clone-recover-no-shuffle-4.xml +new file mode 100644 +index 00000000000..40e6520c6d0 +--- /dev/null ++++ b/cts/scheduler/xml/clone-recover-no-shuffle-4.xml +@@ -0,0 +1,120 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +diff --git a/cts/scheduler/xml/clone-recover-no-shuffle-5.xml b/cts/scheduler/xml/clone-recover-no-shuffle-5.xml +new file mode 100644 +index 00000000000..67176dc1a03 +--- /dev/null ++++ b/cts/scheduler/xml/clone-recover-no-shuffle-5.xml +@@ -0,0 +1,148 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +diff --git a/cts/scheduler/xml/clone-recover-no-shuffle-6.xml b/cts/scheduler/xml/clone-recover-no-shuffle-6.xml +new file mode 100644 +index 00000000000..3de42f581d4 +--- /dev/null ++++ b/cts/scheduler/xml/clone-recover-no-shuffle-6.xml +@@ -0,0 +1,187 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +diff --git a/cts/scheduler/xml/clone-recover-no-shuffle-7.xml b/cts/scheduler/xml/clone-recover-no-shuffle-7.xml +new file mode 100644 +index 00000000000..6e9dad50db4 +--- /dev/null ++++ b/cts/scheduler/xml/clone-recover-no-shuffle-7.xml +@@ -0,0 +1,125 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +diff --git a/cts/scheduler/xml/clone-recover-no-shuffle-8.xml b/cts/scheduler/xml/clone-recover-no-shuffle-8.xml +new file mode 100644 +index 00000000000..6f882b80785 +--- /dev/null ++++ b/cts/scheduler/xml/clone-recover-no-shuffle-8.xml +@@ -0,0 +1,153 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +diff --git a/cts/scheduler/xml/clone-recover-no-shuffle-9.xml b/cts/scheduler/xml/clone-recover-no-shuffle-9.xml +new file mode 100644 +index 00000000000..104331d6c00 +--- /dev/null ++++ b/cts/scheduler/xml/clone-recover-no-shuffle-9.xml +@@ -0,0 +1,186 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ + +From 44dfe36a316bddc562c07f7e1adbbaa57b9adf77 Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Wed, 28 Jun 2023 02:04:45 -0700 +Subject: [PATCH 08/19] Refactor: libpacemaker: Recursively copy and restore + allowed node tables + +Given a resource, these two new functions create copies of the allowed +nodes tables of its entire tree of descendants, or restore from such a +backup copy. + +Ref T678 + +Signed-off-by: Reid Wahl +--- + lib/pacemaker/libpacemaker_private.h | 6 +++ + lib/pacemaker/pcmk_sched_nodes.c | 76 ++++++++++++++++++++++++++++ + 2 files changed, 82 insertions(+) + +diff --git a/lib/pacemaker/libpacemaker_private.h b/lib/pacemaker/libpacemaker_private.h +index 614d695f83f..8cdd13f7304 100644 +--- a/lib/pacemaker/libpacemaker_private.h ++++ b/lib/pacemaker/libpacemaker_private.h +@@ -874,6 +874,12 @@ bool pcmk__any_node_available(GHashTable *nodes); + G_GNUC_INTERNAL + GHashTable *pcmk__copy_node_table(GHashTable *nodes); + ++G_GNUC_INTERNAL ++void pcmk__copy_node_tables(const pe_resource_t *rsc, GHashTable **copy); ++ ++G_GNUC_INTERNAL ++void pcmk__restore_node_tables(pe_resource_t *rsc, GHashTable *backup); ++ + G_GNUC_INTERNAL + GList *pcmk__sort_nodes(GList *nodes, pe_node_t *active_node); + +diff --git a/lib/pacemaker/pcmk_sched_nodes.c b/lib/pacemaker/pcmk_sched_nodes.c +index d7d5ba46169..eb0b2a41e39 100644 +--- a/lib/pacemaker/pcmk_sched_nodes.c ++++ b/lib/pacemaker/pcmk_sched_nodes.c +@@ -82,6 +82,82 @@ pcmk__copy_node_table(GHashTable *nodes) + return new_table; + } + ++/*! ++ * \internal ++ * \brief Free a table of node tables ++ * ++ * \param[in,out] data Table to free ++ * ++ * \note This is a \c GDestroyNotify wrapper for \c g_hash_table_destroy(). ++ */ ++static void ++destroy_node_tables(gpointer data) ++{ ++ g_hash_table_destroy((GHashTable *) data); ++} ++ ++/*! ++ * \internal ++ * \brief Recursively copy the node tables of a resource ++ * ++ * Build a hash table containing copies of the allowed nodes tables of \p rsc ++ * and its entire tree of descendants. The key is the resource ID, and the value ++ * is a copy of the resource's node table. ++ * ++ * \param[in] rsc Resource whose node table to copy ++ * \param[in,out] copy Where to store the copied node tables ++ * ++ * \note \p *copy should be \c NULL for the top-level call. ++ * \note The caller is responsible for freeing \p copy using ++ * \c g_hash_table_destroy(). ++ */ ++void ++pcmk__copy_node_tables(const pe_resource_t *rsc, GHashTable **copy) ++{ ++ CRM_ASSERT((rsc != NULL) && (copy != NULL)); ++ ++ if (*copy == NULL) { ++ *copy = pcmk__strkey_table(NULL, destroy_node_tables); ++ } ++ ++ g_hash_table_insert(*copy, rsc->id, ++ pcmk__copy_node_table(rsc->allowed_nodes)); ++ ++ for (const GList *iter = rsc->children; iter != NULL; iter = iter->next) { ++ pcmk__copy_node_tables((const pe_resource_t *) iter->data, copy); ++ } ++} ++ ++/*! ++ * \internal ++ * \brief Recursively restore the node tables of a resource from backup ++ * ++ * Given a hash table containing backup copies of the allowed nodes tables of ++ * \p rsc and its entire tree of descendants, replace the resources' current ++ * node tables with the backed-up copies. ++ * ++ * \param[in,out] rsc Resource whose node tables to restore ++ * \param[in] backup Table of backup node tables (created by ++ * \c pcmk__copy_node_tables()) ++ * ++ * \note This function frees the resources' current node tables. ++ */ ++void ++pcmk__restore_node_tables(pe_resource_t *rsc, GHashTable *backup) ++{ ++ CRM_ASSERT((rsc != NULL) && (backup != NULL)); ++ ++ g_hash_table_destroy(rsc->allowed_nodes); ++ ++ // Copy to avoid danger with multiple restores ++ rsc->allowed_nodes = g_hash_table_lookup(backup, rsc->id); ++ rsc->allowed_nodes = pcmk__copy_node_table(rsc->allowed_nodes); ++ ++ for (GList *iter = rsc->children; iter != NULL; iter = iter->next) { ++ pcmk__restore_node_tables((pe_resource_t *) iter->data, backup); ++ } ++} ++ + /*! + * \internal + * \brief Copy a list of node objects + +From a3c120c4c0aeb48efd55bac6de68423be099831d Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Wed, 28 Jun 2023 02:09:28 -0700 +Subject: [PATCH 09/19] Refactor: libpacemaker: Restore node tables if cloned + group assign fails + +Currently, when assigning an instance of a cloned group (that is, one of +the groups), we make a copy only of the group's allowed nodes table. We +restore only that table if an early assignment attempt fails. + +Here, we make a recursive copy containing the allowed nodes tables of +the group itself and of all the resources in the group. Then we restore +all of them from backup if the assignment fails. + +This doesn't visibly fix anything yet, but it's a necessary part of the +fix for T678. And it was obviously wrong before :) + +Ref T678 + +Signed-off-by: Reid Wahl +--- + lib/pacemaker/pcmk_sched_instances.c | 9 ++++----- + 1 file changed, 4 insertions(+), 5 deletions(-) + +diff --git a/lib/pacemaker/pcmk_sched_instances.c b/lib/pacemaker/pcmk_sched_instances.c +index c880196f70f..783820bbf69 100644 +--- a/lib/pacemaker/pcmk_sched_instances.c ++++ b/lib/pacemaker/pcmk_sched_instances.c +@@ -600,8 +600,9 @@ assign_instance(pe_resource_t *instance, const pe_node_t *prefer, + chosen = instance->cmds->assign(instance, NULL); + + } else { // Possible early assignment to preferred node +- GHashTable *backup = pcmk__copy_node_table(instance->allowed_nodes); ++ GHashTable *backup = NULL; + ++ pcmk__copy_node_tables(instance, &backup); + chosen = instance->cmds->assign(instance, prefer); + + // Revert nodes if preferred node won't be assigned +@@ -609,13 +610,11 @@ assign_instance(pe_resource_t *instance, const pe_node_t *prefer, + crm_info("Not assigning %s to preferred node %s: %s is better", + instance->id, pe__node_name(prefer), + pe__node_name(chosen)); +- g_hash_table_destroy(instance->allowed_nodes); +- instance->allowed_nodes = backup; ++ pcmk__restore_node_tables(instance, backup); + pcmk__unassign_resource(instance); + chosen = NULL; +- } else if (backup != NULL) { +- g_hash_table_destroy(backup); + } ++ g_hash_table_destroy(backup); + } + + // The parent tracks how many instances have been assigned to each node + +From a5a5c76333365be87f5d3d62f354b45376894506 Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Wed, 28 Jun 2023 02:08:44 -0700 +Subject: [PATCH 10/19] Fix: libpacemaker: Respect clone-node-max for cloned + groups + +Currently, cloned groups may have more than clone-node-max instances +assigned to a given node. This can happen when a location constraint +exists for the clone. + +For example, consider the case of the clone-recover-no-shuffle-5 test. +The cloned group prefers node1 with a score of 100. The location score +is applied only to a group's first member. + +So in the early assignment attempt (within pcmk__assign_instances()), we +try to assign each instance (group) to its current node. However, the +first member prefers a different node (node1) and gets assigned there +instead. The second member has to follow the first due to the group's +internal colocation. + +However, node1 wasn't the preferred node. So assign_instance() tries to +revert the assignment by calling pcmk__unassign_resource() on the +instance (the group). But this leaves the group members assigned, +because pcmk__unassign_resource() doesn't act recursively. + +With this commit, pcmk__unassign_resource() acts recursively. We can now +unassign a resource and all its children recursively. + +Fixes T678 + +Signed-off-by: Reid Wahl +--- + lib/pacemaker/pcmk_sched_resource.c | 43 +++++++++++++++++++---------- + 1 file changed, 28 insertions(+), 15 deletions(-) + +diff --git a/lib/pacemaker/pcmk_sched_resource.c b/lib/pacemaker/pcmk_sched_resource.c +index dd9939a42a6..8f703789b20 100644 +--- a/lib/pacemaker/pcmk_sched_resource.c ++++ b/lib/pacemaker/pcmk_sched_resource.c +@@ -455,13 +455,14 @@ pcmk__assign_resource(pe_resource_t *rsc, pe_node_t *node, bool force) + + /*! + * \internal +- * \brief Remove any assignment of a specified resource to a node ++ * \brief Remove any node assignment from a specified resource and its children + * + * If a specified resource has been assigned to a node, remove that assignment +- * and mark the resource as provisional again. This is not done recursively for +- * children, so it should be called only for primitives. ++ * and mark the resource as provisional again. + * + * \param[in,out] rsc Resource to unassign ++ * ++ * \note This function is called recursively on \p rsc and its children. + */ + void + pcmk__unassign_resource(pe_resource_t *rsc) +@@ -469,21 +470,33 @@ pcmk__unassign_resource(pe_resource_t *rsc) + pe_node_t *old = rsc->allocated_to; + + if (old == NULL) { +- return; ++ crm_info("Unassigning %s", rsc->id); ++ } else { ++ crm_info("Unassigning %s from %s", rsc->id, pe__node_name(old)); + } + +- crm_info("Unassigning %s from %s", rsc->id, pe__node_name(old)); + pe__set_resource_flags(rsc, pe_rsc_provisional); +- rsc->allocated_to = NULL; +- +- /* We're going to free the pe_node_t, but its details member is shared and +- * will remain, so update that appropriately first. +- */ +- old->details->allocated_rsc = g_list_remove(old->details->allocated_rsc, +- rsc); +- old->details->num_resources--; +- pcmk__release_node_capacity(old->details->utilization, rsc); +- free(old); ++ ++ if (rsc->children == NULL) { ++ if (old == NULL) { ++ return; ++ } ++ rsc->allocated_to = NULL; ++ ++ /* We're going to free the pe_node_t, but its details member is shared ++ * and will remain, so update that appropriately first. ++ */ ++ old->details->allocated_rsc = g_list_remove(old->details->allocated_rsc, ++ rsc); ++ old->details->num_resources--; ++ pcmk__release_node_capacity(old->details->utilization, rsc); ++ free(old); ++ return; ++ } ++ ++ for (GList *iter = rsc->children; iter != NULL; iter = iter->next) { ++ pcmk__unassign_resource((pe_resource_t *) iter->data); ++ } + } + + /*! + +From edd9b4ef2094e776530ff540047848aa6d2a1b42 Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Wed, 28 Jun 2023 02:39:39 -0700 +Subject: [PATCH 11/19] Test: scheduler: Update tests for cloned group + clone-node-max fix + +Ref T678 + +Signed-off-by: Reid Wahl +--- + .../dot/clone-recover-no-shuffle-5.dot | 46 +--- + .../exp/clone-recover-no-shuffle-5.exp | 231 +++--------------- + .../scores/clone-recover-no-shuffle-5.scores | 50 +++- + .../clone-recover-no-shuffle-5.summary | 27 +- + .../xml/clone-recover-no-shuffle-5.xml | 6 +- + 5 files changed, 97 insertions(+), 263 deletions(-) + +diff --git a/cts/scheduler/dot/clone-recover-no-shuffle-5.dot b/cts/scheduler/dot/clone-recover-no-shuffle-5.dot +index 7219ee5a6d3..a2356f2280b 100644 +--- a/cts/scheduler/dot/clone-recover-no-shuffle-5.dot ++++ b/cts/scheduler/dot/clone-recover-no-shuffle-5.dot +@@ -2,12 +2,10 @@ + "grp-clone_running_0" [ style=bold color="green" fontcolor="orange"] + "grp-clone_start_0" -> "grp-clone_running_0" [ style = bold] + "grp-clone_start_0" -> "grp:0_start_0" [ style = bold] +-"grp-clone_start_0" -> "grp:1_start_0" [ style = bold] + "grp-clone_start_0" -> "grp:2_start_0" [ style = bold] + "grp-clone_start_0" [ style=bold color="green" fontcolor="orange"] + "grp-clone_stop_0" -> "grp-clone_stopped_0" [ style = bold] + "grp-clone_stop_0" -> "grp:0_stop_0" [ style = bold] +-"grp-clone_stop_0" -> "grp:1_stop_0" [ style = bold] + "grp-clone_stop_0" [ style=bold color="green" fontcolor="orange"] + "grp-clone_stopped_0" -> "grp-clone_start_0" [ style = bold] + "grp-clone_stopped_0" [ style=bold color="green" fontcolor="orange"] +@@ -24,57 +22,35 @@ + "grp:0_stopped_0" -> "grp-clone_stopped_0" [ style = bold] + "grp:0_stopped_0" -> "grp:0_start_0" [ style = bold] + "grp:0_stopped_0" [ style=bold color="green" fontcolor="orange"] +-"grp:1_running_0" -> "grp-clone_running_0" [ style = bold] +-"grp:1_running_0" [ style=bold color="green" fontcolor="orange"] +-"grp:1_start_0" -> "grp:1_running_0" [ style = bold] +-"grp:1_start_0" -> "rsc1_start_0 node1" [ style = bold] +-"grp:1_start_0" -> "rsc2_start_0 node1" [ style = bold] +-"grp:1_start_0" [ style=bold color="green" fontcolor="orange"] +-"grp:1_stop_0" -> "grp:1_stopped_0" [ style = bold] +-"grp:1_stop_0" -> "rsc1_stop_0 node3" [ style = bold] +-"grp:1_stop_0" -> "rsc2_stop_0 node3" [ style = bold] +-"grp:1_stop_0" [ style=bold color="green" fontcolor="orange"] +-"grp:1_stopped_0" -> "grp-clone_stopped_0" [ style = bold] +-"grp:1_stopped_0" -> "grp:1_start_0" [ style = bold] +-"grp:1_stopped_0" [ style=bold color="green" fontcolor="orange"] + "grp:2_running_0" -> "grp-clone_running_0" [ style = bold] + "grp:2_running_0" [ style=bold color="green" fontcolor="orange"] + "grp:2_start_0" -> "grp:2_running_0" [ style = bold] +-"grp:2_start_0" -> "rsc1:2_start_0 node1" [ style = bold] +-"grp:2_start_0" -> "rsc2:2_start_0 node1" [ style = bold] ++"grp:2_start_0" -> "rsc1:2_start_0 node2" [ style = bold] ++"grp:2_start_0" -> "rsc2:2_start_0 node2" [ style = bold] + "grp:2_start_0" [ style=bold color="green" fontcolor="orange"] +-"rsc1:2_monitor_10000 node1" [ style=bold color="green" fontcolor="black"] +-"rsc1:2_start_0 node1" -> "grp:2_running_0" [ style = bold] +-"rsc1:2_start_0 node1" -> "rsc1:2_monitor_10000 node1" [ style = bold] +-"rsc1:2_start_0 node1" -> "rsc2:2_start_0 node1" [ style = bold] +-"rsc1:2_start_0 node1" [ style=bold color="green" fontcolor="black"] ++"rsc1:2_monitor_10000 node2" [ style=bold color="green" fontcolor="black"] ++"rsc1:2_start_0 node2" -> "grp:2_running_0" [ style = bold] ++"rsc1:2_start_0 node2" -> "rsc1:2_monitor_10000 node2" [ style = bold] ++"rsc1:2_start_0 node2" -> "rsc2:2_start_0 node2" [ style = bold] ++"rsc1:2_start_0 node2" [ style=bold color="green" fontcolor="black"] + "rsc1_monitor_10000 node1" [ style=bold color="green" fontcolor="black"] + "rsc1_start_0 node1" -> "grp:0_running_0" [ style = bold] +-"rsc1_start_0 node1" -> "grp:1_running_0" [ style = bold] + "rsc1_start_0 node1" -> "rsc1_monitor_10000 node1" [ style = bold] + "rsc1_start_0 node1" -> "rsc2_start_0 node1" [ style = bold] + "rsc1_start_0 node1" [ style=bold color="green" fontcolor="black"] + "rsc1_stop_0 node2" -> "grp:0_stopped_0" [ style = bold] + "rsc1_stop_0 node2" -> "rsc1_start_0 node1" [ style = bold] + "rsc1_stop_0 node2" [ style=bold color="green" fontcolor="black"] +-"rsc1_stop_0 node3" -> "grp:1_stopped_0" [ style = bold] +-"rsc1_stop_0 node3" -> "rsc1_start_0 node1" [ style = bold] +-"rsc1_stop_0 node3" [ style=bold color="green" fontcolor="black"] +-"rsc2:2_monitor_10000 node1" [ style=bold color="green" fontcolor="black"] +-"rsc2:2_start_0 node1" -> "grp:2_running_0" [ style = bold] +-"rsc2:2_start_0 node1" -> "rsc2:2_monitor_10000 node1" [ style = bold] +-"rsc2:2_start_0 node1" [ style=bold color="green" fontcolor="black"] ++"rsc2:2_monitor_10000 node2" [ style=bold color="green" fontcolor="black"] ++"rsc2:2_start_0 node2" -> "grp:2_running_0" [ style = bold] ++"rsc2:2_start_0 node2" -> "rsc2:2_monitor_10000 node2" [ style = bold] ++"rsc2:2_start_0 node2" [ style=bold color="green" fontcolor="black"] + "rsc2_monitor_10000 node1" [ style=bold color="green" fontcolor="black"] + "rsc2_start_0 node1" -> "grp:0_running_0" [ style = bold] +-"rsc2_start_0 node1" -> "grp:1_running_0" [ style = bold] + "rsc2_start_0 node1" -> "rsc2_monitor_10000 node1" [ style = bold] + "rsc2_start_0 node1" [ style=bold color="green" fontcolor="black"] + "rsc2_stop_0 node2" -> "grp:0_stopped_0" [ style = bold] + "rsc2_stop_0 node2" -> "rsc1_stop_0 node2" [ style = bold] + "rsc2_stop_0 node2" -> "rsc2_start_0 node1" [ style = bold] + "rsc2_stop_0 node2" [ style=bold color="green" fontcolor="black"] +-"rsc2_stop_0 node3" -> "grp:1_stopped_0" [ style = bold] +-"rsc2_stop_0 node3" -> "rsc1_stop_0 node3" [ style = bold] +-"rsc2_stop_0 node3" -> "rsc2_start_0 node1" [ style = bold] +-"rsc2_stop_0 node3" [ style=bold color="green" fontcolor="black"] + } +diff --git a/cts/scheduler/exp/clone-recover-no-shuffle-5.exp b/cts/scheduler/exp/clone-recover-no-shuffle-5.exp +index 8a8e799793e..c1cee43b12f 100644 +--- a/cts/scheduler/exp/clone-recover-no-shuffle-5.exp ++++ b/cts/scheduler/exp/clone-recover-no-shuffle-5.exp +@@ -25,7 +25,7 @@ + + + +- ++ + + + +@@ -58,7 +58,7 @@ + + + +- ++ + + + +@@ -154,245 +154,92 @@ + + + +- ++ + + + + + +- ++ + + +- ++ + + +- ++ + + + + + +- ++ + + + + + +- ++ + + + + + +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- ++ + +- ++ + + + + +- ++ + + + +- ++ + +- ++ + +- ++ + + + + +- ++ + + + +- ++ + +- ++ + +- ++ + + + + +- ++ + + + +- ++ + +- ++ + +- ++ + + + + +- ++ + + +- ++ + + + +- ++ + +- ++ + + + +@@ -401,24 +248,21 @@ + + + +- +- +- +- ++ + + + +- ++ + +- ++ + + + + + +- ++ + +- ++ + + + +@@ -427,25 +271,22 @@ + + + +- ++ + + +- +- +- +- ++ + + + +- ++ + +- ++ + + + + + +- ++ + + + +diff --git a/cts/scheduler/scores/clone-recover-no-shuffle-5.scores b/cts/scheduler/scores/clone-recover-no-shuffle-5.scores +index eecba43fae0..0dd9728830c 100644 +--- a/cts/scheduler/scores/clone-recover-no-shuffle-5.scores ++++ b/cts/scheduler/scores/clone-recover-no-shuffle-5.scores +@@ -30,50 +30,80 @@ pcmk__clone_assign: rsc2:2 allocation score on node1: 0 + pcmk__clone_assign: rsc2:2 allocation score on node2: 0 + pcmk__clone_assign: rsc2:2 allocation score on node3: 0 + pcmk__group_assign: grp:0 allocation score on node1: 100 ++pcmk__group_assign: grp:0 allocation score on node1: 100 ++pcmk__group_assign: grp:0 allocation score on node2: 0 + pcmk__group_assign: grp:0 allocation score on node2: 0 + pcmk__group_assign: grp:0 allocation score on node3: 0 ++pcmk__group_assign: grp:0 allocation score on node3: 0 ++pcmk__group_assign: grp:1 allocation score on node1: -INFINITY + pcmk__group_assign: grp:1 allocation score on node1: 100 + pcmk__group_assign: grp:1 allocation score on node2: 0 ++pcmk__group_assign: grp:1 allocation score on node2: 0 ++pcmk__group_assign: grp:1 allocation score on node3: 0 + pcmk__group_assign: grp:1 allocation score on node3: 0 +-pcmk__group_assign: grp:2 allocation score on node1: 100 ++pcmk__group_assign: grp:2 allocation score on node1: -INFINITY + pcmk__group_assign: grp:2 allocation score on node2: 0 +-pcmk__group_assign: grp:2 allocation score on node3: 0 ++pcmk__group_assign: grp:2 allocation score on node3: -INFINITY ++pcmk__group_assign: rsc1:0 allocation score on node1: 100 + pcmk__group_assign: rsc1:0 allocation score on node1: 100 + pcmk__group_assign: rsc1:0 allocation score on node2: 1 ++pcmk__group_assign: rsc1:0 allocation score on node2: 1 ++pcmk__group_assign: rsc1:0 allocation score on node3: 0 + pcmk__group_assign: rsc1:0 allocation score on node3: 0 ++pcmk__group_assign: rsc1:1 allocation score on node1: -INFINITY + pcmk__group_assign: rsc1:1 allocation score on node1: 100 + pcmk__group_assign: rsc1:1 allocation score on node2: 0 ++pcmk__group_assign: rsc1:1 allocation score on node2: 0 + pcmk__group_assign: rsc1:1 allocation score on node3: 1 +-pcmk__group_assign: rsc1:2 allocation score on node1: 100 ++pcmk__group_assign: rsc1:1 allocation score on node3: 1 ++pcmk__group_assign: rsc1:2 allocation score on node1: -INFINITY + pcmk__group_assign: rsc1:2 allocation score on node2: 0 +-pcmk__group_assign: rsc1:2 allocation score on node3: 0 ++pcmk__group_assign: rsc1:2 allocation score on node3: -INFINITY ++pcmk__group_assign: rsc2:0 allocation score on node1: 0 + pcmk__group_assign: rsc2:0 allocation score on node1: 0 + pcmk__group_assign: rsc2:0 allocation score on node2: 1 ++pcmk__group_assign: rsc2:0 allocation score on node2: 1 + pcmk__group_assign: rsc2:0 allocation score on node3: 0 ++pcmk__group_assign: rsc2:0 allocation score on node3: 0 ++pcmk__group_assign: rsc2:1 allocation score on node1: -INFINITY + pcmk__group_assign: rsc2:1 allocation score on node1: 0 + pcmk__group_assign: rsc2:1 allocation score on node2: 0 ++pcmk__group_assign: rsc2:1 allocation score on node2: 0 ++pcmk__group_assign: rsc2:1 allocation score on node3: 1 + pcmk__group_assign: rsc2:1 allocation score on node3: 1 +-pcmk__group_assign: rsc2:2 allocation score on node1: 0 ++pcmk__group_assign: rsc2:2 allocation score on node1: -INFINITY + pcmk__group_assign: rsc2:2 allocation score on node2: 0 +-pcmk__group_assign: rsc2:2 allocation score on node3: 0 ++pcmk__group_assign: rsc2:2 allocation score on node3: -INFINITY + pcmk__primitive_assign: Fencing allocation score on node1: 0 + pcmk__primitive_assign: Fencing allocation score on node2: 0 + pcmk__primitive_assign: Fencing allocation score on node3: 0 + pcmk__primitive_assign: rsc1:0 allocation score on node1: 100 ++pcmk__primitive_assign: rsc1:0 allocation score on node1: 100 ++pcmk__primitive_assign: rsc1:0 allocation score on node2: 2 + pcmk__primitive_assign: rsc1:0 allocation score on node2: 2 + pcmk__primitive_assign: rsc1:0 allocation score on node3: 0 ++pcmk__primitive_assign: rsc1:0 allocation score on node3: 0 ++pcmk__primitive_assign: rsc1:1 allocation score on node1: -INFINITY + pcmk__primitive_assign: rsc1:1 allocation score on node1: 100 + pcmk__primitive_assign: rsc1:1 allocation score on node2: 0 ++pcmk__primitive_assign: rsc1:1 allocation score on node2: 0 ++pcmk__primitive_assign: rsc1:1 allocation score on node3: 2 + pcmk__primitive_assign: rsc1:1 allocation score on node3: 2 +-pcmk__primitive_assign: rsc1:2 allocation score on node1: 100 ++pcmk__primitive_assign: rsc1:2 allocation score on node1: -INFINITY + pcmk__primitive_assign: rsc1:2 allocation score on node2: 0 +-pcmk__primitive_assign: rsc1:2 allocation score on node3: 0 ++pcmk__primitive_assign: rsc1:2 allocation score on node3: -INFINITY ++pcmk__primitive_assign: rsc2:0 allocation score on node1: 0 + pcmk__primitive_assign: rsc2:0 allocation score on node1: 0 + pcmk__primitive_assign: rsc2:0 allocation score on node2: -INFINITY ++pcmk__primitive_assign: rsc2:0 allocation score on node2: -INFINITY ++pcmk__primitive_assign: rsc2:0 allocation score on node3: -INFINITY + pcmk__primitive_assign: rsc2:0 allocation score on node3: -INFINITY ++pcmk__primitive_assign: rsc2:1 allocation score on node1: -INFINITY + pcmk__primitive_assign: rsc2:1 allocation score on node1: 0 + pcmk__primitive_assign: rsc2:1 allocation score on node2: -INFINITY ++pcmk__primitive_assign: rsc2:1 allocation score on node2: -INFINITY + pcmk__primitive_assign: rsc2:1 allocation score on node3: -INFINITY +-pcmk__primitive_assign: rsc2:2 allocation score on node1: 0 +-pcmk__primitive_assign: rsc2:2 allocation score on node2: -INFINITY ++pcmk__primitive_assign: rsc2:1 allocation score on node3: 1 ++pcmk__primitive_assign: rsc2:2 allocation score on node1: -INFINITY ++pcmk__primitive_assign: rsc2:2 allocation score on node2: 0 + pcmk__primitive_assign: rsc2:2 allocation score on node3: -INFINITY +diff --git a/cts/scheduler/summary/clone-recover-no-shuffle-5.summary b/cts/scheduler/summary/clone-recover-no-shuffle-5.summary +index e84d0a574de..121214c42ab 100644 +--- a/cts/scheduler/summary/clone-recover-no-shuffle-5.summary ++++ b/cts/scheduler/summary/clone-recover-no-shuffle-5.summary +@@ -11,41 +11,29 @@ Current cluster status: + Transition Summary: + * Move rsc1:0 ( node2 -> node1 ) + * Move rsc2:0 ( node2 -> node1 ) +- * Move rsc1:1 ( node3 -> node1 ) +- * Move rsc2:1 ( node3 -> node1 ) +- * Start rsc1:2 ( node1 ) +- * Start rsc2:2 ( node1 ) ++ * Start rsc1:2 ( node2 ) ++ * Start rsc2:2 ( node2 ) + + Executing Cluster Transition: + * Pseudo action: grp-clone_stop_0 + * Pseudo action: grp:0_stop_0 + * Resource action: rsc2 stop on node2 +- * Pseudo action: grp:1_stop_0 +- * Resource action: rsc2 stop on node3 + * Resource action: rsc1 stop on node2 +- * Resource action: rsc1 stop on node3 + * Pseudo action: grp:0_stopped_0 +- * Pseudo action: grp:1_stopped_0 + * Pseudo action: grp-clone_stopped_0 + * Pseudo action: grp-clone_start_0 + * Pseudo action: grp:0_start_0 + * Resource action: rsc1 start on node1 + * Resource action: rsc2 start on node1 +- * Pseudo action: grp:1_start_0 +- * Resource action: rsc1 start on node1 +- * Resource action: rsc2 start on node1 + * Pseudo action: grp:2_start_0 +- * Resource action: rsc1 start on node1 +- * Resource action: rsc2 start on node1 ++ * Resource action: rsc1 start on node2 ++ * Resource action: rsc2 start on node2 + * Pseudo action: grp:0_running_0 + * Resource action: rsc1 monitor=10000 on node1 + * Resource action: rsc2 monitor=10000 on node1 +- * Pseudo action: grp:1_running_0 +- * Resource action: rsc1 monitor=10000 on node1 +- * Resource action: rsc2 monitor=10000 on node1 + * Pseudo action: grp:2_running_0 +- * Resource action: rsc1 monitor=10000 on node1 +- * Resource action: rsc2 monitor=10000 on node1 ++ * Resource action: rsc1 monitor=10000 on node2 ++ * Resource action: rsc2 monitor=10000 on node2 + * Pseudo action: grp-clone_running_0 + + Revised Cluster Status: +@@ -55,5 +43,4 @@ Revised Cluster Status: + * Full List of Resources: + * Fencing (stonith:fence_xvm): Started node2 + * Clone Set: grp-clone [grp]: +- * Started: [ node1 ] +- * Stopped: [ node2 node3 ] ++ * Started: [ node1 node2 node3 ] +diff --git a/cts/scheduler/xml/clone-recover-no-shuffle-5.xml b/cts/scheduler/xml/clone-recover-no-shuffle-5.xml +index 67176dc1a03..45f3b5a9f3a 100644 +--- a/cts/scheduler/xml/clone-recover-no-shuffle-5.xml ++++ b/cts/scheduler/xml/clone-recover-no-shuffle-5.xml +@@ -14,9 +14,9 @@ + * Instance grp:2 should start on node1 + + This test output is incorrect: +- * Instance grp:0 moves from node2 to node1 +- * Instance grp:1 moves from node3 to node1 +- * Instance grp:2 starts on node1 (correct) ++ * Instance grp:0 moves to node1 ++ * Instance grp:1 remains started on node3 (correct) ++ * Instance grp:2 starts on node2 + --> + + + +From ff60c47e89c6434819dbe5e5e9a87d01122e165e Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Thu, 6 Jul 2023 13:52:59 -0700 +Subject: [PATCH 12/19] Refactor: libpacemaker: Move instance provisional check + to loop body + +Avoid calling preferred_node() this way. Since assign_instance() is +static and has only two callers, we don't have to worry about a sanity +provisional check inside the function. + +Signed-off-by: Reid Wahl +--- + lib/pacemaker/pcmk_sched_instances.c | 9 ++++----- + 1 file changed, 4 insertions(+), 5 deletions(-) + +diff --git a/lib/pacemaker/pcmk_sched_instances.c b/lib/pacemaker/pcmk_sched_instances.c +index 783820bbf69..58fad741729 100644 +--- a/lib/pacemaker/pcmk_sched_instances.c ++++ b/lib/pacemaker/pcmk_sched_instances.c +@@ -568,11 +568,6 @@ assign_instance(pe_resource_t *instance, const pe_node_t *prefer, + pe_rsc_trace(instance, "Assigning %s (preferring %s)", instance->id, + ((prefer == NULL)? "no node" : prefer->details->uname)); + +- if (!pcmk_is_set(instance->flags, pe_rsc_provisional)) { +- // Instance is already assigned +- return instance->fns->location(instance, NULL, FALSE) != NULL; +- } +- + if (pcmk_is_set(instance->flags, pe_rsc_allocating)) { + pe_rsc_debug(instance, + "Assignment loop detected involving %s colocations", +@@ -745,6 +740,10 @@ pcmk__assign_instances(pe_resource_t *collective, GList *instances, + iter = iter->next) { + instance = (pe_resource_t *) iter->data; + ++ if (!pcmk_is_set(instance->flags, pe_rsc_provisional)) { ++ continue; // Already assigned ++ } ++ + current = preferred_node(collective, instance, optimal_per_node); + if ((current != NULL) + && assign_instance(instance, current, max_per_node)) { + +From 0f9e84238a4778da71488ff67ea9f1772e797d80 Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Fri, 23 Jun 2023 15:16:57 -0700 +Subject: [PATCH 13/19] Refactor: libpacemaker: Functionize updating parent + allowed node count + +...in pcmk_sched_instances.c:assign_instance(). We'll use this elsewhere +in an upcoming commit. + +Ref T489 + +Signed-off-by: Reid Wahl +--- + lib/pacemaker/pcmk_sched_instances.c | 54 ++++++++++++++++++---------- + 1 file changed, 36 insertions(+), 18 deletions(-) + +diff --git a/lib/pacemaker/pcmk_sched_instances.c b/lib/pacemaker/pcmk_sched_instances.c +index 58fad741729..1b051cb2ed9 100644 +--- a/lib/pacemaker/pcmk_sched_instances.c ++++ b/lib/pacemaker/pcmk_sched_instances.c +@@ -545,6 +545,39 @@ pcmk__cmp_instance(gconstpointer a, gconstpointer b) + return rc; + } + ++/*! ++ * \internal ++ * \brief Increment the parent's instance count after assigning an instance ++ * ++ * An instance's parent tracks how many instances have been assigned to each ++ * node via its pe_node_t:count member. After assigning an instance to a node, ++ * find the corresponding node in the parent's allowed table and increment it. ++ * ++ * \param[in,out] instance Instance whose parent to update ++ * \param[in] assigned_to Node to which the instance was assigned ++ */ ++static void ++increment_parent_count(pe_resource_t *instance, const pe_node_t *assigned_to) ++{ ++ pe_node_t *allowed = NULL; ++ ++ if (assigned_to == NULL) { ++ return; ++ } ++ allowed = pcmk__top_allowed_node(instance, assigned_to); ++ ++ if (allowed == NULL) { ++ /* The instance is allowed on the node, but its parent isn't. This ++ * shouldn't be possible if the resource is managed, and we won't be ++ * able to limit the number of instances assigned to the node. ++ */ ++ CRM_LOG_ASSERT(!pcmk_is_set(instance->flags, pe_rsc_managed)); ++ ++ } else { ++ allowed->count++; ++ } ++} ++ + /*! + * \internal + * \brief Choose a node for an instance +@@ -562,9 +595,7 @@ assign_instance(pe_resource_t *instance, const pe_node_t *prefer, + int max_per_node) + { + pe_node_t *chosen = NULL; +- pe_node_t *allowed = NULL; + +- CRM_ASSERT(instance != NULL); + pe_rsc_trace(instance, "Assigning %s (preferring %s)", instance->id, + ((prefer == NULL)? "no node" : prefer->details->uname)); + +@@ -578,8 +609,8 @@ assign_instance(pe_resource_t *instance, const pe_node_t *prefer, + if (prefer != NULL) { // Possible early assignment to preferred node + + // Get preferred node with instance's scores +- allowed = g_hash_table_lookup(instance->allowed_nodes, +- prefer->details->id); ++ pe_node_t *allowed = g_hash_table_lookup(instance->allowed_nodes, ++ prefer->details->id); + + if ((allowed == NULL) || (allowed->weight < 0)) { + pe_rsc_trace(instance, +@@ -612,20 +643,7 @@ assign_instance(pe_resource_t *instance, const pe_node_t *prefer, + g_hash_table_destroy(backup); + } + +- // The parent tracks how many instances have been assigned to each node +- if (chosen != NULL) { +- allowed = pcmk__top_allowed_node(instance, chosen); +- if (allowed == NULL) { +- /* The instance is allowed on the node, but its parent isn't. This +- * shouldn't be possible if the resource is managed, and we won't be +- * able to limit the number of instances assigned to the node. +- */ +- CRM_LOG_ASSERT(!pcmk_is_set(instance->flags, pe_rsc_managed)); +- +- } else { +- allowed->count++; +- } +- } ++ increment_parent_count(instance, chosen); + return chosen != NULL; + } + + +From 6cddfe269531661112537eb3ef7c90975feb73ea Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Thu, 22 Jun 2023 13:49:42 -0700 +Subject: [PATCH 14/19] Refactor: libpe_status: Copy count in pe__copy_node() + +pe__copy_node() is supposed to make a shallow copy of a pe_node_t +object. That should include the count member. The caller is free to +reset it to 0 if desired. + +Signed-off-by: Reid Wahl +--- + lib/pengine/utils.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/lib/pengine/utils.c b/lib/pengine/utils.c +index ef0a092dc16..199ce87e61f 100644 +--- a/lib/pengine/utils.c ++++ b/lib/pengine/utils.c +@@ -98,6 +98,7 @@ pe__copy_node(const pe_node_t *this_node) + new_node->rsc_discover_mode = this_node->rsc_discover_mode; + new_node->weight = this_node->weight; + new_node->fixed = this_node->fixed; // @COMPAT deprecated and unused ++ new_node->count = this_node->count; + new_node->details = this_node->details; + + return new_node; + +From 30385bedeb5177b703b3b68d9579d55356187f26 Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Fri, 23 Jun 2023 15:29:17 -0700 +Subject: [PATCH 15/19] Refactor: libpacemaker: Return chosen node from + assign_instance() + +The return type was changed to bool by commit 97f67da8. However, an +upcoming commit will need the assigned-to node. + +Ref T489 + +Signed-off-by: Reid Wahl +--- + lib/pacemaker/pcmk_sched_instances.c | 28 ++++++++++++++++------------ + 1 file changed, 16 insertions(+), 12 deletions(-) + +diff --git a/lib/pacemaker/pcmk_sched_instances.c b/lib/pacemaker/pcmk_sched_instances.c +index 1b051cb2ed9..64c027b20b1 100644 +--- a/lib/pacemaker/pcmk_sched_instances.c ++++ b/lib/pacemaker/pcmk_sched_instances.c +@@ -580,7 +580,7 @@ increment_parent_count(pe_resource_t *instance, const pe_node_t *assigned_to) + + /*! + * \internal +- * \brief Choose a node for an instance ++ * \brief Assign an instance to a node + * + * \param[in,out] instance Clone instance or bundle replica container + * \param[in] prefer If not NULL, attempt early assignment to this +@@ -588,9 +588,9 @@ increment_parent_count(pe_resource_t *instance, const pe_node_t *assigned_to) + * perform final assignment + * \param[in] max_per_node Assign at most this many instances to one node + * +- * \return true if \p instance could be assigned to a node, otherwise false ++ * \return Node to which \p instance is assigned + */ +-static bool ++static const pe_node_t * + assign_instance(pe_resource_t *instance, const pe_node_t *prefer, + int max_per_node) + { +@@ -603,7 +603,7 @@ assign_instance(pe_resource_t *instance, const pe_node_t *prefer, + pe_rsc_debug(instance, + "Assignment loop detected involving %s colocations", + instance->id); +- return false; ++ return NULL; + } + + if (prefer != NULL) { // Possible early assignment to preferred node +@@ -616,7 +616,7 @@ assign_instance(pe_resource_t *instance, const pe_node_t *prefer, + pe_rsc_trace(instance, + "Not assigning %s to preferred node %s: unavailable", + instance->id, pe__node_name(prefer)); +- return false; ++ return NULL; + } + } + +@@ -644,7 +644,7 @@ assign_instance(pe_resource_t *instance, const pe_node_t *prefer, + } + + increment_parent_count(instance, chosen); +- return chosen != NULL; ++ return chosen; + } + + /*! +@@ -763,11 +763,15 @@ pcmk__assign_instances(pe_resource_t *collective, GList *instances, + } + + current = preferred_node(collective, instance, optimal_per_node); +- if ((current != NULL) +- && assign_instance(instance, current, max_per_node)) { +- pe_rsc_trace(collective, "Assigned %s to current node %s", +- instance->id, pe__node_name(current)); +- assigned++; ++ if (current != NULL) { ++ const pe_node_t *chosen = assign_instance(instance, current, ++ max_per_node); ++ ++ if (pe__same_node(chosen, current)) { ++ pe_rsc_trace(collective, "Assigned %s to current node %s", ++ instance->id, pe__node_name(current)); ++ assigned++; ++ } + } + } + +@@ -802,7 +806,7 @@ pcmk__assign_instances(pe_resource_t *collective, GList *instances, + resource_location(instance, NULL, -INFINITY, + "collective_limit_reached", collective->cluster); + +- } else if (assign_instance(instance, NULL, max_per_node)) { ++ } else if (assign_instance(instance, NULL, max_per_node) != NULL) { + assigned++; + } + } + +From 010649ef135ee0d4aca916d2d61c79bcba446951 Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Fri, 23 Jun 2023 21:30:47 -0700 +Subject: [PATCH 16/19] Refactor: libpacemaker: New stop_if_fail argument for + assign() method + +...of resource_alloc_functions_t. This will allow us to do a fully +reversible assignment. + +Currently pcmk__unassign_resource() undoes everything assignment-related +but can't undo changes to roles and actions. + +Now, if stop_if_fail is true, the assign() method and +pcmk__assign_resource() behave as before. + +If stop_if_fail is false and assignment succeeds, we can safely either +consider the assignment final or revert it via +pcmk__unassign_resource(). If assignment fails, the effect is as if we +had called pcmk__unassign_resource(); there are no side effects on next +role or actions. + +Ref T489 + +Signed-off-by: Reid Wahl +--- + include/pcmki/pcmki_sched_allocate.h | 3 +- + lib/pacemaker/libpacemaker_private.h | 30 ++++++++++++---- + lib/pacemaker/pcmk_sched_bundle.c | 30 +++++++++++----- + lib/pacemaker/pcmk_sched_clone.c | 22 +++++++++--- + lib/pacemaker/pcmk_sched_group.c | 18 +++++++--- + lib/pacemaker/pcmk_sched_instances.c | 24 +++++++------ + lib/pacemaker/pcmk_sched_primitive.c | 52 ++++++++++++++++++++-------- + lib/pacemaker/pcmk_sched_resource.c | 41 ++++++++++++++++------ + lib/pacemaker/pcmk_scheduler.c | 4 +-- + 9 files changed, 163 insertions(+), 61 deletions(-) + +diff --git a/include/pcmki/pcmki_sched_allocate.h b/include/pcmki/pcmki_sched_allocate.h +index 32044ea96d4..f027d1211f0 100644 +--- a/include/pcmki/pcmki_sched_allocate.h ++++ b/include/pcmki/pcmki_sched_allocate.h +@@ -19,7 +19,8 @@ + # include + # include + +-pe_node_t *pcmk__bundle_allocate(pe_resource_t *rsc, const pe_node_t *prefer); ++pe_node_t *pcmk__bundle_allocate(pe_resource_t *rsc, const pe_node_t *prefer, ++ bool stop_if_fail); + void pcmk__bundle_create_actions(pe_resource_t *rsc); + bool pcmk__bundle_create_probe(pe_resource_t *rsc, pe_node_t *node); + void pcmk__bundle_internal_constraints(pe_resource_t *rsc); +diff --git a/lib/pacemaker/libpacemaker_private.h b/lib/pacemaker/libpacemaker_private.h +index 8cdd13f7304..642176aafcd 100644 +--- a/lib/pacemaker/libpacemaker_private.h ++++ b/lib/pacemaker/libpacemaker_private.h +@@ -58,12 +58,24 @@ struct resource_alloc_functions_s { + * \internal + * \brief Assign a resource to a node + * +- * \param[in,out] rsc Resource to assign to a node +- * \param[in] prefer Node to prefer, if all else is equal ++ * \param[in,out] rsc Resource to assign to a node ++ * \param[in] prefer Node to prefer, if all else is equal ++ * \param[in] stop_if_fail If \c true and \p rsc can't be assigned to a ++ * node, set next role to stopped and update ++ * existing actions (if \p rsc is not a ++ * primitive, this applies to its primitive ++ * descendants instead) + * + * \return Node that \p rsc is assigned to, if assigned entirely to one node ++ * ++ * \note If \p stop_if_fail is \c false, then \c pcmk__unassign_resource() ++ * can completely undo the assignment. A successful assignment can be ++ * either undone or left alone as final. A failed assignment has the ++ * same effect as calling pcmk__unassign_resource(); there are no side ++ * effects on roles or actions. + */ +- pe_node_t *(*assign)(pe_resource_t *rsc, const pe_node_t *prefer); ++ pe_node_t *(*assign)(pe_resource_t *rsc, const pe_node_t *prefer, ++ bool stop_if_fail); + + /*! + * \internal +@@ -649,7 +661,8 @@ void pcmk__add_bundle_meta_to_xml(xmlNode *args_xml, const pe_action_t *action); + // Primitives (pcmk_sched_primitive.c) + + G_GNUC_INTERNAL +-pe_node_t *pcmk__primitive_assign(pe_resource_t *rsc, const pe_node_t *prefer); ++pe_node_t *pcmk__primitive_assign(pe_resource_t *rsc, const pe_node_t *prefer, ++ bool stop_if_fail); + + G_GNUC_INTERNAL + void pcmk__primitive_create_actions(pe_resource_t *rsc); +@@ -696,7 +709,8 @@ void pcmk__primitive_shutdown_lock(pe_resource_t *rsc); + // Groups (pcmk_sched_group.c) + + G_GNUC_INTERNAL +-pe_node_t *pcmk__group_assign(pe_resource_t *rsc, const pe_node_t *prefer); ++pe_node_t *pcmk__group_assign(pe_resource_t *rsc, const pe_node_t *prefer, ++ bool stop_if_fail); + + G_GNUC_INTERNAL + void pcmk__group_create_actions(pe_resource_t *rsc); +@@ -756,7 +770,8 @@ void pcmk__group_shutdown_lock(pe_resource_t *rsc); + // Clones (pcmk_sched_clone.c) + + G_GNUC_INTERNAL +-pe_node_t *pcmk__clone_assign(pe_resource_t *rsc, const pe_node_t *prefer); ++pe_node_t *pcmk__clone_assign(pe_resource_t *rsc, const pe_node_t *prefer, ++ bool stop_if_fail); + + G_GNUC_INTERNAL + void pcmk__clone_apply_coloc_score(pe_resource_t *dependent, +@@ -915,7 +930,8 @@ G_GNUC_INTERNAL + void pcmk__output_resource_actions(pe_resource_t *rsc); + + G_GNUC_INTERNAL +-bool pcmk__assign_resource(pe_resource_t *rsc, pe_node_t *node, bool force); ++bool pcmk__assign_resource(pe_resource_t *rsc, pe_node_t *node, bool force, ++ bool stop_if_fail); + + G_GNUC_INTERNAL + void pcmk__unassign_resource(pe_resource_t *rsc); +diff --git a/lib/pacemaker/pcmk_sched_bundle.c b/lib/pacemaker/pcmk_sched_bundle.c +index 5682744395a..05a8626c889 100644 +--- a/lib/pacemaker/pcmk_sched_bundle.c ++++ b/lib/pacemaker/pcmk_sched_bundle.c +@@ -36,13 +36,24 @@ is_bundle_node(pe__bundle_variant_data_t *data, pe_node_t *node) + * \internal + * \brief Assign a bundle resource to a node + * +- * \param[in,out] rsc Resource to assign to a node +- * \param[in] prefer Node to prefer, if all else is equal ++ * \param[in,out] rsc Resource to assign to a node ++ * \param[in] prefer Node to prefer, if all else is equal ++ * \param[in] stop_if_fail If \c true and a primitive descendant of \p rsc ++ * can't be assigned to a node, set the ++ * descendant's next role to stopped and update ++ * existing actions + * + * \return Node that \p rsc is assigned to, if assigned entirely to one node ++ * ++ * \note If \p stop_if_fail is \c false, then \c pcmk__unassign_resource() can ++ * completely undo the assignment. A successful assignment can be either ++ * undone or left alone as final. A failed assignment has the same effect ++ * as calling pcmk__unassign_resource(); there are no side effects on ++ * roles or actions. + */ + pe_node_t * +-pcmk__bundle_allocate(pe_resource_t *rsc, const pe_node_t *prefer) ++pcmk__bundle_allocate(pe_resource_t *rsc, const pe_node_t *prefer, ++ bool stop_if_fail) + { + GList *containers = NULL; + pe__bundle_variant_data_t *bundle_data = NULL; +@@ -71,7 +82,7 @@ pcmk__bundle_allocate(pe_resource_t *rsc, const pe_node_t *prefer) + if (replica->ip) { + pe_rsc_trace(rsc, "Allocating bundle %s IP %s", + rsc->id, replica->ip->id); +- replica->ip->cmds->assign(replica->ip, prefer); ++ replica->ip->cmds->assign(replica->ip, prefer, stop_if_fail); + } + + container_host = replica->container->allocated_to; +@@ -89,7 +100,8 @@ pcmk__bundle_allocate(pe_resource_t *rsc, const pe_node_t *prefer) + if (replica->remote) { + pe_rsc_trace(rsc, "Allocating bundle %s connection %s", + rsc->id, replica->remote->id); +- replica->remote->cmds->assign(replica->remote, prefer); ++ replica->remote->cmds->assign(replica->remote, prefer, ++ stop_if_fail); + } + + // Explicitly allocate replicas' children before bundle child +@@ -110,7 +122,8 @@ pcmk__bundle_allocate(pe_resource_t *rsc, const pe_node_t *prefer) + pe__set_resource_flags(replica->child->parent, pe_rsc_allocating); + pe_rsc_trace(rsc, "Allocating bundle %s replica child %s", + rsc->id, replica->child->id); +- replica->child->cmds->assign(replica->child, replica->node); ++ replica->child->cmds->assign(replica->child, replica->node, ++ stop_if_fail); + pe__clear_resource_flags(replica->child->parent, + pe_rsc_allocating); + } +@@ -129,7 +142,8 @@ pcmk__bundle_allocate(pe_resource_t *rsc, const pe_node_t *prefer) + } + pe_rsc_trace(rsc, "Allocating bundle %s child %s", + rsc->id, bundle_data->child->id); +- bundle_data->child->cmds->assign(bundle_data->child, prefer); ++ bundle_data->child->cmds->assign(bundle_data->child, prefer, ++ stop_if_fail); + } + + pe__clear_resource_flags(rsc, pe_rsc_allocating|pe_rsc_provisional); +@@ -457,7 +471,7 @@ pcmk__bundle_apply_coloc_score(pe_resource_t *dependent, + } else if (colocation->score >= INFINITY) { + crm_notice("Cannot pair %s with instance of %s", + dependent->id, primary->id); +- pcmk__assign_resource(dependent, NULL, true); ++ pcmk__assign_resource(dependent, NULL, true, true); + + } else { + pe_rsc_debug(primary, "Cannot pair %s with instance of %s", +diff --git a/lib/pacemaker/pcmk_sched_clone.c b/lib/pacemaker/pcmk_sched_clone.c +index 934f512d549..229257fd2be 100644 +--- a/lib/pacemaker/pcmk_sched_clone.c ++++ b/lib/pacemaker/pcmk_sched_clone.c +@@ -18,13 +18,24 @@ + * \internal + * \brief Assign a clone resource's instances to nodes + * +- * \param[in,out] rsc Clone resource to assign +- * \param[in] prefer Node to prefer, if all else is equal ++ * \param[in,out] rsc Clone resource to assign ++ * \param[in] prefer Node to prefer, if all else is equal ++ * \param[in] stop_if_fail If \c true and a primitive descendant of \p rsc ++ * can't be assigned to a node, set the ++ * descendant's next role to stopped and update ++ * existing actions + * + * \return NULL (clones are not assigned to a single node) ++ * ++ * \note If \p stop_if_fail is \c false, then \c pcmk__unassign_resource() can ++ * completely undo the assignment. A successful assignment can be either ++ * undone or left alone as final. A failed assignment has the same effect ++ * as calling pcmk__unassign_resource(); there are no side effects on ++ * roles or actions. + */ + pe_node_t * +-pcmk__clone_assign(pe_resource_t *rsc, const pe_node_t *prefer) ++pcmk__clone_assign(pe_resource_t *rsc, const pe_node_t *prefer, ++ bool stop_if_fail) + { + CRM_ASSERT(pe_rsc_is_clone(rsc)); + +@@ -53,7 +64,8 @@ pcmk__clone_assign(pe_resource_t *rsc, const pe_node_t *prefer) + + pe_rsc_trace(rsc, "%s: Assigning colocation %s primary %s first", + rsc->id, constraint->id, constraint->primary->id); +- constraint->primary->cmds->assign(constraint->primary, prefer); ++ constraint->primary->cmds->assign(constraint->primary, prefer, ++ stop_if_fail); + } + + /* If any resources are colocated with this one, consider their preferences. +@@ -305,7 +317,7 @@ pcmk__clone_apply_coloc_score(pe_resource_t *dependent, + } else if (colocation->score >= INFINITY) { + crm_notice("Cannot pair %s with instance of %s", + dependent->id, primary->id); +- pcmk__assign_resource(dependent, NULL, true); ++ pcmk__assign_resource(dependent, NULL, true, true); + + } else { + pe_rsc_debug(primary, "Cannot pair %s with instance of %s", +diff --git a/lib/pacemaker/pcmk_sched_group.c b/lib/pacemaker/pcmk_sched_group.c +index cb139f7ddf9..55d890a5c4f 100644 +--- a/lib/pacemaker/pcmk_sched_group.c ++++ b/lib/pacemaker/pcmk_sched_group.c +@@ -20,13 +20,23 @@ + * \internal + * \brief Assign a group resource to a node + * +- * \param[in,out] rsc Group resource to assign to a node +- * \param[in] prefer Node to prefer, if all else is equal ++ * \param[in,out] rsc Group resource to assign to a node ++ * \param[in] prefer Node to prefer, if all else is equal ++ * \param[in] stop_if_fail If \c true and a child of \p rsc can't be ++ * assigned to a node, set the child's next role to ++ * stopped and update existing actions + * + * \return Node that \p rsc is assigned to, if assigned entirely to one node ++ * ++ * \note If \p stop_if_fail is \c false, then \c pcmk__unassign_resource() can ++ * completely undo the assignment. A successful assignment can be either ++ * undone or left alone as final. A failed assignment has the same effect ++ * as calling pcmk__unassign_resource(); there are no side effects on ++ * roles or actions. + */ + pe_node_t * +-pcmk__group_assign(pe_resource_t *rsc, const pe_node_t *prefer) ++pcmk__group_assign(pe_resource_t *rsc, const pe_node_t *prefer, ++ bool stop_if_fail) + { + pe_node_t *first_assigned_node = NULL; + pe_resource_t *first_member = NULL; +@@ -61,7 +71,7 @@ pcmk__group_assign(pe_resource_t *rsc, const pe_node_t *prefer) + + pe_rsc_trace(rsc, "Assigning group %s member %s", + rsc->id, member->id); +- node = member->cmds->assign(member, prefer); ++ node = member->cmds->assign(member, prefer, stop_if_fail); + if (first_assigned_node == NULL) { + first_assigned_node = node; + } +diff --git a/lib/pacemaker/pcmk_sched_instances.c b/lib/pacemaker/pcmk_sched_instances.c +index 64c027b20b1..b551f3bee61 100644 +--- a/lib/pacemaker/pcmk_sched_instances.c ++++ b/lib/pacemaker/pcmk_sched_instances.c +@@ -623,22 +623,26 @@ assign_instance(pe_resource_t *instance, const pe_node_t *prefer, + ban_unavailable_allowed_nodes(instance, max_per_node); + + if (prefer == NULL) { // Final assignment +- chosen = instance->cmds->assign(instance, NULL); ++ chosen = instance->cmds->assign(instance, NULL, true); + + } else { // Possible early assignment to preferred node + GHashTable *backup = NULL; + + pcmk__copy_node_tables(instance, &backup); +- chosen = instance->cmds->assign(instance, prefer); +- +- // Revert nodes if preferred node won't be assigned +- if ((chosen != NULL) && (chosen->details != prefer->details)) { +- crm_info("Not assigning %s to preferred node %s: %s is better", +- instance->id, pe__node_name(prefer), +- pe__node_name(chosen)); ++ chosen = instance->cmds->assign(instance, prefer, false); ++ ++ if (!pe__same_node(chosen, prefer)) { ++ // Revert nodes if preferred node won't be assigned ++ if (chosen != NULL) { ++ pe_rsc_info(instance, ++ "Not assigning %s to preferred node %s: " ++ "%s is better", ++ instance->id, pe__node_name(prefer), ++ pe__node_name(chosen)); ++ chosen = NULL; ++ } + pcmk__restore_node_tables(instance, backup); + pcmk__unassign_resource(instance); +- chosen = NULL; + } + g_hash_table_destroy(backup); + } +@@ -1181,7 +1185,7 @@ unassign_if_mandatory(const pe_action_t *first, const pe_action_t *then, + "Inhibiting %s from being active " + "because there is no %s instance to interleave", + then_instance->id, first->rsc->id); +- return pcmk__assign_resource(then_instance, NULL, true); ++ return pcmk__assign_resource(then_instance, NULL, true, true); + } + return false; + } +diff --git a/lib/pacemaker/pcmk_sched_primitive.c b/lib/pacemaker/pcmk_sched_primitive.c +index 2470b08ed69..50f11138f23 100644 +--- a/lib/pacemaker/pcmk_sched_primitive.c ++++ b/lib/pacemaker/pcmk_sched_primitive.c +@@ -141,13 +141,23 @@ sorted_allowed_nodes(const pe_resource_t *rsc) + * \internal + * \brief Assign a resource to its best allowed node, if possible + * +- * \param[in,out] rsc Resource to choose a node for +- * \param[in] prefer If not NULL, prefer this node when all else equal ++ * \param[in,out] rsc Resource to choose a node for ++ * \param[in] prefer If not \c NULL, prefer this node when all else ++ * equal ++ * \param[in] stop_if_fail If \c true and \p rsc can't be assigned to a ++ * node, set next role to stopped and update ++ * existing actions + * + * \return true if \p rsc could be assigned to a node, otherwise false ++ * ++ * \note If \p stop_if_fail is \c false, then \c pcmk__unassign_resource() can ++ * completely undo the assignment. A successful assignment can be either ++ * undone or left alone as final. A failed assignment has the same effect ++ * as calling pcmk__unassign_resource(); there are no side effects on ++ * roles or actions. + */ + static bool +-assign_best_node(pe_resource_t *rsc, const pe_node_t *prefer) ++assign_best_node(pe_resource_t *rsc, const pe_node_t *prefer, bool stop_if_fail) + { + GList *nodes = NULL; + pe_node_t *chosen = NULL; +@@ -259,7 +269,7 @@ assign_best_node(pe_resource_t *rsc, const pe_node_t *prefer) + pe__node_name(chosen), rsc->id, g_list_length(nodes)); + } + +- pcmk__assign_resource(rsc, chosen, false); ++ pcmk__assign_resource(rsc, chosen, false, stop_if_fail); + g_list_free(nodes); + return rsc->allocated_to != NULL; + } +@@ -292,7 +302,7 @@ apply_this_with(gpointer data, gpointer user_data) + "(score=%d role=%s)", + rsc->id, colocation->id, other->id, + colocation->score, role2text(colocation->dependent_role)); +- other->cmds->assign(other, NULL); ++ other->cmds->assign(other, NULL, true); + } + + // Apply the colocation score to this resource's allowed node scores +@@ -351,13 +361,23 @@ remote_connection_assigned(const pe_resource_t *connection) + * \internal + * \brief Assign a primitive resource to a node + * +- * \param[in,out] rsc Resource to assign to a node +- * \param[in] prefer Node to prefer, if all else is equal ++ * \param[in,out] rsc Resource to assign to a node ++ * \param[in] prefer Node to prefer, if all else is equal ++ * \param[in] stop_if_fail If \c true and \p rsc can't be assigned to a ++ * node, set next role to stopped and update ++ * existing actions + * + * \return Node that \p rsc is assigned to, if assigned entirely to one node ++ * ++ * \note If \p stop_if_fail is \c false, then \c pcmk__unassign_resource() can ++ * completely undo the assignment. A successful assignment can be either ++ * undone or left alone as final. A failed assignment has the same effect ++ * as calling pcmk__unassign_resource(); there are no side effects on ++ * roles or actions. + */ + pe_node_t * +-pcmk__primitive_assign(pe_resource_t *rsc, const pe_node_t *prefer) ++pcmk__primitive_assign(pe_resource_t *rsc, const pe_node_t *prefer, ++ bool stop_if_fail) + { + GList *this_with_colocations = NULL; + GList *with_this_colocations = NULL; +@@ -371,7 +391,7 @@ pcmk__primitive_assign(pe_resource_t *rsc, const pe_node_t *prefer) + && !pcmk_is_set(rsc->parent->flags, pe_rsc_allocating)) { + pe_rsc_debug(rsc, "%s: Assigning parent %s first", + rsc->id, rsc->parent->id); +- rsc->parent->cmds->assign(rsc->parent, prefer); ++ rsc->parent->cmds->assign(rsc->parent, prefer, stop_if_fail); + } + + if (!pcmk_is_set(rsc->flags, pe_rsc_provisional)) { +@@ -474,20 +494,24 @@ pcmk__primitive_assign(pe_resource_t *rsc, const pe_node_t *prefer) + } + pe_rsc_info(rsc, "Unmanaged resource %s assigned to %s: %s", rsc->id, + (assign_to? assign_to->details->uname : "no node"), reason); +- pcmk__assign_resource(rsc, assign_to, true); ++ pcmk__assign_resource(rsc, assign_to, true, stop_if_fail); + + } else if (pcmk_is_set(rsc->cluster->flags, pe_flag_stop_everything)) { +- pe_rsc_debug(rsc, "Forcing %s to stop: stop-all-resources", rsc->id); +- pcmk__assign_resource(rsc, NULL, true); ++ // Must stop at some point, but be consistent with stop_if_fail ++ if (stop_if_fail) { ++ pe_rsc_debug(rsc, "Forcing %s to stop: stop-all-resources", ++ rsc->id); ++ } ++ pcmk__assign_resource(rsc, NULL, true, stop_if_fail); + + } else if (pcmk_is_set(rsc->flags, pe_rsc_provisional) +- && assign_best_node(rsc, prefer)) { ++ && assign_best_node(rsc, prefer, stop_if_fail)) { + // Assignment successful + + } else if (rsc->allocated_to == NULL) { + if (!pcmk_is_set(rsc->flags, pe_rsc_orphan)) { + pe_rsc_info(rsc, "Resource %s cannot run anywhere", rsc->id); +- } else if (rsc->running_on != NULL) { ++ } else if ((rsc->running_on != NULL) && stop_if_fail) { + pe_rsc_info(rsc, "Stopping orphan resource %s", rsc->id); + } + +diff --git a/lib/pacemaker/pcmk_sched_resource.c b/lib/pacemaker/pcmk_sched_resource.c +index 8f703789b20..36f49dc49b9 100644 +--- a/lib/pacemaker/pcmk_sched_resource.c ++++ b/lib/pacemaker/pcmk_sched_resource.c +@@ -335,25 +335,38 @@ pcmk__output_resource_actions(pe_resource_t *rsc) + * + * Assign a specified resource and its children (if any) to a specified node, if + * the node can run the resource (or unconditionally, if \p force is true). Mark +- * the resources as no longer provisional. If a resource can't be assigned (or +- * \p node is \c NULL), unassign any previous assignment, set next role to +- * stopped, and update any existing actions scheduled for it. ++ * the resources as no longer provisional. + * +- * \param[in,out] rsc Resource to assign +- * \param[in,out] node Node to assign \p rsc to +- * \param[in] force If true, assign to \p node even if unavailable ++ * If a resource can't be assigned (or \p node is \c NULL), unassign any ++ * previous assignment. If \p stop_if_fail is \c true, set next role to stopped ++ * and update any existing actions scheduled for the resource. ++ * ++ * \param[in,out] rsc Resource to assign ++ * \param[in,out] node Node to assign \p rsc to ++ * \param[in] force If true, assign to \p node even if unavailable ++ * \param[in] stop_if_fail If \c true and either \p rsc can't be assigned ++ * or \p chosen is \c NULL, set next role to ++ * stopped and update existing actions (if \p rsc ++ * is not a primitive, this applies to its ++ * primitive descendants instead) + * + * \return \c true if the assignment of \p rsc changed, or \c false otherwise + * + * \note Assigning a resource to the NULL node using this function is different +- * from calling pcmk__unassign_resource(), in that it will also update any ++ * from calling pcmk__unassign_resource(), in that it may also update any + * actions created for the resource. + * \note The \c resource_alloc_functions_t:assign() method is preferred, unless + * a resource should be assigned to the \c NULL node or every resource in + * a tree should be assigned to the same node. ++ * \note If \p stop_if_fail is \c false, then \c pcmk__unassign_resource() can ++ * completely undo the assignment. A successful assignment can be either ++ * undone or left alone as final. A failed assignment has the same effect ++ * as calling pcmk__unassign_resource(); there are no side effects on ++ * roles or actions. + */ + bool +-pcmk__assign_resource(pe_resource_t *rsc, pe_node_t *node, bool force) ++pcmk__assign_resource(pe_resource_t *rsc, pe_node_t *node, bool force, ++ bool stop_if_fail) + { + bool changed = false; + +@@ -363,7 +376,8 @@ pcmk__assign_resource(pe_resource_t *rsc, pe_node_t *node, bool force) + for (GList *iter = rsc->children; iter != NULL; iter = iter->next) { + pe_resource_t *child_rsc = iter->data; + +- changed |= pcmk__assign_resource(child_rsc, node, force); ++ changed |= pcmk__assign_resource(child_rsc, node, force, ++ stop_if_fail); + } + return changed; + } +@@ -382,7 +396,10 @@ pcmk__assign_resource(pe_resource_t *rsc, pe_node_t *node, bool force) + rsc->id, pe__node_name(node), + (pcmk__node_available(node, true, false)? "" : "not"), + pcmk_readable_score(node->weight)); +- pe__set_next_role(rsc, RSC_ROLE_STOPPED, "node availability"); ++ ++ if (stop_if_fail) { ++ pe__set_next_role(rsc, RSC_ROLE_STOPPED, "node availability"); ++ } + node = NULL; + } + +@@ -398,6 +415,10 @@ pcmk__assign_resource(pe_resource_t *rsc, pe_node_t *node, bool force) + char *rc_stopped = NULL; + + pe_rsc_debug(rsc, "Could not assign %s to a node", rsc->id); ++ ++ if (!stop_if_fail) { ++ return changed; ++ } + pe__set_next_role(rsc, RSC_ROLE_STOPPED, "unable to assign"); + + for (GList *iter = rsc->actions; iter != NULL; iter = iter->next) { +diff --git a/lib/pacemaker/pcmk_scheduler.c b/lib/pacemaker/pcmk_scheduler.c +index b4e670d865c..508cd5721c4 100644 +--- a/lib/pacemaker/pcmk_scheduler.c ++++ b/lib/pacemaker/pcmk_scheduler.c +@@ -318,7 +318,7 @@ allocate_resources(pe_working_set_t *data_set) + if (rsc->is_remote_node) { + pe_rsc_trace(rsc, "Allocating remote connection resource '%s'", + rsc->id); +- rsc->cmds->assign(rsc, rsc->partial_migration_target); ++ rsc->cmds->assign(rsc, rsc->partial_migration_target, true); + } + } + } +@@ -330,7 +330,7 @@ allocate_resources(pe_working_set_t *data_set) + if (!rsc->is_remote_node) { + pe_rsc_trace(rsc, "Allocating %s resource '%s'", + crm_element_name(rsc->xml), rsc->id); +- rsc->cmds->assign(rsc, NULL); ++ rsc->cmds->assign(rsc, NULL, true); + } + } + + +From a698dd1e17f184977f87c4ef44c2eb5b9bd933f6 Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Mon, 10 Jul 2023 02:44:46 -0700 +Subject: [PATCH 17/19] Test: scheduler: Update tests after new stop_if_fail + argument + +Some scores are repeated since we're able to back out of a failed early +assignment now. + +Only one test changes otherwise. bug-1822 has a score change from +-INFINITY to 49. However, the partially active group is still not +allowed to promote, which is the purpose of the test. + +Ref T489 + +Signed-off-by: Reid Wahl +--- + cts/scheduler/scores/594.scores | 3 +++ + cts/scheduler/scores/bug-1822.scores | 2 +- + .../bug-5014-CLONE-A-stop-B-started.scores | 1 + + cts/scheduler/scores/bug-lf-2171.scores | 4 ++++ + cts/scheduler/scores/bug-lf-2422.scores | 16 ++++++++++++++++ + cts/scheduler/scores/bug-lf-2453.scores | 4 ++++ + cts/scheduler/scores/bug-lf-2574.scores | 3 +++ + .../scores/bundle-order-stop-clone.scores | 4 ++++ + cts/scheduler/scores/clone-max-zero.scores | 8 ++++++++ + cts/scheduler/scores/cloned-group-stop.scores | 4 ++++ + cts/scheduler/scores/complex_enforce_colo.scores | 9 +++++++++ + cts/scheduler/scores/enforce-colo1.scores | 9 +++++++++ + .../scores/promoted-asymmetrical-order.scores | 4 ++++ + .../scores/promoted-failed-demote-2.scores | 10 ++++++++++ + .../scores/promoted-failed-demote.scores | 10 ++++++++++ + 15 files changed, 90 insertions(+), 1 deletion(-) + +diff --git a/cts/scheduler/scores/594.scores b/cts/scheduler/scores/594.scores +index 5e99750df21..96c8f441b98 100644 +--- a/cts/scheduler/scores/594.scores ++++ b/cts/scheduler/scores/594.scores +@@ -21,8 +21,11 @@ pcmk__primitive_assign: child_DoFencing:1 allocation score on hadev1: 1 + pcmk__primitive_assign: child_DoFencing:1 allocation score on hadev2: -INFINITY + pcmk__primitive_assign: child_DoFencing:1 allocation score on hadev3: -INFINITY + pcmk__primitive_assign: child_DoFencing:2 allocation score on hadev1: -INFINITY ++pcmk__primitive_assign: child_DoFencing:2 allocation score on hadev1: -INFINITY ++pcmk__primitive_assign: child_DoFencing:2 allocation score on hadev2: -INFINITY + pcmk__primitive_assign: child_DoFencing:2 allocation score on hadev2: -INFINITY + pcmk__primitive_assign: child_DoFencing:2 allocation score on hadev3: -INFINITY ++pcmk__primitive_assign: child_DoFencing:2 allocation score on hadev3: -INFINITY + pcmk__primitive_assign: rsc_hadev1 allocation score on hadev1: 100 + pcmk__primitive_assign: rsc_hadev1 allocation score on hadev2: 0 + pcmk__primitive_assign: rsc_hadev1 allocation score on hadev3: 0 +diff --git a/cts/scheduler/scores/bug-1822.scores b/cts/scheduler/scores/bug-1822.scores +index 82191d1e74b..0a9056bbf3e 100644 +--- a/cts/scheduler/scores/bug-1822.scores ++++ b/cts/scheduler/scores/bug-1822.scores +@@ -1,5 +1,5 @@ + +-ms-sf_group:0 promotion score on process2b: -INFINITY ++ms-sf_group:0 promotion score on process2b: 49 + ms-sf_group:1 promotion score on none: 0 + pcmk__clone_assign: ms-sf allocation score on process1a: 0 + pcmk__clone_assign: ms-sf allocation score on process2b: 0 +diff --git a/cts/scheduler/scores/bug-5014-CLONE-A-stop-B-started.scores b/cts/scheduler/scores/bug-5014-CLONE-A-stop-B-started.scores +index e698b145274..d79208c7336 100644 +--- a/cts/scheduler/scores/bug-5014-CLONE-A-stop-B-started.scores ++++ b/cts/scheduler/scores/bug-5014-CLONE-A-stop-B-started.scores +@@ -5,3 +5,4 @@ pcmk__clone_assign: clone1 allocation score on fc16-builder: 0 + pcmk__clone_assign: clone2 allocation score on fc16-builder: 0 + pcmk__primitive_assign: ClusterIP2:0 allocation score on fc16-builder: 1 + pcmk__primitive_assign: ClusterIP:0 allocation score on fc16-builder: -INFINITY ++pcmk__primitive_assign: ClusterIP:0 allocation score on fc16-builder: -INFINITY +diff --git a/cts/scheduler/scores/bug-lf-2171.scores b/cts/scheduler/scores/bug-lf-2171.scores +index 7d2bdd45307..14cc28a88c5 100644 +--- a/cts/scheduler/scores/bug-lf-2171.scores ++++ b/cts/scheduler/scores/bug-lf-2171.scores +@@ -12,8 +12,12 @@ pcmk__group_assign: res_Dummy2 allocation score on xenserver2: 0 + pcmk__group_assign: res_Dummy3 allocation score on xenserver1: 200 + pcmk__group_assign: res_Dummy3 allocation score on xenserver2: 0 + pcmk__primitive_assign: res_Dummy1:0 allocation score on xenserver1: -INFINITY ++pcmk__primitive_assign: res_Dummy1:0 allocation score on xenserver1: -INFINITY ++pcmk__primitive_assign: res_Dummy1:0 allocation score on xenserver2: -INFINITY + pcmk__primitive_assign: res_Dummy1:0 allocation score on xenserver2: -INFINITY + pcmk__primitive_assign: res_Dummy1:1 allocation score on xenserver1: -INFINITY ++pcmk__primitive_assign: res_Dummy1:1 allocation score on xenserver1: -INFINITY ++pcmk__primitive_assign: res_Dummy1:1 allocation score on xenserver2: -INFINITY + pcmk__primitive_assign: res_Dummy1:1 allocation score on xenserver2: -INFINITY + pcmk__primitive_assign: res_Dummy2 allocation score on xenserver1: 200 + pcmk__primitive_assign: res_Dummy2 allocation score on xenserver2: 0 +diff --git a/cts/scheduler/scores/bug-lf-2422.scores b/cts/scheduler/scores/bug-lf-2422.scores +index 99ff12e3bb6..77a284da9ce 100644 +--- a/cts/scheduler/scores/bug-lf-2422.scores ++++ b/cts/scheduler/scores/bug-lf-2422.scores +@@ -248,20 +248,36 @@ pcmk__primitive_assign: o2cb:3 allocation score on qa-suse-2: -INFINITY + pcmk__primitive_assign: o2cb:3 allocation score on qa-suse-3: -INFINITY + pcmk__primitive_assign: o2cb:3 allocation score on qa-suse-4: -INFINITY + pcmk__primitive_assign: ocfs:0 allocation score on qa-suse-1: -INFINITY ++pcmk__primitive_assign: ocfs:0 allocation score on qa-suse-1: -INFINITY ++pcmk__primitive_assign: ocfs:0 allocation score on qa-suse-2: -INFINITY + pcmk__primitive_assign: ocfs:0 allocation score on qa-suse-2: -INFINITY + pcmk__primitive_assign: ocfs:0 allocation score on qa-suse-3: -INFINITY ++pcmk__primitive_assign: ocfs:0 allocation score on qa-suse-3: -INFINITY ++pcmk__primitive_assign: ocfs:0 allocation score on qa-suse-4: -INFINITY + pcmk__primitive_assign: ocfs:0 allocation score on qa-suse-4: -INFINITY + pcmk__primitive_assign: ocfs:1 allocation score on qa-suse-1: -INFINITY ++pcmk__primitive_assign: ocfs:1 allocation score on qa-suse-1: -INFINITY ++pcmk__primitive_assign: ocfs:1 allocation score on qa-suse-2: -INFINITY + pcmk__primitive_assign: ocfs:1 allocation score on qa-suse-2: -INFINITY + pcmk__primitive_assign: ocfs:1 allocation score on qa-suse-3: -INFINITY ++pcmk__primitive_assign: ocfs:1 allocation score on qa-suse-3: -INFINITY ++pcmk__primitive_assign: ocfs:1 allocation score on qa-suse-4: -INFINITY + pcmk__primitive_assign: ocfs:1 allocation score on qa-suse-4: -INFINITY + pcmk__primitive_assign: ocfs:2 allocation score on qa-suse-1: -INFINITY ++pcmk__primitive_assign: ocfs:2 allocation score on qa-suse-1: -INFINITY ++pcmk__primitive_assign: ocfs:2 allocation score on qa-suse-2: -INFINITY + pcmk__primitive_assign: ocfs:2 allocation score on qa-suse-2: -INFINITY + pcmk__primitive_assign: ocfs:2 allocation score on qa-suse-3: -INFINITY ++pcmk__primitive_assign: ocfs:2 allocation score on qa-suse-3: -INFINITY ++pcmk__primitive_assign: ocfs:2 allocation score on qa-suse-4: -INFINITY + pcmk__primitive_assign: ocfs:2 allocation score on qa-suse-4: -INFINITY + pcmk__primitive_assign: ocfs:3 allocation score on qa-suse-1: -INFINITY ++pcmk__primitive_assign: ocfs:3 allocation score on qa-suse-1: -INFINITY ++pcmk__primitive_assign: ocfs:3 allocation score on qa-suse-2: -INFINITY + pcmk__primitive_assign: ocfs:3 allocation score on qa-suse-2: -INFINITY + pcmk__primitive_assign: ocfs:3 allocation score on qa-suse-3: -INFINITY ++pcmk__primitive_assign: ocfs:3 allocation score on qa-suse-3: -INFINITY ++pcmk__primitive_assign: ocfs:3 allocation score on qa-suse-4: -INFINITY + pcmk__primitive_assign: ocfs:3 allocation score on qa-suse-4: -INFINITY + pcmk__primitive_assign: sbd_stonith allocation score on qa-suse-1: 0 + pcmk__primitive_assign: sbd_stonith allocation score on qa-suse-2: 0 +diff --git a/cts/scheduler/scores/bug-lf-2453.scores b/cts/scheduler/scores/bug-lf-2453.scores +index eaee72d2002..3ef0f6dc375 100644 +--- a/cts/scheduler/scores/bug-lf-2453.scores ++++ b/cts/scheduler/scores/bug-lf-2453.scores +@@ -17,6 +17,10 @@ pcmk__primitive_assign: DummyResource:1 allocation score on domu1: -INFINITY + pcmk__primitive_assign: DummyResource:1 allocation score on domu2: INFINITY + pcmk__primitive_assign: PrimitiveResource1 allocation score on domu1: INFINITY + pcmk__primitive_assign: apache:0 allocation score on domu1: -INFINITY ++pcmk__primitive_assign: apache:0 allocation score on domu1: -INFINITY ++pcmk__primitive_assign: apache:0 allocation score on domu2: -INFINITY + pcmk__primitive_assign: apache:0 allocation score on domu2: -INFINITY + pcmk__primitive_assign: apache:1 allocation score on domu1: -INFINITY ++pcmk__primitive_assign: apache:1 allocation score on domu1: -INFINITY ++pcmk__primitive_assign: apache:1 allocation score on domu2: -INFINITY + pcmk__primitive_assign: apache:1 allocation score on domu2: -INFINITY +diff --git a/cts/scheduler/scores/bug-lf-2574.scores b/cts/scheduler/scores/bug-lf-2574.scores +index 0f5cf60a7e0..b4a1bd95841 100644 +--- a/cts/scheduler/scores/bug-lf-2574.scores ++++ b/cts/scheduler/scores/bug-lf-2574.scores +@@ -39,8 +39,11 @@ pcmk__primitive_assign: prmDummy1:2 allocation score on srv01: -INFINITY + pcmk__primitive_assign: prmDummy1:2 allocation score on srv02: -INFINITY + pcmk__primitive_assign: prmDummy1:2 allocation score on srv03: -INFINITY + pcmk__primitive_assign: prmPingd:0 allocation score on srv01: -INFINITY ++pcmk__primitive_assign: prmPingd:0 allocation score on srv01: -INFINITY ++pcmk__primitive_assign: prmPingd:0 allocation score on srv02: -INFINITY + pcmk__primitive_assign: prmPingd:0 allocation score on srv02: -INFINITY + pcmk__primitive_assign: prmPingd:0 allocation score on srv03: -INFINITY ++pcmk__primitive_assign: prmPingd:0 allocation score on srv03: -INFINITY + pcmk__primitive_assign: prmPingd:1 allocation score on srv01: -INFINITY + pcmk__primitive_assign: prmPingd:1 allocation score on srv02: -INFINITY + pcmk__primitive_assign: prmPingd:1 allocation score on srv03: INFINITY +diff --git a/cts/scheduler/scores/bundle-order-stop-clone.scores b/cts/scheduler/scores/bundle-order-stop-clone.scores +index 707260b80a9..06596e86a24 100644 +--- a/cts/scheduler/scores/bundle-order-stop-clone.scores ++++ b/cts/scheduler/scores/bundle-order-stop-clone.scores +@@ -147,8 +147,12 @@ pcmk__primitive_assign: galera-bundle-2 allocation score on metal-2: 0 + pcmk__primitive_assign: galera-bundle-2 allocation score on metal-3: INFINITY + pcmk__primitive_assign: galera-bundle-2 allocation score on rabbitmq-bundle-0: -INFINITY + pcmk__primitive_assign: galera-bundle-docker-0 allocation score on metal-1: -INFINITY ++pcmk__primitive_assign: galera-bundle-docker-0 allocation score on metal-1: -INFINITY ++pcmk__primitive_assign: galera-bundle-docker-0 allocation score on metal-2: -INFINITY + pcmk__primitive_assign: galera-bundle-docker-0 allocation score on metal-2: -INFINITY + pcmk__primitive_assign: galera-bundle-docker-0 allocation score on metal-3: -INFINITY ++pcmk__primitive_assign: galera-bundle-docker-0 allocation score on metal-3: -INFINITY ++pcmk__primitive_assign: galera-bundle-docker-0 allocation score on rabbitmq-bundle-0: -INFINITY + pcmk__primitive_assign: galera-bundle-docker-0 allocation score on rabbitmq-bundle-0: -INFINITY + pcmk__primitive_assign: galera-bundle-docker-1 allocation score on metal-1: -INFINITY + pcmk__primitive_assign: galera-bundle-docker-1 allocation score on metal-2: INFINITY +diff --git a/cts/scheduler/scores/clone-max-zero.scores b/cts/scheduler/scores/clone-max-zero.scores +index f1711b7885e..bd116a2764c 100644 +--- a/cts/scheduler/scores/clone-max-zero.scores ++++ b/cts/scheduler/scores/clone-max-zero.scores +@@ -26,10 +26,18 @@ pcmk__primitive_assign: drbd0:1 allocation score on c001n12: -INFINITY + pcmk__primitive_assign: fencing allocation score on c001n11: 0 + pcmk__primitive_assign: fencing allocation score on c001n12: 0 + pcmk__primitive_assign: o2cb:0 allocation score on c001n11: -INFINITY ++pcmk__primitive_assign: o2cb:0 allocation score on c001n11: -INFINITY ++pcmk__primitive_assign: o2cb:0 allocation score on c001n12: -INFINITY + pcmk__primitive_assign: o2cb:0 allocation score on c001n12: -INFINITY + pcmk__primitive_assign: o2cb:1 allocation score on c001n11: -INFINITY ++pcmk__primitive_assign: o2cb:1 allocation score on c001n11: -INFINITY ++pcmk__primitive_assign: o2cb:1 allocation score on c001n12: -INFINITY + pcmk__primitive_assign: o2cb:1 allocation score on c001n12: -INFINITY + pcmk__primitive_assign: ocfs2-1:0 allocation score on c001n11: -INFINITY ++pcmk__primitive_assign: ocfs2-1:0 allocation score on c001n11: -INFINITY ++pcmk__primitive_assign: ocfs2-1:0 allocation score on c001n12: -INFINITY + pcmk__primitive_assign: ocfs2-1:0 allocation score on c001n12: -INFINITY + pcmk__primitive_assign: ocfs2-1:1 allocation score on c001n11: -INFINITY ++pcmk__primitive_assign: ocfs2-1:1 allocation score on c001n11: -INFINITY ++pcmk__primitive_assign: ocfs2-1:1 allocation score on c001n12: -INFINITY + pcmk__primitive_assign: ocfs2-1:1 allocation score on c001n12: -INFINITY +diff --git a/cts/scheduler/scores/cloned-group-stop.scores b/cts/scheduler/scores/cloned-group-stop.scores +index be835fa5371..7e406c6ddc2 100644 +--- a/cts/scheduler/scores/cloned-group-stop.scores ++++ b/cts/scheduler/scores/cloned-group-stop.scores +@@ -122,8 +122,12 @@ pcmk__primitive_assign: mysql-fs allocation score on rhos4-node4: -INFINITY + pcmk__primitive_assign: mysql-vip allocation score on rhos4-node3: 300 + pcmk__primitive_assign: mysql-vip allocation score on rhos4-node4: -INFINITY + pcmk__primitive_assign: qpidd:0 allocation score on rhos4-node3: -INFINITY ++pcmk__primitive_assign: qpidd:0 allocation score on rhos4-node3: -INFINITY ++pcmk__primitive_assign: qpidd:0 allocation score on rhos4-node4: -INFINITY + pcmk__primitive_assign: qpidd:0 allocation score on rhos4-node4: -INFINITY + pcmk__primitive_assign: qpidd:1 allocation score on rhos4-node3: -INFINITY ++pcmk__primitive_assign: qpidd:1 allocation score on rhos4-node3: -INFINITY ++pcmk__primitive_assign: qpidd:1 allocation score on rhos4-node4: -INFINITY + pcmk__primitive_assign: qpidd:1 allocation score on rhos4-node4: -INFINITY + pcmk__primitive_assign: virt-fencing allocation score on rhos4-node3: 100 + pcmk__primitive_assign: virt-fencing allocation score on rhos4-node4: 0 +diff --git a/cts/scheduler/scores/complex_enforce_colo.scores b/cts/scheduler/scores/complex_enforce_colo.scores +index 9968e1097ef..a5d0b2b4125 100644 +--- a/cts/scheduler/scores/complex_enforce_colo.scores ++++ b/cts/scheduler/scores/complex_enforce_colo.scores +@@ -588,13 +588,22 @@ pcmk__primitive_assign: horizon:2 allocation score on rhos6-node1: -INFINITY + pcmk__primitive_assign: horizon:2 allocation score on rhos6-node2: -INFINITY + pcmk__primitive_assign: horizon:2 allocation score on rhos6-node3: 1 + pcmk__primitive_assign: keystone:0 allocation score on rhos6-node1: -INFINITY ++pcmk__primitive_assign: keystone:0 allocation score on rhos6-node1: -INFINITY ++pcmk__primitive_assign: keystone:0 allocation score on rhos6-node2: -INFINITY + pcmk__primitive_assign: keystone:0 allocation score on rhos6-node2: -INFINITY + pcmk__primitive_assign: keystone:0 allocation score on rhos6-node3: -INFINITY ++pcmk__primitive_assign: keystone:0 allocation score on rhos6-node3: -INFINITY + pcmk__primitive_assign: keystone:1 allocation score on rhos6-node1: -INFINITY ++pcmk__primitive_assign: keystone:1 allocation score on rhos6-node1: -INFINITY ++pcmk__primitive_assign: keystone:1 allocation score on rhos6-node2: -INFINITY + pcmk__primitive_assign: keystone:1 allocation score on rhos6-node2: -INFINITY + pcmk__primitive_assign: keystone:1 allocation score on rhos6-node3: -INFINITY ++pcmk__primitive_assign: keystone:1 allocation score on rhos6-node3: -INFINITY ++pcmk__primitive_assign: keystone:2 allocation score on rhos6-node1: -INFINITY + pcmk__primitive_assign: keystone:2 allocation score on rhos6-node1: -INFINITY + pcmk__primitive_assign: keystone:2 allocation score on rhos6-node2: -INFINITY ++pcmk__primitive_assign: keystone:2 allocation score on rhos6-node2: -INFINITY ++pcmk__primitive_assign: keystone:2 allocation score on rhos6-node3: -INFINITY + pcmk__primitive_assign: keystone:2 allocation score on rhos6-node3: -INFINITY + pcmk__primitive_assign: lb-haproxy:0 allocation score on rhos6-node1: 1 + pcmk__primitive_assign: lb-haproxy:0 allocation score on rhos6-node2: 0 +diff --git a/cts/scheduler/scores/enforce-colo1.scores b/cts/scheduler/scores/enforce-colo1.scores +index 8194789648a..262cbd94a30 100644 +--- a/cts/scheduler/scores/enforce-colo1.scores ++++ b/cts/scheduler/scores/enforce-colo1.scores +@@ -18,13 +18,22 @@ pcmk__primitive_assign: engine allocation score on rhel7-auto1: -INFINITY + pcmk__primitive_assign: engine allocation score on rhel7-auto2: -INFINITY + pcmk__primitive_assign: engine allocation score on rhel7-auto3: 0 + pcmk__primitive_assign: keystone:0 allocation score on rhel7-auto1: -INFINITY ++pcmk__primitive_assign: keystone:0 allocation score on rhel7-auto1: -INFINITY ++pcmk__primitive_assign: keystone:0 allocation score on rhel7-auto2: -INFINITY + pcmk__primitive_assign: keystone:0 allocation score on rhel7-auto2: -INFINITY + pcmk__primitive_assign: keystone:0 allocation score on rhel7-auto3: -INFINITY ++pcmk__primitive_assign: keystone:0 allocation score on rhel7-auto3: -INFINITY + pcmk__primitive_assign: keystone:1 allocation score on rhel7-auto1: -INFINITY ++pcmk__primitive_assign: keystone:1 allocation score on rhel7-auto1: -INFINITY ++pcmk__primitive_assign: keystone:1 allocation score on rhel7-auto2: -INFINITY + pcmk__primitive_assign: keystone:1 allocation score on rhel7-auto2: -INFINITY + pcmk__primitive_assign: keystone:1 allocation score on rhel7-auto3: -INFINITY ++pcmk__primitive_assign: keystone:1 allocation score on rhel7-auto3: -INFINITY ++pcmk__primitive_assign: keystone:2 allocation score on rhel7-auto1: -INFINITY + pcmk__primitive_assign: keystone:2 allocation score on rhel7-auto1: -INFINITY + pcmk__primitive_assign: keystone:2 allocation score on rhel7-auto2: -INFINITY ++pcmk__primitive_assign: keystone:2 allocation score on rhel7-auto2: -INFINITY ++pcmk__primitive_assign: keystone:2 allocation score on rhel7-auto3: -INFINITY + pcmk__primitive_assign: keystone:2 allocation score on rhel7-auto3: -INFINITY + pcmk__primitive_assign: shooter allocation score on rhel7-auto1: 0 + pcmk__primitive_assign: shooter allocation score on rhel7-auto2: 0 +diff --git a/cts/scheduler/scores/promoted-asymmetrical-order.scores b/cts/scheduler/scores/promoted-asymmetrical-order.scores +index 382e0ebe285..18bc704551e 100644 +--- a/cts/scheduler/scores/promoted-asymmetrical-order.scores ++++ b/cts/scheduler/scores/promoted-asymmetrical-order.scores +@@ -12,8 +12,12 @@ pcmk__clone_assign: rsc2:0 allocation score on node2: 0 + pcmk__clone_assign: rsc2:1 allocation score on node1: 0 + pcmk__clone_assign: rsc2:1 allocation score on node2: 1 + pcmk__primitive_assign: rsc1:0 allocation score on node1: -INFINITY ++pcmk__primitive_assign: rsc1:0 allocation score on node1: -INFINITY ++pcmk__primitive_assign: rsc1:0 allocation score on node2: -INFINITY + pcmk__primitive_assign: rsc1:0 allocation score on node2: -INFINITY + pcmk__primitive_assign: rsc1:1 allocation score on node1: -INFINITY ++pcmk__primitive_assign: rsc1:1 allocation score on node1: -INFINITY ++pcmk__primitive_assign: rsc1:1 allocation score on node2: -INFINITY + pcmk__primitive_assign: rsc1:1 allocation score on node2: -INFINITY + pcmk__primitive_assign: rsc2:0 allocation score on node1: 1 + pcmk__primitive_assign: rsc2:0 allocation score on node2: 0 +diff --git a/cts/scheduler/scores/promoted-failed-demote-2.scores b/cts/scheduler/scores/promoted-failed-demote-2.scores +index 2a85ae6060e..e457d8c6057 100644 +--- a/cts/scheduler/scores/promoted-failed-demote-2.scores ++++ b/cts/scheduler/scores/promoted-failed-demote-2.scores +@@ -16,22 +16,32 @@ pcmk__clone_assign: stateful-2:0 allocation score on dl380g5b: -INFINITY + pcmk__clone_assign: stateful-2:1 allocation score on dl380g5a: INFINITY + pcmk__clone_assign: stateful-2:1 allocation score on dl380g5b: 0 + pcmk__group_assign: group:0 allocation score on dl380g5a: -INFINITY ++pcmk__group_assign: group:0 allocation score on dl380g5a: -INFINITY ++pcmk__group_assign: group:0 allocation score on dl380g5b: 0 + pcmk__group_assign: group:0 allocation score on dl380g5b: 0 + pcmk__group_assign: group:1 allocation score on dl380g5a: 0 + pcmk__group_assign: group:1 allocation score on dl380g5b: 0 + pcmk__group_assign: stateful-1:0 allocation score on dl380g5a: -INFINITY ++pcmk__group_assign: stateful-1:0 allocation score on dl380g5a: -INFINITY ++pcmk__group_assign: stateful-1:0 allocation score on dl380g5b: -INFINITY + pcmk__group_assign: stateful-1:0 allocation score on dl380g5b: -INFINITY + pcmk__group_assign: stateful-1:1 allocation score on dl380g5a: INFINITY + pcmk__group_assign: stateful-1:1 allocation score on dl380g5b: 0 + pcmk__group_assign: stateful-2:0 allocation score on dl380g5a: -INFINITY ++pcmk__group_assign: stateful-2:0 allocation score on dl380g5a: -INFINITY ++pcmk__group_assign: stateful-2:0 allocation score on dl380g5b: -INFINITY + pcmk__group_assign: stateful-2:0 allocation score on dl380g5b: -INFINITY + pcmk__group_assign: stateful-2:1 allocation score on dl380g5a: INFINITY + pcmk__group_assign: stateful-2:1 allocation score on dl380g5b: 0 + pcmk__primitive_assign: stateful-1:0 allocation score on dl380g5a: -INFINITY ++pcmk__primitive_assign: stateful-1:0 allocation score on dl380g5a: -INFINITY ++pcmk__primitive_assign: stateful-1:0 allocation score on dl380g5b: -INFINITY + pcmk__primitive_assign: stateful-1:0 allocation score on dl380g5b: -INFINITY + pcmk__primitive_assign: stateful-1:1 allocation score on dl380g5a: INFINITY + pcmk__primitive_assign: stateful-1:1 allocation score on dl380g5b: 0 + pcmk__primitive_assign: stateful-2:0 allocation score on dl380g5a: -INFINITY ++pcmk__primitive_assign: stateful-2:0 allocation score on dl380g5a: -INFINITY ++pcmk__primitive_assign: stateful-2:0 allocation score on dl380g5b: -INFINITY + pcmk__primitive_assign: stateful-2:0 allocation score on dl380g5b: -INFINITY + pcmk__primitive_assign: stateful-2:1 allocation score on dl380g5a: INFINITY + pcmk__primitive_assign: stateful-2:1 allocation score on dl380g5b: -INFINITY +diff --git a/cts/scheduler/scores/promoted-failed-demote.scores b/cts/scheduler/scores/promoted-failed-demote.scores +index 2a85ae6060e..e457d8c6057 100644 +--- a/cts/scheduler/scores/promoted-failed-demote.scores ++++ b/cts/scheduler/scores/promoted-failed-demote.scores +@@ -16,22 +16,32 @@ pcmk__clone_assign: stateful-2:0 allocation score on dl380g5b: -INFINITY + pcmk__clone_assign: stateful-2:1 allocation score on dl380g5a: INFINITY + pcmk__clone_assign: stateful-2:1 allocation score on dl380g5b: 0 + pcmk__group_assign: group:0 allocation score on dl380g5a: -INFINITY ++pcmk__group_assign: group:0 allocation score on dl380g5a: -INFINITY ++pcmk__group_assign: group:0 allocation score on dl380g5b: 0 + pcmk__group_assign: group:0 allocation score on dl380g5b: 0 + pcmk__group_assign: group:1 allocation score on dl380g5a: 0 + pcmk__group_assign: group:1 allocation score on dl380g5b: 0 + pcmk__group_assign: stateful-1:0 allocation score on dl380g5a: -INFINITY ++pcmk__group_assign: stateful-1:0 allocation score on dl380g5a: -INFINITY ++pcmk__group_assign: stateful-1:0 allocation score on dl380g5b: -INFINITY + pcmk__group_assign: stateful-1:0 allocation score on dl380g5b: -INFINITY + pcmk__group_assign: stateful-1:1 allocation score on dl380g5a: INFINITY + pcmk__group_assign: stateful-1:1 allocation score on dl380g5b: 0 + pcmk__group_assign: stateful-2:0 allocation score on dl380g5a: -INFINITY ++pcmk__group_assign: stateful-2:0 allocation score on dl380g5a: -INFINITY ++pcmk__group_assign: stateful-2:0 allocation score on dl380g5b: -INFINITY + pcmk__group_assign: stateful-2:0 allocation score on dl380g5b: -INFINITY + pcmk__group_assign: stateful-2:1 allocation score on dl380g5a: INFINITY + pcmk__group_assign: stateful-2:1 allocation score on dl380g5b: 0 + pcmk__primitive_assign: stateful-1:0 allocation score on dl380g5a: -INFINITY ++pcmk__primitive_assign: stateful-1:0 allocation score on dl380g5a: -INFINITY ++pcmk__primitive_assign: stateful-1:0 allocation score on dl380g5b: -INFINITY + pcmk__primitive_assign: stateful-1:0 allocation score on dl380g5b: -INFINITY + pcmk__primitive_assign: stateful-1:1 allocation score on dl380g5a: INFINITY + pcmk__primitive_assign: stateful-1:1 allocation score on dl380g5b: 0 + pcmk__primitive_assign: stateful-2:0 allocation score on dl380g5a: -INFINITY ++pcmk__primitive_assign: stateful-2:0 allocation score on dl380g5a: -INFINITY ++pcmk__primitive_assign: stateful-2:0 allocation score on dl380g5b: -INFINITY + pcmk__primitive_assign: stateful-2:0 allocation score on dl380g5b: -INFINITY + pcmk__primitive_assign: stateful-2:1 allocation score on dl380g5a: INFINITY + pcmk__primitive_assign: stateful-2:1 allocation score on dl380g5b: -INFINITY + +From 4abb93e5c779cf058861a25c5eac456ac1087fd6 Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Wed, 21 Jun 2023 22:40:20 -0700 +Subject: [PATCH 18/19] Fix: libpacemaker: Don't shuffle clone instances + unnecessarily + +Currently, clone instances may be shuffled under certain conditions, +causing an unnecessary resource downtime when an instance is moved +away from its current running node. + +For example, this can happen when a stopped promotable instance is +scheduled to promote and the stickiness is lower than the promotion +score (see the clone-recover-no-shuffle-7 test). Instance 0 gets +assigned first and goes to the node that will be promoted. If instance 0 +is already running on some node, it must stop there before it can start +on the new node. Another instance may start in its place after it stops. + +The fix is to assign an instance to its current node during the early +assignment phase, if that node is going to receive any instance at all. +If the node will receive an instance, it should receive its current +instance. + +The approach is described in detail in comments. + +Previously, if instance 0 was running on node1 and got assigned to node2 +during the early assignment phase (due to node2 having a higher score), +we backed out and immediately gave up on assigning instance 0 early. + +Now, we increment a "number of instances reserved" counter, as well as +the parent's counter of instances assigned to node2. We then try again +to assign instance 0 to node1. If node2 already has the max allowed +number of instances, then it will be marked unavailable for this round. + +Fixes T489 +Fixes RHBZ#1931023 + +Signed-off-by: Reid Wahl +--- + lib/pacemaker/pcmk_sched_instances.c | 163 ++++++++++++++++++++------- + 1 file changed, 122 insertions(+), 41 deletions(-) + +diff --git a/lib/pacemaker/pcmk_sched_instances.c b/lib/pacemaker/pcmk_sched_instances.c +index b551f3bee61..b010d460dbc 100644 +--- a/lib/pacemaker/pcmk_sched_instances.c ++++ b/lib/pacemaker/pcmk_sched_instances.c +@@ -605,50 +605,135 @@ assign_instance(pe_resource_t *instance, const pe_node_t *prefer, + instance->id); + return NULL; + } ++ ban_unavailable_allowed_nodes(instance, max_per_node); ++ ++ // Failed early assignments are reversible (stop_if_fail=false) ++ chosen = instance->cmds->assign(instance, prefer, (prefer == NULL)); ++ increment_parent_count(instance, chosen); ++ return chosen; ++} ++ ++/*! ++ * \internal ++ * \brief Try to assign an instance to its current node early ++ * ++ * \param[in] rsc Clone or bundle being assigned (for logs only) ++ * \param[in] instance Clone instance or bundle replica container ++ * \param[in] current Instance's current node ++ * \param[in] max_per_node Maximum number of instances per node ++ * \param[in] available Number of instances still available for assignment ++ * ++ * \return \c true if \p instance was successfully assigned to its current node, ++ * or \c false otherwise ++ */ ++static bool ++assign_instance_early(const pe_resource_t *rsc, pe_resource_t *instance, ++ const pe_node_t *current, int max_per_node, int available) ++{ ++ const pe_node_t *chosen = NULL; ++ int reserved = 0; + +- if (prefer != NULL) { // Possible early assignment to preferred node ++ pe_resource_t *parent = instance->parent; ++ GHashTable *allowed_orig = NULL; ++ GHashTable *allowed_orig_parent = parent->allowed_nodes; + +- // Get preferred node with instance's scores +- pe_node_t *allowed = g_hash_table_lookup(instance->allowed_nodes, +- prefer->details->id); ++ const pe_node_t *allowed_node = g_hash_table_lookup(instance->allowed_nodes, ++ current->details->id); + +- if ((allowed == NULL) || (allowed->weight < 0)) { +- pe_rsc_trace(instance, +- "Not assigning %s to preferred node %s: unavailable", +- instance->id, pe__node_name(prefer)); +- return NULL; +- } ++ pe_rsc_trace(instance, "Trying to assign %s to its current node %s", ++ instance->id, pe__node_name(current)); ++ ++ if (!pcmk__node_available(allowed_node, true, false)) { ++ pe_rsc_info(instance, ++ "Not assigning %s to current node %s: unavailable", ++ instance->id, pe__node_name(current)); ++ return false; + } + +- ban_unavailable_allowed_nodes(instance, max_per_node); ++ /* On each iteration, if instance gets assigned to a node other than its ++ * current one, we reserve one instance for the chosen node, unassign ++ * instance, restore instance's original node tables, and try again. This ++ * way, instances are proportionally assigned to nodes based on preferences, ++ * but shuffling of specific instances is minimized. If a node will be ++ * assigned instances at all, it preferentially receives instances that are ++ * currently active there. ++ * ++ * parent->allowed_nodes tracks the number of instances assigned to each ++ * node. If a node already has max_per_node instances assigned, ++ * ban_unavailable_allowed_nodes() marks it as unavailable. ++ * ++ * In the end, we restore the original parent->allowed_nodes to undo the ++ * changes to counts during tentative assignments. If we successfully ++ * assigned instance to its current node, we increment that node's counter. ++ */ + +- if (prefer == NULL) { // Final assignment +- chosen = instance->cmds->assign(instance, NULL, true); ++ // Back up the allowed node tables of instance and its children recursively ++ pcmk__copy_node_tables(instance, &allowed_orig); + +- } else { // Possible early assignment to preferred node +- GHashTable *backup = NULL; ++ // Update instances-per-node counts in a scratch table ++ parent->allowed_nodes = pcmk__copy_node_table(parent->allowed_nodes); + +- pcmk__copy_node_tables(instance, &backup); +- chosen = instance->cmds->assign(instance, prefer, false); ++ while (reserved < available) { ++ chosen = assign_instance(instance, current, max_per_node); + +- if (!pe__same_node(chosen, prefer)) { +- // Revert nodes if preferred node won't be assigned +- if (chosen != NULL) { +- pe_rsc_info(instance, +- "Not assigning %s to preferred node %s: " +- "%s is better", +- instance->id, pe__node_name(prefer), +- pe__node_name(chosen)); +- chosen = NULL; +- } +- pcmk__restore_node_tables(instance, backup); +- pcmk__unassign_resource(instance); ++ if (pe__same_node(chosen, current)) { ++ // Successfully assigned to current node ++ break; ++ } ++ ++ // Assignment updates scores, so restore to original state ++ pe_rsc_debug(instance, "Rolling back node scores for %s", instance->id); ++ pcmk__restore_node_tables(instance, allowed_orig); ++ ++ if (chosen == NULL) { ++ // Assignment failed, so give up ++ pe_rsc_info(instance, ++ "Not assigning %s to current node %s: unavailable", ++ instance->id, pe__node_name(current)); ++ pe__set_resource_flags(instance, pe_rsc_provisional); ++ break; ++ } ++ ++ // We prefer more strongly to assign an instance to the chosen node ++ pe_rsc_debug(instance, ++ "Not assigning %s to current node %s: %s is better", ++ instance->id, pe__node_name(current), ++ pe__node_name(chosen)); ++ ++ // Reserve one instance for the chosen node and try again ++ if (++reserved >= available) { ++ pe_rsc_info(instance, ++ "Not assigning %s to current node %s: " ++ "other assignments are more important", ++ instance->id, pe__node_name(current)); ++ ++ } else { ++ pe_rsc_debug(instance, ++ "Reserved an instance of %s for %s. Retrying " ++ "assignment of %s to %s", ++ rsc->id, pe__node_name(chosen), instance->id, ++ pe__node_name(current)); + } +- g_hash_table_destroy(backup); ++ ++ // Clear this assignment (frees chosen); leave instance counts in parent ++ pcmk__unassign_resource(instance); ++ chosen = NULL; + } + ++ g_hash_table_destroy(allowed_orig); ++ ++ // Restore original instances-per-node counts ++ g_hash_table_destroy(parent->allowed_nodes); ++ parent->allowed_nodes = allowed_orig_parent; ++ ++ if (chosen == NULL) { ++ // Couldn't assign instance to current node ++ return false; ++ } ++ pe_rsc_trace(instance, "Assigned %s to current node %s", ++ instance->id, pe__node_name(current)); + increment_parent_count(instance, chosen); +- return chosen; ++ return true; + } + + /*! +@@ -760,22 +845,18 @@ pcmk__assign_instances(pe_resource_t *collective, GList *instances, + // Assign as many instances as possible to their current location + for (iter = instances; (iter != NULL) && (assigned < max_total); + iter = iter->next) { +- instance = (pe_resource_t *) iter->data; ++ int available = max_total - assigned; + ++ instance = iter->data; + if (!pcmk_is_set(instance->flags, pe_rsc_provisional)) { + continue; // Already assigned + } + + current = preferred_node(collective, instance, optimal_per_node); +- if (current != NULL) { +- const pe_node_t *chosen = assign_instance(instance, current, +- max_per_node); +- +- if (pe__same_node(chosen, current)) { +- pe_rsc_trace(collective, "Assigned %s to current node %s", +- instance->id, pe__node_name(current)); +- assigned++; +- } ++ if ((current != NULL) ++ && assign_instance_early(collective, instance, current, ++ max_per_node, available)) { ++ assigned++; + } + } + + +From 59e9950212506a9034db8e90a17033734a1d18a1 Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Mon, 10 Jul 2023 02:50:28 -0700 +Subject: [PATCH 19/19] Test: scheduler: Update test outputs after clone + instance shuffling fix + +The following tests are now correct: +* clone-recover-no-shuffle-4 +* clone-recover-no-shuffle-5 +* clone-recover-no-shuffle-6 +* clone-recover-no-shuffle-7 + +Scores for several other tests are changed in ways (usually duplicates +from additional tentative assignments) that don't impact the resulting +transition. + +One test (cancel-behind-moving-remote) technically breaks. Previously, +due to shuffling, ovn-dbs-bundle-1 moved to controller-0. Since +ovndb_servers:1 gets promoted on ovn-dbs-bundle-1, controller-0 held the +promoted instance of ovn-dbs-bundle. + +Now, since instances correctly prefer their current nodes, +ovn-dbs-bundle-1 remains on controller-2. However, ovndb_servers:1 still +gets promoted on ovn-dbs-bundle-1, so controller-2 holds the promoted +instance of ovn-dbs-bundle. + +ip-172.17.1.87 is colocated with ovn-dbs-bundle's promoted role and is +banned from controller-2. As a result, ip-172.17.1.87 is now stopped. + +This test is believed to have worked properly in the past due only to +luck. At this point (see T672 and the bundle-promoted-*colocation-* +tests), it's well-established that colocations involving promotable +bundles don't work correctly. + +Ref T489 +Ref RHBZ#1931023 + +Signed-off-by: Reid Wahl +--- + .../dot/cancel-behind-moving-remote.dot | 99 +-- + .../dot/clone-recover-no-shuffle-4.dot | 23 +- + .../dot/clone-recover-no-shuffle-5.dot | 57 +- + .../dot/clone-recover-no-shuffle-6.dot | 99 +-- + .../dot/clone-recover-no-shuffle-7.dot | 35 +- + .../exp/cancel-behind-moving-remote.exp | 724 +++++------------- + .../exp/clone-recover-no-shuffle-4.exp | 98 +-- + .../exp/clone-recover-no-shuffle-5.exp | 239 +----- + .../exp/clone-recover-no-shuffle-6.exp | 434 ++--------- + .../exp/clone-recover-no-shuffle-7.exp | 174 ++--- + cts/scheduler/scores/bug-cl-5168.scores | 2 +- + .../scores/cancel-behind-moving-remote.scores | 27 +- + .../scores/clone-recover-no-shuffle-10.scores | 2 +- + .../scores/clone-recover-no-shuffle-4.scores | 10 +- + .../scores/clone-recover-no-shuffle-5.scores | 48 +- + .../scores/clone-recover-no-shuffle-6.scores | 22 +- + .../scores/clone-recover-no-shuffle-7.scores | 14 +- + .../scores/promoted-failed-demote-2.scores | 4 - + .../scores/promoted-failed-demote.scores | 4 - + .../scores/utilization-complex.scores | 24 + + .../scores/utilization-order2.scores | 2 + + .../cancel-behind-moving-remote.summary | 61 +- + .../clone-recover-no-shuffle-4.summary | 8 +- + .../clone-recover-no-shuffle-5.summary | 22 +- + .../clone-recover-no-shuffle-6.summary | 48 +- + .../clone-recover-no-shuffle-7.summary | 12 +- + .../xml/cancel-behind-moving-remote.xml | 14 + + .../xml/clone-recover-no-shuffle-4.xml | 5 - + .../xml/clone-recover-no-shuffle-5.xml | 5 - + .../xml/clone-recover-no-shuffle-6.xml | 5 - + .../xml/clone-recover-no-shuffle-7.xml | 5 - + 31 files changed, 526 insertions(+), 1800 deletions(-) + +diff --git a/cts/scheduler/dot/cancel-behind-moving-remote.dot b/cts/scheduler/dot/cancel-behind-moving-remote.dot +index 1a0dfc8c889..de803a7e299 100644 +--- a/cts/scheduler/dot/cancel-behind-moving-remote.dot ++++ b/cts/scheduler/dot/cancel-behind-moving-remote.dot +@@ -1,28 +1,12 @@ + digraph "g" { + "Cancel ovndb_servers_monitor_30000 ovn-dbs-bundle-1" -> "ovndb_servers_promote_0 ovn-dbs-bundle-1" [ style = bold] + "Cancel ovndb_servers_monitor_30000 ovn-dbs-bundle-1" [ style=bold color="green" fontcolor="black"] +-"ip-172.17.1.87_monitor_10000 controller-0" [ style=bold color="green" fontcolor="black"] +-"ip-172.17.1.87_start_0 controller-0" -> "ip-172.17.1.87_monitor_10000 controller-0" [ style = bold] +-"ip-172.17.1.87_start_0 controller-0" [ style=bold color="green" fontcolor="black"] + "nova-evacuate_clear_failcount_0 messaging-0" [ style=bold color="green" fontcolor="black"] +-"ovn-dbs-bundle-0_clear_failcount_0 controller-0" -> "ovn-dbs-bundle-0_start_0 controller-2" [ style = bold] +-"ovn-dbs-bundle-0_clear_failcount_0 controller-0" [ style=bold color="green" fontcolor="black"] +-"ovn-dbs-bundle-0_monitor_30000 controller-2" [ style=bold color="green" fontcolor="black"] +-"ovn-dbs-bundle-0_start_0 controller-2" -> "ovn-dbs-bundle-0_monitor_30000 controller-2" [ style = bold] +-"ovn-dbs-bundle-0_start_0 controller-2" -> "ovndb_servers:0_monitor_30000 ovn-dbs-bundle-0" [ style = bold] +-"ovn-dbs-bundle-0_start_0 controller-2" -> "ovndb_servers:0_start_0 ovn-dbs-bundle-0" [ style = bold] +-"ovn-dbs-bundle-0_start_0 controller-2" [ style=bold color="green" fontcolor="black"] +-"ovn-dbs-bundle-1_clear_failcount_0 controller-2" -> "ovn-dbs-bundle-1_start_0 controller-0" [ style = bold] +-"ovn-dbs-bundle-1_clear_failcount_0 controller-2" [ style=bold color="green" fontcolor="black"] +-"ovn-dbs-bundle-1_monitor_30000 controller-0" [ style=bold color="green" fontcolor="black"] +-"ovn-dbs-bundle-1_start_0 controller-0" -> "ovn-dbs-bundle-1_monitor_30000 controller-0" [ style = bold] +-"ovn-dbs-bundle-1_start_0 controller-0" -> "ovndb_servers_monitor_10000 ovn-dbs-bundle-1" [ style = bold] +-"ovn-dbs-bundle-1_start_0 controller-0" -> "ovndb_servers_promote_0 ovn-dbs-bundle-1" [ style = bold] +-"ovn-dbs-bundle-1_start_0 controller-0" -> "ovndb_servers_start_0 ovn-dbs-bundle-1" [ style = bold] +-"ovn-dbs-bundle-1_start_0 controller-0" [ style=bold color="green" fontcolor="black"] +-"ovn-dbs-bundle-1_stop_0 controller-2" -> "ovn-dbs-bundle-1_start_0 controller-0" [ style = bold] +-"ovn-dbs-bundle-1_stop_0 controller-2" -> "ovn-dbs-bundle-podman-1_stop_0 controller-2" [ style = bold] +-"ovn-dbs-bundle-1_stop_0 controller-2" [ style=bold color="green" fontcolor="black"] ++"ovn-dbs-bundle-0_monitor_30000 controller-0" [ style=bold color="green" fontcolor="black"] ++"ovn-dbs-bundle-0_start_0 controller-0" -> "ovn-dbs-bundle-0_monitor_30000 controller-0" [ style = bold] ++"ovn-dbs-bundle-0_start_0 controller-0" -> "ovndb_servers:0_monitor_30000 ovn-dbs-bundle-0" [ style = bold] ++"ovn-dbs-bundle-0_start_0 controller-0" -> "ovndb_servers:0_start_0 ovn-dbs-bundle-0" [ style = bold] ++"ovn-dbs-bundle-0_start_0 controller-0" [ style=bold color="green" fontcolor="black"] + "ovn-dbs-bundle-master_confirmed-post_notify_promoted_0" -> "ovn-dbs-bundle_promoted_0" [ style = bold] + "ovn-dbs-bundle-master_confirmed-post_notify_promoted_0" -> "ovndb_servers:0_monitor_30000 ovn-dbs-bundle-0" [ style = bold] + "ovn-dbs-bundle-master_confirmed-post_notify_promoted_0" -> "ovndb_servers_monitor_10000 ovn-dbs-bundle-1" [ style = bold] +@@ -32,19 +16,12 @@ + "ovn-dbs-bundle-master_confirmed-post_notify_running_0" -> "ovndb_servers:0_monitor_30000 ovn-dbs-bundle-0" [ style = bold] + "ovn-dbs-bundle-master_confirmed-post_notify_running_0" -> "ovndb_servers_monitor_10000 ovn-dbs-bundle-1" [ style = bold] + "ovn-dbs-bundle-master_confirmed-post_notify_running_0" [ style=bold color="green" fontcolor="orange"] +-"ovn-dbs-bundle-master_confirmed-post_notify_stopped_0" -> "ovn-dbs-bundle-master_pre_notify_promote_0" [ style = bold] +-"ovn-dbs-bundle-master_confirmed-post_notify_stopped_0" -> "ovn-dbs-bundle-master_pre_notify_start_0" [ style = bold] +-"ovn-dbs-bundle-master_confirmed-post_notify_stopped_0" -> "ovn-dbs-bundle_stopped_0" [ style = bold] +-"ovn-dbs-bundle-master_confirmed-post_notify_stopped_0" [ style=bold color="green" fontcolor="orange"] + "ovn-dbs-bundle-master_confirmed-pre_notify_promote_0" -> "ovn-dbs-bundle-master_post_notify_promoted_0" [ style = bold] + "ovn-dbs-bundle-master_confirmed-pre_notify_promote_0" -> "ovn-dbs-bundle-master_promote_0" [ style = bold] + "ovn-dbs-bundle-master_confirmed-pre_notify_promote_0" [ style=bold color="green" fontcolor="orange"] + "ovn-dbs-bundle-master_confirmed-pre_notify_start_0" -> "ovn-dbs-bundle-master_post_notify_running_0" [ style = bold] + "ovn-dbs-bundle-master_confirmed-pre_notify_start_0" -> "ovn-dbs-bundle-master_start_0" [ style = bold] + "ovn-dbs-bundle-master_confirmed-pre_notify_start_0" [ style=bold color="green" fontcolor="orange"] +-"ovn-dbs-bundle-master_confirmed-pre_notify_stop_0" -> "ovn-dbs-bundle-master_post_notify_stopped_0" [ style = bold] +-"ovn-dbs-bundle-master_confirmed-pre_notify_stop_0" -> "ovn-dbs-bundle-master_stop_0" [ style = bold] +-"ovn-dbs-bundle-master_confirmed-pre_notify_stop_0" [ style=bold color="green" fontcolor="orange"] + "ovn-dbs-bundle-master_post_notify_promoted_0" -> "ovn-dbs-bundle-master_confirmed-post_notify_promoted_0" [ style = bold] + "ovn-dbs-bundle-master_post_notify_promoted_0" -> "ovndb_servers:0_post_notify_promote_0 ovn-dbs-bundle-0" [ style = bold] + "ovn-dbs-bundle-master_post_notify_promoted_0" -> "ovndb_servers_post_notify_promoted_0 ovn-dbs-bundle-1" [ style = bold] +@@ -55,21 +32,15 @@ + "ovn-dbs-bundle-master_post_notify_running_0" -> "ovndb_servers_post_notify_running_0 ovn-dbs-bundle-1" [ style = bold] + "ovn-dbs-bundle-master_post_notify_running_0" -> "ovndb_servers_post_notify_running_0 ovn-dbs-bundle-2" [ style = bold] + "ovn-dbs-bundle-master_post_notify_running_0" [ style=bold color="green" fontcolor="orange"] +-"ovn-dbs-bundle-master_post_notify_stopped_0" -> "ovn-dbs-bundle-master_confirmed-post_notify_stopped_0" [ style = bold] +-"ovn-dbs-bundle-master_post_notify_stopped_0" -> "ovndb_servers_post_notify_stopped_0 ovn-dbs-bundle-2" [ style = bold] +-"ovn-dbs-bundle-master_post_notify_stopped_0" [ style=bold color="green" fontcolor="orange"] + "ovn-dbs-bundle-master_pre_notify_promote_0" -> "ovn-dbs-bundle-master_confirmed-pre_notify_promote_0" [ style = bold] + "ovn-dbs-bundle-master_pre_notify_promote_0" -> "ovndb_servers:0_pre_notify_promote_0 ovn-dbs-bundle-0" [ style = bold] + "ovn-dbs-bundle-master_pre_notify_promote_0" -> "ovndb_servers_pre_notify_promote_0 ovn-dbs-bundle-1" [ style = bold] + "ovn-dbs-bundle-master_pre_notify_promote_0" -> "ovndb_servers_pre_notify_promote_0 ovn-dbs-bundle-2" [ style = bold] + "ovn-dbs-bundle-master_pre_notify_promote_0" [ style=bold color="green" fontcolor="orange"] + "ovn-dbs-bundle-master_pre_notify_start_0" -> "ovn-dbs-bundle-master_confirmed-pre_notify_start_0" [ style = bold] ++"ovn-dbs-bundle-master_pre_notify_start_0" -> "ovndb_servers_pre_notify_start_0 ovn-dbs-bundle-1" [ style = bold] + "ovn-dbs-bundle-master_pre_notify_start_0" -> "ovndb_servers_pre_notify_start_0 ovn-dbs-bundle-2" [ style = bold] + "ovn-dbs-bundle-master_pre_notify_start_0" [ style=bold color="green" fontcolor="orange"] +-"ovn-dbs-bundle-master_pre_notify_stop_0" -> "ovn-dbs-bundle-master_confirmed-pre_notify_stop_0" [ style = bold] +-"ovn-dbs-bundle-master_pre_notify_stop_0" -> "ovndb_servers_pre_notify_stop_0 ovn-dbs-bundle-1" [ style = bold] +-"ovn-dbs-bundle-master_pre_notify_stop_0" -> "ovndb_servers_pre_notify_stop_0 ovn-dbs-bundle-2" [ style = bold] +-"ovn-dbs-bundle-master_pre_notify_stop_0" [ style=bold color="green" fontcolor="orange"] + "ovn-dbs-bundle-master_promote_0" -> "ovndb_servers_promote_0 ovn-dbs-bundle-1" [ style = bold] + "ovn-dbs-bundle-master_promote_0" [ style=bold color="green" fontcolor="orange"] + "ovn-dbs-bundle-master_promoted_0" -> "ovn-dbs-bundle-master_post_notify_promoted_0" [ style = bold] +@@ -79,48 +50,21 @@ + "ovn-dbs-bundle-master_running_0" [ style=bold color="green" fontcolor="orange"] + "ovn-dbs-bundle-master_start_0" -> "ovn-dbs-bundle-master_running_0" [ style = bold] + "ovn-dbs-bundle-master_start_0" -> "ovndb_servers:0_start_0 ovn-dbs-bundle-0" [ style = bold] +-"ovn-dbs-bundle-master_start_0" -> "ovndb_servers_start_0 ovn-dbs-bundle-1" [ style = bold] + "ovn-dbs-bundle-master_start_0" [ style=bold color="green" fontcolor="orange"] +-"ovn-dbs-bundle-master_stop_0" -> "ovn-dbs-bundle-master_stopped_0" [ style = bold] +-"ovn-dbs-bundle-master_stop_0" -> "ovndb_servers_stop_0 ovn-dbs-bundle-1" [ style = bold] +-"ovn-dbs-bundle-master_stop_0" [ style=bold color="green" fontcolor="orange"] +-"ovn-dbs-bundle-master_stopped_0" -> "ovn-dbs-bundle-master_post_notify_stopped_0" [ style = bold] +-"ovn-dbs-bundle-master_stopped_0" -> "ovn-dbs-bundle-master_promote_0" [ style = bold] +-"ovn-dbs-bundle-master_stopped_0" -> "ovn-dbs-bundle-master_start_0" [ style = bold] +-"ovn-dbs-bundle-master_stopped_0" [ style=bold color="green" fontcolor="orange"] +-"ovn-dbs-bundle-podman-0_monitor_60000 controller-2" [ style=bold color="green" fontcolor="black"] +-"ovn-dbs-bundle-podman-0_start_0 controller-2" -> "ovn-dbs-bundle-0_start_0 controller-2" [ style = bold] +-"ovn-dbs-bundle-podman-0_start_0 controller-2" -> "ovn-dbs-bundle-podman-0_monitor_60000 controller-2" [ style = bold] +-"ovn-dbs-bundle-podman-0_start_0 controller-2" -> "ovn-dbs-bundle_running_0" [ style = bold] +-"ovn-dbs-bundle-podman-0_start_0 controller-2" -> "ovndb_servers:0_start_0 ovn-dbs-bundle-0" [ style = bold] +-"ovn-dbs-bundle-podman-0_start_0 controller-2" [ style=bold color="green" fontcolor="black"] +-"ovn-dbs-bundle-podman-1_monitor_60000 controller-0" [ style=bold color="green" fontcolor="black"] +-"ovn-dbs-bundle-podman-1_start_0 controller-0" -> "ovn-dbs-bundle-1_start_0 controller-0" [ style = bold] +-"ovn-dbs-bundle-podman-1_start_0 controller-0" -> "ovn-dbs-bundle-podman-1_monitor_60000 controller-0" [ style = bold] +-"ovn-dbs-bundle-podman-1_start_0 controller-0" -> "ovn-dbs-bundle_running_0" [ style = bold] +-"ovn-dbs-bundle-podman-1_start_0 controller-0" -> "ovndb_servers_promote_0 ovn-dbs-bundle-1" [ style = bold] +-"ovn-dbs-bundle-podman-1_start_0 controller-0" -> "ovndb_servers_start_0 ovn-dbs-bundle-1" [ style = bold] +-"ovn-dbs-bundle-podman-1_start_0 controller-0" [ style=bold color="green" fontcolor="black"] +-"ovn-dbs-bundle-podman-1_stop_0 controller-2" -> "ovn-dbs-bundle-podman-1_start_0 controller-0" [ style = bold] +-"ovn-dbs-bundle-podman-1_stop_0 controller-2" -> "ovn-dbs-bundle_stopped_0" [ style = bold] +-"ovn-dbs-bundle-podman-1_stop_0 controller-2" [ style=bold color="green" fontcolor="black"] +-"ovn-dbs-bundle_promote_0" -> "ip-172.17.1.87_start_0 controller-0" [ style = bold] ++"ovn-dbs-bundle-podman-0_monitor_60000 controller-0" [ style=bold color="green" fontcolor="black"] ++"ovn-dbs-bundle-podman-0_start_0 controller-0" -> "ovn-dbs-bundle-0_start_0 controller-0" [ style = bold] ++"ovn-dbs-bundle-podman-0_start_0 controller-0" -> "ovn-dbs-bundle-podman-0_monitor_60000 controller-0" [ style = bold] ++"ovn-dbs-bundle-podman-0_start_0 controller-0" -> "ovn-dbs-bundle_running_0" [ style = bold] ++"ovn-dbs-bundle-podman-0_start_0 controller-0" -> "ovndb_servers:0_start_0 ovn-dbs-bundle-0" [ style = bold] ++"ovn-dbs-bundle-podman-0_start_0 controller-0" [ style=bold color="green" fontcolor="black"] + "ovn-dbs-bundle_promote_0" -> "ovn-dbs-bundle-master_promote_0" [ style = bold] + "ovn-dbs-bundle_promote_0" [ style=bold color="green" fontcolor="orange"] + "ovn-dbs-bundle_promoted_0" [ style=bold color="green" fontcolor="orange"] + "ovn-dbs-bundle_running_0" -> "ovn-dbs-bundle_promote_0" [ style = bold] + "ovn-dbs-bundle_running_0" [ style=bold color="green" fontcolor="orange"] + "ovn-dbs-bundle_start_0" -> "ovn-dbs-bundle-master_start_0" [ style = bold] +-"ovn-dbs-bundle_start_0" -> "ovn-dbs-bundle-podman-0_start_0 controller-2" [ style = bold] +-"ovn-dbs-bundle_start_0" -> "ovn-dbs-bundle-podman-1_start_0 controller-0" [ style = bold] ++"ovn-dbs-bundle_start_0" -> "ovn-dbs-bundle-podman-0_start_0 controller-0" [ style = bold] + "ovn-dbs-bundle_start_0" [ style=bold color="green" fontcolor="orange"] +-"ovn-dbs-bundle_stop_0" -> "ovn-dbs-bundle-master_stop_0" [ style = bold] +-"ovn-dbs-bundle_stop_0" -> "ovn-dbs-bundle-podman-1_stop_0 controller-2" [ style = bold] +-"ovn-dbs-bundle_stop_0" -> "ovndb_servers_stop_0 ovn-dbs-bundle-1" [ style = bold] +-"ovn-dbs-bundle_stop_0" [ style=bold color="green" fontcolor="orange"] +-"ovn-dbs-bundle_stopped_0" -> "ovn-dbs-bundle_promote_0" [ style = bold] +-"ovn-dbs-bundle_stopped_0" -> "ovn-dbs-bundle_start_0" [ style = bold] +-"ovn-dbs-bundle_stopped_0" [ style=bold color="green" fontcolor="orange"] + "ovndb_servers:0_monitor_30000 ovn-dbs-bundle-0" [ style=bold color="green" fontcolor="black"] + "ovndb_servers:0_post_notify_promote_0 ovn-dbs-bundle-0" -> "ovn-dbs-bundle-master_confirmed-post_notify_promoted_0" [ style = bold] + "ovndb_servers:0_post_notify_promote_0 ovn-dbs-bundle-0" [ style=bold color="green" fontcolor="black"] +@@ -130,7 +74,6 @@ + "ovndb_servers:0_pre_notify_promote_0 ovn-dbs-bundle-0" [ style=bold color="green" fontcolor="black"] + "ovndb_servers:0_start_0 ovn-dbs-bundle-0" -> "ovn-dbs-bundle-master_running_0" [ style = bold] + "ovndb_servers:0_start_0 ovn-dbs-bundle-0" -> "ovndb_servers:0_monitor_30000 ovn-dbs-bundle-0" [ style = bold] +-"ovndb_servers:0_start_0 ovn-dbs-bundle-0" -> "ovndb_servers_start_0 ovn-dbs-bundle-1" [ style = bold] + "ovndb_servers:0_start_0 ovn-dbs-bundle-0" [ style=bold color="green" fontcolor="black"] + "ovndb_servers_monitor_10000 ovn-dbs-bundle-1" [ style=bold color="green" fontcolor="black"] + "ovndb_servers_post_notify_promoted_0 ovn-dbs-bundle-1" -> "ovn-dbs-bundle-master_confirmed-post_notify_promoted_0" [ style = bold] +@@ -141,29 +84,17 @@ + "ovndb_servers_post_notify_running_0 ovn-dbs-bundle-1" [ style=bold color="green" fontcolor="black"] + "ovndb_servers_post_notify_running_0 ovn-dbs-bundle-2" -> "ovn-dbs-bundle-master_confirmed-post_notify_running_0" [ style = bold] + "ovndb_servers_post_notify_running_0 ovn-dbs-bundle-2" [ style=bold color="green" fontcolor="black"] +-"ovndb_servers_post_notify_stopped_0 ovn-dbs-bundle-2" -> "ovn-dbs-bundle-master_confirmed-post_notify_stopped_0" [ style = bold] +-"ovndb_servers_post_notify_stopped_0 ovn-dbs-bundle-2" [ style=bold color="green" fontcolor="black"] + "ovndb_servers_pre_notify_promote_0 ovn-dbs-bundle-1" -> "ovn-dbs-bundle-master_confirmed-pre_notify_promote_0" [ style = bold] + "ovndb_servers_pre_notify_promote_0 ovn-dbs-bundle-1" [ style=bold color="green" fontcolor="black"] + "ovndb_servers_pre_notify_promote_0 ovn-dbs-bundle-2" -> "ovn-dbs-bundle-master_confirmed-pre_notify_promote_0" [ style = bold] + "ovndb_servers_pre_notify_promote_0 ovn-dbs-bundle-2" [ style=bold color="green" fontcolor="black"] ++"ovndb_servers_pre_notify_start_0 ovn-dbs-bundle-1" -> "ovn-dbs-bundle-master_confirmed-pre_notify_start_0" [ style = bold] ++"ovndb_servers_pre_notify_start_0 ovn-dbs-bundle-1" [ style=bold color="green" fontcolor="black"] + "ovndb_servers_pre_notify_start_0 ovn-dbs-bundle-2" -> "ovn-dbs-bundle-master_confirmed-pre_notify_start_0" [ style = bold] + "ovndb_servers_pre_notify_start_0 ovn-dbs-bundle-2" [ style=bold color="green" fontcolor="black"] +-"ovndb_servers_pre_notify_stop_0 ovn-dbs-bundle-1" -> "ovn-dbs-bundle-master_confirmed-pre_notify_stop_0" [ style = bold] +-"ovndb_servers_pre_notify_stop_0 ovn-dbs-bundle-1" [ style=bold color="green" fontcolor="black"] +-"ovndb_servers_pre_notify_stop_0 ovn-dbs-bundle-2" -> "ovn-dbs-bundle-master_confirmed-pre_notify_stop_0" [ style = bold] +-"ovndb_servers_pre_notify_stop_0 ovn-dbs-bundle-2" [ style=bold color="green" fontcolor="black"] + "ovndb_servers_promote_0 ovn-dbs-bundle-1" -> "ovn-dbs-bundle-master_promoted_0" [ style = bold] + "ovndb_servers_promote_0 ovn-dbs-bundle-1" -> "ovndb_servers_monitor_10000 ovn-dbs-bundle-1" [ style = bold] + "ovndb_servers_promote_0 ovn-dbs-bundle-1" [ style=bold color="green" fontcolor="black"] +-"ovndb_servers_start_0 ovn-dbs-bundle-1" -> "ovn-dbs-bundle-master_running_0" [ style = bold] +-"ovndb_servers_start_0 ovn-dbs-bundle-1" -> "ovndb_servers_monitor_10000 ovn-dbs-bundle-1" [ style = bold] +-"ovndb_servers_start_0 ovn-dbs-bundle-1" -> "ovndb_servers_promote_0 ovn-dbs-bundle-1" [ style = bold] +-"ovndb_servers_start_0 ovn-dbs-bundle-1" [ style=bold color="green" fontcolor="black"] +-"ovndb_servers_stop_0 ovn-dbs-bundle-1" -> "ovn-dbs-bundle-1_stop_0 controller-2" [ style = bold] +-"ovndb_servers_stop_0 ovn-dbs-bundle-1" -> "ovn-dbs-bundle-master_stopped_0" [ style = bold] +-"ovndb_servers_stop_0 ovn-dbs-bundle-1" -> "ovndb_servers_start_0 ovn-dbs-bundle-1" [ style = bold] +-"ovndb_servers_stop_0 ovn-dbs-bundle-1" [ style=bold color="green" fontcolor="black"] + "rabbitmq-bundle-1_monitor_30000 controller-0" [ style=dashed color="red" fontcolor="black"] + "rabbitmq-bundle-1_start_0 controller-0" -> "rabbitmq-bundle-1_monitor_30000 controller-0" [ style = dashed] + "rabbitmq-bundle-1_start_0 controller-0" -> "rabbitmq:1_monitor_10000 rabbitmq-bundle-1" [ style = dashed] +diff --git a/cts/scheduler/dot/clone-recover-no-shuffle-4.dot b/cts/scheduler/dot/clone-recover-no-shuffle-4.dot +index fd002f28fcf..287d82d3806 100644 +--- a/cts/scheduler/dot/clone-recover-no-shuffle-4.dot ++++ b/cts/scheduler/dot/clone-recover-no-shuffle-4.dot +@@ -1,23 +1,10 @@ + digraph "g" { + "dummy-clone_running_0" [ style=bold color="green" fontcolor="orange"] + "dummy-clone_start_0" -> "dummy-clone_running_0" [ style = bold] +-"dummy-clone_start_0" -> "dummy:2_start_0 node2" [ style = bold] +-"dummy-clone_start_0" -> "dummy_start_0 node1" [ style = bold] ++"dummy-clone_start_0" -> "dummy:2_start_0 node1" [ style = bold] + "dummy-clone_start_0" [ style=bold color="green" fontcolor="orange"] +-"dummy-clone_stop_0" -> "dummy-clone_stopped_0" [ style = bold] +-"dummy-clone_stop_0" -> "dummy_stop_0 node2" [ style = bold] +-"dummy-clone_stop_0" [ style=bold color="green" fontcolor="orange"] +-"dummy-clone_stopped_0" -> "dummy-clone_start_0" [ style = bold] +-"dummy-clone_stopped_0" [ style=bold color="green" fontcolor="orange"] +-"dummy:2_monitor_10000 node2" [ style=bold color="green" fontcolor="black"] +-"dummy:2_start_0 node2" -> "dummy-clone_running_0" [ style = bold] +-"dummy:2_start_0 node2" -> "dummy:2_monitor_10000 node2" [ style = bold] +-"dummy:2_start_0 node2" [ style=bold color="green" fontcolor="black"] +-"dummy_monitor_10000 node1" [ style=bold color="green" fontcolor="black"] +-"dummy_start_0 node1" -> "dummy-clone_running_0" [ style = bold] +-"dummy_start_0 node1" -> "dummy_monitor_10000 node1" [ style = bold] +-"dummy_start_0 node1" [ style=bold color="green" fontcolor="black"] +-"dummy_stop_0 node2" -> "dummy-clone_stopped_0" [ style = bold] +-"dummy_stop_0 node2" -> "dummy_start_0 node1" [ style = bold] +-"dummy_stop_0 node2" [ style=bold color="green" fontcolor="black"] ++"dummy:2_monitor_10000 node1" [ style=bold color="green" fontcolor="black"] ++"dummy:2_start_0 node1" -> "dummy-clone_running_0" [ style = bold] ++"dummy:2_start_0 node1" -> "dummy:2_monitor_10000 node1" [ style = bold] ++"dummy:2_start_0 node1" [ style=bold color="green" fontcolor="black"] + } +diff --git a/cts/scheduler/dot/clone-recover-no-shuffle-5.dot b/cts/scheduler/dot/clone-recover-no-shuffle-5.dot +index a2356f2280b..d3bdf04baa9 100644 +--- a/cts/scheduler/dot/clone-recover-no-shuffle-5.dot ++++ b/cts/scheduler/dot/clone-recover-no-shuffle-5.dot +@@ -1,56 +1,21 @@ + digraph "g" { + "grp-clone_running_0" [ style=bold color="green" fontcolor="orange"] + "grp-clone_start_0" -> "grp-clone_running_0" [ style = bold] +-"grp-clone_start_0" -> "grp:0_start_0" [ style = bold] + "grp-clone_start_0" -> "grp:2_start_0" [ style = bold] + "grp-clone_start_0" [ style=bold color="green" fontcolor="orange"] +-"grp-clone_stop_0" -> "grp-clone_stopped_0" [ style = bold] +-"grp-clone_stop_0" -> "grp:0_stop_0" [ style = bold] +-"grp-clone_stop_0" [ style=bold color="green" fontcolor="orange"] +-"grp-clone_stopped_0" -> "grp-clone_start_0" [ style = bold] +-"grp-clone_stopped_0" [ style=bold color="green" fontcolor="orange"] +-"grp:0_running_0" -> "grp-clone_running_0" [ style = bold] +-"grp:0_running_0" [ style=bold color="green" fontcolor="orange"] +-"grp:0_start_0" -> "grp:0_running_0" [ style = bold] +-"grp:0_start_0" -> "rsc1_start_0 node1" [ style = bold] +-"grp:0_start_0" -> "rsc2_start_0 node1" [ style = bold] +-"grp:0_start_0" [ style=bold color="green" fontcolor="orange"] +-"grp:0_stop_0" -> "grp:0_stopped_0" [ style = bold] +-"grp:0_stop_0" -> "rsc1_stop_0 node2" [ style = bold] +-"grp:0_stop_0" -> "rsc2_stop_0 node2" [ style = bold] +-"grp:0_stop_0" [ style=bold color="green" fontcolor="orange"] +-"grp:0_stopped_0" -> "grp-clone_stopped_0" [ style = bold] +-"grp:0_stopped_0" -> "grp:0_start_0" [ style = bold] +-"grp:0_stopped_0" [ style=bold color="green" fontcolor="orange"] + "grp:2_running_0" -> "grp-clone_running_0" [ style = bold] + "grp:2_running_0" [ style=bold color="green" fontcolor="orange"] + "grp:2_start_0" -> "grp:2_running_0" [ style = bold] +-"grp:2_start_0" -> "rsc1:2_start_0 node2" [ style = bold] +-"grp:2_start_0" -> "rsc2:2_start_0 node2" [ style = bold] ++"grp:2_start_0" -> "rsc1:2_start_0 node1" [ style = bold] ++"grp:2_start_0" -> "rsc2:2_start_0 node1" [ style = bold] + "grp:2_start_0" [ style=bold color="green" fontcolor="orange"] +-"rsc1:2_monitor_10000 node2" [ style=bold color="green" fontcolor="black"] +-"rsc1:2_start_0 node2" -> "grp:2_running_0" [ style = bold] +-"rsc1:2_start_0 node2" -> "rsc1:2_monitor_10000 node2" [ style = bold] +-"rsc1:2_start_0 node2" -> "rsc2:2_start_0 node2" [ style = bold] +-"rsc1:2_start_0 node2" [ style=bold color="green" fontcolor="black"] +-"rsc1_monitor_10000 node1" [ style=bold color="green" fontcolor="black"] +-"rsc1_start_0 node1" -> "grp:0_running_0" [ style = bold] +-"rsc1_start_0 node1" -> "rsc1_monitor_10000 node1" [ style = bold] +-"rsc1_start_0 node1" -> "rsc2_start_0 node1" [ style = bold] +-"rsc1_start_0 node1" [ style=bold color="green" fontcolor="black"] +-"rsc1_stop_0 node2" -> "grp:0_stopped_0" [ style = bold] +-"rsc1_stop_0 node2" -> "rsc1_start_0 node1" [ style = bold] +-"rsc1_stop_0 node2" [ style=bold color="green" fontcolor="black"] +-"rsc2:2_monitor_10000 node2" [ style=bold color="green" fontcolor="black"] +-"rsc2:2_start_0 node2" -> "grp:2_running_0" [ style = bold] +-"rsc2:2_start_0 node2" -> "rsc2:2_monitor_10000 node2" [ style = bold] +-"rsc2:2_start_0 node2" [ style=bold color="green" fontcolor="black"] +-"rsc2_monitor_10000 node1" [ style=bold color="green" fontcolor="black"] +-"rsc2_start_0 node1" -> "grp:0_running_0" [ style = bold] +-"rsc2_start_0 node1" -> "rsc2_monitor_10000 node1" [ style = bold] +-"rsc2_start_0 node1" [ style=bold color="green" fontcolor="black"] +-"rsc2_stop_0 node2" -> "grp:0_stopped_0" [ style = bold] +-"rsc2_stop_0 node2" -> "rsc1_stop_0 node2" [ style = bold] +-"rsc2_stop_0 node2" -> "rsc2_start_0 node1" [ style = bold] +-"rsc2_stop_0 node2" [ style=bold color="green" fontcolor="black"] ++"rsc1:2_monitor_10000 node1" [ style=bold color="green" fontcolor="black"] ++"rsc1:2_start_0 node1" -> "grp:2_running_0" [ style = bold] ++"rsc1:2_start_0 node1" -> "rsc1:2_monitor_10000 node1" [ style = bold] ++"rsc1:2_start_0 node1" -> "rsc2:2_start_0 node1" [ style = bold] ++"rsc1:2_start_0 node1" [ style=bold color="green" fontcolor="black"] ++"rsc2:2_monitor_10000 node1" [ style=bold color="green" fontcolor="black"] ++"rsc2:2_start_0 node1" -> "grp:2_running_0" [ style = bold] ++"rsc2:2_start_0 node1" -> "rsc2:2_monitor_10000 node1" [ style = bold] ++"rsc2:2_start_0 node1" [ style=bold color="green" fontcolor="black"] + } +diff --git a/cts/scheduler/dot/clone-recover-no-shuffle-6.dot b/cts/scheduler/dot/clone-recover-no-shuffle-6.dot +index f8cfe9252d2..f60fd2cc04e 100644 +--- a/cts/scheduler/dot/clone-recover-no-shuffle-6.dot ++++ b/cts/scheduler/dot/clone-recover-no-shuffle-6.dot +@@ -1,97 +1,32 @@ + digraph "g" { +-"base-bundle-0_monitor_30000 node1" [ style=bold color="green" fontcolor="black"] +-"base-bundle-0_start_0 node1" -> "base-bundle-0_monitor_30000 node1" [ style = bold] +-"base-bundle-0_start_0 node1" -> "base_start_0 base-bundle-0" [ style = bold] +-"base-bundle-0_start_0 node1" [ style=bold color="green" fontcolor="black"] +-"base-bundle-0_stop_0 node3" -> "base-bundle-0_start_0 node1" [ style = bold] +-"base-bundle-0_stop_0 node3" -> "base-bundle-podman-0_stop_0 node3" [ style = bold] +-"base-bundle-0_stop_0 node3" [ style=bold color="green" fontcolor="black"] +-"base-bundle-1_monitor_30000 node3" [ style=bold color="green" fontcolor="black"] +-"base-bundle-1_start_0 node3" -> "base-bundle-1_monitor_30000 node3" [ style = bold] +-"base-bundle-1_start_0 node3" -> "base_start_0 base-bundle-1" [ style = bold] +-"base-bundle-1_start_0 node3" [ style=bold color="green" fontcolor="black"] +-"base-bundle-1_stop_0 node2" -> "base-bundle-1_start_0 node3" [ style = bold] +-"base-bundle-1_stop_0 node2" -> "base-bundle-podman-1_stop_0 node2" [ style = bold] +-"base-bundle-1_stop_0 node2" [ style=bold color="green" fontcolor="black"] +-"base-bundle-2_monitor_0 node1" -> "base-bundle-2_start_0 node2" [ style = bold] ++"base-bundle-2_monitor_0 node1" -> "base-bundle-2_start_0 node1" [ style = bold] + "base-bundle-2_monitor_0 node1" [ style=bold color="green" fontcolor="black"] +-"base-bundle-2_monitor_0 node2" -> "base-bundle-2_start_0 node2" [ style = bold] ++"base-bundle-2_monitor_0 node2" -> "base-bundle-2_start_0 node1" [ style = bold] + "base-bundle-2_monitor_0 node2" [ style=bold color="green" fontcolor="black"] +-"base-bundle-2_monitor_0 node3" -> "base-bundle-2_start_0 node2" [ style = bold] ++"base-bundle-2_monitor_0 node3" -> "base-bundle-2_start_0 node1" [ style = bold] + "base-bundle-2_monitor_0 node3" [ style=bold color="green" fontcolor="black"] +-"base-bundle-2_monitor_30000 node2" [ style=bold color="green" fontcolor="black"] +-"base-bundle-2_start_0 node2" -> "base-bundle-2_monitor_30000 node2" [ style = bold] +-"base-bundle-2_start_0 node2" -> "base:2_start_0 base-bundle-2" [ style = bold] +-"base-bundle-2_start_0 node2" [ style=bold color="green" fontcolor="black"] ++"base-bundle-2_monitor_30000 node1" [ style=bold color="green" fontcolor="black"] ++"base-bundle-2_start_0 node1" -> "base-bundle-2_monitor_30000 node1" [ style = bold] ++"base-bundle-2_start_0 node1" -> "base:2_start_0 base-bundle-2" [ style = bold] ++"base-bundle-2_start_0 node1" [ style=bold color="green" fontcolor="black"] + "base-bundle-clone_running_0" -> "base-bundle_running_0" [ style = bold] + "base-bundle-clone_running_0" [ style=bold color="green" fontcolor="orange"] + "base-bundle-clone_start_0" -> "base-bundle-clone_running_0" [ style = bold] + "base-bundle-clone_start_0" -> "base:2_start_0 base-bundle-2" [ style = bold] +-"base-bundle-clone_start_0" -> "base_start_0 base-bundle-0" [ style = bold] +-"base-bundle-clone_start_0" -> "base_start_0 base-bundle-1" [ style = bold] + "base-bundle-clone_start_0" [ style=bold color="green" fontcolor="orange"] +-"base-bundle-clone_stop_0" -> "base-bundle-clone_stopped_0" [ style = bold] +-"base-bundle-clone_stop_0" -> "base_stop_0 base-bundle-0" [ style = bold] +-"base-bundle-clone_stop_0" -> "base_stop_0 base-bundle-1" [ style = bold] +-"base-bundle-clone_stop_0" [ style=bold color="green" fontcolor="orange"] +-"base-bundle-clone_stopped_0" -> "base-bundle-clone_start_0" [ style = bold] +-"base-bundle-clone_stopped_0" -> "base-bundle_stopped_0" [ style = bold] +-"base-bundle-clone_stopped_0" [ style=bold color="green" fontcolor="orange"] +-"base-bundle-podman-0_monitor_60000 node1" [ style=bold color="green" fontcolor="black"] +-"base-bundle-podman-0_start_0 node1" -> "base-bundle-0_start_0 node1" [ style = bold] +-"base-bundle-podman-0_start_0 node1" -> "base-bundle-podman-0_monitor_60000 node1" [ style = bold] +-"base-bundle-podman-0_start_0 node1" -> "base-bundle_running_0" [ style = bold] +-"base-bundle-podman-0_start_0 node1" -> "base_start_0 base-bundle-0" [ style = bold] +-"base-bundle-podman-0_start_0 node1" [ style=bold color="green" fontcolor="black"] +-"base-bundle-podman-0_stop_0 node3" -> "base-bundle-podman-0_start_0 node1" [ style = bold] +-"base-bundle-podman-0_stop_0 node3" -> "base-bundle_stopped_0" [ style = bold] +-"base-bundle-podman-0_stop_0 node3" [ style=bold color="green" fontcolor="black"] +-"base-bundle-podman-1_monitor_60000 node3" [ style=bold color="green" fontcolor="black"] +-"base-bundle-podman-1_start_0 node3" -> "base-bundle-1_start_0 node3" [ style = bold] +-"base-bundle-podman-1_start_0 node3" -> "base-bundle-podman-1_monitor_60000 node3" [ style = bold] +-"base-bundle-podman-1_start_0 node3" -> "base-bundle_running_0" [ style = bold] +-"base-bundle-podman-1_start_0 node3" -> "base_start_0 base-bundle-1" [ style = bold] +-"base-bundle-podman-1_start_0 node3" [ style=bold color="green" fontcolor="black"] +-"base-bundle-podman-1_stop_0 node2" -> "base-bundle-podman-1_start_0 node3" [ style = bold] +-"base-bundle-podman-1_stop_0 node2" -> "base-bundle_stopped_0" [ style = bold] +-"base-bundle-podman-1_stop_0 node2" [ style=bold color="green" fontcolor="black"] +-"base-bundle-podman-2_monitor_60000 node2" [ style=bold color="green" fontcolor="black"] +-"base-bundle-podman-2_start_0 node2" -> "base-bundle-2_monitor_0 node1" [ style = bold] +-"base-bundle-podman-2_start_0 node2" -> "base-bundle-2_monitor_0 node2" [ style = bold] +-"base-bundle-podman-2_start_0 node2" -> "base-bundle-2_monitor_0 node3" [ style = bold] +-"base-bundle-podman-2_start_0 node2" -> "base-bundle-2_start_0 node2" [ style = bold] +-"base-bundle-podman-2_start_0 node2" -> "base-bundle-podman-2_monitor_60000 node2" [ style = bold] +-"base-bundle-podman-2_start_0 node2" -> "base-bundle_running_0" [ style = bold] +-"base-bundle-podman-2_start_0 node2" -> "base:2_start_0 base-bundle-2" [ style = bold] +-"base-bundle-podman-2_start_0 node2" [ style=bold color="green" fontcolor="black"] ++"base-bundle-podman-2_monitor_60000 node1" [ style=bold color="green" fontcolor="black"] ++"base-bundle-podman-2_start_0 node1" -> "base-bundle-2_monitor_0 node1" [ style = bold] ++"base-bundle-podman-2_start_0 node1" -> "base-bundle-2_monitor_0 node2" [ style = bold] ++"base-bundle-podman-2_start_0 node1" -> "base-bundle-2_monitor_0 node3" [ style = bold] ++"base-bundle-podman-2_start_0 node1" -> "base-bundle-2_start_0 node1" [ style = bold] ++"base-bundle-podman-2_start_0 node1" -> "base-bundle-podman-2_monitor_60000 node1" [ style = bold] ++"base-bundle-podman-2_start_0 node1" -> "base-bundle_running_0" [ style = bold] ++"base-bundle-podman-2_start_0 node1" -> "base:2_start_0 base-bundle-2" [ style = bold] ++"base-bundle-podman-2_start_0 node1" [ style=bold color="green" fontcolor="black"] + "base-bundle_running_0" [ style=bold color="green" fontcolor="orange"] + "base-bundle_start_0" -> "base-bundle-clone_start_0" [ style = bold] +-"base-bundle_start_0" -> "base-bundle-podman-0_start_0 node1" [ style = bold] +-"base-bundle_start_0" -> "base-bundle-podman-1_start_0 node3" [ style = bold] +-"base-bundle_start_0" -> "base-bundle-podman-2_start_0 node2" [ style = bold] ++"base-bundle_start_0" -> "base-bundle-podman-2_start_0 node1" [ style = bold] + "base-bundle_start_0" [ style=bold color="green" fontcolor="orange"] +-"base-bundle_stop_0" -> "base-bundle-clone_stop_0" [ style = bold] +-"base-bundle_stop_0" -> "base-bundle-podman-0_stop_0 node3" [ style = bold] +-"base-bundle_stop_0" -> "base-bundle-podman-1_stop_0 node2" [ style = bold] +-"base-bundle_stop_0" -> "base_stop_0 base-bundle-0" [ style = bold] +-"base-bundle_stop_0" -> "base_stop_0 base-bundle-1" [ style = bold] +-"base-bundle_stop_0" [ style=bold color="green" fontcolor="orange"] +-"base-bundle_stopped_0" [ style=bold color="green" fontcolor="orange"] + "base:2_start_0 base-bundle-2" -> "base-bundle-clone_running_0" [ style = bold] + "base:2_start_0 base-bundle-2" [ style=bold color="green" fontcolor="black"] +-"base_start_0 base-bundle-0" -> "base-bundle-clone_running_0" [ style = bold] +-"base_start_0 base-bundle-0" -> "base_start_0 base-bundle-1" [ style = bold] +-"base_start_0 base-bundle-0" [ style=bold color="green" fontcolor="black"] +-"base_start_0 base-bundle-1" -> "base-bundle-clone_running_0" [ style = bold] +-"base_start_0 base-bundle-1" -> "base:2_start_0 base-bundle-2" [ style = bold] +-"base_start_0 base-bundle-1" [ style=bold color="green" fontcolor="black"] +-"base_stop_0 base-bundle-0" -> "base-bundle-0_stop_0 node3" [ style = bold] +-"base_stop_0 base-bundle-0" -> "base-bundle-clone_stopped_0" [ style = bold] +-"base_stop_0 base-bundle-0" -> "base_start_0 base-bundle-0" [ style = bold] +-"base_stop_0 base-bundle-0" [ style=bold color="green" fontcolor="black"] +-"base_stop_0 base-bundle-1" -> "base-bundle-1_stop_0 node2" [ style = bold] +-"base_stop_0 base-bundle-1" -> "base-bundle-clone_stopped_0" [ style = bold] +-"base_stop_0 base-bundle-1" -> "base_start_0 base-bundle-1" [ style = bold] +-"base_stop_0 base-bundle-1" -> "base_stop_0 base-bundle-0" [ style = bold] +-"base_stop_0 base-bundle-1" [ style=bold color="green" fontcolor="black"] + } +diff --git a/cts/scheduler/dot/clone-recover-no-shuffle-7.dot b/cts/scheduler/dot/clone-recover-no-shuffle-7.dot +index 8bff7da01db..f61bf0d7acf 100644 +--- a/cts/scheduler/dot/clone-recover-no-shuffle-7.dot ++++ b/cts/scheduler/dot/clone-recover-no-shuffle-7.dot +@@ -6,40 +6,25 @@ + "dummy-clone_demote_0" [ style=bold color="green" fontcolor="orange"] + "dummy-clone_demoted_0" -> "dummy-clone_promote_0" [ style = bold] + "dummy-clone_demoted_0" -> "dummy-clone_start_0" [ style = bold] +-"dummy-clone_demoted_0" -> "dummy-clone_stop_0" [ style = bold] + "dummy-clone_demoted_0" [ style=bold color="green" fontcolor="orange"] +-"dummy-clone_promote_0" -> "dummy_promote_0 node1" [ style = bold] ++"dummy-clone_promote_0" -> "dummy:2_promote_0 node1" [ style = bold] + "dummy-clone_promote_0" [ style=bold color="green" fontcolor="orange"] + "dummy-clone_promoted_0" [ style=bold color="green" fontcolor="orange"] + "dummy-clone_running_0" -> "dummy-clone_promote_0" [ style = bold] + "dummy-clone_running_0" [ style=bold color="green" fontcolor="orange"] + "dummy-clone_start_0" -> "dummy-clone_running_0" [ style = bold] +-"dummy-clone_start_0" -> "dummy:2_start_0 node3" [ style = bold] +-"dummy-clone_start_0" -> "dummy_start_0 node1" [ style = bold] ++"dummy-clone_start_0" -> "dummy:2_start_0 node1" [ style = bold] + "dummy-clone_start_0" [ style=bold color="green" fontcolor="orange"] +-"dummy-clone_stop_0" -> "dummy-clone_stopped_0" [ style = bold] +-"dummy-clone_stop_0" -> "dummy_stop_0 node3" [ style = bold] +-"dummy-clone_stop_0" [ style=bold color="green" fontcolor="orange"] +-"dummy-clone_stopped_0" -> "dummy-clone_promote_0" [ style = bold] +-"dummy-clone_stopped_0" -> "dummy-clone_start_0" [ style = bold] +-"dummy-clone_stopped_0" [ style=bold color="green" fontcolor="orange"] +-"dummy:2_monitor_11000 node3" [ style=bold color="green" fontcolor="black"] +-"dummy:2_start_0 node3" -> "dummy-clone_running_0" [ style = bold] +-"dummy:2_start_0 node3" -> "dummy:2_monitor_11000 node3" [ style = bold] +-"dummy:2_start_0 node3" [ style=bold color="green" fontcolor="black"] ++"dummy:2_monitor_10000 node1" [ style=bold color="green" fontcolor="black"] ++"dummy:2_promote_0 node1" -> "dummy-clone_promoted_0" [ style = bold] ++"dummy:2_promote_0 node1" -> "dummy:2_monitor_10000 node1" [ style = bold] ++"dummy:2_promote_0 node1" [ style=bold color="green" fontcolor="black"] ++"dummy:2_start_0 node1" -> "dummy-clone_running_0" [ style = bold] ++"dummy:2_start_0 node1" -> "dummy:2_monitor_10000 node1" [ style = bold] ++"dummy:2_start_0 node1" -> "dummy:2_promote_0 node1" [ style = bold] ++"dummy:2_start_0 node1" [ style=bold color="green" fontcolor="black"] + "dummy_demote_0 node2" -> "dummy-clone_demoted_0" [ style = bold] + "dummy_demote_0 node2" -> "dummy_monitor_11000 node2" [ style = bold] + "dummy_demote_0 node2" [ style=bold color="green" fontcolor="black"] +-"dummy_monitor_10000 node1" [ style=bold color="green" fontcolor="black"] + "dummy_monitor_11000 node2" [ style=bold color="green" fontcolor="black"] +-"dummy_promote_0 node1" -> "dummy-clone_promoted_0" [ style = bold] +-"dummy_promote_0 node1" -> "dummy_monitor_10000 node1" [ style = bold] +-"dummy_promote_0 node1" [ style=bold color="green" fontcolor="black"] +-"dummy_start_0 node1" -> "dummy-clone_running_0" [ style = bold] +-"dummy_start_0 node1" -> "dummy_monitor_10000 node1" [ style = bold] +-"dummy_start_0 node1" -> "dummy_promote_0 node1" [ style = bold] +-"dummy_start_0 node1" [ style=bold color="green" fontcolor="black"] +-"dummy_stop_0 node3" -> "dummy-clone_stopped_0" [ style = bold] +-"dummy_stop_0 node3" -> "dummy_start_0 node1" [ style = bold] +-"dummy_stop_0 node3" [ style=bold color="green" fontcolor="black"] + } +diff --git a/cts/scheduler/exp/cancel-behind-moving-remote.exp b/cts/scheduler/exp/cancel-behind-moving-remote.exp +index 17759cb8c57..68cdf4d5370 100644 +--- a/cts/scheduler/exp/cancel-behind-moving-remote.exp ++++ b/cts/scheduler/exp/cancel-behind-moving-remote.exp +@@ -1,46 +1,46 @@ + + + +- ++ + + + + + +- ++ + + + + + +- ++ + + + + + +- ++ + + +- ++ + + + + + +- ++ + + + + + +- ++ + + + + + +- ++ + + + +@@ -48,193 +48,187 @@ + + + +- ++ + + + + + +- ++ + + + + + +- ++ + + + + + +- ++ + + +- ++ + + + + + +- ++ + +- ++ + + + + +- ++ + + + + + +- ++ + +- ++ + + + + +- ++ + + + + + +- ++ + +- ++ + + + + +- ++ + + + + + +- ++ + +- ++ + + + + +- ++ + + +- ++ + + +- ++ + + +- ++ + + + + + +- ++ + +- ++ + + + + +- ++ + + +- ++ + + +- ++ + + + + + +- ++ + +- ++ + + + + +- ++ + + + + + +- ++ + +- ++ + + + + +- ++ + + + +- ++ + +- ++ + +- ++ + + + + +- ++ + + + +- ++ + +- ++ + +- ++ + + + + +- ++ + + + + + +- ++ + +- ++ + + + + +- ++ + + +- ++ + + +- +- +- +- +- +- +- ++ + + + + + +- ++ + +- ++ + + + +@@ -242,61 +236,11 @@ + + + +- +- +- +- +- +- +- +- +- +- ++ + + + + +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- + + + +@@ -305,567 +249,302 @@ + + + +- ++ + +- ++ + +- ++ + + + + +- ++ + + + +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- ++ + +- ++ + +- ++ + + + + +- ++ + + + +- ++ + +- ++ + +- ++ + + + + +- ++ + + + +- ++ + +- ++ + +- ++ + + + + +- ++ + + + +- ++ + +- ++ + + + + + +- ++ + + +- ++ + + +- ++ + + +- ++ + + + +- ++ + +- ++ + + + + + +- ++ + + +- ++ + + + +- ++ + +- ++ + + + + + +- ++ + + +- ++ + + +- ++ + + +- ++ + + + +- ++ + +- ++ + + + + + +- +- +- +- ++ + + + +- ++ + +- ++ + + + + + +- ++ + + + +- ++ + +- ++ + + + + + +- +- +- +- ++ + + +- ++ + + +- ++ + + + +- ++ + +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- ++ + + + + + +- ++ + + +- ++ + + +- ++ + + +- ++ + + + +- ++ + +- ++ + + + + + +- ++ + + +- ++ + + + +- ++ + +- ++ + + + + + +- ++ + + +- ++ + +- +- +- +- +- +- +- +- +- + +- ++ + + + +- ++ + +- +- ++ ++ + + +- +- +- +- +- +- +- +- ++ + +- ++ + +- ++ + + + + + +- ++ + + +- ++ + + + +- ++ + +- ++ + + + + + +- ++ + + +- +- +- +- ++ + + + +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- ++ + +- ++ + +- ++ + + + + +- ++ + + + +- ++ + +- ++ + +- ++ + + + + +- ++ + + + +- ++ + +- ++ + +- ++ + + + + +- ++ + + + +- ++ + +- ++ + +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- ++ + + + + +- +- +- +- +- +- +- ++ + + + +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- ++ + + + +@@ -874,7 +553,7 @@ + + + +- ++ + + + +@@ -883,7 +562,7 @@ + + + +- ++ + + + +@@ -892,7 +571,7 @@ + + + +- ++ + + + +@@ -901,42 +580,42 @@ + + + +- ++ + +- ++ + + + + + + +- ++ + + + +- ++ + +- ++ + + + + + + +- ++ + + + +- ++ + +- ++ + + + + + + +- ++ + + + +@@ -945,7 +624,7 @@ + + + +- ++ + + + +@@ -954,7 +633,7 @@ + + + +- ++ + + + +@@ -963,42 +642,42 @@ + + + +- ++ + +- ++ + + + + + + +- ++ + + + +- ++ + +- ++ + + + + + + +- ++ + + + +- ++ + +- ++ + + + + + + +- ++ + + + +@@ -1007,7 +686,7 @@ + + + +- ++ + + + +@@ -1016,7 +695,7 @@ + + + +- ++ + + + +@@ -1025,7 +704,7 @@ + + + +- ++ + + + +@@ -1034,101 +713,68 @@ + + + +- ++ + +- ++ + + + + + +- ++ + + + +- ++ + +- ++ + + + + + +- +- +- +- ++ + + + +- ++ + +- ++ + + + + + +- ++ + + +- ++ + + + +- ++ + +- ++ + + + + + +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- ++ + +- ++ + + + + + +- ++ + + + +- ++ + +- ++ + + + +diff --git a/cts/scheduler/exp/clone-recover-no-shuffle-4.exp b/cts/scheduler/exp/clone-recover-no-shuffle-4.exp +index 4596c685d0a..670a823dac9 100644 +--- a/cts/scheduler/exp/clone-recover-no-shuffle-4.exp ++++ b/cts/scheduler/exp/clone-recover-no-shuffle-4.exp +@@ -1,123 +1,51 @@ + + + +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- ++ + +- ++ + + + + +- ++ + + + +- ++ + +- ++ + +- ++ + + + + +- ++ + + + +- ++ + +- ++ + + + + + +- ++ + + +- ++ + + + +- ++ + +- ++ + + + + + +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- + +diff --git a/cts/scheduler/exp/clone-recover-no-shuffle-5.exp b/cts/scheduler/exp/clone-recover-no-shuffle-5.exp +index c1cee43b12f..84b1e1bc98c 100644 +--- a/cts/scheduler/exp/clone-recover-no-shuffle-5.exp ++++ b/cts/scheduler/exp/clone-recover-no-shuffle-5.exp +@@ -1,293 +1,110 @@ + + + +- ++ + + + + + +- ++ + + +- ++ + + +- ++ + + + + + +- ++ + + + + + +- ++ + + + + + +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- ++ + +- ++ + + + + +- ++ + + + +- ++ + +- ++ + +- ++ + + + + +- ++ + + + +- ++ + +- ++ + +- ++ + + + + +- ++ + + + +- ++ + +- ++ + +- ++ + + + + +- ++ + + +- ++ + + + +- ++ + +- ++ + + + + + +- ++ + + +- ++ + + + +- ++ + +- ++ + + + + + +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- + +diff --git a/cts/scheduler/exp/clone-recover-no-shuffle-6.exp b/cts/scheduler/exp/clone-recover-no-shuffle-6.exp +index e6704c9e254..6b6ed075f57 100644 +--- a/cts/scheduler/exp/clone-recover-no-shuffle-6.exp ++++ b/cts/scheduler/exp/clone-recover-no-shuffle-6.exp +@@ -1,504 +1,168 @@ + + + +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- ++ + +- ++ + + + + +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- ++ + + +- ++ + + +- ++ + + + +- ++ + +- ++ + + + + + +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- ++ + + +- +- +- +- ++ + + + +- ++ + +- ++ + + + + + +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- ++ + + + +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- ++ + +- ++ + +- ++ + + + + +- ++ + + + +- ++ + +- ++ + +- ++ + + + + +- ++ + + + +- ++ + +- ++ + +- ++ + + + + +- ++ + + + +- ++ + +- ++ + +- ++ + + + + +- ++ + + +- ++ + + +- ++ + + +- ++ + + + +- ++ + +- ++ + +- ++ + + + + +- ++ + + + +- ++ + +- ++ + +- ++ + + + + +- ++ + + + +- ++ + +- ++ + +- ++ + + + + +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- ++ + + + +- +- +- +- +- +- +- +- +- ++ + +- ++ + + + + + +- +- +- +- ++ + + +- +- +- +- ++ + + + +- ++ + +- ++ + + + +diff --git a/cts/scheduler/exp/clone-recover-no-shuffle-7.exp b/cts/scheduler/exp/clone-recover-no-shuffle-7.exp +index 950de9e0312..870ed54e9c2 100644 +--- a/cts/scheduler/exp/clone-recover-no-shuffle-7.exp ++++ b/cts/scheduler/exp/clone-recover-no-shuffle-7.exp +@@ -1,239 +1,161 @@ + + + +- +- +- ++ ++ ++ + + + + +- +- +- +- ++ + + + + + +- +- +- ++ ++ ++ + + + + +- ++ + + +- ++ + + + + + +- +- +- ++ ++ ++ + + +- +- +- +- +- +- +- +- ++ + + + +- +- +- ++ ++ ++ + + + + +- ++ + +- +- +- +- +- +- +- +- +- +- + +- ++ + + + +- ++ + +- +- +- ++ ++ ++ + + + + +- ++ + + +- ++ + + + +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- ++ + +- ++ + +- ++ + + + + +- ++ + + + +- ++ + +- ++ + + + + + +- ++ + + +- ++ + + + +- ++ + +- ++ + + + + + +- ++ + +- ++ + + + + + +- ++ + + + +- ++ + +- ++ + + + + + +- +- +- +- ++ + + +- ++ + + + +- ++ + +- ++ + + + + + +- ++ + + +- ++ + + + +- ++ + +- ++ + + + + + +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- ++ + + + +diff --git a/cts/scheduler/scores/bug-cl-5168.scores b/cts/scheduler/scores/bug-cl-5168.scores +index 916fecb195f..59dee5d39b3 100644 +--- a/cts/scheduler/scores/bug-cl-5168.scores ++++ b/cts/scheduler/scores/bug-cl-5168.scores +@@ -200,7 +200,7 @@ pcmk__primitive_assign: drbd-r1:0 allocation score on hex-2: 1001 + pcmk__primitive_assign: drbd-r1:0 allocation score on hex-3: -INFINITY + pcmk__primitive_assign: drbd-r1:0 allocation score on hex-3: INFINITY + pcmk__primitive_assign: drbd-r1:1 allocation score on hex-1: -INFINITY +-pcmk__primitive_assign: drbd-r1:1 allocation score on hex-2: 0 ++pcmk__primitive_assign: drbd-r1:1 allocation score on hex-2: -INFINITY + pcmk__primitive_assign: drbd-r1:1 allocation score on hex-3: INFINITY + pcmk__primitive_assign: dummy1 allocation score on hex-1: -INFINITY + pcmk__primitive_assign: dummy1 allocation score on hex-2: -INFINITY +diff --git a/cts/scheduler/scores/cancel-behind-moving-remote.scores b/cts/scheduler/scores/cancel-behind-moving-remote.scores +index 0e11b225aea..09f0175b9e2 100644 +--- a/cts/scheduler/scores/cancel-behind-moving-remote.scores ++++ b/cts/scheduler/scores/cancel-behind-moving-remote.scores +@@ -1799,7 +1799,7 @@ pcmk__primitive_assign: ip-172.17.1.151 allocation score on messaging-1: -INFINI + pcmk__primitive_assign: ip-172.17.1.151 allocation score on messaging-2: -INFINITY + pcmk__primitive_assign: ip-172.17.1.87 allocation score on compute-0: -INFINITY + pcmk__primitive_assign: ip-172.17.1.87 allocation score on compute-1: -INFINITY +-pcmk__primitive_assign: ip-172.17.1.87 allocation score on controller-0: 0 ++pcmk__primitive_assign: ip-172.17.1.87 allocation score on controller-0: -INFINITY + pcmk__primitive_assign: ip-172.17.1.87 allocation score on controller-1: -INFINITY + pcmk__primitive_assign: ip-172.17.1.87 allocation score on controller-2: -INFINITY + pcmk__primitive_assign: ip-172.17.1.87 allocation score on database-0: -INFINITY +@@ -1865,9 +1865,9 @@ pcmk__primitive_assign: openstack-cinder-volume-podman-0 allocation score on mes + pcmk__primitive_assign: openstack-cinder-volume-podman-0 allocation score on messaging-2: -INFINITY + pcmk__primitive_assign: ovn-dbs-bundle-0 allocation score on compute-0: -INFINITY + pcmk__primitive_assign: ovn-dbs-bundle-0 allocation score on compute-1: -INFINITY +-pcmk__primitive_assign: ovn-dbs-bundle-0 allocation score on controller-0: 0 ++pcmk__primitive_assign: ovn-dbs-bundle-0 allocation score on controller-0: 10000 + pcmk__primitive_assign: ovn-dbs-bundle-0 allocation score on controller-1: 0 +-pcmk__primitive_assign: ovn-dbs-bundle-0 allocation score on controller-2: 10000 ++pcmk__primitive_assign: ovn-dbs-bundle-0 allocation score on controller-2: 0 + pcmk__primitive_assign: ovn-dbs-bundle-0 allocation score on database-0: 0 + pcmk__primitive_assign: ovn-dbs-bundle-0 allocation score on database-1: 0 + pcmk__primitive_assign: ovn-dbs-bundle-0 allocation score on database-2: 0 +@@ -1876,9 +1876,9 @@ pcmk__primitive_assign: ovn-dbs-bundle-0 allocation score on messaging-1: 0 + pcmk__primitive_assign: ovn-dbs-bundle-0 allocation score on messaging-2: 0 + pcmk__primitive_assign: ovn-dbs-bundle-1 allocation score on compute-0: -INFINITY + pcmk__primitive_assign: ovn-dbs-bundle-1 allocation score on compute-1: -INFINITY +-pcmk__primitive_assign: ovn-dbs-bundle-1 allocation score on controller-0: 10000 ++pcmk__primitive_assign: ovn-dbs-bundle-1 allocation score on controller-0: 0 + pcmk__primitive_assign: ovn-dbs-bundle-1 allocation score on controller-1: 0 +-pcmk__primitive_assign: ovn-dbs-bundle-1 allocation score on controller-2: 0 ++pcmk__primitive_assign: ovn-dbs-bundle-1 allocation score on controller-2: 10000 + pcmk__primitive_assign: ovn-dbs-bundle-1 allocation score on database-0: 0 + pcmk__primitive_assign: ovn-dbs-bundle-1 allocation score on database-1: 0 + pcmk__primitive_assign: ovn-dbs-bundle-1 allocation score on database-2: 0 +@@ -1898,9 +1898,9 @@ pcmk__primitive_assign: ovn-dbs-bundle-2 allocation score on messaging-1: 0 + pcmk__primitive_assign: ovn-dbs-bundle-2 allocation score on messaging-2: 0 + pcmk__primitive_assign: ovn-dbs-bundle-podman-0 allocation score on compute-0: -INFINITY + pcmk__primitive_assign: ovn-dbs-bundle-podman-0 allocation score on compute-1: -INFINITY +-pcmk__primitive_assign: ovn-dbs-bundle-podman-0 allocation score on controller-0: -INFINITY ++pcmk__primitive_assign: ovn-dbs-bundle-podman-0 allocation score on controller-0: 0 + pcmk__primitive_assign: ovn-dbs-bundle-podman-0 allocation score on controller-1: -INFINITY +-pcmk__primitive_assign: ovn-dbs-bundle-podman-0 allocation score on controller-2: 0 ++pcmk__primitive_assign: ovn-dbs-bundle-podman-0 allocation score on controller-2: -INFINITY + pcmk__primitive_assign: ovn-dbs-bundle-podman-0 allocation score on database-0: -INFINITY + pcmk__primitive_assign: ovn-dbs-bundle-podman-0 allocation score on database-1: -INFINITY + pcmk__primitive_assign: ovn-dbs-bundle-podman-0 allocation score on database-2: -INFINITY +@@ -1909,24 +1909,35 @@ pcmk__primitive_assign: ovn-dbs-bundle-podman-0 allocation score on messaging-1: + pcmk__primitive_assign: ovn-dbs-bundle-podman-0 allocation score on messaging-2: -INFINITY + pcmk__primitive_assign: ovn-dbs-bundle-podman-1 allocation score on compute-0: -INFINITY + pcmk__primitive_assign: ovn-dbs-bundle-podman-1 allocation score on compute-0: -INFINITY ++pcmk__primitive_assign: ovn-dbs-bundle-podman-1 allocation score on compute-0: -INFINITY + pcmk__primitive_assign: ovn-dbs-bundle-podman-1 allocation score on compute-1: -INFINITY + pcmk__primitive_assign: ovn-dbs-bundle-podman-1 allocation score on compute-1: -INFINITY +-pcmk__primitive_assign: ovn-dbs-bundle-podman-1 allocation score on controller-0: 0 ++pcmk__primitive_assign: ovn-dbs-bundle-podman-1 allocation score on compute-1: -INFINITY ++pcmk__primitive_assign: ovn-dbs-bundle-podman-1 allocation score on controller-0: -INFINITY ++pcmk__primitive_assign: ovn-dbs-bundle-podman-1 allocation score on controller-0: -INFINITY + pcmk__primitive_assign: ovn-dbs-bundle-podman-1 allocation score on controller-0: 0 + pcmk__primitive_assign: ovn-dbs-bundle-podman-1 allocation score on controller-1: -INFINITY + pcmk__primitive_assign: ovn-dbs-bundle-podman-1 allocation score on controller-1: 0 ++pcmk__primitive_assign: ovn-dbs-bundle-podman-1 allocation score on controller-1: 0 + pcmk__primitive_assign: ovn-dbs-bundle-podman-1 allocation score on controller-2: -INFINITY + pcmk__primitive_assign: ovn-dbs-bundle-podman-1 allocation score on controller-2: -INFINITY ++pcmk__primitive_assign: ovn-dbs-bundle-podman-1 allocation score on controller-2: 0 ++pcmk__primitive_assign: ovn-dbs-bundle-podman-1 allocation score on database-0: -INFINITY + pcmk__primitive_assign: ovn-dbs-bundle-podman-1 allocation score on database-0: -INFINITY + pcmk__primitive_assign: ovn-dbs-bundle-podman-1 allocation score on database-0: -INFINITY + pcmk__primitive_assign: ovn-dbs-bundle-podman-1 allocation score on database-1: -INFINITY + pcmk__primitive_assign: ovn-dbs-bundle-podman-1 allocation score on database-1: -INFINITY ++pcmk__primitive_assign: ovn-dbs-bundle-podman-1 allocation score on database-1: -INFINITY ++pcmk__primitive_assign: ovn-dbs-bundle-podman-1 allocation score on database-2: -INFINITY + pcmk__primitive_assign: ovn-dbs-bundle-podman-1 allocation score on database-2: -INFINITY + pcmk__primitive_assign: ovn-dbs-bundle-podman-1 allocation score on database-2: -INFINITY + pcmk__primitive_assign: ovn-dbs-bundle-podman-1 allocation score on messaging-0: -INFINITY + pcmk__primitive_assign: ovn-dbs-bundle-podman-1 allocation score on messaging-0: -INFINITY ++pcmk__primitive_assign: ovn-dbs-bundle-podman-1 allocation score on messaging-0: -INFINITY + pcmk__primitive_assign: ovn-dbs-bundle-podman-1 allocation score on messaging-1: -INFINITY + pcmk__primitive_assign: ovn-dbs-bundle-podman-1 allocation score on messaging-1: -INFINITY ++pcmk__primitive_assign: ovn-dbs-bundle-podman-1 allocation score on messaging-1: -INFINITY ++pcmk__primitive_assign: ovn-dbs-bundle-podman-1 allocation score on messaging-2: -INFINITY + pcmk__primitive_assign: ovn-dbs-bundle-podman-1 allocation score on messaging-2: -INFINITY + pcmk__primitive_assign: ovn-dbs-bundle-podman-1 allocation score on messaging-2: -INFINITY + pcmk__primitive_assign: ovn-dbs-bundle-podman-2 allocation score on compute-0: -INFINITY +diff --git a/cts/scheduler/scores/clone-recover-no-shuffle-10.scores b/cts/scheduler/scores/clone-recover-no-shuffle-10.scores +index 4ac63e37058..4f4c29ed7f1 100644 +--- a/cts/scheduler/scores/clone-recover-no-shuffle-10.scores ++++ b/cts/scheduler/scores/clone-recover-no-shuffle-10.scores +@@ -28,4 +28,4 @@ pcmk__primitive_assign: dummy:1 allocation score on node2: 16 + pcmk__primitive_assign: dummy:1 allocation score on node3: 0 + pcmk__primitive_assign: dummy:2 allocation score on node1: 10 + pcmk__primitive_assign: dummy:2 allocation score on node2: -INFINITY +-pcmk__primitive_assign: dummy:2 allocation score on node3: 5 ++pcmk__primitive_assign: dummy:2 allocation score on node3: -INFINITY +diff --git a/cts/scheduler/scores/clone-recover-no-shuffle-4.scores b/cts/scheduler/scores/clone-recover-no-shuffle-4.scores +index 492dad1baa4..2a52c8185b2 100644 +--- a/cts/scheduler/scores/clone-recover-no-shuffle-4.scores ++++ b/cts/scheduler/scores/clone-recover-no-shuffle-4.scores +@@ -14,7 +14,7 @@ pcmk__clone_assign: dummy:2 allocation score on node3: 0 + pcmk__primitive_assign: Fencing allocation score on node1: 0 + pcmk__primitive_assign: Fencing allocation score on node2: 0 + pcmk__primitive_assign: Fencing allocation score on node3: 0 +-pcmk__primitive_assign: dummy:0 allocation score on node1: 100 ++pcmk__primitive_assign: dummy:0 allocation score on node1: -INFINITY + pcmk__primitive_assign: dummy:0 allocation score on node1: 100 + pcmk__primitive_assign: dummy:0 allocation score on node2: 1 + pcmk__primitive_assign: dummy:0 allocation score on node2: 1 +@@ -22,10 +22,10 @@ pcmk__primitive_assign: dummy:0 allocation score on node3: 0 + pcmk__primitive_assign: dummy:0 allocation score on node3: 0 + pcmk__primitive_assign: dummy:1 allocation score on node1: -INFINITY + pcmk__primitive_assign: dummy:1 allocation score on node1: 100 +-pcmk__primitive_assign: dummy:1 allocation score on node2: 0 +-pcmk__primitive_assign: dummy:1 allocation score on node2: 0 ++pcmk__primitive_assign: dummy:1 allocation score on node2: -INFINITY ++pcmk__primitive_assign: dummy:1 allocation score on node2: -INFINITY + pcmk__primitive_assign: dummy:1 allocation score on node3: 1 + pcmk__primitive_assign: dummy:1 allocation score on node3: 1 +-pcmk__primitive_assign: dummy:2 allocation score on node1: -INFINITY +-pcmk__primitive_assign: dummy:2 allocation score on node2: 0 ++pcmk__primitive_assign: dummy:2 allocation score on node1: 100 ++pcmk__primitive_assign: dummy:2 allocation score on node2: -INFINITY + pcmk__primitive_assign: dummy:2 allocation score on node3: -INFINITY +diff --git a/cts/scheduler/scores/clone-recover-no-shuffle-5.scores b/cts/scheduler/scores/clone-recover-no-shuffle-5.scores +index 0dd9728830c..c6c8072db82 100644 +--- a/cts/scheduler/scores/clone-recover-no-shuffle-5.scores ++++ b/cts/scheduler/scores/clone-recover-no-shuffle-5.scores +@@ -29,7 +29,7 @@ pcmk__clone_assign: rsc2:1 allocation score on node3: 1 + pcmk__clone_assign: rsc2:2 allocation score on node1: 0 + pcmk__clone_assign: rsc2:2 allocation score on node2: 0 + pcmk__clone_assign: rsc2:2 allocation score on node3: 0 +-pcmk__group_assign: grp:0 allocation score on node1: 100 ++pcmk__group_assign: grp:0 allocation score on node1: -INFINITY + pcmk__group_assign: grp:0 allocation score on node1: 100 + pcmk__group_assign: grp:0 allocation score on node2: 0 + pcmk__group_assign: grp:0 allocation score on node2: 0 +@@ -37,14 +37,14 @@ pcmk__group_assign: grp:0 allocation score on node3: 0 + pcmk__group_assign: grp:0 allocation score on node3: 0 + pcmk__group_assign: grp:1 allocation score on node1: -INFINITY + pcmk__group_assign: grp:1 allocation score on node1: 100 +-pcmk__group_assign: grp:1 allocation score on node2: 0 +-pcmk__group_assign: grp:1 allocation score on node2: 0 ++pcmk__group_assign: grp:1 allocation score on node2: -INFINITY ++pcmk__group_assign: grp:1 allocation score on node2: -INFINITY + pcmk__group_assign: grp:1 allocation score on node3: 0 + pcmk__group_assign: grp:1 allocation score on node3: 0 +-pcmk__group_assign: grp:2 allocation score on node1: -INFINITY +-pcmk__group_assign: grp:2 allocation score on node2: 0 ++pcmk__group_assign: grp:2 allocation score on node1: 100 ++pcmk__group_assign: grp:2 allocation score on node2: -INFINITY + pcmk__group_assign: grp:2 allocation score on node3: -INFINITY +-pcmk__group_assign: rsc1:0 allocation score on node1: 100 ++pcmk__group_assign: rsc1:0 allocation score on node1: -INFINITY + pcmk__group_assign: rsc1:0 allocation score on node1: 100 + pcmk__group_assign: rsc1:0 allocation score on node2: 1 + pcmk__group_assign: rsc1:0 allocation score on node2: 1 +@@ -52,14 +52,14 @@ pcmk__group_assign: rsc1:0 allocation score on node3: 0 + pcmk__group_assign: rsc1:0 allocation score on node3: 0 + pcmk__group_assign: rsc1:1 allocation score on node1: -INFINITY + pcmk__group_assign: rsc1:1 allocation score on node1: 100 +-pcmk__group_assign: rsc1:1 allocation score on node2: 0 +-pcmk__group_assign: rsc1:1 allocation score on node2: 0 ++pcmk__group_assign: rsc1:1 allocation score on node2: -INFINITY ++pcmk__group_assign: rsc1:1 allocation score on node2: -INFINITY + pcmk__group_assign: rsc1:1 allocation score on node3: 1 + pcmk__group_assign: rsc1:1 allocation score on node3: 1 +-pcmk__group_assign: rsc1:2 allocation score on node1: -INFINITY +-pcmk__group_assign: rsc1:2 allocation score on node2: 0 ++pcmk__group_assign: rsc1:2 allocation score on node1: 100 ++pcmk__group_assign: rsc1:2 allocation score on node2: -INFINITY + pcmk__group_assign: rsc1:2 allocation score on node3: -INFINITY +-pcmk__group_assign: rsc2:0 allocation score on node1: 0 ++pcmk__group_assign: rsc2:0 allocation score on node1: -INFINITY + pcmk__group_assign: rsc2:0 allocation score on node1: 0 + pcmk__group_assign: rsc2:0 allocation score on node2: 1 + pcmk__group_assign: rsc2:0 allocation score on node2: 1 +@@ -67,17 +67,17 @@ pcmk__group_assign: rsc2:0 allocation score on node3: 0 + pcmk__group_assign: rsc2:0 allocation score on node3: 0 + pcmk__group_assign: rsc2:1 allocation score on node1: -INFINITY + pcmk__group_assign: rsc2:1 allocation score on node1: 0 +-pcmk__group_assign: rsc2:1 allocation score on node2: 0 +-pcmk__group_assign: rsc2:1 allocation score on node2: 0 ++pcmk__group_assign: rsc2:1 allocation score on node2: -INFINITY ++pcmk__group_assign: rsc2:1 allocation score on node2: -INFINITY + pcmk__group_assign: rsc2:1 allocation score on node3: 1 + pcmk__group_assign: rsc2:1 allocation score on node3: 1 +-pcmk__group_assign: rsc2:2 allocation score on node1: -INFINITY +-pcmk__group_assign: rsc2:2 allocation score on node2: 0 ++pcmk__group_assign: rsc2:2 allocation score on node1: 0 ++pcmk__group_assign: rsc2:2 allocation score on node2: -INFINITY + pcmk__group_assign: rsc2:2 allocation score on node3: -INFINITY + pcmk__primitive_assign: Fencing allocation score on node1: 0 + pcmk__primitive_assign: Fencing allocation score on node2: 0 + pcmk__primitive_assign: Fencing allocation score on node3: 0 +-pcmk__primitive_assign: rsc1:0 allocation score on node1: 100 ++pcmk__primitive_assign: rsc1:0 allocation score on node1: -INFINITY + pcmk__primitive_assign: rsc1:0 allocation score on node1: 100 + pcmk__primitive_assign: rsc1:0 allocation score on node2: 2 + pcmk__primitive_assign: rsc1:0 allocation score on node2: 2 +@@ -85,17 +85,17 @@ pcmk__primitive_assign: rsc1:0 allocation score on node3: 0 + pcmk__primitive_assign: rsc1:0 allocation score on node3: 0 + pcmk__primitive_assign: rsc1:1 allocation score on node1: -INFINITY + pcmk__primitive_assign: rsc1:1 allocation score on node1: 100 +-pcmk__primitive_assign: rsc1:1 allocation score on node2: 0 +-pcmk__primitive_assign: rsc1:1 allocation score on node2: 0 ++pcmk__primitive_assign: rsc1:1 allocation score on node2: -INFINITY ++pcmk__primitive_assign: rsc1:1 allocation score on node2: -INFINITY + pcmk__primitive_assign: rsc1:1 allocation score on node3: 2 + pcmk__primitive_assign: rsc1:1 allocation score on node3: 2 +-pcmk__primitive_assign: rsc1:2 allocation score on node1: -INFINITY +-pcmk__primitive_assign: rsc1:2 allocation score on node2: 0 ++pcmk__primitive_assign: rsc1:2 allocation score on node1: 100 ++pcmk__primitive_assign: rsc1:2 allocation score on node2: -INFINITY + pcmk__primitive_assign: rsc1:2 allocation score on node3: -INFINITY ++pcmk__primitive_assign: rsc2:0 allocation score on node1: -INFINITY + pcmk__primitive_assign: rsc2:0 allocation score on node1: 0 +-pcmk__primitive_assign: rsc2:0 allocation score on node1: 0 +-pcmk__primitive_assign: rsc2:0 allocation score on node2: -INFINITY + pcmk__primitive_assign: rsc2:0 allocation score on node2: -INFINITY ++pcmk__primitive_assign: rsc2:0 allocation score on node2: 1 + pcmk__primitive_assign: rsc2:0 allocation score on node3: -INFINITY + pcmk__primitive_assign: rsc2:0 allocation score on node3: -INFINITY + pcmk__primitive_assign: rsc2:1 allocation score on node1: -INFINITY +@@ -104,6 +104,6 @@ pcmk__primitive_assign: rsc2:1 allocation score on node2: -INFINITY + pcmk__primitive_assign: rsc2:1 allocation score on node2: -INFINITY + pcmk__primitive_assign: rsc2:1 allocation score on node3: -INFINITY + pcmk__primitive_assign: rsc2:1 allocation score on node3: 1 +-pcmk__primitive_assign: rsc2:2 allocation score on node1: -INFINITY +-pcmk__primitive_assign: rsc2:2 allocation score on node2: 0 ++pcmk__primitive_assign: rsc2:2 allocation score on node1: 0 ++pcmk__primitive_assign: rsc2:2 allocation score on node2: -INFINITY + pcmk__primitive_assign: rsc2:2 allocation score on node3: -INFINITY +diff --git a/cts/scheduler/scores/clone-recover-no-shuffle-6.scores b/cts/scheduler/scores/clone-recover-no-shuffle-6.scores +index 643e30f9d18..f1f300cbd66 100644 +--- a/cts/scheduler/scores/clone-recover-no-shuffle-6.scores ++++ b/cts/scheduler/scores/clone-recover-no-shuffle-6.scores +@@ -41,16 +41,16 @@ pcmk__clone_assign: base:2 allocation score on base-bundle-2: INFINITY + pcmk__primitive_assign: Fencing allocation score on node1: 0 + pcmk__primitive_assign: Fencing allocation score on node2: 0 + pcmk__primitive_assign: Fencing allocation score on node3: 0 +-pcmk__primitive_assign: base-bundle-0 allocation score on node1: 10000 ++pcmk__primitive_assign: base-bundle-0 allocation score on node1: 0 + pcmk__primitive_assign: base-bundle-0 allocation score on node2: 0 +-pcmk__primitive_assign: base-bundle-0 allocation score on node3: 0 ++pcmk__primitive_assign: base-bundle-0 allocation score on node3: 10000 + pcmk__primitive_assign: base-bundle-1 allocation score on node1: 0 +-pcmk__primitive_assign: base-bundle-1 allocation score on node2: 0 +-pcmk__primitive_assign: base-bundle-1 allocation score on node3: 10000 +-pcmk__primitive_assign: base-bundle-2 allocation score on node1: 0 +-pcmk__primitive_assign: base-bundle-2 allocation score on node2: 10000 ++pcmk__primitive_assign: base-bundle-1 allocation score on node2: 10000 ++pcmk__primitive_assign: base-bundle-1 allocation score on node3: 0 ++pcmk__primitive_assign: base-bundle-2 allocation score on node1: 10000 ++pcmk__primitive_assign: base-bundle-2 allocation score on node2: 0 + pcmk__primitive_assign: base-bundle-2 allocation score on node3: 0 +-pcmk__primitive_assign: base-bundle-podman-0 allocation score on node1: 100 ++pcmk__primitive_assign: base-bundle-podman-0 allocation score on node1: -INFINITY + pcmk__primitive_assign: base-bundle-podman-0 allocation score on node1: 100 + pcmk__primitive_assign: base-bundle-podman-0 allocation score on node2: 0 + pcmk__primitive_assign: base-bundle-podman-0 allocation score on node2: 0 +@@ -60,10 +60,10 @@ pcmk__primitive_assign: base-bundle-podman-1 allocation score on node1: -INFINIT + pcmk__primitive_assign: base-bundle-podman-1 allocation score on node1: 100 + pcmk__primitive_assign: base-bundle-podman-1 allocation score on node2: 0 + pcmk__primitive_assign: base-bundle-podman-1 allocation score on node2: 0 +-pcmk__primitive_assign: base-bundle-podman-1 allocation score on node3: 0 +-pcmk__primitive_assign: base-bundle-podman-1 allocation score on node3: 0 +-pcmk__primitive_assign: base-bundle-podman-2 allocation score on node1: -INFINITY +-pcmk__primitive_assign: base-bundle-podman-2 allocation score on node2: 0 ++pcmk__primitive_assign: base-bundle-podman-1 allocation score on node3: -INFINITY ++pcmk__primitive_assign: base-bundle-podman-1 allocation score on node3: -INFINITY ++pcmk__primitive_assign: base-bundle-podman-2 allocation score on node1: 100 ++pcmk__primitive_assign: base-bundle-podman-2 allocation score on node2: -INFINITY + pcmk__primitive_assign: base-bundle-podman-2 allocation score on node3: -INFINITY + pcmk__primitive_assign: base:0 allocation score on base-bundle-0: INFINITY + pcmk__primitive_assign: base:1 allocation score on base-bundle-1: INFINITY +diff --git a/cts/scheduler/scores/clone-recover-no-shuffle-7.scores b/cts/scheduler/scores/clone-recover-no-shuffle-7.scores +index fc45bf740fd..503cbb3addf 100644 +--- a/cts/scheduler/scores/clone-recover-no-shuffle-7.scores ++++ b/cts/scheduler/scores/clone-recover-no-shuffle-7.scores +@@ -1,7 +1,7 @@ + +-dummy:0 promotion score on node1: 15 ++dummy:0 promotion score on node3: 5 + dummy:1 promotion score on node2: 10 +-dummy:2 promotion score on node3: 5 ++dummy:2 promotion score on node1: 15 + pcmk__clone_assign: dummy-clone allocation score on node1: 0 + pcmk__clone_assign: dummy-clone allocation score on node2: 0 + pcmk__clone_assign: dummy-clone allocation score on node3: 0 +@@ -17,7 +17,7 @@ pcmk__clone_assign: dummy:2 allocation score on node3: 5 + pcmk__primitive_assign: Fencing allocation score on node1: 0 + pcmk__primitive_assign: Fencing allocation score on node2: 0 + pcmk__primitive_assign: Fencing allocation score on node3: 0 +-pcmk__primitive_assign: dummy:0 allocation score on node1: 15 ++pcmk__primitive_assign: dummy:0 allocation score on node1: -INFINITY + pcmk__primitive_assign: dummy:0 allocation score on node1: 15 + pcmk__primitive_assign: dummy:0 allocation score on node2: 0 + pcmk__primitive_assign: dummy:0 allocation score on node2: 0 +@@ -27,8 +27,8 @@ pcmk__primitive_assign: dummy:1 allocation score on node1: -INFINITY + pcmk__primitive_assign: dummy:1 allocation score on node1: 15 + pcmk__primitive_assign: dummy:1 allocation score on node2: 11 + pcmk__primitive_assign: dummy:1 allocation score on node2: 11 +-pcmk__primitive_assign: dummy:1 allocation score on node3: 0 +-pcmk__primitive_assign: dummy:1 allocation score on node3: 0 +-pcmk__primitive_assign: dummy:2 allocation score on node1: -INFINITY ++pcmk__primitive_assign: dummy:1 allocation score on node3: -INFINITY ++pcmk__primitive_assign: dummy:1 allocation score on node3: -INFINITY ++pcmk__primitive_assign: dummy:2 allocation score on node1: 15 + pcmk__primitive_assign: dummy:2 allocation score on node2: -INFINITY +-pcmk__primitive_assign: dummy:2 allocation score on node3: 5 ++pcmk__primitive_assign: dummy:2 allocation score on node3: -INFINITY +diff --git a/cts/scheduler/scores/promoted-failed-demote-2.scores b/cts/scheduler/scores/promoted-failed-demote-2.scores +index e457d8c6057..39399d9eac4 100644 +--- a/cts/scheduler/scores/promoted-failed-demote-2.scores ++++ b/cts/scheduler/scores/promoted-failed-demote-2.scores +@@ -34,14 +34,10 @@ pcmk__group_assign: stateful-2:0 allocation score on dl380g5b: -INFINITY + pcmk__group_assign: stateful-2:1 allocation score on dl380g5a: INFINITY + pcmk__group_assign: stateful-2:1 allocation score on dl380g5b: 0 + pcmk__primitive_assign: stateful-1:0 allocation score on dl380g5a: -INFINITY +-pcmk__primitive_assign: stateful-1:0 allocation score on dl380g5a: -INFINITY +-pcmk__primitive_assign: stateful-1:0 allocation score on dl380g5b: -INFINITY + pcmk__primitive_assign: stateful-1:0 allocation score on dl380g5b: -INFINITY + pcmk__primitive_assign: stateful-1:1 allocation score on dl380g5a: INFINITY + pcmk__primitive_assign: stateful-1:1 allocation score on dl380g5b: 0 + pcmk__primitive_assign: stateful-2:0 allocation score on dl380g5a: -INFINITY +-pcmk__primitive_assign: stateful-2:0 allocation score on dl380g5a: -INFINITY +-pcmk__primitive_assign: stateful-2:0 allocation score on dl380g5b: -INFINITY + pcmk__primitive_assign: stateful-2:0 allocation score on dl380g5b: -INFINITY + pcmk__primitive_assign: stateful-2:1 allocation score on dl380g5a: INFINITY + pcmk__primitive_assign: stateful-2:1 allocation score on dl380g5b: -INFINITY +diff --git a/cts/scheduler/scores/promoted-failed-demote.scores b/cts/scheduler/scores/promoted-failed-demote.scores +index e457d8c6057..39399d9eac4 100644 +--- a/cts/scheduler/scores/promoted-failed-demote.scores ++++ b/cts/scheduler/scores/promoted-failed-demote.scores +@@ -34,14 +34,10 @@ pcmk__group_assign: stateful-2:0 allocation score on dl380g5b: -INFINITY + pcmk__group_assign: stateful-2:1 allocation score on dl380g5a: INFINITY + pcmk__group_assign: stateful-2:1 allocation score on dl380g5b: 0 + pcmk__primitive_assign: stateful-1:0 allocation score on dl380g5a: -INFINITY +-pcmk__primitive_assign: stateful-1:0 allocation score on dl380g5a: -INFINITY +-pcmk__primitive_assign: stateful-1:0 allocation score on dl380g5b: -INFINITY + pcmk__primitive_assign: stateful-1:0 allocation score on dl380g5b: -INFINITY + pcmk__primitive_assign: stateful-1:1 allocation score on dl380g5a: INFINITY + pcmk__primitive_assign: stateful-1:1 allocation score on dl380g5b: 0 + pcmk__primitive_assign: stateful-2:0 allocation score on dl380g5a: -INFINITY +-pcmk__primitive_assign: stateful-2:0 allocation score on dl380g5a: -INFINITY +-pcmk__primitive_assign: stateful-2:0 allocation score on dl380g5b: -INFINITY + pcmk__primitive_assign: stateful-2:0 allocation score on dl380g5b: -INFINITY + pcmk__primitive_assign: stateful-2:1 allocation score on dl380g5a: INFINITY + pcmk__primitive_assign: stateful-2:1 allocation score on dl380g5b: -INFINITY +diff --git a/cts/scheduler/scores/utilization-complex.scores b/cts/scheduler/scores/utilization-complex.scores +index 29bc92c193f..b9dd80c4b6a 100644 +--- a/cts/scheduler/scores/utilization-complex.scores ++++ b/cts/scheduler/scores/utilization-complex.scores +@@ -312,18 +312,26 @@ pcmk__primitive_assign: clone1:2 allocation score on rhel8-4: 1 + pcmk__primitive_assign: clone1:2 allocation score on rhel8-5: 0 + pcmk__primitive_assign: clone1:3 allocation score on httpd-bundle-0: -INFINITY + pcmk__primitive_assign: clone1:3 allocation score on httpd-bundle-0: -INFINITY ++pcmk__primitive_assign: clone1:3 allocation score on httpd-bundle-0: -INFINITY ++pcmk__primitive_assign: clone1:3 allocation score on httpd-bundle-1: -INFINITY + pcmk__primitive_assign: clone1:3 allocation score on httpd-bundle-1: -INFINITY + pcmk__primitive_assign: clone1:3 allocation score on httpd-bundle-1: -INFINITY + pcmk__primitive_assign: clone1:3 allocation score on httpd-bundle-2: -INFINITY + pcmk__primitive_assign: clone1:3 allocation score on httpd-bundle-2: -INFINITY ++pcmk__primitive_assign: clone1:3 allocation score on httpd-bundle-2: -INFINITY ++pcmk__primitive_assign: clone1:3 allocation score on rhel8-1: -INFINITY + pcmk__primitive_assign: clone1:3 allocation score on rhel8-1: -INFINITY + pcmk__primitive_assign: clone1:3 allocation score on rhel8-1: 0 + pcmk__primitive_assign: clone1:3 allocation score on rhel8-2: -INFINITY + pcmk__primitive_assign: clone1:3 allocation score on rhel8-2: -INFINITY ++pcmk__primitive_assign: clone1:3 allocation score on rhel8-2: -INFINITY ++pcmk__primitive_assign: clone1:3 allocation score on rhel8-3: -INFINITY + pcmk__primitive_assign: clone1:3 allocation score on rhel8-3: -INFINITY + pcmk__primitive_assign: clone1:3 allocation score on rhel8-3: -INFINITY + pcmk__primitive_assign: clone1:3 allocation score on rhel8-4: -INFINITY + pcmk__primitive_assign: clone1:3 allocation score on rhel8-4: -INFINITY ++pcmk__primitive_assign: clone1:3 allocation score on rhel8-4: -INFINITY ++pcmk__primitive_assign: clone1:3 allocation score on rhel8-5: 1 + pcmk__primitive_assign: clone1:3 allocation score on rhel8-5: 1 + pcmk__primitive_assign: clone1:3 allocation score on rhel8-5: 1 + pcmk__primitive_assign: clone1:4 allocation score on httpd-bundle-0: -INFINITY +@@ -384,18 +392,26 @@ pcmk__primitive_assign: clone2:2 allocation score on rhel8-4: 1 + pcmk__primitive_assign: clone2:2 allocation score on rhel8-5: -INFINITY + pcmk__primitive_assign: clone2:3 allocation score on httpd-bundle-0: -INFINITY + pcmk__primitive_assign: clone2:3 allocation score on httpd-bundle-0: -INFINITY ++pcmk__primitive_assign: clone2:3 allocation score on httpd-bundle-0: -INFINITY + pcmk__primitive_assign: clone2:3 allocation score on httpd-bundle-1: -INFINITY + pcmk__primitive_assign: clone2:3 allocation score on httpd-bundle-1: -INFINITY ++pcmk__primitive_assign: clone2:3 allocation score on httpd-bundle-1: -INFINITY ++pcmk__primitive_assign: clone2:3 allocation score on httpd-bundle-2: -INFINITY + pcmk__primitive_assign: clone2:3 allocation score on httpd-bundle-2: -INFINITY + pcmk__primitive_assign: clone2:3 allocation score on httpd-bundle-2: -INFINITY + pcmk__primitive_assign: clone2:3 allocation score on rhel8-1: -INFINITY ++pcmk__primitive_assign: clone2:3 allocation score on rhel8-1: -INFINITY + pcmk__primitive_assign: clone2:3 allocation score on rhel8-1: 0 + pcmk__primitive_assign: clone2:3 allocation score on rhel8-2: -INFINITY + pcmk__primitive_assign: clone2:3 allocation score on rhel8-2: -INFINITY ++pcmk__primitive_assign: clone2:3 allocation score on rhel8-2: -INFINITY ++pcmk__primitive_assign: clone2:3 allocation score on rhel8-3: -INFINITY + pcmk__primitive_assign: clone2:3 allocation score on rhel8-3: -INFINITY + pcmk__primitive_assign: clone2:3 allocation score on rhel8-3: -INFINITY + pcmk__primitive_assign: clone2:3 allocation score on rhel8-4: -INFINITY + pcmk__primitive_assign: clone2:3 allocation score on rhel8-4: -INFINITY ++pcmk__primitive_assign: clone2:3 allocation score on rhel8-4: -INFINITY ++pcmk__primitive_assign: clone2:3 allocation score on rhel8-5: -INFINITY + pcmk__primitive_assign: clone2:3 allocation score on rhel8-5: -INFINITY + pcmk__primitive_assign: clone2:3 allocation score on rhel8-5: -INFINITY + pcmk__primitive_assign: clone2:4 allocation score on httpd-bundle-0: -INFINITY +@@ -535,18 +551,26 @@ pcmk__primitive_assign: httpd-bundle-ip-192.168.122.133 allocation score on rhel + pcmk__primitive_assign: httpd-bundle-ip-192.168.122.133 allocation score on rhel8-5: -INFINITY + pcmk__primitive_assign: httpd-bundle-podman-0 allocation score on httpd-bundle-0: -INFINITY + pcmk__primitive_assign: httpd-bundle-podman-0 allocation score on httpd-bundle-0: -INFINITY ++pcmk__primitive_assign: httpd-bundle-podman-0 allocation score on httpd-bundle-0: -INFINITY ++pcmk__primitive_assign: httpd-bundle-podman-0 allocation score on httpd-bundle-1: -INFINITY + pcmk__primitive_assign: httpd-bundle-podman-0 allocation score on httpd-bundle-1: -INFINITY + pcmk__primitive_assign: httpd-bundle-podman-0 allocation score on httpd-bundle-1: -INFINITY + pcmk__primitive_assign: httpd-bundle-podman-0 allocation score on httpd-bundle-2: -INFINITY + pcmk__primitive_assign: httpd-bundle-podman-0 allocation score on httpd-bundle-2: -INFINITY ++pcmk__primitive_assign: httpd-bundle-podman-0 allocation score on httpd-bundle-2: -INFINITY ++pcmk__primitive_assign: httpd-bundle-podman-0 allocation score on rhel8-1: -INFINITY + pcmk__primitive_assign: httpd-bundle-podman-0 allocation score on rhel8-1: -INFINITY + pcmk__primitive_assign: httpd-bundle-podman-0 allocation score on rhel8-1: -INFINITY + pcmk__primitive_assign: httpd-bundle-podman-0 allocation score on rhel8-2: -INFINITY + pcmk__primitive_assign: httpd-bundle-podman-0 allocation score on rhel8-2: -INFINITY ++pcmk__primitive_assign: httpd-bundle-podman-0 allocation score on rhel8-2: -INFINITY + pcmk__primitive_assign: httpd-bundle-podman-0 allocation score on rhel8-3: -INFINITY + pcmk__primitive_assign: httpd-bundle-podman-0 allocation score on rhel8-3: -INFINITY ++pcmk__primitive_assign: httpd-bundle-podman-0 allocation score on rhel8-3: -INFINITY ++pcmk__primitive_assign: httpd-bundle-podman-0 allocation score on rhel8-4: -INFINITY + pcmk__primitive_assign: httpd-bundle-podman-0 allocation score on rhel8-4: -INFINITY + pcmk__primitive_assign: httpd-bundle-podman-0 allocation score on rhel8-4: -INFINITY ++pcmk__primitive_assign: httpd-bundle-podman-0 allocation score on rhel8-5: -INFINITY + pcmk__primitive_assign: httpd-bundle-podman-0 allocation score on rhel8-5: 0 + pcmk__primitive_assign: httpd-bundle-podman-0 allocation score on rhel8-5: 0 + pcmk__primitive_assign: httpd-bundle-podman-1 allocation score on httpd-bundle-1: -INFINITY +diff --git a/cts/scheduler/scores/utilization-order2.scores b/cts/scheduler/scores/utilization-order2.scores +index c4b49d9b366..4476b60ee21 100644 +--- a/cts/scheduler/scores/utilization-order2.scores ++++ b/cts/scheduler/scores/utilization-order2.scores +@@ -9,6 +9,8 @@ pcmk__primitive_assign: rsc1 allocation score on node1: 0 + pcmk__primitive_assign: rsc1 allocation score on node2: 0 + pcmk__primitive_assign: rsc2:0 allocation score on node1: 1 + pcmk__primitive_assign: rsc2:0 allocation score on node1: 1 ++pcmk__primitive_assign: rsc2:0 allocation score on node1: 1 ++pcmk__primitive_assign: rsc2:0 allocation score on node2: -INFINITY + pcmk__primitive_assign: rsc2:0 allocation score on node2: -INFINITY + pcmk__primitive_assign: rsc2:0 allocation score on node2: 0 + pcmk__primitive_assign: rsc2:1 allocation score on node1: 0 +diff --git a/cts/scheduler/summary/cancel-behind-moving-remote.summary b/cts/scheduler/summary/cancel-behind-moving-remote.summary +index 7726876f949..58de340318b 100644 +--- a/cts/scheduler/summary/cancel-behind-moving-remote.summary ++++ b/cts/scheduler/summary/cancel-behind-moving-remote.summary +@@ -58,22 +58,17 @@ Current cluster status: + Transition Summary: + * Start rabbitmq-bundle-1 ( controller-0 ) due to unrunnable rabbitmq-bundle-podman-1 start (blocked) + * Start rabbitmq:1 ( rabbitmq-bundle-1 ) due to unrunnable rabbitmq-bundle-podman-1 start (blocked) +- * Start ovn-dbs-bundle-podman-0 ( controller-2 ) +- * Start ovn-dbs-bundle-0 ( controller-2 ) ++ * Start ovn-dbs-bundle-podman-0 ( controller-0 ) ++ * Start ovn-dbs-bundle-0 ( controller-0 ) + * Start ovndb_servers:0 ( ovn-dbs-bundle-0 ) +- * Move ovn-dbs-bundle-podman-1 ( controller-2 -> controller-0 ) +- * Move ovn-dbs-bundle-1 ( controller-2 -> controller-0 ) +- * Restart ovndb_servers:1 ( Unpromoted -> Promoted ovn-dbs-bundle-1 ) due to required ovn-dbs-bundle-podman-1 start +- * Start ip-172.17.1.87 ( controller-0 ) ++ * Promote ovndb_servers:1 ( Unpromoted -> Promoted ovn-dbs-bundle-1 ) + * Move stonith-fence_ipmilan-52540040bb56 ( messaging-2 -> database-0 ) + * Move stonith-fence_ipmilan-525400e1534e ( database-1 -> messaging-2 ) + + Executing Cluster Transition: + * Pseudo action: rabbitmq-bundle-clone_pre_notify_start_0 + * Resource action: ovndb_servers cancel=30000 on ovn-dbs-bundle-1 +- * Pseudo action: ovn-dbs-bundle-master_pre_notify_stop_0 +- * Cluster action: clear_failcount for ovn-dbs-bundle-0 on controller-0 +- * Cluster action: clear_failcount for ovn-dbs-bundle-1 on controller-2 ++ * Pseudo action: ovn-dbs-bundle-master_pre_notify_start_0 + * Cluster action: clear_failcount for stonith-fence_compute-fence-nova on messaging-0 + * Cluster action: clear_failcount for nova-evacuate on messaging-0 + * Cluster action: clear_failcount for stonith-fence_ipmilan-525400aa1373 on database-0 +@@ -87,52 +82,34 @@ Executing Cluster Transition: + * Cluster action: clear_failcount for stonith-fence_ipmilan-52540060dbba on messaging-0 + * Cluster action: clear_failcount for stonith-fence_ipmilan-525400e018b6 on database-0 + * Cluster action: clear_failcount for stonith-fence_ipmilan-525400c87cdb on database-2 +- * Pseudo action: ovn-dbs-bundle_stop_0 ++ * Pseudo action: ovn-dbs-bundle_start_0 + * Pseudo action: rabbitmq-bundle_start_0 + * Pseudo action: rabbitmq-bundle-clone_confirmed-pre_notify_start_0 + * Pseudo action: rabbitmq-bundle-clone_start_0 + * Resource action: ovndb_servers notify on ovn-dbs-bundle-1 + * Resource action: ovndb_servers notify on ovn-dbs-bundle-2 +- * Pseudo action: ovn-dbs-bundle-master_confirmed-pre_notify_stop_0 +- * Pseudo action: ovn-dbs-bundle-master_stop_0 ++ * Pseudo action: ovn-dbs-bundle-master_confirmed-pre_notify_start_0 ++ * Pseudo action: ovn-dbs-bundle-master_start_0 ++ * Resource action: ovn-dbs-bundle-podman-0 start on controller-0 ++ * Resource action: ovn-dbs-bundle-0 start on controller-0 + * Resource action: stonith-fence_ipmilan-52540040bb56 start on database-0 + * Resource action: stonith-fence_ipmilan-525400e1534e start on messaging-2 + * Pseudo action: rabbitmq-bundle-clone_running_0 +- * Resource action: ovndb_servers stop on ovn-dbs-bundle-1 +- * Pseudo action: ovn-dbs-bundle-master_stopped_0 +- * Resource action: ovn-dbs-bundle-1 stop on controller-2 ++ * Resource action: ovndb_servers start on ovn-dbs-bundle-0 ++ * Pseudo action: ovn-dbs-bundle-master_running_0 ++ * Resource action: ovn-dbs-bundle-podman-0 monitor=60000 on controller-0 ++ * Resource action: ovn-dbs-bundle-0 monitor=30000 on controller-0 + * Resource action: stonith-fence_ipmilan-52540040bb56 monitor=60000 on database-0 + * Resource action: stonith-fence_ipmilan-525400e1534e monitor=60000 on messaging-2 + * Pseudo action: rabbitmq-bundle-clone_post_notify_running_0 +- * Pseudo action: ovn-dbs-bundle-master_post_notify_stopped_0 +- * Resource action: ovn-dbs-bundle-podman-1 stop on controller-2 +- * Pseudo action: rabbitmq-bundle-clone_confirmed-post_notify_running_0 +- * Resource action: ovndb_servers notify on ovn-dbs-bundle-2 +- * Pseudo action: ovn-dbs-bundle-master_confirmed-post_notify_stopped_0 +- * Pseudo action: ovn-dbs-bundle-master_pre_notify_start_0 +- * Pseudo action: ovn-dbs-bundle_stopped_0 +- * Pseudo action: ovn-dbs-bundle_start_0 +- * Pseudo action: rabbitmq-bundle_running_0 +- * Resource action: ovndb_servers notify on ovn-dbs-bundle-2 +- * Pseudo action: ovn-dbs-bundle-master_confirmed-pre_notify_start_0 +- * Pseudo action: ovn-dbs-bundle-master_start_0 +- * Resource action: ovn-dbs-bundle-podman-0 start on controller-2 +- * Resource action: ovn-dbs-bundle-0 start on controller-2 +- * Resource action: ovn-dbs-bundle-podman-1 start on controller-0 +- * Resource action: ovn-dbs-bundle-1 start on controller-0 +- * Resource action: ovndb_servers start on ovn-dbs-bundle-0 +- * Resource action: ovndb_servers start on ovn-dbs-bundle-1 +- * Pseudo action: ovn-dbs-bundle-master_running_0 +- * Resource action: ovn-dbs-bundle-podman-0 monitor=60000 on controller-2 +- * Resource action: ovn-dbs-bundle-0 monitor=30000 on controller-2 +- * Resource action: ovn-dbs-bundle-podman-1 monitor=60000 on controller-0 +- * Resource action: ovn-dbs-bundle-1 monitor=30000 on controller-0 + * Pseudo action: ovn-dbs-bundle-master_post_notify_running_0 ++ * Pseudo action: rabbitmq-bundle-clone_confirmed-post_notify_running_0 + * Resource action: ovndb_servers notify on ovn-dbs-bundle-0 + * Resource action: ovndb_servers notify on ovn-dbs-bundle-1 + * Resource action: ovndb_servers notify on ovn-dbs-bundle-2 + * Pseudo action: ovn-dbs-bundle-master_confirmed-post_notify_running_0 + * Pseudo action: ovn-dbs-bundle_running_0 ++ * Pseudo action: rabbitmq-bundle_running_0 + * Pseudo action: ovn-dbs-bundle-master_pre_notify_promote_0 + * Pseudo action: ovn-dbs-bundle_promote_0 + * Resource action: ovndb_servers notify on ovn-dbs-bundle-0 +@@ -140,10 +117,8 @@ Executing Cluster Transition: + * Resource action: ovndb_servers notify on ovn-dbs-bundle-2 + * Pseudo action: ovn-dbs-bundle-master_confirmed-pre_notify_promote_0 + * Pseudo action: ovn-dbs-bundle-master_promote_0 +- * Resource action: ip-172.17.1.87 start on controller-0 + * Resource action: ovndb_servers promote on ovn-dbs-bundle-1 + * Pseudo action: ovn-dbs-bundle-master_promoted_0 +- * Resource action: ip-172.17.1.87 monitor=10000 on controller-0 + * Pseudo action: ovn-dbs-bundle-master_post_notify_promoted_0 + * Resource action: ovndb_servers notify on ovn-dbs-bundle-0 + * Resource action: ovndb_servers notify on ovn-dbs-bundle-1 +@@ -187,10 +162,10 @@ Revised Cluster Status: + * haproxy-bundle-podman-1 (ocf:heartbeat:podman): Started controller-0 + * haproxy-bundle-podman-2 (ocf:heartbeat:podman): Started controller-1 + * Container bundle set: ovn-dbs-bundle [cluster.common.tag/rhosp16-openstack-ovn-northd:pcmklatest]: +- * ovn-dbs-bundle-0 (ocf:ovn:ovndb-servers): Unpromoted controller-2 +- * ovn-dbs-bundle-1 (ocf:ovn:ovndb-servers): Promoted controller-0 ++ * ovn-dbs-bundle-0 (ocf:ovn:ovndb-servers): Unpromoted controller-0 ++ * ovn-dbs-bundle-1 (ocf:ovn:ovndb-servers): Promoted controller-2 + * ovn-dbs-bundle-2 (ocf:ovn:ovndb-servers): Unpromoted controller-1 +- * ip-172.17.1.87 (ocf:heartbeat:IPaddr2): Started controller-0 ++ * ip-172.17.1.87 (ocf:heartbeat:IPaddr2): Stopped + * stonith-fence_compute-fence-nova (stonith:fence_compute): Started database-1 + * Clone Set: compute-unfence-trigger-clone [compute-unfence-trigger]: + * Started: [ compute-0 compute-1 ] +diff --git a/cts/scheduler/summary/clone-recover-no-shuffle-4.summary b/cts/scheduler/summary/clone-recover-no-shuffle-4.summary +index 944bcb834b3..0b6866ec16c 100644 +--- a/cts/scheduler/summary/clone-recover-no-shuffle-4.summary ++++ b/cts/scheduler/summary/clone-recover-no-shuffle-4.summary +@@ -10,19 +10,13 @@ Current cluster status: + * Stopped: [ node1 ] + + Transition Summary: +- * Move dummy:0 ( node2 -> node1 ) +- * Start dummy:2 ( node2 ) ++ * Start dummy:2 ( node1 ) + + Executing Cluster Transition: +- * Pseudo action: dummy-clone_stop_0 +- * Resource action: dummy stop on node2 +- * Pseudo action: dummy-clone_stopped_0 + * Pseudo action: dummy-clone_start_0 + * Resource action: dummy start on node1 +- * Resource action: dummy start on node2 + * Pseudo action: dummy-clone_running_0 + * Resource action: dummy monitor=10000 on node1 +- * Resource action: dummy monitor=10000 on node2 + Using the original execution date of: 2023-06-21 00:59:59Z + + Revised Cluster Status: +diff --git a/cts/scheduler/summary/clone-recover-no-shuffle-5.summary b/cts/scheduler/summary/clone-recover-no-shuffle-5.summary +index 121214c42ab..8b18120ad8d 100644 +--- a/cts/scheduler/summary/clone-recover-no-shuffle-5.summary ++++ b/cts/scheduler/summary/clone-recover-no-shuffle-5.summary +@@ -9,31 +9,17 @@ Current cluster status: + * Stopped: [ node1 ] + + Transition Summary: +- * Move rsc1:0 ( node2 -> node1 ) +- * Move rsc2:0 ( node2 -> node1 ) +- * Start rsc1:2 ( node2 ) +- * Start rsc2:2 ( node2 ) ++ * Start rsc1:2 ( node1 ) ++ * Start rsc2:2 ( node1 ) + + Executing Cluster Transition: +- * Pseudo action: grp-clone_stop_0 +- * Pseudo action: grp:0_stop_0 +- * Resource action: rsc2 stop on node2 +- * Resource action: rsc1 stop on node2 +- * Pseudo action: grp:0_stopped_0 +- * Pseudo action: grp-clone_stopped_0 + * Pseudo action: grp-clone_start_0 +- * Pseudo action: grp:0_start_0 ++ * Pseudo action: grp:2_start_0 + * Resource action: rsc1 start on node1 + * Resource action: rsc2 start on node1 +- * Pseudo action: grp:2_start_0 +- * Resource action: rsc1 start on node2 +- * Resource action: rsc2 start on node2 +- * Pseudo action: grp:0_running_0 ++ * Pseudo action: grp:2_running_0 + * Resource action: rsc1 monitor=10000 on node1 + * Resource action: rsc2 monitor=10000 on node1 +- * Pseudo action: grp:2_running_0 +- * Resource action: rsc1 monitor=10000 on node2 +- * Resource action: rsc2 monitor=10000 on node2 + * Pseudo action: grp-clone_running_0 + + Revised Cluster Status: +diff --git a/cts/scheduler/summary/clone-recover-no-shuffle-6.summary b/cts/scheduler/summary/clone-recover-no-shuffle-6.summary +index 19a957e15fb..5702177e33d 100644 +--- a/cts/scheduler/summary/clone-recover-no-shuffle-6.summary ++++ b/cts/scheduler/summary/clone-recover-no-shuffle-6.summary +@@ -11,48 +11,22 @@ Current cluster status: + * base-bundle-2 (ocf:pacemaker:Stateful): Stopped + + Transition Summary: +- * Move base-bundle-podman-0 ( node3 -> node1 ) +- * Move base-bundle-0 ( node3 -> node1 ) +- * Restart base:0 ( base-bundle-0 ) due to required base-bundle-podman-0 start +- * Move base-bundle-podman-1 ( node2 -> node3 ) +- * Move base-bundle-1 ( node2 -> node3 ) +- * Restart base:1 ( base-bundle-1 ) due to required base-bundle-podman-1 start +- * Start base-bundle-podman-2 ( node2 ) +- * Start base-bundle-2 ( node2 ) +- * Start base:2 ( base-bundle-2 ) ++ * Start base-bundle-podman-2 ( node1 ) ++ * Start base-bundle-2 ( node1 ) ++ * Start base:2 ( base-bundle-2 ) + + Executing Cluster Transition: +- * Pseudo action: base-bundle_stop_0 + * Pseudo action: base-bundle_start_0 +- * Pseudo action: base-bundle-clone_stop_0 +- * Resource action: base-bundle-podman-2 start on node2 ++ * Pseudo action: base-bundle-clone_start_0 ++ * Resource action: base-bundle-podman-2 start on node1 + * Resource action: base-bundle-2 monitor on node3 + * Resource action: base-bundle-2 monitor on node2 + * Resource action: base-bundle-2 monitor on node1 +- * Resource action: base stop on base-bundle-1 +- * Resource action: base-bundle-1 stop on node2 +- * Resource action: base-bundle-podman-2 monitor=60000 on node2 +- * Resource action: base-bundle-2 start on node2 +- * Resource action: base stop on base-bundle-0 +- * Pseudo action: base-bundle-clone_stopped_0 +- * Pseudo action: base-bundle-clone_start_0 +- * Resource action: base-bundle-0 stop on node3 +- * Resource action: base-bundle-podman-1 stop on node2 +- * Resource action: base-bundle-2 monitor=30000 on node2 +- * Resource action: base-bundle-podman-0 stop on node3 +- * Resource action: base-bundle-podman-1 start on node3 +- * Resource action: base-bundle-1 start on node3 +- * Pseudo action: base-bundle_stopped_0 +- * Resource action: base-bundle-podman-0 start on node1 +- * Resource action: base-bundle-0 start on node1 +- * Resource action: base-bundle-podman-1 monitor=60000 on node3 +- * Resource action: base-bundle-1 monitor=30000 on node3 +- * Resource action: base start on base-bundle-0 +- * Resource action: base start on base-bundle-1 ++ * Resource action: base-bundle-podman-2 monitor=60000 on node1 ++ * Resource action: base-bundle-2 start on node1 + * Resource action: base start on base-bundle-2 + * Pseudo action: base-bundle-clone_running_0 +- * Resource action: base-bundle-podman-0 monitor=60000 on node1 +- * Resource action: base-bundle-0 monitor=30000 on node1 ++ * Resource action: base-bundle-2 monitor=30000 on node1 + * Pseudo action: base-bundle_running_0 + + Revised Cluster Status: +@@ -63,6 +37,6 @@ Revised Cluster Status: + * Full List of Resources: + * Fencing (stonith:fence_xvm): Started node2 + * Container bundle set: base-bundle [localhost/pcmktest]: +- * base-bundle-0 (ocf:pacemaker:Stateful): Started node1 +- * base-bundle-1 (ocf:pacemaker:Stateful): Started node3 +- * base-bundle-2 (ocf:pacemaker:Stateful): Started node2 ++ * base-bundle-0 (ocf:pacemaker:Stateful): Started node3 ++ * base-bundle-1 (ocf:pacemaker:Stateful): Started node2 ++ * base-bundle-2 (ocf:pacemaker:Stateful): Started node1 +diff --git a/cts/scheduler/summary/clone-recover-no-shuffle-7.summary b/cts/scheduler/summary/clone-recover-no-shuffle-7.summary +index e6c9baed0db..77445700f04 100644 +--- a/cts/scheduler/summary/clone-recover-no-shuffle-7.summary ++++ b/cts/scheduler/summary/clone-recover-no-shuffle-7.summary +@@ -10,24 +10,18 @@ Current cluster status: + * Stopped: [ node1 ] + + Transition Summary: +- * Move dummy:0 ( Unpromoted node3 -> Promoted node1 ) +- * Demote dummy:1 ( Promoted -> Unpromoted node2 ) +- * Start dummy:2 ( node3 ) ++ * Demote dummy:1 ( Promoted -> Unpromoted node2 ) ++ * Promote dummy:2 ( Stopped -> Promoted node1 ) + + Executing Cluster Transition: + * Resource action: dummy cancel=10000 on node2 + * Pseudo action: dummy-clone_demote_0 + * Resource action: dummy demote on node2 + * Pseudo action: dummy-clone_demoted_0 +- * Pseudo action: dummy-clone_stop_0 +- * Resource action: dummy stop on node3 +- * Resource action: dummy monitor=11000 on node2 +- * Pseudo action: dummy-clone_stopped_0 + * Pseudo action: dummy-clone_start_0 ++ * Resource action: dummy monitor=11000 on node2 + * Resource action: dummy start on node1 +- * Resource action: dummy start on node3 + * Pseudo action: dummy-clone_running_0 +- * Resource action: dummy monitor=11000 on node3 + * Pseudo action: dummy-clone_promote_0 + * Resource action: dummy promote on node1 + * Pseudo action: dummy-clone_promoted_0 +diff --git a/cts/scheduler/xml/cancel-behind-moving-remote.xml b/cts/scheduler/xml/cancel-behind-moving-remote.xml +index 67e14300ba8..7b880602b1b 100644 +--- a/cts/scheduler/xml/cancel-behind-moving-remote.xml ++++ b/cts/scheduler/xml/cancel-behind-moving-remote.xml +@@ -1,5 +1,19 @@ + + ++ + + + +diff --git a/cts/scheduler/xml/clone-recover-no-shuffle-4.xml b/cts/scheduler/xml/clone-recover-no-shuffle-4.xml +index 40e6520c6d0..f0a5feb8c2f 100644 +--- a/cts/scheduler/xml/clone-recover-no-shuffle-4.xml ++++ b/cts/scheduler/xml/clone-recover-no-shuffle-4.xml +@@ -11,11 +11,6 @@ + * Instance dummy:0 should remain started on node2 + * Instance dummy:1 should remain started on node3 + * Instance dummy:2 should start on node1 +- +- This test output is incorrect: +- * Instance dummy:0 moves from node2 to node1 +- * Instance dummy:1 remains started on node3 (correct) +- * Instance dummy:2 starts on node2 + --> + + +diff --git a/cts/scheduler/xml/clone-recover-no-shuffle-5.xml b/cts/scheduler/xml/clone-recover-no-shuffle-5.xml +index 45f3b5a9f3a..95e5eca9c9d 100644 +--- a/cts/scheduler/xml/clone-recover-no-shuffle-5.xml ++++ b/cts/scheduler/xml/clone-recover-no-shuffle-5.xml +@@ -12,11 +12,6 @@ + * Instance grp:0 should remain started on node2 + * Instance grp:1 should remain started on node3 + * Instance grp:2 should start on node1 +- +- This test output is incorrect: +- * Instance grp:0 moves to node1 +- * Instance grp:1 remains started on node3 (correct) +- * Instance grp:2 starts on node2 + --> + + +diff --git a/cts/scheduler/xml/clone-recover-no-shuffle-6.xml b/cts/scheduler/xml/clone-recover-no-shuffle-6.xml +index 3de42f581d4..64bb4d90179 100644 +--- a/cts/scheduler/xml/clone-recover-no-shuffle-6.xml ++++ b/cts/scheduler/xml/clone-recover-no-shuffle-6.xml +@@ -12,11 +12,6 @@ + * Instance base:0 should remain started on node3 + * Instance base:1 should remain started on node2 + * Instance base:2 should start on node1 +- +- This test output is incorrect: +- * Instance base:0 moves from node3 to node1 +- * Instance base:1 moves from node2 to node3 +- * Instance base:2 starts on node2 + --> + + +diff --git a/cts/scheduler/xml/clone-recover-no-shuffle-7.xml b/cts/scheduler/xml/clone-recover-no-shuffle-7.xml +index 6e9dad50db4..e588b811d77 100644 +--- a/cts/scheduler/xml/clone-recover-no-shuffle-7.xml ++++ b/cts/scheduler/xml/clone-recover-no-shuffle-7.xml +@@ -11,11 +11,6 @@ + * Instance dummy:0 should remain started (unpromoted) on node3 + * Instance dummy:1 should demote on node2 + * Instance dummy:2 should promote on node1 +- +- This test output is incorrect: +- * Instance dummy:0 moves from unpromoted on node3 to promoted on node1 +- * Instance dummy:1 demotes on node2 +- * Instance dummy:2 starts on node3 + --> + + diff --git a/SOURCES/003-history-cleanup.patch b/SOURCES/003-history-cleanup.patch deleted file mode 100644 index 87a3e27..0000000 --- a/SOURCES/003-history-cleanup.patch +++ /dev/null @@ -1,2829 +0,0 @@ -From e953591a9796edebd4796c344df0eddcbc7a2dff Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Mon, 30 Jan 2023 16:34:32 -0600 -Subject: [PATCH 01/14] Refactor: scheduler: drop unneeded arguments from - process_rsc_state() - -migrate_op has been unused since at least 2011 ---- - lib/pengine/unpack.c | 36 +++++++++++++++--------------------- - 1 file changed, 15 insertions(+), 21 deletions(-) - -diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c -index 5fcba3b..9524def 100644 ---- a/lib/pengine/unpack.c -+++ b/lib/pengine/unpack.c -@@ -1963,8 +1963,7 @@ process_orphan_resource(xmlNode * rsc_entry, pe_node_t * node, pe_working_set_t - - static void - process_rsc_state(pe_resource_t * rsc, pe_node_t * node, -- enum action_fail_response on_fail, -- xmlNode * migrate_op, pe_working_set_t * data_set) -+ enum action_fail_response on_fail) - { - pe_node_t *tmpnode = NULL; - char *reason = NULL; -@@ -2016,7 +2015,7 @@ process_rsc_state(pe_resource_t * rsc, pe_node_t * node, - pe__set_resource_flags(rsc, pe_rsc_failed|pe_rsc_stop); - should_fence = TRUE; - -- } else if (pcmk_is_set(data_set->flags, pe_flag_stonith_enabled)) { -+ } else if (pcmk_is_set(rsc->cluster->flags, pe_flag_stonith_enabled)) { - if (pe__is_remote_node(node) && node->details->remote_rsc - && !pcmk_is_set(node->details->remote_rsc->flags, pe_rsc_failed)) { - -@@ -2039,7 +2038,7 @@ process_rsc_state(pe_resource_t * rsc, pe_node_t * node, - if (reason == NULL) { - reason = crm_strdup_printf("%s is thought to be active there", rsc->id); - } -- pe_fence_node(data_set, node, reason, FALSE); -+ pe_fence_node(rsc->cluster, node, reason, FALSE); - } - free(reason); - } -@@ -2069,7 +2068,7 @@ process_rsc_state(pe_resource_t * rsc, pe_node_t * node, - * but also mark the node as unclean - */ - reason = crm_strdup_printf("%s failed there", rsc->id); -- pe_fence_node(data_set, node, reason, FALSE); -+ pe_fence_node(rsc->cluster, node, reason, FALSE); - free(reason); - break; - -@@ -2090,7 +2089,8 @@ process_rsc_state(pe_resource_t * rsc, pe_node_t * node, - /* make sure it comes up somewhere else - * or not at all - */ -- resource_location(rsc, node, -INFINITY, "__action_migration_auto__", data_set); -+ resource_location(rsc, node, -INFINITY, "__action_migration_auto__", -+ rsc->cluster); - break; - - case action_fail_stop: -@@ -2112,8 +2112,8 @@ process_rsc_state(pe_resource_t * rsc, pe_node_t * node, - * container is running yet, so remember it and add a stop - * action for it later. - */ -- data_set->stop_needed = g_list_prepend(data_set->stop_needed, -- rsc->container); -+ rsc->cluster->stop_needed = -+ g_list_prepend(rsc->cluster->stop_needed, rsc->container); - } else if (rsc->container) { - stop_action(rsc->container, node, FALSE); - } else if (rsc->role != RSC_ROLE_STOPPED && rsc->role != RSC_ROLE_UNKNOWN) { -@@ -2123,10 +2123,10 @@ process_rsc_state(pe_resource_t * rsc, pe_node_t * node, - - case action_fail_reset_remote: - pe__set_resource_flags(rsc, pe_rsc_failed|pe_rsc_stop); -- if (pcmk_is_set(data_set->flags, pe_flag_stonith_enabled)) { -+ if (pcmk_is_set(rsc->cluster->flags, pe_flag_stonith_enabled)) { - tmpnode = NULL; - if (rsc->is_remote_node) { -- tmpnode = pe_find_node(data_set->nodes, rsc->id); -+ tmpnode = pe_find_node(rsc->cluster->nodes, rsc->id); - } - if (tmpnode && - pe__is_remote_node(tmpnode) && -@@ -2135,7 +2135,7 @@ process_rsc_state(pe_resource_t * rsc, pe_node_t * node, - /* The remote connection resource failed in a way that - * should result in fencing the remote node. - */ -- pe_fence_node(data_set, tmpnode, -+ pe_fence_node(rsc->cluster, tmpnode, - "remote connection is unrecoverable", FALSE); - } - } -@@ -2158,7 +2158,7 @@ process_rsc_state(pe_resource_t * rsc, pe_node_t * node, - * result in a fencing operation regardless if we're going to attempt to - * reconnect to the remote-node in this transition or not. */ - if (pcmk_is_set(rsc->flags, pe_rsc_failed) && rsc->is_remote_node) { -- tmpnode = pe_find_node(data_set->nodes, rsc->id); -+ tmpnode = pe_find_node(rsc->cluster->nodes, rsc->id); - if (tmpnode && tmpnode->details->unclean) { - tmpnode->details->unseen = FALSE; - } -@@ -2177,7 +2177,8 @@ process_rsc_state(pe_resource_t * rsc, pe_node_t * node, - } - } - -- native_add_running(rsc, node, data_set, (save_on_fail != action_fail_ignore)); -+ native_add_running(rsc, node, rsc->cluster, -+ (save_on_fail != action_fail_ignore)); - switch (on_fail) { - case action_fail_ignore: - break; -@@ -2376,14 +2377,12 @@ unpack_lrm_resource(pe_node_t *node, xmlNode *lrm_resource, - int start_index = -1; - enum rsc_role_e req_role = RSC_ROLE_UNKNOWN; - -- const char *task = NULL; - const char *rsc_id = ID(lrm_resource); - - pe_resource_t *rsc = NULL; - GList *op_list = NULL; - GList *sorted_op_list = NULL; - -- xmlNode *migrate_op = NULL; - xmlNode *rsc_op = NULL; - xmlNode *last_failure = NULL; - -@@ -2437,11 +2436,6 @@ unpack_lrm_resource(pe_node_t *node, xmlNode *lrm_resource, - for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) { - xmlNode *rsc_op = (xmlNode *) gIter->data; - -- task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); -- if (pcmk__str_eq(task, CRMD_ACTION_MIGRATED, pcmk__str_casei)) { -- migrate_op = rsc_op; -- } -- - unpack_rsc_op(rsc, node, rsc_op, &last_failure, &on_fail, data_set); - } - -@@ -2452,7 +2446,7 @@ unpack_lrm_resource(pe_node_t *node, xmlNode *lrm_resource, - /* no need to free the contents */ - g_list_free(sorted_op_list); - -- process_rsc_state(rsc, node, on_fail, migrate_op, data_set); -+ process_rsc_state(rsc, node, on_fail); - - if (get_target_role(rsc, &req_role)) { - if (rsc->next_role == RSC_ROLE_UNKNOWN || req_role < rsc->next_role) { --- -2.31.1 - -From 6f4e34cccc4864961d2020a2dd547450ac53a44e Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Wed, 1 Feb 2023 16:30:20 -0600 -Subject: [PATCH 02/14] Log: scheduler: improve trace logs when unpacking - resource history - ---- - lib/pengine/unpack.c | 112 +++++++++++++++++++++++++++---------------- - 1 file changed, 71 insertions(+), 41 deletions(-) - -diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c -index 9524def..b7b2873 100644 ---- a/lib/pengine/unpack.c -+++ b/lib/pengine/unpack.c -@@ -3363,6 +3363,24 @@ check_recoverable(pe_resource_t *rsc, pe_node_t *node, const char *task, - pe__set_resource_flags(rsc, pe_rsc_block); - } - -+/*! -+ * \internal -+ * \brief Update an integer value and why -+ * -+ * \param[in,out] i Pointer to integer to update -+ * \param[in,out] why Where to store reason for update -+ * \param[in] value New value -+ * \param[in,out] reason Description of why value was changed -+ */ -+static inline void -+remap_because(int *i, const char **why, int value, const char *reason) -+{ -+ if (*i != value) { -+ *i = value; -+ *why = reason; -+ } -+} -+ - /*! - * \internal - * \brief Remap informational monitor results and operation status -@@ -3393,29 +3411,34 @@ check_recoverable(pe_resource_t *rsc, pe_node_t *node, const char *task, - static void - remap_operation(xmlNode *xml_op, pe_resource_t *rsc, pe_node_t *node, - pe_working_set_t *data_set, enum action_fail_response *on_fail, -- int target_rc, int *rc, int *status) { -+ int target_rc, int *rc, int *status) -+{ - bool is_probe = false; -+ int orig_exit_status = *rc; -+ int orig_exec_status = *status; -+ const char *why = NULL; - const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); - const char *key = get_op_key(xml_op); - const char *exit_reason = crm_element_value(xml_op, - XML_LRM_ATTR_EXIT_REASON); - - if (pcmk__str_eq(task, CRMD_ACTION_STATUS, pcmk__str_none)) { -- int remapped_rc = pcmk__effective_rc(*rc); -- -- if (*rc != remapped_rc) { -- crm_trace("Remapping monitor result %d to %d", *rc, remapped_rc); -+ // Remap degraded results to their usual counterparts -+ *rc = pcmk__effective_rc(*rc); -+ if (*rc != orig_exit_status) { -+ why = "degraded monitor result"; - if (!node->details->shutdown || node->details->online) { - record_failed_op(xml_op, node, rsc, data_set); - } -- -- *rc = remapped_rc; - } - } - - if (!pe_rsc_is_bundled(rsc) && pcmk_xe_mask_probe_failure(xml_op)) { -- *status = PCMK_EXEC_DONE; -- *rc = PCMK_OCF_NOT_RUNNING; -+ if ((*status != PCMK_EXEC_DONE) || (*rc != PCMK_OCF_NOT_RUNNING)) { -+ *status = PCMK_EXEC_DONE; -+ *rc = PCMK_OCF_NOT_RUNNING; -+ why = "irrelevant probe result"; -+ } - } - - /* If the executor reported an operation status of anything but done or -@@ -3423,22 +3446,19 @@ remap_operation(xmlNode *xml_op, pe_resource_t *rsc, pe_node_t *node, - * it should be treated as a failure or not, because we know the expected - * result. - */ -- if (*status != PCMK_EXEC_DONE && *status != PCMK_EXEC_ERROR) { -- return; -+ switch (*status) { -+ case PCMK_EXEC_DONE: -+ case PCMK_EXEC_ERROR: -+ break; -+ default: -+ goto remap_done; - } - -- CRM_ASSERT(rsc); -- CRM_CHECK(task != NULL, -- *status = PCMK_EXEC_ERROR; return); -- -- *status = PCMK_EXEC_DONE; -- - if (exit_reason == NULL) { - exit_reason = ""; - } - - is_probe = pcmk_xe_is_probe(xml_op); -- - if (is_probe) { - task = "probe"; - } -@@ -3452,12 +3472,15 @@ remap_operation(xmlNode *xml_op, pe_resource_t *rsc, pe_node_t *node, - * those versions or processing of saved CIB files from those versions, - * so we do not need to care much about this case. - */ -- *status = PCMK_EXEC_ERROR; -+ remap_because(status, &why, PCMK_EXEC_ERROR, "obsolete history format"); - crm_warn("Expected result not found for %s on %s (corrupt or obsolete CIB?)", - key, pe__node_name(node)); - -- } else if (target_rc != *rc) { -- *status = PCMK_EXEC_ERROR; -+ } else if (*rc == target_rc) { -+ remap_because(status, &why, PCMK_EXEC_DONE, "expected result"); -+ -+ } else { -+ remap_because(status, &why, PCMK_EXEC_ERROR, "unexpected result"); - pe_rsc_debug(rsc, "%s on %s: expected %d (%s), got %d (%s%s%s)", - key, pe__node_name(node), - target_rc, services_ocf_exitcode_str(target_rc), -@@ -3468,7 +3491,7 @@ remap_operation(xmlNode *xml_op, pe_resource_t *rsc, pe_node_t *node, - switch (*rc) { - case PCMK_OCF_OK: - if (is_probe && (target_rc == PCMK_OCF_NOT_RUNNING)) { -- *status = PCMK_EXEC_DONE; -+ remap_because(status, &why,PCMK_EXEC_DONE, "probe"); - pe_rsc_info(rsc, "Probe found %s active on %s at %s", - rsc->id, pe__node_name(node), - last_change_str(xml_op)); -@@ -3479,7 +3502,7 @@ remap_operation(xmlNode *xml_op, pe_resource_t *rsc, pe_node_t *node, - if (is_probe || (target_rc == *rc) - || !pcmk_is_set(rsc->flags, pe_rsc_managed)) { - -- *status = PCMK_EXEC_DONE; -+ remap_because(status, &why, PCMK_EXEC_DONE, "exit status"); - rsc->role = RSC_ROLE_STOPPED; - - /* clear any previous failure actions */ -@@ -3490,7 +3513,7 @@ remap_operation(xmlNode *xml_op, pe_resource_t *rsc, pe_node_t *node, - - case PCMK_OCF_RUNNING_PROMOTED: - if (is_probe && (*rc != target_rc)) { -- *status = PCMK_EXEC_DONE; -+ remap_because(status, &why, PCMK_EXEC_DONE, "probe"); - pe_rsc_info(rsc, - "Probe found %s active and promoted on %s at %s", - rsc->id, pe__node_name(node), -@@ -3502,11 +3525,11 @@ remap_operation(xmlNode *xml_op, pe_resource_t *rsc, pe_node_t *node, - case PCMK_OCF_DEGRADED_PROMOTED: - case PCMK_OCF_FAILED_PROMOTED: - rsc->role = RSC_ROLE_PROMOTED; -- *status = PCMK_EXEC_ERROR; -+ remap_because(status, &why, PCMK_EXEC_ERROR, "exit status"); - break; - - case PCMK_OCF_NOT_CONFIGURED: -- *status = PCMK_EXEC_ERROR_FATAL; -+ remap_because(status, &why, PCMK_EXEC_ERROR_FATAL, "exit status"); - break; - - case PCMK_OCF_UNIMPLEMENT_FEATURE: -@@ -3517,9 +3540,11 @@ remap_operation(xmlNode *xml_op, pe_resource_t *rsc, pe_node_t *node, - - if (interval_ms == 0) { - check_recoverable(rsc, node, task, *rc, xml_op); -- *status = PCMK_EXEC_ERROR_HARD; -+ remap_because(status, &why, PCMK_EXEC_ERROR_HARD, -+ "exit status"); - } else { -- *status = PCMK_EXEC_NOT_SUPPORTED; -+ remap_because(status, &why, PCMK_EXEC_NOT_SUPPORTED, -+ "exit status"); - } - } - break; -@@ -3528,7 +3553,7 @@ remap_operation(xmlNode *xml_op, pe_resource_t *rsc, pe_node_t *node, - case PCMK_OCF_INVALID_PARAM: - case PCMK_OCF_INSUFFICIENT_PRIV: - check_recoverable(rsc, node, task, *rc, xml_op); -- *status = PCMK_EXEC_ERROR_HARD; -+ remap_because(status, &why, PCMK_EXEC_ERROR_HARD, "exit status"); - break; - - default: -@@ -3537,13 +3562,21 @@ remap_operation(xmlNode *xml_op, pe_resource_t *rsc, pe_node_t *node, - "on %s at %s as failure", - *rc, task, rsc->id, pe__node_name(node), - last_change_str(xml_op)); -- *status = PCMK_EXEC_ERROR; -+ remap_because(status, &why, PCMK_EXEC_ERROR, -+ "unknown exit status"); - } - break; - } - -- pe_rsc_trace(rsc, "Remapped %s status to '%s'", -- key, pcmk_exec_status_str(*status)); -+remap_done: -+ if (why != NULL) { -+ pe_rsc_trace(rsc, -+ "Remapped %s result from [%s: %s] to [%s: %s] " -+ "because of %s", -+ key, pcmk_exec_status_str(orig_exec_status), -+ crm_exit_str(orig_exit_status), -+ pcmk_exec_status_str(*status), crm_exit_str(*rc), why); -+ } - } - - // return TRUE if start or monitor last failure but parameters changed -@@ -3947,9 +3980,9 @@ unpack_rsc_op(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, - parent = uber_parent(rsc); - } - -- pe_rsc_trace(rsc, "Unpacking task %s/%s (call_id=%d, status=%d, rc=%d) on %s (role=%s)", -- task_key, task, task_id, status, rc, pe__node_name(node), -- role2text(rsc->role)); -+ pe_rsc_trace(rsc, "Unpacking %s (%s call %d on %s): %s (%s)", -+ ID(xml_op), task, task_id, pe__node_name(node), -+ pcmk_exec_status_str(status), crm_exit_str(rc)); - - if (node->details->unclean) { - pe_rsc_trace(rsc, -@@ -4077,9 +4110,6 @@ unpack_rsc_op(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, - goto done; - - case PCMK_EXEC_DONE: -- pe_rsc_trace(rsc, "%s of %s on %s completed at %s " CRM_XS " id=%s", -- task, rsc->id, pe__node_name(node), -- last_change_str(xml_op), ID(xml_op)); - update_resource_state(rsc, node, xml_op, task, rc, *last_failure, on_fail, data_set); - goto done; - -@@ -4175,9 +4205,9 @@ unpack_rsc_op(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, - } - - done: -- pe_rsc_trace(rsc, "Resource %s after %s: role=%s, next=%s", -- rsc->id, task, role2text(rsc->role), -- role2text(rsc->next_role)); -+ pe_rsc_trace(rsc, "%s role on %s after %s is %s (next %s)", -+ rsc->id, pe__node_name(node), ID(xml_op), -+ role2text(rsc->role), role2text(rsc->next_role)); - } - - static void --- -2.31.1 - -From 5a1d2a3ba58fa73225433dab40cee0a6e0ef9bda Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Wed, 1 Feb 2023 12:08:55 -0600 -Subject: [PATCH 03/14] Low: scheduler: improve migration history validation - -Instead of a simple CRM_CHECK(), functionize parsing the source and target node -names from a migration action's resource history entry. This reduces -duplication and allows us to log more helpful errors. - -Also, CRM_CHECK() tries to dump core for debugging, and that's not helpful for -corrupted CIB entries. ---- - lib/pengine/unpack.c | 87 ++++++++++++++++++++++++++++++++++++++------ - 1 file changed, 75 insertions(+), 12 deletions(-) - -diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c -index b7b2873..cd1b038 100644 ---- a/lib/pengine/unpack.c -+++ b/lib/pengine/unpack.c -@@ -2786,6 +2786,60 @@ newer_state_after_migrate(const char *rsc_id, const char *node_name, - || monitor_not_running_after(rsc_id, node_name, xml_op, same_node, - data_set); - } -+ -+/*! -+ * \internal -+ * \brief Parse migration source and target node names from history entry -+ * -+ * \param[in] entry Resource history entry for a migration action -+ * \param[in] source_node If not NULL, source must match this node -+ * \param[in] target_node If not NULL, target must match this node -+ * \param[out] source_name Where to store migration source node name -+ * \param[out] target_name Where to store migration target node name -+ * -+ * \return Standard Pacemaker return code -+ */ -+static int -+get_migration_node_names(const xmlNode *entry, const pe_node_t *source_node, -+ const pe_node_t *target_node, -+ const char **source_name, const char **target_name) -+{ -+ const char *id = ID(entry); -+ -+ if (id == NULL) { -+ crm_err("Ignoring resource history entry without ID"); -+ return pcmk_rc_unpack_error; -+ } -+ -+ *source_name = crm_element_value(entry, XML_LRM_ATTR_MIGRATE_SOURCE); -+ *target_name = crm_element_value(entry, XML_LRM_ATTR_MIGRATE_TARGET); -+ if ((*source_name == NULL) || (*target_name == NULL)) { -+ crm_err("Ignoring resource history entry %s without " -+ XML_LRM_ATTR_MIGRATE_SOURCE " and " XML_LRM_ATTR_MIGRATE_TARGET, -+ id); -+ return pcmk_rc_unpack_error; -+ } -+ -+ if ((source_node != NULL) -+ && !pcmk__str_eq(*source_name, source_node->details->uname, -+ pcmk__str_casei|pcmk__str_null_matches)) { -+ crm_err("Ignoring resource history entry %s because " -+ XML_LRM_ATTR_MIGRATE_SOURCE "='%s' does not match %s", -+ id, pcmk__s(*source_name, ""), pe__node_name(source_node)); -+ return pcmk_rc_unpack_error; -+ } -+ -+ if ((target_node != NULL) -+ && !pcmk__str_eq(*target_name, target_node->details->uname, -+ pcmk__str_casei|pcmk__str_null_matches)) { -+ crm_err("Ignoring resource history entry %s because " -+ XML_LRM_ATTR_MIGRATE_TARGET "='%s' does not match %s", -+ id, pcmk__s(*target_name, ""), pe__node_name(target_node)); -+ return pcmk_rc_unpack_error; -+ } -+ -+ return pcmk_rc_ok; -+} - - static void - unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, -@@ -2834,13 +2888,16 @@ unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, - pe_node_t *target_node = NULL; - pe_node_t *source_node = NULL; - xmlNode *migrate_from = NULL; -- const char *source = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE); -- const char *target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET); -+ const char *source = NULL; -+ const char *target = NULL; - bool source_newer_op = false; - bool target_newer_state = false; - -- // Sanity check -- CRM_CHECK(source && target && !strcmp(source, node->details->uname), return); -+ // Get source and target node names from XML -+ if (get_migration_node_names(xml_op, node, NULL, &source, -+ &target) != pcmk_rc_ok) { -+ return; -+ } - - /* If there's any newer non-monitor operation on the source, this migrate_to - * potentially no longer matters for the source. -@@ -2949,11 +3006,14 @@ unpack_migrate_to_failure(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, - pe_working_set_t *data_set) - { - xmlNode *target_migrate_from = NULL; -- const char *source = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE); -- const char *target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET); -+ const char *source = NULL; -+ const char *target = NULL; - -- // Sanity check -- CRM_CHECK(source && target && !strcmp(source, node->details->uname), return); -+ // Get source and target node names from XML -+ if (get_migration_node_names(xml_op, node, NULL, &source, -+ &target) != pcmk_rc_ok) { -+ return; -+ } - - /* If a migration failed, we have to assume the resource is active. Clones - * are not allowed to migrate, so role can't be promoted. -@@ -3001,11 +3061,14 @@ unpack_migrate_from_failure(pe_resource_t *rsc, pe_node_t *node, - xmlNode *xml_op, pe_working_set_t *data_set) - { - xmlNode *source_migrate_to = NULL; -- const char *source = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE); -- const char *target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET); -+ const char *source = NULL; -+ const char *target = NULL; - -- // Sanity check -- CRM_CHECK(source && target && !strcmp(target, node->details->uname), return); -+ // Get source and target node names from XML -+ if (get_migration_node_names(xml_op, NULL, node, &source, -+ &target) != pcmk_rc_ok) { -+ return; -+ } - - /* If a migration failed, we have to assume the resource is active. Clones - * are not allowed to migrate, so role can't be promoted. --- -2.31.1 - -From 5139e5369769e733b05bc28940d3dccb4f7fca95 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Tue, 31 Jan 2023 14:30:16 -0600 -Subject: [PATCH 04/14] Refactor: scheduler: functionize adding a dangling - migration - -... for code isolation and readability ---- - lib/pengine/unpack.c | 31 +++++++++++++++++++++++-------- - 1 file changed, 23 insertions(+), 8 deletions(-) - -diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c -index cd1b038..fa7c2cc 100644 ---- a/lib/pengine/unpack.c -+++ b/lib/pengine/unpack.c -@@ -2841,6 +2841,28 @@ get_migration_node_names(const xmlNode *entry, const pe_node_t *source_node, - return pcmk_rc_ok; - } - -+/* -+ * \internal -+ * \brief Add a migration source to a resource's list of dangling migrations -+ * -+ * If the migrate_to and migrate_from actions in a live migration both -+ * succeeded, but there is no stop on the source, the migration is considered -+ * "dangling." Add the source to the resource's dangling migration list, which -+ * will be used to schedule a stop on the source without affecting the target. -+ * -+ * \param[in,out] rsc Resource involved in migration -+ * \param[in] node Migration source -+ */ -+static void -+add_dangling_migration(pe_resource_t *rsc, const pe_node_t *node) -+{ -+ pe_rsc_trace(rsc, "Dangling migration of %s requires stop on %s", -+ rsc->id, pe__node_name(node)); -+ rsc->role = RSC_ROLE_STOPPED; -+ rsc->dangling_migrations = g_list_prepend(rsc->dangling_migrations, -+ (gpointer) node); -+} -+ - static void - unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, - pe_working_set_t *data_set) -@@ -2941,14 +2963,7 @@ unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, - - if (migrate_from && from_rc == PCMK_OCF_OK - && (from_status == PCMK_EXEC_DONE)) { -- /* The migrate_to and migrate_from both succeeded, so mark the migration -- * as "dangling". This will be used to schedule a stop action on the -- * source without affecting the target. -- */ -- pe_rsc_trace(rsc, "Detected dangling migration op: %s on %s", ID(xml_op), -- source); -- rsc->role = RSC_ROLE_STOPPED; -- rsc->dangling_migrations = g_list_prepend(rsc->dangling_migrations, node); -+ add_dangling_migration(rsc, node); - - } else if (migrate_from && (from_status != PCMK_EXEC_PENDING)) { // Failed - /* If the resource has newer state on the target, this migrate_to no --- -2.31.1 - -From da71c04463d31338dd5da54d1d48b53e413716dc Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Tue, 31 Jan 2023 16:57:55 -0600 -Subject: [PATCH 05/14] Refactor: scheduler: check for dangling migration - before setting role - -Previously, unpack_migrate_to_success() set rsc->role = RSC_ROLE_STARTED -then checked for dangling migration, which would reset it to RSC_ROLE_STOPPED. - -For clarity, do the dangling migration check first. ---- - lib/pengine/unpack.c | 47 ++++++++++++++++++++++++-------------------- - 1 file changed, 26 insertions(+), 21 deletions(-) - -diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c -index fa7c2cc..b858b59 100644 ---- a/lib/pengine/unpack.c -+++ b/lib/pengine/unpack.c -@@ -2905,8 +2905,8 @@ unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, - * migration is considered to be "dangling". Schedule a stop on the source - * in this case. - */ -- int from_rc = 0; -- int from_status = 0; -+ int from_rc = PCMK_OCF_OK; -+ int from_status = PCMK_EXEC_PENDING; - pe_node_t *target_node = NULL; - pe_node_t *source_node = NULL; - xmlNode *migrate_from = NULL; -@@ -2930,12 +2930,17 @@ unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, - // Check whether there was a migrate_from action on the target - migrate_from = find_lrm_op(rsc->id, CRMD_ACTION_MIGRATED, target, - source, -1, data_set); -- -- /* Even if there's a newer non-monitor operation on the source, we still -- * need to check how this migrate_to might matter for the target. -- */ -- if (source_newer_op && migrate_from) { -- return; -+ if (migrate_from != NULL) { -+ if (source_newer_op) { -+ /* There's a newer non-monitor operation on the source and a -+ * migrate_from on the target, so this migrate_to is irrelevant to -+ * the resource's state. -+ */ -+ return; -+ } -+ crm_element_value_int(migrate_from, XML_LRM_ATTR_RC, &from_rc); -+ crm_element_value_int(migrate_from, XML_LRM_ATTR_OPSTATUS, -+ &from_status); - } - - /* If the resource has newer state on the target after the migration -@@ -2948,24 +2953,24 @@ unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, - return; - } - -- // Clones are not allowed to migrate, so role can't be promoted -+ /* Check for dangling migration (migrate_from succeeded but stop not done). -+ * We know there's no stop because we already returned if the target has a -+ * migrate_from and the source has any newer non-monitor operation. -+ */ -+ if ((from_rc == PCMK_OCF_OK) && (from_status == PCMK_EXEC_DONE)) { -+ add_dangling_migration(rsc, node); -+ return; -+ } -+ -+ /* Without newer state, this migrate_to implies the resource is active. -+ * (Clones are not allowed to migrate, so role can't be promoted.) -+ */ - rsc->role = RSC_ROLE_STARTED; - - target_node = pe_find_node(data_set->nodes, target); - source_node = pe_find_node(data_set->nodes, source); - -- if (migrate_from) { -- crm_element_value_int(migrate_from, XML_LRM_ATTR_RC, &from_rc); -- crm_element_value_int(migrate_from, XML_LRM_ATTR_OPSTATUS, &from_status); -- pe_rsc_trace(rsc, "%s op on %s exited with status=%d, rc=%d", -- ID(migrate_from), target, from_status, from_rc); -- } -- -- if (migrate_from && from_rc == PCMK_OCF_OK -- && (from_status == PCMK_EXEC_DONE)) { -- add_dangling_migration(rsc, node); -- -- } else if (migrate_from && (from_status != PCMK_EXEC_PENDING)) { // Failed -+ if (from_status != PCMK_EXEC_PENDING) { // migrate_from failed on target - /* If the resource has newer state on the target, this migrate_to no - * longer matters for the target. - */ --- -2.31.1 - -From d98a2687d68747b0598554939dea05c420456a12 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Tue, 31 Jan 2023 17:05:50 -0600 -Subject: [PATCH 06/14] Refactor: scheduler: avoid duplication of - active-on-target check - ---- - lib/pengine/unpack.c | 24 ++++++------------------ - 1 file changed, 6 insertions(+), 18 deletions(-) - -diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c -index b858b59..8cfc0ef 100644 ---- a/lib/pengine/unpack.c -+++ b/lib/pengine/unpack.c -@@ -2914,6 +2914,7 @@ unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, - const char *target = NULL; - bool source_newer_op = false; - bool target_newer_state = false; -+ bool active_on_target = false; - - // Get source and target node names from XML - if (get_migration_node_names(xml_op, node, NULL, &source, -@@ -2969,23 +2970,14 @@ unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, - - target_node = pe_find_node(data_set->nodes, target); - source_node = pe_find_node(data_set->nodes, source); -+ active_on_target = !target_newer_state && (target_node != NULL) -+ && target_node->details->online; - - if (from_status != PCMK_EXEC_PENDING) { // migrate_from failed on target -- /* If the resource has newer state on the target, this migrate_to no -- * longer matters for the target. -- */ -- if (!target_newer_state -- && target_node && target_node->details->online) { -- pe_rsc_trace(rsc, "Marking active on %s %p %d", target, target_node, -- target_node->details->online); -+ if (active_on_target) { - native_add_running(rsc, target_node, data_set, TRUE); -- - } else { -- /* With the earlier bail logic, migrate_from != NULL here implies -- * source_newer_op is false, meaning this migrate_to still matters -- * for the source. -- * Consider it failed here - forces a restart, prevents migration -- */ -+ // Mark resource as failed, require recovery, and prevent migration - pe__set_resource_flags(rsc, pe_rsc_failed|pe_rsc_stop); - pe__clear_resource_flags(rsc, pe_rsc_allow_migrate); - } -@@ -2994,11 +2986,7 @@ unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, - /* If the resource has newer state on the target, this migrate_to no - * longer matters for the target. - */ -- if (!target_newer_state -- && target_node && target_node->details->online) { -- pe_rsc_trace(rsc, "Marking active on %s %p %d", target, target_node, -- target_node->details->online); -- -+ if (active_on_target) { - native_add_running(rsc, target_node, data_set, FALSE); - if (source_node && source_node->details->online) { - /* This is a partial migration: the migrate_to completed --- -2.31.1 - -From ae145309e3fdb26608e99f6d1fe1a7859d98efd0 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Tue, 31 Jan 2023 17:07:58 -0600 -Subject: [PATCH 07/14] Refactor: scheduler: improve unpacking of successful - migrate_to - -Improve log messages, comments, and formatting, and avoid doing things until -needed, to improve efficiency of early returns. ---- - lib/pengine/unpack.c | 109 +++++++++++++++++++------------------------ - 1 file changed, 48 insertions(+), 61 deletions(-) - -diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c -index 8cfc0ef..224b7b5 100644 ---- a/lib/pengine/unpack.c -+++ b/lib/pengine/unpack.c -@@ -2867,48 +2867,40 @@ static void - unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, - pe_working_set_t *data_set) - { -- /* A successful migration sequence is: -- * migrate_to on source node -- * migrate_from on target node -- * stop on source node -+ /* A complete migration sequence is: -+ * 1. migrate_to on source node (which succeeded if we get to this function) -+ * 2. migrate_from on target node -+ * 3. stop on source node - * -- * But there could be scenarios like (It's easier to produce with cluster -- * property batch-limit=1): -- * -- * - rscA is live-migrating from node1 to node2. -- * -- * - Before migrate_to on node1 returns, put node2 into standby. -- * -- * - Transition aborts upon return of successful migrate_to on node1. New -- * transition is going to stop the rscA on both nodes and start it on -- * node1. -+ * If no migrate_from has happened, the migration is considered to be -+ * "partial". If the migrate_from succeeded but no stop has happened, the -+ * migration is considered to be "dangling". - * -- * - While it is stopping on node1, run something that is going to make -- * the transition abort again like: -- * crm_resource --resource rscA --ban --node node2 -+ * If a successful migrate_to and stop have happened on the source node, we -+ * still need to check for a partial migration, due to scenarios (easier to -+ * produce with batch-limit=1) like: - * -- * - Transition aborts upon return of stop on node1. -+ * - A resource is migrating from node1 to node2, and a migrate_to is -+ * initiated for it on node1. - * -- * Now although there's a stop on node1, it's still a partial migration and -- * rscA is still potentially active on node2. -+ * - node2 goes into standby mode while the migrate_to is pending, which -+ * aborts the transition. - * -- * So even if a migrate_to is followed by a stop, we still need to check -- * whether there's a corresponding migrate_from or any newer operation on -- * the target. -+ * - Upon completion of the migrate_to, a new transition schedules a stop -+ * on both nodes and a start on node1. - * -- * If no migrate_from has happened, the migration is considered to be -- * "partial". If the migrate_from failed, make sure the resource gets -- * stopped on both source and target (if up). -+ * - If the new transition is aborted for any reason while the resource is -+ * stopping on node1, the transition after that stop completes will see -+ * the migrate_from and stop on the source, but it's still a partial -+ * migration, and the resource must be stopped on node2 because it is -+ * potentially active there due to the migrate_to. - * -- * If the migrate_to and migrate_from both succeeded (which also implies the -- * resource is no longer running on the source), but there is no stop, the -- * migration is considered to be "dangling". Schedule a stop on the source -- * in this case. -+ * We also need to take into account that either node's history may be -+ * cleared at any point in the migration process. - */ - int from_rc = PCMK_OCF_OK; - int from_status = PCMK_EXEC_PENDING; - pe_node_t *target_node = NULL; -- pe_node_t *source_node = NULL; - xmlNode *migrate_from = NULL; - const char *source = NULL; - const char *target = NULL; -@@ -2922,13 +2914,11 @@ unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, - return; - } - -- /* If there's any newer non-monitor operation on the source, this migrate_to -- * potentially no longer matters for the source. -- */ -+ // Check for newer state on the source - source_newer_op = non_monitor_after(rsc->id, source, xml_op, true, - data_set); - -- // Check whether there was a migrate_from action on the target -+ // Check for a migrate_from action from this source on the target - migrate_from = find_lrm_op(rsc->id, CRMD_ACTION_MIGRATED, target, - source, -1, data_set); - if (migrate_from != NULL) { -@@ -2944,12 +2934,11 @@ unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, - &from_status); - } - -- /* If the resource has newer state on the target after the migration -- * events, this migrate_to no longer matters for the target. -+ /* If the resource has newer state on both the source and target after the -+ * migration events, this migrate_to is irrelevant to the resource's state. - */ - target_newer_state = newer_state_after_migrate(rsc->id, target, xml_op, - migrate_from, data_set); -- - if (source_newer_op && target_newer_state) { - return; - } -@@ -2969,7 +2958,6 @@ unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, - rsc->role = RSC_ROLE_STARTED; - - target_node = pe_find_node(data_set->nodes, target); -- source_node = pe_find_node(data_set->nodes, source); - active_on_target = !target_newer_state && (target_node != NULL) - && target_node->details->online; - -@@ -2981,31 +2969,30 @@ unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, - pe__set_resource_flags(rsc, pe_rsc_failed|pe_rsc_stop); - pe__clear_resource_flags(rsc, pe_rsc_allow_migrate); - } -+ return; -+ } - -- } else { // Pending, or complete but erased -- /* If the resource has newer state on the target, this migrate_to no -- * longer matters for the target. -- */ -- if (active_on_target) { -- native_add_running(rsc, target_node, data_set, FALSE); -- if (source_node && source_node->details->online) { -- /* This is a partial migration: the migrate_to completed -- * successfully on the source, but the migrate_from has not -- * completed. Remember the source and target; if the newly -- * chosen target remains the same when we schedule actions -- * later, we may continue with the migration. -- */ -- rsc->partial_migration_target = target_node; -- rsc->partial_migration_source = source_node; -- } -- } else if (!source_newer_op) { -- /* This migrate_to matters for the source only if it's the last -- * non-monitor operation here. -- * Consider it failed here - forces a restart, prevents migration -+ // The migrate_from is pending, complete but erased, or to be scheduled -+ -+ if (active_on_target) { -+ pe_node_t *source_node = pe_find_node(data_set->nodes, source); -+ -+ native_add_running(rsc, target_node, data_set, FALSE); -+ if ((source_node != NULL) && source_node->details->online) { -+ /* This is a partial migration: the migrate_to completed -+ * successfully on the source, but the migrate_from has not -+ * completed. Remember the source and target; if the newly -+ * chosen target remains the same when we schedule actions -+ * later, we may continue with the migration. - */ -- pe__set_resource_flags(rsc, pe_rsc_failed|pe_rsc_stop); -- pe__clear_resource_flags(rsc, pe_rsc_allow_migrate); -+ rsc->partial_migration_target = target_node; -+ rsc->partial_migration_source = source_node; - } -+ -+ } else if (!source_newer_op) { -+ // Mark resource as failed, require recovery, and prevent migration -+ pe__set_resource_flags(rsc, pe_rsc_failed|pe_rsc_stop); -+ pe__clear_resource_flags(rsc, pe_rsc_allow_migrate); - } - } - --- -2.31.1 - -From 7d63ed8d52f64d2523367cff36bf77bd85296bd9 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Tue, 31 Jan 2023 17:14:57 -0600 -Subject: [PATCH 08/14] Refactor: scheduler: drop redundant argument from - unpack_migrate_to_success() - ---- - lib/pengine/unpack.c | 19 +++++++++---------- - 1 file changed, 9 insertions(+), 10 deletions(-) - -diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c -index 224b7b5..6222115 100644 ---- a/lib/pengine/unpack.c -+++ b/lib/pengine/unpack.c -@@ -2864,8 +2864,7 @@ add_dangling_migration(pe_resource_t *rsc, const pe_node_t *node) - } - - static void --unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, -- pe_working_set_t *data_set) -+unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op) - { - /* A complete migration sequence is: - * 1. migrate_to on source node (which succeeded if we get to this function) -@@ -2916,11 +2915,11 @@ unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, - - // Check for newer state on the source - source_newer_op = non_monitor_after(rsc->id, source, xml_op, true, -- data_set); -+ rsc->cluster); - - // Check for a migrate_from action from this source on the target - migrate_from = find_lrm_op(rsc->id, CRMD_ACTION_MIGRATED, target, -- source, -1, data_set); -+ source, -1, rsc->cluster); - if (migrate_from != NULL) { - if (source_newer_op) { - /* There's a newer non-monitor operation on the source and a -@@ -2938,7 +2937,7 @@ unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, - * migration events, this migrate_to is irrelevant to the resource's state. - */ - target_newer_state = newer_state_after_migrate(rsc->id, target, xml_op, -- migrate_from, data_set); -+ migrate_from, rsc->cluster); - if (source_newer_op && target_newer_state) { - return; - } -@@ -2957,13 +2956,13 @@ unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, - */ - rsc->role = RSC_ROLE_STARTED; - -- target_node = pe_find_node(data_set->nodes, target); -+ target_node = pe_find_node(rsc->cluster->nodes, target); - active_on_target = !target_newer_state && (target_node != NULL) - && target_node->details->online; - - if (from_status != PCMK_EXEC_PENDING) { // migrate_from failed on target - if (active_on_target) { -- native_add_running(rsc, target_node, data_set, TRUE); -+ native_add_running(rsc, target_node, rsc->cluster, TRUE); - } else { - // Mark resource as failed, require recovery, and prevent migration - pe__set_resource_flags(rsc, pe_rsc_failed|pe_rsc_stop); -@@ -2975,9 +2974,9 @@ unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, - // The migrate_from is pending, complete but erased, or to be scheduled - - if (active_on_target) { -- pe_node_t *source_node = pe_find_node(data_set->nodes, source); -+ pe_node_t *source_node = pe_find_node(rsc->cluster->nodes, source); - -- native_add_running(rsc, target_node, data_set, FALSE); -+ native_add_running(rsc, target_node, rsc->cluster, FALSE); - if ((source_node != NULL) && source_node->details->online) { - /* This is a partial migration: the migrate_to completed - * successfully on the source, but the migrate_from has not -@@ -3946,7 +3945,7 @@ update_resource_state(pe_resource_t * rsc, pe_node_t * node, xmlNode * xml_op, c - clear_past_failure = TRUE; - - } else if (pcmk__str_eq(task, CRMD_ACTION_MIGRATE, pcmk__str_casei)) { -- unpack_migrate_to_success(rsc, node, xml_op, data_set); -+ unpack_migrate_to_success(rsc, node, xml_op); - - } else if (rsc->role < RSC_ROLE_STARTED) { - pe_rsc_trace(rsc, "%s active on %s", rsc->id, pe__node_name(node)); --- -2.31.1 - -From 3be487f87bf5e26277379148922525fd98d29681 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Thu, 2 Feb 2023 09:13:30 -0600 -Subject: [PATCH 09/14] Doc: scheduler: clarify comments about unpacking - migration history - -per review ---- - lib/pengine/unpack.c | 20 ++++++++++---------- - 1 file changed, 10 insertions(+), 10 deletions(-) - -diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c -index 6222115..ec2cf26 100644 ---- a/lib/pengine/unpack.c -+++ b/lib/pengine/unpack.c -@@ -2791,9 +2791,9 @@ newer_state_after_migrate(const char *rsc_id, const char *node_name, - * \internal - * \brief Parse migration source and target node names from history entry - * -- * \param[in] entry Resource history entry for a migration action -- * \param[in] source_node If not NULL, source must match this node -- * \param[in] target_node If not NULL, target must match this node -+ * \param[in] entry Resource history entry for a migration action -+ * \param[in] source_node If not NULL, source must match this node -+ * \param[in] target_node If not NULL, target must match this node - * \param[out] source_name Where to store migration source node name - * \param[out] target_name Where to store migration target node name - * -@@ -2825,7 +2825,7 @@ get_migration_node_names(const xmlNode *entry, const pe_node_t *source_node, - pcmk__str_casei|pcmk__str_null_matches)) { - crm_err("Ignoring resource history entry %s because " - XML_LRM_ATTR_MIGRATE_SOURCE "='%s' does not match %s", -- id, pcmk__s(*source_name, ""), pe__node_name(source_node)); -+ id, *source_name, pe__node_name(source_node)); - return pcmk_rc_unpack_error; - } - -@@ -2834,7 +2834,7 @@ get_migration_node_names(const xmlNode *entry, const pe_node_t *source_node, - pcmk__str_casei|pcmk__str_null_matches)) { - crm_err("Ignoring resource history entry %s because " - XML_LRM_ATTR_MIGRATE_TARGET "='%s' does not match %s", -- id, pcmk__s(*target_name, ""), pe__node_name(target_node)); -+ id, *target_name, pe__node_name(target_node)); - return pcmk_rc_unpack_error; - } - -@@ -2890,7 +2890,7 @@ unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op) - * - * - If the new transition is aborted for any reason while the resource is - * stopping on node1, the transition after that stop completes will see -- * the migrate_from and stop on the source, but it's still a partial -+ * the migrate_to and stop on the source, but it's still a partial - * migration, and the resource must be stopped on node2 because it is - * potentially active there due to the migrate_to. - * -@@ -3425,9 +3425,9 @@ check_recoverable(pe_resource_t *rsc, pe_node_t *node, const char *task, - * \brief Update an integer value and why - * - * \param[in,out] i Pointer to integer to update -- * \param[in,out] why Where to store reason for update -+ * \param[out] why Where to store reason for update - * \param[in] value New value -- * \param[in,out] reason Description of why value was changed -+ * \param[in] reason Description of why value was changed - */ - static inline void - remap_because(int *i, const char **why, int value, const char *reason) -@@ -3456,7 +3456,7 @@ remap_because(int *i, const char **why, int value, const char *reason) - * \param[in] data_set Current cluster working set - * \param[in,out] on_fail What should be done about the result - * \param[in] target_rc Expected return code of operation -- * \param[in,out] rc Actual return code of operation -+ * \param[in,out] rc Actual return code of operation (treated as OCF) - * \param[in,out] status Operation execution status - * - * \note If the result is remapped and the node is not shutting down or failed, -@@ -3548,7 +3548,7 @@ remap_operation(xmlNode *xml_op, pe_resource_t *rsc, pe_node_t *node, - switch (*rc) { - case PCMK_OCF_OK: - if (is_probe && (target_rc == PCMK_OCF_NOT_RUNNING)) { -- remap_because(status, &why,PCMK_EXEC_DONE, "probe"); -+ remap_because(status, &why, PCMK_EXEC_DONE, "probe"); - pe_rsc_info(rsc, "Probe found %s active on %s at %s", - rsc->id, pe__node_name(node), - last_change_str(xml_op)); --- -2.31.1 - -From 3ef6c84a7b0dd434731e72d91f2724bdb52e292e Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Thu, 2 Feb 2023 09:42:01 -0600 -Subject: [PATCH 10/14] Refactor: scheduler: improve xpath efficiency when - unpacking - -Using "//" means that every child must be searched recursively. If we know the -exact path, we should explicitly specify it. ---- - lib/pengine/unpack.c | 20 ++++++++++++-------- - 1 file changed, 12 insertions(+), 8 deletions(-) - -diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c -index ec2cf26..8aead58 100644 ---- a/lib/pengine/unpack.c -+++ b/lib/pengine/unpack.c -@@ -2571,6 +2571,13 @@ set_node_score(gpointer key, gpointer value, gpointer user_data) - node->weight = *score; - } - -+#define XPATH_NODE_STATE "/" XML_TAG_CIB "/" XML_CIB_TAG_STATUS \ -+ "/" XML_CIB_TAG_STATE -+#define SUB_XPATH_LRM_RESOURCE "/" XML_CIB_TAG_LRM \ -+ "/" XML_LRM_TAG_RESOURCES \ -+ "/" XML_LRM_TAG_RESOURCE -+#define SUB_XPATH_LRM_RSC_OP "/" XML_LRM_TAG_RSC_OP -+ - static xmlNode * - find_lrm_op(const char *resource, const char *op, const char *node, const char *source, - int target_rc, pe_working_set_t *data_set) -@@ -2583,10 +2590,9 @@ find_lrm_op(const char *resource, const char *op, const char *node, const char * - - xpath = g_string_sized_new(256); - pcmk__g_strcat(xpath, -- "//" XML_CIB_TAG_STATE "[@" XML_ATTR_UNAME "='", node, "']" -- "//" XML_LRM_TAG_RESOURCE -- "[@" XML_ATTR_ID "='", resource, "']" -- "/" XML_LRM_TAG_RSC_OP "[@" XML_LRM_ATTR_TASK "='", op, "'", -+ XPATH_NODE_STATE "[@" XML_ATTR_UNAME "='", node, "']" -+ SUB_XPATH_LRM_RESOURCE "[@" XML_ATTR_ID "='", resource, "']" -+ SUB_XPATH_LRM_RSC_OP "[@" XML_LRM_ATTR_TASK "='", op, "'", - NULL); - - /* Need to check against transition_magic too? */ -@@ -2631,10 +2637,8 @@ find_lrm_resource(const char *rsc_id, const char *node_name, - - xpath = g_string_sized_new(256); - pcmk__g_strcat(xpath, -- "//" XML_CIB_TAG_STATE -- "[@" XML_ATTR_UNAME "='", node_name, "']" -- "//" XML_LRM_TAG_RESOURCE -- "[@" XML_ATTR_ID "='", rsc_id, "']", -+ XPATH_NODE_STATE "[@" XML_ATTR_UNAME "='", node_name, "']" -+ SUB_XPATH_LRM_RESOURCE "[@" XML_ATTR_ID "='", rsc_id, "']", - NULL); - - xml = get_xpath_object((const char *) xpath->str, data_set->input, --- -2.31.1 - -From 1869f99bc8eeedb976f96f0f1cc3d4dd86735504 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Thu, 2 Feb 2023 10:25:53 -0600 -Subject: [PATCH 11/14] Low: scheduler: unknown_on_node() should ignore pending - actions - -Previously, unknown_on_node() looked for any lrm_rsc_op at all to decide -whether a resource is known on a node. However if the only action is pending, -the resource is not yet known. - -Also drop a redundant argument and add a doxygen block. (The rsc argument is -not const due to a getDocPtr() call in the chain, as well as libxml2 calls that -are likely const in practice but aren't marked as such.) ---- - lib/pengine/unpack.c | 37 +++++++++++++++++++++++++------------ - 1 file changed, 25 insertions(+), 12 deletions(-) - -diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c -index 8aead58..14dc202 100644 ---- a/lib/pengine/unpack.c -+++ b/lib/pengine/unpack.c -@@ -2648,19 +2648,32 @@ find_lrm_resource(const char *rsc_id, const char *node_name, - return xml; - } - -+/*! -+ * \internal -+ * \brief Check whether a resource has no completed action history on a node -+ * -+ * \param[in,out] rsc Resource to check -+ * \param[in] node_name Node to check -+ * -+ * \return true if \p rsc_id is unknown on \p node_name, otherwise false -+ */ - static bool --unknown_on_node(const char *rsc_id, const char *node_name, -- pe_working_set_t *data_set) -+unknown_on_node(pe_resource_t *rsc, const char *node_name) - { -- xmlNode *lrm_resource = NULL; -- -- lrm_resource = find_lrm_resource(rsc_id, node_name, data_set); -+ bool result = false; -+ xmlXPathObjectPtr search; -+ GString *xpath = g_string_sized_new(256); - -- /* If the resource has no lrm_rsc_op history on the node, that means its -- * state is unknown there. -- */ -- return (lrm_resource == NULL -- || first_named_child(lrm_resource, XML_LRM_TAG_RSC_OP) == NULL); -+ pcmk__g_strcat(xpath, -+ XPATH_NODE_STATE "[@" XML_ATTR_UNAME "='", node_name, "']" -+ SUB_XPATH_LRM_RESOURCE "[@" XML_ATTR_ID "='", rsc->id, "']" -+ SUB_XPATH_LRM_RSC_OP "[@" XML_LRM_ATTR_RC "!='193']", -+ NULL); -+ search = xpath_search(rsc->cluster->input, (const char *) xpath->str); -+ result = (numXpathResults(search) == 0); -+ freeXpathObject(search); -+ g_string_free(xpath, TRUE); -+ return result; - } - - /*! -@@ -3027,7 +3040,7 @@ unpack_migrate_to_failure(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op, - * Don't just consider it running there. We will get back here anyway in - * case the probe detects it's running there. - */ -- !unknown_on_node(rsc->id, target, data_set) -+ !unknown_on_node(rsc, target) - /* If the resource has newer state on the target after the migration - * events, this migrate_to no longer matters for the target. - */ -@@ -3082,7 +3095,7 @@ unpack_migrate_from_failure(pe_resource_t *rsc, pe_node_t *node, - * Don't just consider it running there. We will get back here anyway in - * case the probe detects it's running there. - */ -- !unknown_on_node(rsc->id, source, data_set) -+ !unknown_on_node(rsc, source) - /* If the resource has newer state on the source after the migration - * events, this migrate_from no longer matters for the source. - */ --- -2.31.1 - -From 22fbab8e0d449d2accb231dfcec94294ded27f4e Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Tue, 31 Jan 2023 12:11:19 -0600 -Subject: [PATCH 12/14] Test: scheduler: add regression test for migration - intermediary - -As of this commit, the cluster wrongly restarts the migrated resource ---- - cts/cts-scheduler.in | 3 + - .../dot/migration-intermediary-cleaned.dot | 46 ++ - .../exp/migration-intermediary-cleaned.exp | 316 +++++++++++ - .../migration-intermediary-cleaned.scores | 201 +++++++ - .../migration-intermediary-cleaned.summary | 94 ++++ - .../xml/migration-intermediary-cleaned.xml | 513 ++++++++++++++++++ - 6 files changed, 1173 insertions(+) - create mode 100644 cts/scheduler/dot/migration-intermediary-cleaned.dot - create mode 100644 cts/scheduler/exp/migration-intermediary-cleaned.exp - create mode 100644 cts/scheduler/scores/migration-intermediary-cleaned.scores - create mode 100644 cts/scheduler/summary/migration-intermediary-cleaned.summary - create mode 100644 cts/scheduler/xml/migration-intermediary-cleaned.xml - -diff --git a/cts/cts-scheduler.in b/cts/cts-scheduler.in -index feb5dc8..9899c36 100644 ---- a/cts/cts-scheduler.in -+++ b/cts/cts-scheduler.in -@@ -387,6 +387,9 @@ TESTS = [ - [ "probe-target-of-failed-migrate_to-1", "Failed migrate_to, target rejoins" ], - [ "probe-target-of-failed-migrate_to-2", "Failed migrate_to, target rejoined and probed" ], - [ "partial-live-migration-multiple-active", "Prevent running on multiple nodes due to partial live migration" ], -+ [ "migration-intermediary-cleaned", -+ "Probe live-migration intermediary with no history" -+ ], - [ "bug-lf-2422", "Dependency on partially active group - stop ocfs:*" ], - ], - [ -diff --git a/cts/scheduler/dot/migration-intermediary-cleaned.dot b/cts/scheduler/dot/migration-intermediary-cleaned.dot -new file mode 100644 -index 0000000..09568d0 ---- /dev/null -+++ b/cts/scheduler/dot/migration-intermediary-cleaned.dot -@@ -0,0 +1,46 @@ -+ digraph "g" { -+"Connectivity_running_0" [ style=bold color="green" fontcolor="orange"] -+"Connectivity_start_0" -> "Connectivity_running_0" [ style = bold] -+"Connectivity_start_0" -> "ping-1_start_0 rhel8-2" [ style = bold] -+"Connectivity_start_0" [ style=bold color="green" fontcolor="orange"] -+"FencingFail_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] -+"FencingPass_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] -+"Fencing_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] -+"lsb-dummy_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] -+"migrator_monitor_0 rhel8-2" -> "migrator_start_0 rhel8-5" [ style = bold] -+"migrator_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] -+"migrator_monitor_10000 rhel8-5" [ style=bold color="green" fontcolor="black"] -+"migrator_start_0 rhel8-5" -> "migrator_monitor_10000 rhel8-5" [ style = bold] -+"migrator_start_0 rhel8-5" [ style=bold color="green" fontcolor="black"] -+"migrator_stop_0 rhel8-2" -> "migrator_start_0 rhel8-5" [ style = bold] -+"migrator_stop_0 rhel8-2" [ style=bold color="green" fontcolor="black"] -+"migrator_stop_0 rhel8-5" -> "migrator_start_0 rhel8-5" [ style = bold] -+"migrator_stop_0 rhel8-5" [ style=bold color="green" fontcolor="black"] -+"petulant_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] -+"ping-1_monitor_0 rhel8-2" -> "Connectivity_start_0" [ style = bold] -+"ping-1_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] -+"ping-1_monitor_60000 rhel8-2" [ style=bold color="green" fontcolor="black"] -+"ping-1_start_0 rhel8-2" -> "Connectivity_running_0" [ style = bold] -+"ping-1_start_0 rhel8-2" -> "ping-1_monitor_60000 rhel8-2" [ style = bold] -+"ping-1_start_0 rhel8-2" [ style=bold color="green" fontcolor="black"] -+"r192.168.122.207_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] -+"r192.168.122.208_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] -+"rsc_rhel8-1_monitor_0 rhel8-2" -> "rsc_rhel8-1_start_0 rhel8-2" [ style = bold] -+"rsc_rhel8-1_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] -+"rsc_rhel8-1_monitor_5000 rhel8-2" [ style=bold color="green" fontcolor="black"] -+"rsc_rhel8-1_start_0 rhel8-2" -> "rsc_rhel8-1_monitor_5000 rhel8-2" [ style = bold] -+"rsc_rhel8-1_start_0 rhel8-2" [ style=bold color="green" fontcolor="black"] -+"rsc_rhel8-1_stop_0 rhel8-3" -> "rsc_rhel8-1_start_0 rhel8-2" [ style = bold] -+"rsc_rhel8-1_stop_0 rhel8-3" [ style=bold color="green" fontcolor="black"] -+"rsc_rhel8-2_monitor_0 rhel8-2" -> "rsc_rhel8-2_start_0 rhel8-2" [ style = bold] -+"rsc_rhel8-2_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] -+"rsc_rhel8-2_monitor_5000 rhel8-2" [ style=bold color="green" fontcolor="black"] -+"rsc_rhel8-2_start_0 rhel8-2" -> "rsc_rhel8-2_monitor_5000 rhel8-2" [ style = bold] -+"rsc_rhel8-2_start_0 rhel8-2" [ style=bold color="green" fontcolor="black"] -+"rsc_rhel8-2_stop_0 rhel8-4" -> "rsc_rhel8-2_start_0 rhel8-2" [ style = bold] -+"rsc_rhel8-2_stop_0 rhel8-4" [ style=bold color="green" fontcolor="black"] -+"rsc_rhel8-3_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] -+"rsc_rhel8-4_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] -+"rsc_rhel8-5_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] -+"stateful-1_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] -+} -diff --git a/cts/scheduler/exp/migration-intermediary-cleaned.exp b/cts/scheduler/exp/migration-intermediary-cleaned.exp -new file mode 100644 -index 0000000..28fa776 ---- /dev/null -+++ b/cts/scheduler/exp/migration-intermediary-cleaned.exp -@@ -0,0 +1,316 @@ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -diff --git a/cts/scheduler/scores/migration-intermediary-cleaned.scores b/cts/scheduler/scores/migration-intermediary-cleaned.scores -new file mode 100644 -index 0000000..b3b8dff ---- /dev/null -+++ b/cts/scheduler/scores/migration-intermediary-cleaned.scores -@@ -0,0 +1,201 @@ -+ -+pcmk__clone_allocate: Connectivity allocation score on rhel8-1: 0 -+pcmk__clone_allocate: Connectivity allocation score on rhel8-2: 0 -+pcmk__clone_allocate: Connectivity allocation score on rhel8-3: 0 -+pcmk__clone_allocate: Connectivity allocation score on rhel8-4: 0 -+pcmk__clone_allocate: Connectivity allocation score on rhel8-5: 0 -+pcmk__clone_allocate: ping-1:0 allocation score on rhel8-1: 0 -+pcmk__clone_allocate: ping-1:0 allocation score on rhel8-2: 0 -+pcmk__clone_allocate: ping-1:0 allocation score on rhel8-3: 1 -+pcmk__clone_allocate: ping-1:0 allocation score on rhel8-4: 0 -+pcmk__clone_allocate: ping-1:0 allocation score on rhel8-5: 0 -+pcmk__clone_allocate: ping-1:1 allocation score on rhel8-1: 0 -+pcmk__clone_allocate: ping-1:1 allocation score on rhel8-2: 0 -+pcmk__clone_allocate: ping-1:1 allocation score on rhel8-3: 0 -+pcmk__clone_allocate: ping-1:1 allocation score on rhel8-4: 1 -+pcmk__clone_allocate: ping-1:1 allocation score on rhel8-5: 0 -+pcmk__clone_allocate: ping-1:2 allocation score on rhel8-1: 0 -+pcmk__clone_allocate: ping-1:2 allocation score on rhel8-2: 0 -+pcmk__clone_allocate: ping-1:2 allocation score on rhel8-3: 0 -+pcmk__clone_allocate: ping-1:2 allocation score on rhel8-4: 0 -+pcmk__clone_allocate: ping-1:2 allocation score on rhel8-5: 1 -+pcmk__clone_allocate: ping-1:3 allocation score on rhel8-1: 0 -+pcmk__clone_allocate: ping-1:3 allocation score on rhel8-2: 0 -+pcmk__clone_allocate: ping-1:3 allocation score on rhel8-3: 0 -+pcmk__clone_allocate: ping-1:3 allocation score on rhel8-4: 0 -+pcmk__clone_allocate: ping-1:3 allocation score on rhel8-5: 0 -+pcmk__clone_allocate: ping-1:4 allocation score on rhel8-1: 0 -+pcmk__clone_allocate: ping-1:4 allocation score on rhel8-2: 0 -+pcmk__clone_allocate: ping-1:4 allocation score on rhel8-3: 0 -+pcmk__clone_allocate: ping-1:4 allocation score on rhel8-4: 0 -+pcmk__clone_allocate: ping-1:4 allocation score on rhel8-5: 0 -+pcmk__clone_allocate: promotable-1 allocation score on rhel8-1: -INFINITY -+pcmk__clone_allocate: promotable-1 allocation score on rhel8-2: -INFINITY -+pcmk__clone_allocate: promotable-1 allocation score on rhel8-3: 0 -+pcmk__clone_allocate: promotable-1 allocation score on rhel8-4: 0 -+pcmk__clone_allocate: promotable-1 allocation score on rhel8-5: 0 -+pcmk__clone_allocate: stateful-1:0 allocation score on rhel8-1: -INFINITY -+pcmk__clone_allocate: stateful-1:0 allocation score on rhel8-2: -INFINITY -+pcmk__clone_allocate: stateful-1:0 allocation score on rhel8-3: 11 -+pcmk__clone_allocate: stateful-1:0 allocation score on rhel8-4: 0 -+pcmk__clone_allocate: stateful-1:0 allocation score on rhel8-5: 0 -+pcmk__clone_allocate: stateful-1:1 allocation score on rhel8-1: -INFINITY -+pcmk__clone_allocate: stateful-1:1 allocation score on rhel8-2: -INFINITY -+pcmk__clone_allocate: stateful-1:1 allocation score on rhel8-3: 0 -+pcmk__clone_allocate: stateful-1:1 allocation score on rhel8-4: 6 -+pcmk__clone_allocate: stateful-1:1 allocation score on rhel8-5: 0 -+pcmk__clone_allocate: stateful-1:2 allocation score on rhel8-1: -INFINITY -+pcmk__clone_allocate: stateful-1:2 allocation score on rhel8-2: -INFINITY -+pcmk__clone_allocate: stateful-1:2 allocation score on rhel8-3: 0 -+pcmk__clone_allocate: stateful-1:2 allocation score on rhel8-4: 0 -+pcmk__clone_allocate: stateful-1:2 allocation score on rhel8-5: 6 -+pcmk__clone_allocate: stateful-1:3 allocation score on rhel8-1: -INFINITY -+pcmk__clone_allocate: stateful-1:3 allocation score on rhel8-2: -INFINITY -+pcmk__clone_allocate: stateful-1:3 allocation score on rhel8-3: 0 -+pcmk__clone_allocate: stateful-1:3 allocation score on rhel8-4: 0 -+pcmk__clone_allocate: stateful-1:3 allocation score on rhel8-5: 0 -+pcmk__clone_allocate: stateful-1:4 allocation score on rhel8-1: -INFINITY -+pcmk__clone_allocate: stateful-1:4 allocation score on rhel8-2: -INFINITY -+pcmk__clone_allocate: stateful-1:4 allocation score on rhel8-3: 10 -+pcmk__clone_allocate: stateful-1:4 allocation score on rhel8-4: 5 -+pcmk__clone_allocate: stateful-1:4 allocation score on rhel8-5: 5 -+pcmk__group_assign: group-1 allocation score on rhel8-1: 0 -+pcmk__group_assign: group-1 allocation score on rhel8-2: 0 -+pcmk__group_assign: group-1 allocation score on rhel8-3: 0 -+pcmk__group_assign: group-1 allocation score on rhel8-4: 0 -+pcmk__group_assign: group-1 allocation score on rhel8-5: 0 -+pcmk__group_assign: petulant allocation score on rhel8-1: 0 -+pcmk__group_assign: petulant allocation score on rhel8-2: 0 -+pcmk__group_assign: petulant allocation score on rhel8-3: 0 -+pcmk__group_assign: petulant allocation score on rhel8-4: 0 -+pcmk__group_assign: petulant allocation score on rhel8-5: 0 -+pcmk__group_assign: r192.168.122.207 allocation score on rhel8-1: 0 -+pcmk__group_assign: r192.168.122.207 allocation score on rhel8-2: 0 -+pcmk__group_assign: r192.168.122.207 allocation score on rhel8-3: 0 -+pcmk__group_assign: r192.168.122.207 allocation score on rhel8-4: 0 -+pcmk__group_assign: r192.168.122.207 allocation score on rhel8-5: 0 -+pcmk__group_assign: r192.168.122.208 allocation score on rhel8-1: 0 -+pcmk__group_assign: r192.168.122.208 allocation score on rhel8-2: 0 -+pcmk__group_assign: r192.168.122.208 allocation score on rhel8-3: 0 -+pcmk__group_assign: r192.168.122.208 allocation score on rhel8-4: 0 -+pcmk__group_assign: r192.168.122.208 allocation score on rhel8-5: 0 -+pcmk__primitive_assign: Fencing allocation score on rhel8-1: 0 -+pcmk__primitive_assign: Fencing allocation score on rhel8-2: 0 -+pcmk__primitive_assign: Fencing allocation score on rhel8-3: 0 -+pcmk__primitive_assign: Fencing allocation score on rhel8-4: 0 -+pcmk__primitive_assign: Fencing allocation score on rhel8-5: 0 -+pcmk__primitive_assign: FencingFail allocation score on rhel8-1: 0 -+pcmk__primitive_assign: FencingFail allocation score on rhel8-2: 0 -+pcmk__primitive_assign: FencingFail allocation score on rhel8-3: 0 -+pcmk__primitive_assign: FencingFail allocation score on rhel8-4: 0 -+pcmk__primitive_assign: FencingFail allocation score on rhel8-5: 0 -+pcmk__primitive_assign: FencingPass allocation score on rhel8-1: 0 -+pcmk__primitive_assign: FencingPass allocation score on rhel8-2: 0 -+pcmk__primitive_assign: FencingPass allocation score on rhel8-3: 0 -+pcmk__primitive_assign: FencingPass allocation score on rhel8-4: 0 -+pcmk__primitive_assign: FencingPass allocation score on rhel8-5: 0 -+pcmk__primitive_assign: lsb-dummy allocation score on rhel8-1: -INFINITY -+pcmk__primitive_assign: lsb-dummy allocation score on rhel8-2: -INFINITY -+pcmk__primitive_assign: lsb-dummy allocation score on rhel8-3: 0 -+pcmk__primitive_assign: lsb-dummy allocation score on rhel8-4: -INFINITY -+pcmk__primitive_assign: lsb-dummy allocation score on rhel8-5: -INFINITY -+pcmk__primitive_assign: migrator allocation score on rhel8-1: 0 -+pcmk__primitive_assign: migrator allocation score on rhel8-2: 0 -+pcmk__primitive_assign: migrator allocation score on rhel8-3: 0 -+pcmk__primitive_assign: migrator allocation score on rhel8-4: 0 -+pcmk__primitive_assign: migrator allocation score on rhel8-5: 0 -+pcmk__primitive_assign: petulant allocation score on rhel8-1: -INFINITY -+pcmk__primitive_assign: petulant allocation score on rhel8-2: -INFINITY -+pcmk__primitive_assign: petulant allocation score on rhel8-3: 0 -+pcmk__primitive_assign: petulant allocation score on rhel8-4: -INFINITY -+pcmk__primitive_assign: petulant allocation score on rhel8-5: -INFINITY -+pcmk__primitive_assign: ping-1:0 allocation score on rhel8-1: -INFINITY -+pcmk__primitive_assign: ping-1:0 allocation score on rhel8-2: 0 -+pcmk__primitive_assign: ping-1:0 allocation score on rhel8-3: 1 -+pcmk__primitive_assign: ping-1:0 allocation score on rhel8-4: 0 -+pcmk__primitive_assign: ping-1:0 allocation score on rhel8-5: 0 -+pcmk__primitive_assign: ping-1:1 allocation score on rhel8-1: -INFINITY -+pcmk__primitive_assign: ping-1:1 allocation score on rhel8-2: 0 -+pcmk__primitive_assign: ping-1:1 allocation score on rhel8-3: -INFINITY -+pcmk__primitive_assign: ping-1:1 allocation score on rhel8-4: 1 -+pcmk__primitive_assign: ping-1:1 allocation score on rhel8-5: 0 -+pcmk__primitive_assign: ping-1:2 allocation score on rhel8-1: -INFINITY -+pcmk__primitive_assign: ping-1:2 allocation score on rhel8-2: 0 -+pcmk__primitive_assign: ping-1:2 allocation score on rhel8-3: -INFINITY -+pcmk__primitive_assign: ping-1:2 allocation score on rhel8-4: -INFINITY -+pcmk__primitive_assign: ping-1:2 allocation score on rhel8-5: 1 -+pcmk__primitive_assign: ping-1:3 allocation score on rhel8-1: -INFINITY -+pcmk__primitive_assign: ping-1:3 allocation score on rhel8-2: 0 -+pcmk__primitive_assign: ping-1:3 allocation score on rhel8-3: -INFINITY -+pcmk__primitive_assign: ping-1:3 allocation score on rhel8-4: -INFINITY -+pcmk__primitive_assign: ping-1:3 allocation score on rhel8-5: -INFINITY -+pcmk__primitive_assign: ping-1:4 allocation score on rhel8-1: -INFINITY -+pcmk__primitive_assign: ping-1:4 allocation score on rhel8-2: -INFINITY -+pcmk__primitive_assign: ping-1:4 allocation score on rhel8-3: -INFINITY -+pcmk__primitive_assign: ping-1:4 allocation score on rhel8-4: -INFINITY -+pcmk__primitive_assign: ping-1:4 allocation score on rhel8-5: -INFINITY -+pcmk__primitive_assign: r192.168.122.207 allocation score on rhel8-1: -INFINITY -+pcmk__primitive_assign: r192.168.122.207 allocation score on rhel8-2: -INFINITY -+pcmk__primitive_assign: r192.168.122.207 allocation score on rhel8-3: 11 -+pcmk__primitive_assign: r192.168.122.207 allocation score on rhel8-4: -INFINITY -+pcmk__primitive_assign: r192.168.122.207 allocation score on rhel8-5: -INFINITY -+pcmk__primitive_assign: r192.168.122.208 allocation score on rhel8-1: -INFINITY -+pcmk__primitive_assign: r192.168.122.208 allocation score on rhel8-2: -INFINITY -+pcmk__primitive_assign: r192.168.122.208 allocation score on rhel8-3: 0 -+pcmk__primitive_assign: r192.168.122.208 allocation score on rhel8-4: -INFINITY -+pcmk__primitive_assign: r192.168.122.208 allocation score on rhel8-5: -INFINITY -+pcmk__primitive_assign: rsc_rhel8-1 allocation score on rhel8-1: 100 -+pcmk__primitive_assign: rsc_rhel8-1 allocation score on rhel8-2: 0 -+pcmk__primitive_assign: rsc_rhel8-1 allocation score on rhel8-3: 0 -+pcmk__primitive_assign: rsc_rhel8-1 allocation score on rhel8-4: 0 -+pcmk__primitive_assign: rsc_rhel8-1 allocation score on rhel8-5: 0 -+pcmk__primitive_assign: rsc_rhel8-2 allocation score on rhel8-1: 0 -+pcmk__primitive_assign: rsc_rhel8-2 allocation score on rhel8-2: 100 -+pcmk__primitive_assign: rsc_rhel8-2 allocation score on rhel8-3: 0 -+pcmk__primitive_assign: rsc_rhel8-2 allocation score on rhel8-4: 0 -+pcmk__primitive_assign: rsc_rhel8-2 allocation score on rhel8-5: 0 -+pcmk__primitive_assign: rsc_rhel8-3 allocation score on rhel8-1: 0 -+pcmk__primitive_assign: rsc_rhel8-3 allocation score on rhel8-2: 0 -+pcmk__primitive_assign: rsc_rhel8-3 allocation score on rhel8-3: 100 -+pcmk__primitive_assign: rsc_rhel8-3 allocation score on rhel8-4: 0 -+pcmk__primitive_assign: rsc_rhel8-3 allocation score on rhel8-5: 0 -+pcmk__primitive_assign: rsc_rhel8-4 allocation score on rhel8-1: 0 -+pcmk__primitive_assign: rsc_rhel8-4 allocation score on rhel8-2: 0 -+pcmk__primitive_assign: rsc_rhel8-4 allocation score on rhel8-3: 0 -+pcmk__primitive_assign: rsc_rhel8-4 allocation score on rhel8-4: 100 -+pcmk__primitive_assign: rsc_rhel8-4 allocation score on rhel8-5: 0 -+pcmk__primitive_assign: rsc_rhel8-5 allocation score on rhel8-1: 0 -+pcmk__primitive_assign: rsc_rhel8-5 allocation score on rhel8-2: 0 -+pcmk__primitive_assign: rsc_rhel8-5 allocation score on rhel8-3: 0 -+pcmk__primitive_assign: rsc_rhel8-5 allocation score on rhel8-4: 0 -+pcmk__primitive_assign: rsc_rhel8-5 allocation score on rhel8-5: 100 -+pcmk__primitive_assign: stateful-1:0 allocation score on rhel8-1: -INFINITY -+pcmk__primitive_assign: stateful-1:0 allocation score on rhel8-2: -INFINITY -+pcmk__primitive_assign: stateful-1:0 allocation score on rhel8-3: 11 -+pcmk__primitive_assign: stateful-1:0 allocation score on rhel8-4: 0 -+pcmk__primitive_assign: stateful-1:0 allocation score on rhel8-5: 0 -+pcmk__primitive_assign: stateful-1:1 allocation score on rhel8-1: -INFINITY -+pcmk__primitive_assign: stateful-1:1 allocation score on rhel8-2: -INFINITY -+pcmk__primitive_assign: stateful-1:1 allocation score on rhel8-3: -INFINITY -+pcmk__primitive_assign: stateful-1:1 allocation score on rhel8-4: 6 -+pcmk__primitive_assign: stateful-1:1 allocation score on rhel8-5: 0 -+pcmk__primitive_assign: stateful-1:2 allocation score on rhel8-1: -INFINITY -+pcmk__primitive_assign: stateful-1:2 allocation score on rhel8-2: -INFINITY -+pcmk__primitive_assign: stateful-1:2 allocation score on rhel8-3: -INFINITY -+pcmk__primitive_assign: stateful-1:2 allocation score on rhel8-4: -INFINITY -+pcmk__primitive_assign: stateful-1:2 allocation score on rhel8-5: 6 -+pcmk__primitive_assign: stateful-1:3 allocation score on rhel8-1: -INFINITY -+pcmk__primitive_assign: stateful-1:3 allocation score on rhel8-2: -INFINITY -+pcmk__primitive_assign: stateful-1:3 allocation score on rhel8-3: -INFINITY -+pcmk__primitive_assign: stateful-1:3 allocation score on rhel8-4: -INFINITY -+pcmk__primitive_assign: stateful-1:3 allocation score on rhel8-5: -INFINITY -+pcmk__primitive_assign: stateful-1:4 allocation score on rhel8-1: -INFINITY -+pcmk__primitive_assign: stateful-1:4 allocation score on rhel8-2: -INFINITY -+pcmk__primitive_assign: stateful-1:4 allocation score on rhel8-3: -INFINITY -+pcmk__primitive_assign: stateful-1:4 allocation score on rhel8-4: -INFINITY -+pcmk__primitive_assign: stateful-1:4 allocation score on rhel8-5: -INFINITY -+stateful-1:0 promotion score on rhel8-3: 10 -+stateful-1:1 promotion score on rhel8-4: 5 -+stateful-1:2 promotion score on rhel8-5: 5 -+stateful-1:3 promotion score on none: 0 -+stateful-1:4 promotion score on none: 0 -diff --git a/cts/scheduler/summary/migration-intermediary-cleaned.summary b/cts/scheduler/summary/migration-intermediary-cleaned.summary -new file mode 100644 -index 0000000..5de1355 ---- /dev/null -+++ b/cts/scheduler/summary/migration-intermediary-cleaned.summary -@@ -0,0 +1,94 @@ -+Using the original execution date of: 2023-01-19 21:05:59Z -+Current cluster status: -+ * Node List: -+ * Online: [ rhel8-2 rhel8-3 rhel8-4 rhel8-5 ] -+ * OFFLINE: [ rhel8-1 ] -+ -+ * Full List of Resources: -+ * Fencing (stonith:fence_xvm): Started rhel8-3 -+ * FencingPass (stonith:fence_dummy): Started rhel8-4 -+ * FencingFail (stonith:fence_dummy): Started rhel8-5 -+ * rsc_rhel8-1 (ocf:heartbeat:IPaddr2): Started rhel8-3 -+ * rsc_rhel8-2 (ocf:heartbeat:IPaddr2): Started rhel8-4 -+ * rsc_rhel8-3 (ocf:heartbeat:IPaddr2): Started rhel8-3 -+ * rsc_rhel8-4 (ocf:heartbeat:IPaddr2): Started rhel8-4 -+ * rsc_rhel8-5 (ocf:heartbeat:IPaddr2): Started rhel8-5 -+ * migrator (ocf:pacemaker:Dummy): Started [ rhel8-5 rhel8-2 ] -+ * Clone Set: Connectivity [ping-1]: -+ * Started: [ rhel8-3 rhel8-4 rhel8-5 ] -+ * Stopped: [ rhel8-1 rhel8-2 ] -+ * Clone Set: promotable-1 [stateful-1] (promotable): -+ * Promoted: [ rhel8-3 ] -+ * Unpromoted: [ rhel8-4 rhel8-5 ] -+ * Stopped: [ rhel8-1 rhel8-2 ] -+ * Resource Group: group-1: -+ * r192.168.122.207 (ocf:heartbeat:IPaddr2): Started rhel8-3 -+ * petulant (service:pacemaker-cts-dummyd@10): Started rhel8-3 -+ * r192.168.122.208 (ocf:heartbeat:IPaddr2): Started rhel8-3 -+ * lsb-dummy (lsb:LSBDummy): Started rhel8-3 -+ -+Transition Summary: -+ * Move rsc_rhel8-1 ( rhel8-3 -> rhel8-2 ) -+ * Move rsc_rhel8-2 ( rhel8-4 -> rhel8-2 ) -+ * Restart migrator ( rhel8-5 ) -+ * Start ping-1:3 ( rhel8-2 ) -+ -+Executing Cluster Transition: -+ * Resource action: Fencing monitor on rhel8-2 -+ * Resource action: FencingPass monitor on rhel8-2 -+ * Resource action: FencingFail monitor on rhel8-2 -+ * Resource action: rsc_rhel8-1 stop on rhel8-3 -+ * Resource action: rsc_rhel8-1 monitor on rhel8-2 -+ * Resource action: rsc_rhel8-2 stop on rhel8-4 -+ * Resource action: rsc_rhel8-2 monitor on rhel8-2 -+ * Resource action: rsc_rhel8-3 monitor on rhel8-2 -+ * Resource action: rsc_rhel8-4 monitor on rhel8-2 -+ * Resource action: rsc_rhel8-5 monitor on rhel8-2 -+ * Resource action: migrator stop on rhel8-2 -+ * Resource action: migrator stop on rhel8-5 -+ * Resource action: migrator monitor on rhel8-2 -+ * Resource action: ping-1 monitor on rhel8-2 -+ * Pseudo action: Connectivity_start_0 -+ * Resource action: stateful-1 monitor on rhel8-2 -+ * Resource action: r192.168.122.207 monitor on rhel8-2 -+ * Resource action: petulant monitor on rhel8-2 -+ * Resource action: r192.168.122.208 monitor on rhel8-2 -+ * Resource action: lsb-dummy monitor on rhel8-2 -+ * Resource action: rsc_rhel8-1 start on rhel8-2 -+ * Resource action: rsc_rhel8-2 start on rhel8-2 -+ * Resource action: migrator start on rhel8-5 -+ * Resource action: migrator monitor=10000 on rhel8-5 -+ * Resource action: ping-1 start on rhel8-2 -+ * Pseudo action: Connectivity_running_0 -+ * Resource action: rsc_rhel8-1 monitor=5000 on rhel8-2 -+ * Resource action: rsc_rhel8-2 monitor=5000 on rhel8-2 -+ * Resource action: ping-1 monitor=60000 on rhel8-2 -+Using the original execution date of: 2023-01-19 21:05:59Z -+ -+Revised Cluster Status: -+ * Node List: -+ * Online: [ rhel8-2 rhel8-3 rhel8-4 rhel8-5 ] -+ * OFFLINE: [ rhel8-1 ] -+ -+ * Full List of Resources: -+ * Fencing (stonith:fence_xvm): Started rhel8-3 -+ * FencingPass (stonith:fence_dummy): Started rhel8-4 -+ * FencingFail (stonith:fence_dummy): Started rhel8-5 -+ * rsc_rhel8-1 (ocf:heartbeat:IPaddr2): Started rhel8-2 -+ * rsc_rhel8-2 (ocf:heartbeat:IPaddr2): Started rhel8-2 -+ * rsc_rhel8-3 (ocf:heartbeat:IPaddr2): Started rhel8-3 -+ * rsc_rhel8-4 (ocf:heartbeat:IPaddr2): Started rhel8-4 -+ * rsc_rhel8-5 (ocf:heartbeat:IPaddr2): Started rhel8-5 -+ * migrator (ocf:pacemaker:Dummy): Started [ rhel8-2 rhel8-5 ] -+ * Clone Set: Connectivity [ping-1]: -+ * Started: [ rhel8-2 rhel8-3 rhel8-4 rhel8-5 ] -+ * Stopped: [ rhel8-1 ] -+ * Clone Set: promotable-1 [stateful-1] (promotable): -+ * Promoted: [ rhel8-3 ] -+ * Unpromoted: [ rhel8-4 rhel8-5 ] -+ * Stopped: [ rhel8-1 rhel8-2 ] -+ * Resource Group: group-1: -+ * r192.168.122.207 (ocf:heartbeat:IPaddr2): Started rhel8-3 -+ * petulant (service:pacemaker-cts-dummyd@10): Started rhel8-3 -+ * r192.168.122.208 (ocf:heartbeat:IPaddr2): Started rhel8-3 -+ * lsb-dummy (lsb:LSBDummy): Started rhel8-3 -diff --git a/cts/scheduler/xml/migration-intermediary-cleaned.xml b/cts/scheduler/xml/migration-intermediary-cleaned.xml -new file mode 100644 -index 0000000..bec7888 ---- /dev/null -+++ b/cts/scheduler/xml/migration-intermediary-cleaned.xml -@@ -0,0 +1,513 @@ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ --- -2.31.1 - -From 1f9fadbb06baded3fc393cfe30a0cb620aca0829 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Wed, 1 Feb 2023 17:12:13 -0600 -Subject: [PATCH 13/14] Fix: scheduler: handle cleaned migrate_from history - correctly - -Fixes T623 ---- - lib/pengine/unpack.c | 9 +++++++++ - 1 file changed, 9 insertions(+) - -diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c -index 14dc202..9c99183 100644 ---- a/lib/pengine/unpack.c -+++ b/lib/pengine/unpack.c -@@ -2990,6 +2990,15 @@ unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op) - - // The migrate_from is pending, complete but erased, or to be scheduled - -+ /* If there is no history at all for the resource on an online target, then -+ * it was likely cleaned. Just return, and we'll schedule a probe. Once we -+ * have the probe result, it will be reflected in target_newer_state. -+ */ -+ if ((target_node != NULL) && target_node->details->online -+ && unknown_on_node(rsc, target)) { -+ return; -+ } -+ - if (active_on_target) { - pe_node_t *source_node = pe_find_node(rsc->cluster->nodes, source); - --- -2.31.1 - -From d9d1bf19e8522ea29c87f0c39b05828947bc5b0f Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Thu, 2 Feb 2023 15:48:01 -0600 -Subject: [PATCH 14/14] Test: scheduler: update expected output for migration - fix - ---- - .../dot/migration-intermediary-cleaned.dot | 8 -- - .../exp/migration-intermediary-cleaned.exp | 88 ++++--------------- - .../migration-intermediary-cleaned.scores | 2 +- - .../migration-intermediary-cleaned.summary | 9 +- - 4 files changed, 22 insertions(+), 85 deletions(-) - -diff --git a/cts/scheduler/dot/migration-intermediary-cleaned.dot b/cts/scheduler/dot/migration-intermediary-cleaned.dot -index 09568d0..f6eabba 100644 ---- a/cts/scheduler/dot/migration-intermediary-cleaned.dot -+++ b/cts/scheduler/dot/migration-intermediary-cleaned.dot -@@ -7,15 +7,7 @@ - "FencingPass_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] - "Fencing_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] - "lsb-dummy_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] --"migrator_monitor_0 rhel8-2" -> "migrator_start_0 rhel8-5" [ style = bold] - "migrator_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] --"migrator_monitor_10000 rhel8-5" [ style=bold color="green" fontcolor="black"] --"migrator_start_0 rhel8-5" -> "migrator_monitor_10000 rhel8-5" [ style = bold] --"migrator_start_0 rhel8-5" [ style=bold color="green" fontcolor="black"] --"migrator_stop_0 rhel8-2" -> "migrator_start_0 rhel8-5" [ style = bold] --"migrator_stop_0 rhel8-2" [ style=bold color="green" fontcolor="black"] --"migrator_stop_0 rhel8-5" -> "migrator_start_0 rhel8-5" [ style = bold] --"migrator_stop_0 rhel8-5" [ style=bold color="green" fontcolor="black"] - "petulant_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] - "ping-1_monitor_0 rhel8-2" -> "Connectivity_start_0" [ style = bold] - "ping-1_monitor_0 rhel8-2" [ style=bold color="green" fontcolor="black"] -diff --git a/cts/scheduler/exp/migration-intermediary-cleaned.exp b/cts/scheduler/exp/migration-intermediary-cleaned.exp -index 28fa776..8b9bb39 100644 ---- a/cts/scheduler/exp/migration-intermediary-cleaned.exp -+++ b/cts/scheduler/exp/migration-intermediary-cleaned.exp -@@ -148,91 +148,41 @@ - - - -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- - - - -- -+ - - - - -- -- -- -- -- -- -- -- -- -- -- -- -- -- -+ - -- -+ - - - - - - -- -+ - - - -- -+ - -- -+ - - - - - - -- -+ - - - -- -+ - - - -@@ -241,24 +191,24 @@ - - - -- -+ - -- -+ - - - - - -- -+ - - -- -+ - - - -- -+ - -- -+ - - - -@@ -268,7 +218,7 @@ - - - -- -+ - - - -@@ -277,7 +227,7 @@ - - - -- -+ - - - -@@ -286,7 +236,7 @@ - - - -- -+ - - - -@@ -295,7 +245,7 @@ - - - -- -+ - - - -@@ -304,7 +254,7 @@ - - - -- -+ - - - -diff --git a/cts/scheduler/scores/migration-intermediary-cleaned.scores b/cts/scheduler/scores/migration-intermediary-cleaned.scores -index b3b8dff..09f05d1 100644 ---- a/cts/scheduler/scores/migration-intermediary-cleaned.scores -+++ b/cts/scheduler/scores/migration-intermediary-cleaned.scores -@@ -103,7 +103,7 @@ pcmk__primitive_assign: migrator allocation score on rhel8-1: 0 - pcmk__primitive_assign: migrator allocation score on rhel8-2: 0 - pcmk__primitive_assign: migrator allocation score on rhel8-3: 0 - pcmk__primitive_assign: migrator allocation score on rhel8-4: 0 --pcmk__primitive_assign: migrator allocation score on rhel8-5: 0 -+pcmk__primitive_assign: migrator allocation score on rhel8-5: 1 - pcmk__primitive_assign: petulant allocation score on rhel8-1: -INFINITY - pcmk__primitive_assign: petulant allocation score on rhel8-2: -INFINITY - pcmk__primitive_assign: petulant allocation score on rhel8-3: 0 -diff --git a/cts/scheduler/summary/migration-intermediary-cleaned.summary b/cts/scheduler/summary/migration-intermediary-cleaned.summary -index 5de1355..dd127a8 100644 ---- a/cts/scheduler/summary/migration-intermediary-cleaned.summary -+++ b/cts/scheduler/summary/migration-intermediary-cleaned.summary -@@ -13,7 +13,7 @@ Current cluster status: - * rsc_rhel8-3 (ocf:heartbeat:IPaddr2): Started rhel8-3 - * rsc_rhel8-4 (ocf:heartbeat:IPaddr2): Started rhel8-4 - * rsc_rhel8-5 (ocf:heartbeat:IPaddr2): Started rhel8-5 -- * migrator (ocf:pacemaker:Dummy): Started [ rhel8-5 rhel8-2 ] -+ * migrator (ocf:pacemaker:Dummy): Started rhel8-5 - * Clone Set: Connectivity [ping-1]: - * Started: [ rhel8-3 rhel8-4 rhel8-5 ] - * Stopped: [ rhel8-1 rhel8-2 ] -@@ -30,7 +30,6 @@ Current cluster status: - Transition Summary: - * Move rsc_rhel8-1 ( rhel8-3 -> rhel8-2 ) - * Move rsc_rhel8-2 ( rhel8-4 -> rhel8-2 ) -- * Restart migrator ( rhel8-5 ) - * Start ping-1:3 ( rhel8-2 ) - - Executing Cluster Transition: -@@ -44,8 +43,6 @@ Executing Cluster Transition: - * Resource action: rsc_rhel8-3 monitor on rhel8-2 - * Resource action: rsc_rhel8-4 monitor on rhel8-2 - * Resource action: rsc_rhel8-5 monitor on rhel8-2 -- * Resource action: migrator stop on rhel8-2 -- * Resource action: migrator stop on rhel8-5 - * Resource action: migrator monitor on rhel8-2 - * Resource action: ping-1 monitor on rhel8-2 - * Pseudo action: Connectivity_start_0 -@@ -56,8 +53,6 @@ Executing Cluster Transition: - * Resource action: lsb-dummy monitor on rhel8-2 - * Resource action: rsc_rhel8-1 start on rhel8-2 - * Resource action: rsc_rhel8-2 start on rhel8-2 -- * Resource action: migrator start on rhel8-5 -- * Resource action: migrator monitor=10000 on rhel8-5 - * Resource action: ping-1 start on rhel8-2 - * Pseudo action: Connectivity_running_0 - * Resource action: rsc_rhel8-1 monitor=5000 on rhel8-2 -@@ -79,7 +74,7 @@ Revised Cluster Status: - * rsc_rhel8-3 (ocf:heartbeat:IPaddr2): Started rhel8-3 - * rsc_rhel8-4 (ocf:heartbeat:IPaddr2): Started rhel8-4 - * rsc_rhel8-5 (ocf:heartbeat:IPaddr2): Started rhel8-5 -- * migrator (ocf:pacemaker:Dummy): Started [ rhel8-2 rhel8-5 ] -+ * migrator (ocf:pacemaker:Dummy): Started rhel8-5 - * Clone Set: Connectivity [ping-1]: - * Started: [ rhel8-2 rhel8-3 rhel8-4 rhel8-5 ] - * Stopped: [ rhel8-1 ] --- -2.31.1 - diff --git a/SOURCES/004-clone-rsc-display.patch b/SOURCES/004-clone-rsc-display.patch new file mode 100644 index 0000000..b09a53a --- /dev/null +++ b/SOURCES/004-clone-rsc-display.patch @@ -0,0 +1,35 @@ +From 770d417e28dc9527fec8b8a00caaba8825995454 Mon Sep 17 00:00:00 2001 +From: Grace Chin +Date: Wed, 19 Jul 2023 10:25:55 -0400 +Subject: [PATCH] Fix: tools: Fix a bug in clone resource description display + +Previously, descriptions of resources running on multiple +nodes were displayed despite --full not being used (with pcs +status) or --show-detail not being used (with crm_mon). + +For example, clone resources running on multiple nodes were +affected. + +Now, --full and --show-detail must be used in order for resource +descriptions to be displayed, regardless of the number of nodes +the resource is run on. + +see bz: 2106642 +--- + lib/pengine/pe_output.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/lib/pengine/pe_output.c b/lib/pengine/pe_output.c +index e0b43d997a..d1c9f6e226 100644 +--- a/lib/pengine/pe_output.c ++++ b/lib/pengine/pe_output.c +@@ -20,8 +20,7 @@ pe__resource_description(const pe_resource_t *rsc, uint32_t show_opts) + { + const char * desc = NULL; + // User-supplied description +- if (pcmk_any_flags_set(show_opts, pcmk_show_rsc_only|pcmk_show_description) +- || pcmk__list_of_multiple(rsc->running_on)) { ++ if (pcmk_any_flags_set(show_opts, pcmk_show_rsc_only|pcmk_show_description)) { + desc = crm_element_value(rsc->xml, XML_ATTR_DESC); + } + return desc; diff --git a/SOURCES/004-g_source_remove.patch b/SOURCES/004-g_source_remove.patch deleted file mode 100644 index 2af0f47..0000000 --- a/SOURCES/004-g_source_remove.patch +++ /dev/null @@ -1,107 +0,0 @@ -From 45617b727e280cac384a28ae3d96145e066e6197 Mon Sep 17 00:00:00 2001 -From: Reid Wahl -Date: Fri, 3 Feb 2023 12:08:57 -0800 -Subject: [PATCH 01/02] Fix: fencer: Prevent double g_source_remove of op_timer_one - -QE observed a rarely reproducible core dump in the fencer during -Pacemaker shutdown, in which we try to g_source_remove() an op timer -that's already been removed. - -free_stonith_remote_op_list() --> g_hash_table_destroy() --> g_hash_table_remove_all_nodes() --> clear_remote_op_timers() --> g_source_remove() --> crm_glib_handler() --> "Source ID 190 was not found when attempting to remove it" - -The likely cause is that request_peer_fencing() doesn't set -op->op_timer_one to 0 after calling g_source_remove() on it, so if that -op is still in the stonith_remote_op_list at shutdown with the same -timer, clear_remote_op_timers() tries to remove the source for -op_timer_one again. - -There are only five locations that call g_source_remove() on a -remote_fencing_op_t timer. -* Three of them are in clear_remote_op_timers(), which first 0-checks - the timer and then sets it to 0 after g_source_remove(). -* One is in remote_op_query_timeout(), which does the same. -* The last is the one we fix here in request_peer_fencing(). - -I don't know all the conditions of QE's test scenario at this point. -What I do know: -* have-watchdog=true -* stonith-watchdog-timeout=10 -* no explicit topology -* fence agent script is missing for the configured fence device -* requested fencing of one node -* cluster shutdown - -Fixes RHBZ2166967 - -Signed-off-by: Reid Wahl ---- - daemons/fenced/fenced_remote.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c -index d61b5bd..b7426ff 100644 ---- a/daemons/fenced/fenced_remote.c -+++ b/daemons/fenced/fenced_remote.c -@@ -1825,6 +1825,7 @@ request_peer_fencing(remote_fencing_op_t *op, peer_device_info_t *peer) - op->state = st_exec; - if (op->op_timer_one) { - g_source_remove(op->op_timer_one); -+ op->op_timer_one = 0; - } - - if (!((stonith_watchdog_timeout_ms > 0) --- -2.31.1 - -From 0291db4750322ec7f01ae6a4a2a30abca9d8e19e Mon Sep 17 00:00:00 2001 -From: Reid Wahl -Date: Wed, 15 Feb 2023 22:30:27 -0800 -Subject: [PATCH 02/02] Fix: fencer: Avoid double source remove of op_timer_total - -remote_op_timeout() returns G_SOURCE_REMOVE, which tells GLib to remove -the source from the main loop after returning. Currently this function -is used as the callback only when creating op->op_timer_total. - -If we don't set op->op_timer_total to 0 before returning from -remote_op_timeout(), then we can get an assertion and core dump from -GLib when the op's timers are being cleared (either during op -finalization or during fencer shutdown). This is because -clear_remote_op_timers() sees that op->op_timer_total != 0 and tries to -remove the source, but the source has already been removed. - -Note that we're already (correctly) zeroing op->op_timer_one and -op->query_timeout as appropriate in their respective callback functions. - -Fortunately, GLib doesn't care whether the source has already been -removed before we return G_SOURCE_REMOVE from a callback. So it's safe -to call finalize_op() (which removes all the op's timer sources) from -within a callback. - -Fixes RHBZ#2166967 - -Signed-off-by: Reid Wahl ---- - daemons/fenced/fenced_remote.c | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c -index b7426ff88..adea3d7d8 100644 ---- a/daemons/fenced/fenced_remote.c -+++ b/daemons/fenced/fenced_remote.c -@@ -718,6 +718,8 @@ remote_op_timeout(gpointer userdata) - { - remote_fencing_op_t *op = userdata; - -+ op->op_timer_total = 0; -+ - if (op->state == st_done) { - crm_debug("Action '%s' targeting %s for client %s already completed " - CRM_XS " id=%.8s", --- -2.39.0 diff --git a/SOURCES/005-attrd-dampen.patch b/SOURCES/005-attrd-dampen.patch new file mode 100644 index 0000000..80c8a67 --- /dev/null +++ b/SOURCES/005-attrd-dampen.patch @@ -0,0 +1,26 @@ +From ebac530c815a62f7c3a1c24f64e9a530d9753dbe Mon Sep 17 00:00:00 2001 +From: Hideo Yamauchi +Date: Wed, 19 Jul 2023 18:21:07 +0900 +Subject: [PATCH] High: tools: The dampen parameter is disabled when setting + values with attrd_updater. + +--- + tools/attrd_updater.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/tools/attrd_updater.c b/tools/attrd_updater.c +index b615a3575..4688b9ff6 100644 +--- a/tools/attrd_updater.c ++++ b/tools/attrd_updater.c +@@ -501,7 +501,7 @@ send_attrd_update(char command, const char *attr_node, const char *attr_name, + + case 'U': + rc = pcmk__attrd_api_update(NULL, attr_node, attr_name, attr_value, +- NULL, attr_set, NULL, ++ attr_dampen, attr_set, NULL, + attr_options | pcmk__node_attr_value); + break; + +-- +2.41.0 + diff --git a/SOURCES/005-query-null.patch b/SOURCES/005-query-null.patch deleted file mode 100644 index 194cd33..0000000 --- a/SOURCES/005-query-null.patch +++ /dev/null @@ -1,151 +0,0 @@ -From 0d15568a538349ac41028db6b506d13dd23e8732 Mon Sep 17 00:00:00 2001 -From: Chris Lumens -Date: Tue, 14 Feb 2023 14:00:37 -0500 -Subject: [PATCH] High: libcrmcommon: Fix handling node=NULL in - pcmk__attrd_api_query. - -According to the header file, if node is NULL, pcmk__attrd_api_query -should query the value of the given attribute on all cluster nodes. -This is also what the server expects and how attrd_updater is supposed -to work. - -However, pcmk__attrd_api_query has no way of letting callers decide -whether they want to query all nodes or whether they want to use the -local node. We were passing NULL for the node name, which it took to -mean it should look up the local node name. This calls -pcmk__node_attr_target, which probes the local cluster name and returns -that to pcmk__attrd_api_query. If it returns non-NULL, that value will -then be put into the XML IPC call which means the server will only -return the value for that node. - -In testing this was usually fine. However, in pratice, the methods -pcmk__node_attr_target uses to figure out the local cluster node name -involves checking the OCF_RESKEY_CRM_meta_on_node environment variable -among others. - -This variable was never set in testing, but can be set in the real -world. This leads to circumstances where the user did "attrd_updater -QA" -expecting to get the values on all nodes, but instead only got the value -on the local cluster node. - -In pacemaker-2.1.4 and prior, pcmk__node_attr_target was simply never -called if the node was NULL but was called otherwise. - -The fix is to modify pcmk__attrd_api_query to take an option for -querying all nodes. If that's present, we'll query all nodes. If it's -not present, we'll look at the given node name - NULL means look it up, -anything else means just that node. - -Regression in 2.1.5 introduced by eb20a65577 ---- - include/crm/common/attrd_internal.h | 6 +++++- - include/crm/common/ipc_attrd_internal.h | 7 +++++-- - lib/common/ipc_attrd.c | 12 ++++++++---- - tools/attrd_updater.c | 5 +++-- - 4 files changed, 21 insertions(+), 9 deletions(-) - -diff --git a/include/crm/common/attrd_internal.h b/include/crm/common/attrd_internal.h -index 389be48..7337c38 100644 ---- a/include/crm/common/attrd_internal.h -+++ b/include/crm/common/attrd_internal.h -@@ -1,5 +1,5 @@ - /* -- * Copyright 2004-2022 the Pacemaker project contributors -+ * Copyright 2004-2023 the Pacemaker project contributors - * - * The version control history for this file may have further details. - * -@@ -25,6 +25,10 @@ enum pcmk__node_attr_opts { - pcmk__node_attr_perm = (1 << 5), - pcmk__node_attr_sync_local = (1 << 6), - pcmk__node_attr_sync_cluster = (1 << 7), -+ // pcmk__node_attr_utilization is 8, but that has not been backported. -+ // I'm leaving the gap here in case we backport that in the future and -+ // also to avoid problems on mixed-version clusters. -+ pcmk__node_attr_query_all = (1 << 9), - }; - - #define pcmk__set_node_attr_flags(node_attr_flags, flags_to_set) do { \ -diff --git a/include/crm/common/ipc_attrd_internal.h b/include/crm/common/ipc_attrd_internal.h -index 2c6713f..b1b7584 100644 ---- a/include/crm/common/ipc_attrd_internal.h -+++ b/include/crm/common/ipc_attrd_internal.h -@@ -1,5 +1,5 @@ - /* -- * Copyright 2022 the Pacemaker project contributors -+ * Copyright 2022-2023 the Pacemaker project contributors - * - * The version control history for this file may have further details. - * -@@ -110,10 +110,13 @@ int pcmk__attrd_api_purge(pcmk_ipc_api_t *api, const char *node); - * - * \param[in,out] api Connection to pacemaker-attrd - * \param[in] node Look up the attribute for this node -- * (or NULL for all nodes) -+ * (or NULL for the local node) - * \param[in] name Attribute name - * \param[in] options Bitmask of pcmk__node_attr_opts - * -+ * \note Passing pcmk__node_attr_query_all will cause the function to query -+ * the value of \p name on all nodes, regardless of the value of \p node. -+ * - * \return Standard Pacemaker return code - */ - int pcmk__attrd_api_query(pcmk_ipc_api_t *api, const char *node, const char *name, -diff --git a/lib/common/ipc_attrd.c b/lib/common/ipc_attrd.c -index 4606509..dece49b 100644 ---- a/lib/common/ipc_attrd.c -+++ b/lib/common/ipc_attrd.c -@@ -1,5 +1,5 @@ - /* -- * Copyright 2011-2022 the Pacemaker project contributors -+ * Copyright 2011-2023 the Pacemaker project contributors - * - * The version control history for this file may have further details. - * -@@ -332,10 +332,14 @@ pcmk__attrd_api_query(pcmk_ipc_api_t *api, const char *node, const char *name, - return EINVAL; - } - -- target = pcmk__node_attr_target(node); -+ if (pcmk_is_set(options, pcmk__node_attr_query_all)) { -+ node = NULL; -+ } else { -+ target = pcmk__node_attr_target(node); - -- if (target != NULL) { -- node = target; -+ if (target != NULL) { -+ node = target; -+ } - } - - request = create_attrd_op(NULL); -diff --git a/tools/attrd_updater.c b/tools/attrd_updater.c -index 3cd766d..cbd341d 100644 ---- a/tools/attrd_updater.c -+++ b/tools/attrd_updater.c -@@ -376,6 +376,7 @@ attrd_event_cb(pcmk_ipc_api_t *attrd_api, enum pcmk_ipc_event event_type, - static int - send_attrd_query(pcmk__output_t *out, const char *attr_name, const char *attr_node, gboolean query_all) - { -+ uint32_t options = pcmk__node_attr_none; - pcmk_ipc_api_t *attrd_api = NULL; - int rc = pcmk_rc_ok; - -@@ -400,10 +401,10 @@ send_attrd_query(pcmk__output_t *out, const char *attr_name, const char *attr_no - - /* Decide which node(s) to query */ - if (query_all == TRUE) { -- attr_node = NULL; -+ options |= pcmk__node_attr_query_all; - } - -- rc = pcmk__attrd_api_query(attrd_api, attr_node, attr_name, 0); -+ rc = pcmk__attrd_api_query(attrd_api, attr_node, attr_name, options); - - if (rc != pcmk_rc_ok) { - g_set_error(&error, PCMK__RC_ERROR, rc, "Could not query value of %s: %s (%d)", --- -2.31.1 - diff --git a/SOURCES/006-controller-reply.patch b/SOURCES/006-controller-reply.patch new file mode 100644 index 0000000..efd4f9c --- /dev/null +++ b/SOURCES/006-controller-reply.patch @@ -0,0 +1,109 @@ +From 3e31da0016795397bfeacb2f3d76ecfe35cc1f67 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Mon, 17 Jul 2023 14:52:42 -0500 +Subject: [PATCH] Fix: libcrmcommon: wait for reply from appropriate controller + commands + +ipc_controld.c:reply_expected() wrongly omitted PCMK__CONTROLD_CMD_NODES (which +hasn't been a problem because crm_node uses a mainloop instead of sync dispatch +for that) and CRM_OP_RM_NODE_CACHE (which can be sent via +ipc_client.c:pcmk_ipc_purge_node()). + +Because CRM_OP_RM_NODE_CACHE gets only an ack and no further replies, we now +have to be careful not to return true from the controller's dispatch() +function, otherwise crm_node -R would wait forever for more data. That means +we have to check for whether any replies are expected, which means we have to +increment expected replies *before* sending a request (in case it's sync). + +Regression introduced in 2.0.5 by ae14fa4a + +Fixes T681 +--- + lib/common/ipc_controld.c | 49 ++++++++++++++------------------------- + 1 file changed, 17 insertions(+), 32 deletions(-) + +diff --git a/lib/common/ipc_controld.c b/lib/common/ipc_controld.c +index 3c3a98964..405fd0518 100644 +--- a/lib/common/ipc_controld.c ++++ b/lib/common/ipc_controld.c +@@ -177,18 +177,16 @@ set_nodes_data(pcmk_controld_api_reply_t *data, xmlNode *msg_data) + static bool + reply_expected(pcmk_ipc_api_t *api, xmlNode *request) + { +- const char *command = crm_element_value(request, F_CRM_TASK); +- +- if (command == NULL) { +- return false; +- } +- +- // We only need to handle commands that functions in this file can send +- return !strcmp(command, CRM_OP_REPROBE) +- || !strcmp(command, CRM_OP_NODE_INFO) +- || !strcmp(command, CRM_OP_PING) +- || !strcmp(command, CRM_OP_LRM_FAIL) +- || !strcmp(command, CRM_OP_LRM_DELETE); ++ // We only need to handle commands that API functions can send ++ return pcmk__str_any_of(crm_element_value(request, F_CRM_TASK), ++ PCMK__CONTROLD_CMD_NODES, ++ CRM_OP_LRM_DELETE, ++ CRM_OP_LRM_FAIL, ++ CRM_OP_NODE_INFO, ++ CRM_OP_PING, ++ CRM_OP_REPROBE, ++ CRM_OP_RM_NODE_CACHE, ++ NULL); + } + + static bool +@@ -202,22 +200,12 @@ dispatch(pcmk_ipc_api_t *api, xmlNode *reply) + pcmk_controld_reply_unknown, NULL, NULL, + }; + +- /* If we got an ACK, return true so the caller knows to expect more responses +- * from the IPC server. We do this before decrementing replies_expected because +- * ACKs are not going to be included in that value. +- * +- * Note that we cannot do the same kind of status checking here that we do in +- * ipc_pacemakerd.c. The ACK message we receive does not necessarily contain +- * a status attribute. That is, we may receive this: +- * +- * +- * +- * Instead of this: +- * +- * +- */ + if (pcmk__str_eq(crm_element_name(reply), "ack", pcmk__str_none)) { +- return true; // More replies needed ++ /* ACKs are trivial responses that do not count toward expected replies, ++ * and do not have all the fields that validation requires, so skip that ++ * processing. ++ */ ++ return private->replies_expected > 0; + } + + if (private->replies_expected > 0) { +@@ -344,18 +332,15 @@ static int + send_controller_request(pcmk_ipc_api_t *api, xmlNode *request, + bool reply_is_expected) + { +- int rc; +- + if (crm_element_value(request, XML_ATTR_REFERENCE) == NULL) { + return EINVAL; + } +- rc = pcmk__send_ipc_request(api, request); +- if ((rc == pcmk_rc_ok) && reply_is_expected) { ++ if (reply_is_expected) { + struct controld_api_private_s *private = api->api_data; + + private->replies_expected++; + } +- return rc; ++ return pcmk__send_ipc_request(api, request); + } + + static xmlNode * +-- +2.41.0 + diff --git a/SOURCES/006-watchdog-fencing-topology.patch b/SOURCES/006-watchdog-fencing-topology.patch deleted file mode 100644 index 7651584..0000000 --- a/SOURCES/006-watchdog-fencing-topology.patch +++ /dev/null @@ -1,142 +0,0 @@ -From 17cc49e1564b0ae55cc8212d14c5c055f88040da Mon Sep 17 00:00:00 2001 -From: Klaus Wenninger -Date: Tue, 14 Feb 2023 15:35:37 +0100 -Subject: [PATCH] Fix: watchdog-fencing: terminate dangling timer before - watchdog-waiting - ---- - daemons/fenced/fenced_remote.c | 6 +++++- - 1 file changed, 5 insertions(+), 1 deletion(-) - -diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c -index 5c3fe25e3..aab185adb 100644 ---- a/daemons/fenced/fenced_remote.c -+++ b/daemons/fenced/fenced_remote.c -@@ -1,5 +1,5 @@ - /* -- * Copyright 2009-2022 the Pacemaker project contributors -+ * Copyright 2009-2023 the Pacemaker project contributors - * - * The version control history for this file may have further details. - * -@@ -1702,6 +1702,10 @@ check_watchdog_fencing_and_wait(remote_fencing_op_t * op) - "client %s " CRM_XS " id=%.8s", - (stonith_watchdog_timeout_ms / 1000), - op->target, op->action, op->client_name, op->id); -+ -+ if (op->op_timer_one) { -+ g_source_remove(op->op_timer_one); -+ } - op->op_timer_one = g_timeout_add(stonith_watchdog_timeout_ms, - remote_op_watchdog_done, op); - return TRUE; --- -2.39.0 - -From f2cc2a4277124230903a18713e50604a8f1842cd Mon Sep 17 00:00:00 2001 -From: Klaus Wenninger -Date: Wed, 1 Mar 2023 15:00:15 +0100 -Subject: [PATCH] Refactor: watchdog-fencing: convenience function - pcmk__is_fencing_action - -for consistency and add comment making clear why this block exits -with new timer set in any case ---- - daemons/fenced/fenced_remote.c | 5 ++++- - 1 file changed, 4 insertions(+), 1 deletion(-) - -diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c -index aab185adb..e0f8de057 100644 ---- a/daemons/fenced/fenced_remote.c -+++ b/daemons/fenced/fenced_remote.c -@@ -1834,7 +1834,7 @@ request_peer_fencing(remote_fencing_op_t *op, peer_device_info_t *peer) - if (!((stonith_watchdog_timeout_ms > 0) - && (pcmk__str_eq(device, STONITH_WATCHDOG_ID, pcmk__str_none) - || (pcmk__str_eq(peer->host, op->target, pcmk__str_casei) -- && !pcmk__str_eq(op->action, "on", pcmk__str_none))) -+ && pcmk__is_fencing_action(op->action))) - && check_watchdog_fencing_and_wait(op))) { - - /* Some thoughts about self-fencing cases reaching this point: -@@ -1854,6 +1854,9 @@ request_peer_fencing(remote_fencing_op_t *op, peer_device_info_t *peer) - Otherwise the selection of stonith-watchdog-timeout at - least is questionable. - */ -+ -+ /* coming here we're not waiting for watchdog timeout - -+ thus engage timer with timout evaluated before */ - op->op_timer_one = g_timeout_add((1000 * timeout_one), remote_op_timeout_one, op); - } - --- -2.39.0 - -From c4eb45a986f8865fc5e69350fd5b9f4b056d9d69 Mon Sep 17 00:00:00 2001 -From: Klaus Wenninger -Date: Tue, 14 Feb 2023 11:57:17 +0100 -Subject: [PATCH] Fix: watchdog-fencing: correctly derive timeout with topology - -up to now the timeout for watchdog-fencing was just added to -the overall timeout if the node to be fenced was visible and -reported back to the query. ---- - daemons/fenced/fenced_remote.c | 28 +++++++++++++++++++++++++--- - 1 file changed, 25 insertions(+), 3 deletions(-) - -diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c -index e0f8de057..3b7ab05e9 100644 ---- a/daemons/fenced/fenced_remote.c -+++ b/daemons/fenced/fenced_remote.c -@@ -969,8 +969,9 @@ advance_topology_level(remote_fencing_op_t *op, bool empty_ok) - return pcmk_rc_ok; - } - -- crm_info("All fencing options targeting %s for client %s@%s failed " -+ crm_info("All %sfencing options targeting %s for client %s@%s failed " - CRM_XS " id=%.8s", -+ (stonith_watchdog_timeout_ms > 0)?"non-watchdog ":"", - op->target, op->client_name, op->originator, op->id); - return ENODEV; - } -@@ -1434,8 +1435,17 @@ stonith_choose_peer(remote_fencing_op_t * op) - && pcmk_is_set(op->call_options, st_opt_topology) - && (advance_topology_level(op, false) == pcmk_rc_ok)); - -- crm_notice("Couldn't find anyone to fence (%s) %s using %s", -- op->action, op->target, (device? device : "any device")); -+ if ((stonith_watchdog_timeout_ms > 0) -+ && pcmk__is_fencing_action(op->action) -+ && pcmk__str_eq(device, STONITH_WATCHDOG_ID, pcmk__str_none) -+ && node_does_watchdog_fencing(op->target)) { -+ crm_info("Couldn't contact watchdog-fencing target-node (%s)", -+ op->target); -+ /* check_watchdog_fencing_and_wait will log additional info */ -+ } else { -+ crm_notice("Couldn't find anyone to fence (%s) %s using %s", -+ op->action, op->target, (device? device : "any device")); -+ } - return NULL; - } - -@@ -1531,6 +1541,18 @@ get_op_total_timeout(const remote_fencing_op_t *op, - continue; - } - for (device_list = tp->levels[i]; device_list; device_list = device_list->next) { -+ /* in case of watchdog-device we add the timeout to the budget -+ regardless of if we got a reply or not -+ */ -+ if ((stonith_watchdog_timeout_ms > 0) -+ && pcmk__is_fencing_action(op->action) -+ && pcmk__str_eq(device_list->data, STONITH_WATCHDOG_ID, -+ pcmk__str_none) -+ && node_does_watchdog_fencing(op->target)) { -+ total_timeout += stonith_watchdog_timeout_ms / 1000; -+ continue; -+ } -+ - for (iter = op->query_results; iter != NULL; iter = iter->next) { - const peer_device_info_t *peer = iter->data; - --- -2.39.0 - diff --git a/SOURCES/007-glib-assertions.patch b/SOURCES/007-glib-assertions.patch new file mode 100644 index 0000000..5679ee6 --- /dev/null +++ b/SOURCES/007-glib-assertions.patch @@ -0,0 +1,163 @@ +From 63f4bd4d5a324e6eb279340a42c7c36c8902ada7 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Wed, 2 Aug 2023 15:55:26 -0500 +Subject: [PATCH 1/4] Fix: controller: don't try to execute agent action at + shutdown + +Normally, agent execution is not possible at shutdown. However, when metadata +is needed for some action, the agent can be called asynchronously, and when the +metadata action returns, the original action is performed. If the metadata is +initiated before shutdown, but completes after shutdown has begun, do not try +to attempt the original action, so we avoid unnecessary error logs. +--- + daemons/controld/controld_execd.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c +index 530e4346c8..a90e8d833e 100644 +--- a/daemons/controld/controld_execd.c ++++ b/daemons/controld/controld_execd.c +@@ -1400,7 +1400,9 @@ metadata_complete(int pid, const pcmk__action_result_t *result, void *user_data) + md = controld_cache_metadata(lrm_state->metadata_cache, data->rsc, + result->action_stdout); + } +- do_lrm_rsc_op(lrm_state, data->rsc, data->input_xml, md); ++ if (!pcmk_is_set(controld_globals.fsa_input_register, R_HA_DISCONNECTED)) { ++ do_lrm_rsc_op(lrm_state, data->rsc, data->input_xml, md); ++ } + free_metadata_cb_data(data); + } + + +From 247d9534f36f690c1474e36cedaadb3934022a05 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Wed, 2 Aug 2023 16:16:31 -0500 +Subject: [PATCH 2/4] Refactor: controller: de-functionize lrm_state_destroy() + +It was a one-liner called once +--- + daemons/controld/controld_execd_state.c | 8 +------- + daemons/controld/controld_lrm.h | 5 ----- + 2 files changed, 1 insertion(+), 12 deletions(-) + +diff --git a/daemons/controld/controld_execd_state.c b/daemons/controld/controld_execd_state.c +index 8c68bfca08..4a87a9b332 100644 +--- a/daemons/controld/controld_execd_state.c ++++ b/daemons/controld/controld_execd_state.c +@@ -132,12 +132,6 @@ lrm_state_create(const char *node_name) + return state; + } + +-void +-lrm_state_destroy(const char *node_name) +-{ +- g_hash_table_remove(lrm_state_table, node_name); +-} +- + static gboolean + remote_proxy_remove_by_node(gpointer key, gpointer value, gpointer user_data) + { +@@ -799,7 +793,7 @@ lrm_state_unregister_rsc(lrm_state_t * lrm_state, + } + + if (is_remote_lrmd_ra(NULL, NULL, rsc_id)) { +- lrm_state_destroy(rsc_id); ++ g_hash_table_remove(lrm_state_table, rsc_id); + return pcmk_ok; + } + +diff --git a/daemons/controld/controld_lrm.h b/daemons/controld/controld_lrm.h +index 25f3db3316..c3113e49c3 100644 +--- a/daemons/controld/controld_lrm.h ++++ b/daemons/controld/controld_lrm.h +@@ -108,11 +108,6 @@ gboolean lrm_state_init_local(void); + */ + void lrm_state_destroy_all(void); + +-/*! +- * \brief Destroy executor connection by node name +- */ +-void lrm_state_destroy(const char *node_name); +- + /*! + * \brief Find lrm_state data by node name + */ + +From 1b915f1ce38756431f7faa142565e3e07aade194 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Wed, 2 Aug 2023 15:58:09 -0500 +Subject: [PATCH 3/4] Low: controller: guard lrm_state_table usage with NULL + check + +It is NULL while draining the mainloop during the shutdown sequence. +--- + daemons/controld/controld_execd_state.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +diff --git a/daemons/controld/controld_execd_state.c b/daemons/controld/controld_execd_state.c +index 4a87a9b332..b90cc5e635 100644 +--- a/daemons/controld/controld_execd_state.c ++++ b/daemons/controld/controld_execd_state.c +@@ -301,7 +301,7 @@ lrm_state_destroy_all(void) + lrm_state_t * + lrm_state_find(const char *node_name) + { +- if (!node_name) { ++ if ((node_name == NULL) || (lrm_state_table == NULL)) { + return NULL; + } + return g_hash_table_lookup(lrm_state_table, node_name); +@@ -312,6 +312,8 @@ lrm_state_find_or_create(const char *node_name) + { + lrm_state_t *lrm_state; + ++ CRM_CHECK(lrm_state_table != NULL, return NULL); ++ + lrm_state = g_hash_table_lookup(lrm_state_table, node_name); + if (!lrm_state) { + lrm_state = lrm_state_create(node_name); +@@ -323,6 +325,9 @@ lrm_state_find_or_create(const char *node_name) + GList * + lrm_state_get_list(void) + { ++ if (lrm_state_table == NULL) { ++ return NULL; ++ } + return g_hash_table_get_values(lrm_state_table); + } + + +From 78581213ed3bf4183b0ec1f391b720d5d91f3f68 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Wed, 2 Aug 2023 15:48:36 -0500 +Subject: [PATCH 4/4] Log: controller: improve messages for resource history + updates + +--- + daemons/controld/controld_cib.c | 11 +++++++++-- + 1 file changed, 9 insertions(+), 2 deletions(-) + +diff --git a/daemons/controld/controld_cib.c b/daemons/controld/controld_cib.c +index 22ac42486f..c9dde0b748 100644 +--- a/daemons/controld/controld_cib.c ++++ b/daemons/controld/controld_cib.c +@@ -861,10 +861,17 @@ cib_rsc_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *use + case pcmk_ok: + case -pcmk_err_diff_failed: + case -pcmk_err_diff_resync: +- crm_trace("Resource update %d complete: rc=%d", call_id, rc); ++ crm_trace("Resource history update completed (call=%d rc=%d)", ++ call_id, rc); + break; + default: +- crm_warn("Resource update %d failed: (rc=%d) %s", call_id, rc, pcmk_strerror(rc)); ++ if (call_id > 0) { ++ crm_warn("Resource history update %d failed: %s " ++ CRM_XS " rc=%d", call_id, pcmk_strerror(rc), rc); ++ } else { ++ crm_warn("Resource history update failed: %s " CRM_XS " rc=%d", ++ pcmk_strerror(rc), rc); ++ } + } + + if (call_id == pending_rsc_update) { diff --git a/SOURCES/008-attrd-shutdown.patch b/SOURCES/008-attrd-shutdown.patch new file mode 100644 index 0000000..1d02526 --- /dev/null +++ b/SOURCES/008-attrd-shutdown.patch @@ -0,0 +1,45 @@ +From f5263c9401c9c38d4e039149deddcc0da0c184ba Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Thu, 3 Aug 2023 12:17:08 -0500 +Subject: [PATCH] Fix: attrd: avoid race condition when shutting down + +This addresses a race condition that can occur when the DC and the attribute +writer are different nodes, and shutting down at the same time. When the DC +controller leaves its Corosync process group, the remaining nodes erase its +transient node attributes (including "shutdown") from the CIB. However if the +(former) DC's attrd is still up, it can win the attribute writer election +called after the original writer leaves. As the election winner, it writes out +all its attributes to the CIB, including "shutdown". The next time it rejoins +the cluster, it will be immediately shut down. + +Fixes T138 +--- + daemons/attrd/attrd_elections.c | 10 +++++++++- + 1 file changed, 9 insertions(+), 1 deletion(-) + +diff --git a/daemons/attrd/attrd_elections.c b/daemons/attrd/attrd_elections.c +index 3b6b55a0f59..6f4916888a9 100644 +--- a/daemons/attrd/attrd_elections.c ++++ b/daemons/attrd/attrd_elections.c +@@ -22,12 +22,20 @@ attrd_election_cb(gpointer user_data) + { + attrd_declare_winner(); + ++ if (attrd_requesting_shutdown() || attrd_shutting_down()) { ++ /* This node is shutting down or about to, meaning its attributes will ++ * be removed (and may have already been removed from the CIB by a ++ * controller). Don't sync or write its attributes in this case. ++ */ ++ return G_SOURCE_REMOVE; ++ } ++ + /* Update the peers after an election */ + attrd_peer_sync(NULL, NULL); + + /* Update the CIB after an election */ + attrd_write_attributes(true, false); +- return FALSE; ++ return G_SOURCE_REMOVE; + } + + void diff --git a/SOURCES/009-attrd-shutdown-2.patch b/SOURCES/009-attrd-shutdown-2.patch new file mode 100644 index 0000000..ba79a62 --- /dev/null +++ b/SOURCES/009-attrd-shutdown-2.patch @@ -0,0 +1,210 @@ +From 83e547cc64f2586031a007ab58e91fc22cd1a68a Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Thu, 24 Aug 2023 12:18:23 -0500 +Subject: [PATCH] Refactor: attrd: use enum instead of bools for + attrd_write_attributes() + +--- + daemons/attrd/attrd_cib.c | 24 ++++++++++++++++++------ + daemons/attrd/attrd_corosync.c | 2 +- + daemons/attrd/attrd_elections.c | 2 +- + daemons/attrd/attrd_ipc.c | 2 +- + daemons/attrd/attrd_utils.c | 2 +- + daemons/attrd/pacemaker-attrd.h | 8 +++++++- + 6 files changed, 29 insertions(+), 11 deletions(-) + +diff --git a/daemons/attrd/attrd_cib.c b/daemons/attrd/attrd_cib.c +index 928c0133745..9c787fe1024 100644 +--- a/daemons/attrd/attrd_cib.c ++++ b/daemons/attrd/attrd_cib.c +@@ -343,16 +343,23 @@ attrd_write_attribute(attribute_t *a, bool ignore_delay) + free_xml(xml_top); + } + ++/*! ++ * \internal ++ * \brief Write out attributes ++ * ++ * \param[in] options Group of enum attrd_write_options ++ */ + void +-attrd_write_attributes(bool all, bool ignore_delay) ++attrd_write_attributes(uint32_t options) + { + GHashTableIter iter; + attribute_t *a = NULL; + +- crm_debug("Writing out %s attributes", all? "all" : "changed"); ++ crm_debug("Writing out %s attributes", ++ pcmk_is_set(options, attrd_write_all)? "all" : "changed"); + g_hash_table_iter_init(&iter, attributes); + while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & a)) { +- if (!all && a->unknown_peer_uuids) { ++ if (!pcmk_is_set(options, attrd_write_all) && a->unknown_peer_uuids) { + // Try writing this attribute again, in case peer ID was learned + a->changed = true; + } else if (a->force_write) { +@@ -360,9 +367,14 @@ attrd_write_attributes(bool all, bool ignore_delay) + a->changed = true; + } + +- if(all || a->changed) { +- /* When forced write flag is set, ignore delay. */ +- attrd_write_attribute(a, (a->force_write ? true : ignore_delay)); ++ if (pcmk_is_set(options, attrd_write_all) || a->changed) { ++ bool ignore_delay = pcmk_is_set(options, attrd_write_no_delay); ++ ++ if (a->force_write) { ++ // Always ignore delay when forced write flag is set ++ ignore_delay = true; ++ } ++ attrd_write_attribute(a, ignore_delay); + } else { + crm_trace("Skipping unchanged attribute %s", a->id); + } +diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c +index 1aec35a054e..49631df6e44 100644 +--- a/daemons/attrd/attrd_corosync.c ++++ b/daemons/attrd/attrd_corosync.c +@@ -285,7 +285,7 @@ record_peer_nodeid(attribute_value_t *v, const char *host) + + crm_trace("Learned %s has node id %s", known_peer->uname, known_peer->uuid); + if (attrd_election_won()) { +- attrd_write_attributes(false, false); ++ attrd_write_attributes(attrd_write_changed); + } + } + +diff --git a/daemons/attrd/attrd_elections.c b/daemons/attrd/attrd_elections.c +index c25a41a4492..01341db18e4 100644 +--- a/daemons/attrd/attrd_elections.c ++++ b/daemons/attrd/attrd_elections.c +@@ -34,7 +34,7 @@ attrd_election_cb(gpointer user_data) + attrd_peer_sync(NULL, NULL); + + /* Update the CIB after an election */ +- attrd_write_attributes(true, false); ++ attrd_write_attributes(attrd_write_all); + return G_SOURCE_REMOVE; + } + +diff --git a/daemons/attrd/attrd_ipc.c b/daemons/attrd/attrd_ipc.c +index 4be789de7f9..05c4a696a19 100644 +--- a/daemons/attrd/attrd_ipc.c ++++ b/daemons/attrd/attrd_ipc.c +@@ -232,7 +232,7 @@ attrd_client_refresh(pcmk__request_t *request) + crm_info("Updating all attributes"); + + attrd_send_ack(request->ipc_client, request->ipc_id, request->ipc_flags); +- attrd_write_attributes(true, true); ++ attrd_write_attributes(attrd_write_all|attrd_write_no_delay); + + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); + return NULL; +diff --git a/daemons/attrd/attrd_utils.c b/daemons/attrd/attrd_utils.c +index c43eac1695a..bfd51368890 100644 +--- a/daemons/attrd/attrd_utils.c ++++ b/daemons/attrd/attrd_utils.c +@@ -156,7 +156,7 @@ attrd_cib_replaced_cb(const char *event, xmlNode * msg) + if (attrd_election_won()) { + if (change_section & (cib_change_section_nodes | cib_change_section_status)) { + crm_notice("Updating all attributes after %s event", event); +- attrd_write_attributes(true, false); ++ attrd_write_attributes(attrd_write_all); + } + } + +diff --git a/daemons/attrd/pacemaker-attrd.h b/daemons/attrd/pacemaker-attrd.h +index 41f31d97b3b..2d781d11394 100644 +--- a/daemons/attrd/pacemaker-attrd.h ++++ b/daemons/attrd/pacemaker-attrd.h +@@ -176,8 +176,14 @@ void attrd_free_attribute(gpointer data); + void attrd_free_attribute_value(gpointer data); + attribute_t *attrd_populate_attribute(xmlNode *xml, const char *attr); + ++enum attrd_write_options { ++ attrd_write_changed = 0, ++ attrd_write_all = (1 << 0), ++ attrd_write_no_delay = (1 << 1), ++}; ++ + void attrd_write_attribute(attribute_t *a, bool ignore_delay); +-void attrd_write_attributes(bool all, bool ignore_delay); ++void attrd_write_attributes(uint32_t options); + void attrd_write_or_elect_attribute(attribute_t *a); + + extern int minimum_protocol_version; +From 58400e272cfc51f02eec69cdd0ed0d27a30e78a3 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Thu, 24 Aug 2023 12:27:53 -0500 +Subject: [PATCH] Fix: attrd: avoid race condition at writer election + +f5263c94 was not a complete fix. The issue may also occur if a remaining node +(not the original DC or writer) wins the attribute writer election after the +original DC's controller has exited but before its attribute manger has exited. + +The long-term solution will be to have the attribute manager (instead of the +controller) be in control of erasing transient attributes from the CIB when a +node leaves. This short-term workaround simply has new attribute writers skip +shutdown attributes when writing out all attributes. + +Fixes T138 +--- + daemons/attrd/attrd_cib.c | 5 +++++ + daemons/attrd/attrd_elections.c | 14 ++++++++++++-- + daemons/attrd/pacemaker-attrd.h | 1 + + 3 files changed, 18 insertions(+), 2 deletions(-) + +diff --git a/daemons/attrd/attrd_cib.c b/daemons/attrd/attrd_cib.c +index 9c787fe102..2c910b4c64 100644 +--- a/daemons/attrd/attrd_cib.c ++++ b/daemons/attrd/attrd_cib.c +@@ -359,6 +359,11 @@ attrd_write_attributes(uint32_t options) + pcmk_is_set(options, attrd_write_all)? "all" : "changed"); + g_hash_table_iter_init(&iter, attributes); + while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & a)) { ++ if (pcmk_is_set(options, attrd_write_skip_shutdown) ++ && pcmk__str_eq(a->id, XML_CIB_ATTR_SHUTDOWN, pcmk__str_none)) { ++ continue; ++ } ++ + if (!pcmk_is_set(options, attrd_write_all) && a->unknown_peer_uuids) { + // Try writing this attribute again, in case peer ID was learned + a->changed = true; +diff --git a/daemons/attrd/attrd_elections.c b/daemons/attrd/attrd_elections.c +index 01341db18e..a95cd44cbd 100644 +--- a/daemons/attrd/attrd_elections.c ++++ b/daemons/attrd/attrd_elections.c +@@ -33,8 +33,18 @@ attrd_election_cb(gpointer user_data) + /* Update the peers after an election */ + attrd_peer_sync(NULL, NULL); + +- /* Update the CIB after an election */ +- attrd_write_attributes(attrd_write_all); ++ /* After winning an election, update the CIB with the values of all ++ * attributes as the winner knows them. ++ * ++ * However, do not write out any "shutdown" attributes. A node that is ++ * shutting down will have all its transient attributes removed from the CIB ++ * when its controller exits, and from the attribute manager's memory (on ++ * remaining nodes) when its attribute manager exits; if an election is won ++ * between when those two things happen, we don't want to write the shutdown ++ * attribute back out, which would cause the node to immediately shut down ++ * the next time it rejoins. ++ */ ++ attrd_write_attributes(attrd_write_all|attrd_write_skip_shutdown); + return G_SOURCE_REMOVE; + } + +diff --git a/daemons/attrd/pacemaker-attrd.h b/daemons/attrd/pacemaker-attrd.h +index 2d781d1139..2e35bd7ec5 100644 +--- a/daemons/attrd/pacemaker-attrd.h ++++ b/daemons/attrd/pacemaker-attrd.h +@@ -180,6 +180,7 @@ enum attrd_write_options { + attrd_write_changed = 0, + attrd_write_all = (1 << 0), + attrd_write_no_delay = (1 << 1), ++ attrd_write_skip_shutdown = (1 << 2), + }; + + void attrd_write_attribute(attribute_t *a, bool ignore_delay); diff --git a/SOURCES/010-revert-58400e27.patch b/SOURCES/010-revert-58400e27.patch new file mode 100644 index 0000000..d08ff17 --- /dev/null +++ b/SOURCES/010-revert-58400e27.patch @@ -0,0 +1,62 @@ +From 2e81e0db9a716c486805e0760f78be65ca79eeae Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 17 Oct 2023 15:28:27 -0500 +Subject: [PATCH] Fix: attrd: avoid regression by reverting 58400e27 + +Fixes T714 +--- + daemons/attrd/attrd_cib.c | 5 ----- + daemons/attrd/attrd_elections.c | 10 +--------- + daemons/attrd/pacemaker-attrd.h | 1 - + 3 files changed, 1 insertion(+), 15 deletions(-) + +diff --git a/daemons/attrd/attrd_cib.c b/daemons/attrd/attrd_cib.c +index 2de37a7cb6..9ce2872715 100644 +--- a/daemons/attrd/attrd_cib.c ++++ b/daemons/attrd/attrd_cib.c +@@ -641,11 +641,6 @@ attrd_write_attributes(uint32_t options) + pcmk_is_set(options, attrd_write_all)? "all" : "changed"); + g_hash_table_iter_init(&iter, attributes); + while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & a)) { +- if (pcmk_is_set(options, attrd_write_skip_shutdown) +- && pcmk__str_eq(a->id, XML_CIB_ATTR_SHUTDOWN, pcmk__str_none)) { +- continue; +- } +- + if (!pcmk_is_set(options, attrd_write_all) && a->unknown_peer_uuids) { + // Try writing this attribute again, in case peer ID was learned + a->changed = true; +diff --git a/daemons/attrd/attrd_elections.c b/daemons/attrd/attrd_elections.c +index a95cd44cbd..62310ed1d8 100644 +--- a/daemons/attrd/attrd_elections.c ++++ b/daemons/attrd/attrd_elections.c +@@ -35,16 +35,8 @@ attrd_election_cb(gpointer user_data) + + /* After winning an election, update the CIB with the values of all + * attributes as the winner knows them. +- * +- * However, do not write out any "shutdown" attributes. A node that is +- * shutting down will have all its transient attributes removed from the CIB +- * when its controller exits, and from the attribute manager's memory (on +- * remaining nodes) when its attribute manager exits; if an election is won +- * between when those two things happen, we don't want to write the shutdown +- * attribute back out, which would cause the node to immediately shut down +- * the next time it rejoins. + */ +- attrd_write_attributes(attrd_write_all|attrd_write_skip_shutdown); ++ attrd_write_attributes(attrd_write_all); + return G_SOURCE_REMOVE; + } + +diff --git a/daemons/attrd/pacemaker-attrd.h b/daemons/attrd/pacemaker-attrd.h +index e3c369b5bc..a95bb54367 100644 +--- a/daemons/attrd/pacemaker-attrd.h ++++ b/daemons/attrd/pacemaker-attrd.h +@@ -181,7 +181,6 @@ enum attrd_write_options { + attrd_write_changed = 0, + attrd_write_all = (1 << 0), + attrd_write_no_delay = (1 << 1), +- attrd_write_skip_shutdown = (1 << 2), + }; + + void attrd_write_attribute(attribute_t *a, bool ignore_delay); diff --git a/SOURCES/011-revert-f5263c94.patch b/SOURCES/011-revert-f5263c94.patch new file mode 100644 index 0000000..c0f1c03 --- /dev/null +++ b/SOURCES/011-revert-f5263c94.patch @@ -0,0 +1,34 @@ +From 14b87a38786ae5b4dc12fc1581e5d39a274fced2 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Mon, 30 Oct 2023 12:21:24 -0500 +Subject: [PATCH] Fix: attrd: revert faulty T138 fix + +f5263c9401 created a timing issue where a node could get a shutdown attribute, +the original writer leaves the cluster before writing it out, then the +shutting-down node wins the writer election. In that case, it would skip the +write-out and the scheduler would never shut it down. + +Reopens T138 +--- + daemons/attrd/attrd_elections.c | 8 -------- + 1 file changed, 8 deletions(-) + +diff --git a/daemons/attrd/attrd_elections.c b/daemons/attrd/attrd_elections.c +index 62310ed1d8..82fbe8affc 100644 +--- a/daemons/attrd/attrd_elections.c ++++ b/daemons/attrd/attrd_elections.c +@@ -22,14 +22,6 @@ + { + attrd_declare_winner(); + +- if (attrd_requesting_shutdown() || attrd_shutting_down()) { +- /* This node is shutting down or about to, meaning its attributes will +- * be removed (and may have already been removed from the CIB by a +- * controller). Don't sync or write its attributes in this case. +- */ +- return G_SOURCE_REMOVE; +- } +- + /* Update the peers after an election */ + attrd_peer_sync(NULL, NULL); + diff --git a/SPECS/pacemaker.spec b/SPECS/pacemaker.spec index 7da7a5d..51c0706 100644 --- a/SPECS/pacemaker.spec +++ b/SPECS/pacemaker.spec @@ -35,11 +35,11 @@ ## Upstream pacemaker version, and its package version (specversion ## can be incremented to build packages reliably considered "newer" ## than previously built packages with the same pcmkversion) -%global pcmkversion 2.1.5 -%global specversion 9 +%global pcmkversion 2.1.6 +%global specversion 10 ## Upstream commit (full commit ID, abbreviated commit ID, or tag) to build -%global commit a3f44794f94e1571c6ba0042915ade369b4ce4b1 +%global commit 6fdc9deea294bbad629b003c6ae036aaed8e3ee0 ## Since git v2.11, the extent of abbreviation is autoscaled by default ## (used to be constant of 7), so we need to convey it for non-tags, too. @@ -232,8 +232,8 @@ Name: pacemaker Summary: Scalable High-Availability cluster resource manager Version: %{pcmkversion} -Release: %{pcmk_release}%{?dist} -License: GPLv2+ and LGPLv2+ +Release: %{pcmk_release}.1%{?dist} +License: GPL-2.0-or-later AND LGPL-2.1-or-later Url: https://www.clusterlabs.org/ # Example: https://codeload.github.com/ClusterLabs/pacemaker/tar.gz/e91769e @@ -248,17 +248,25 @@ Source0: https://codeload.github.com/%{github_owner}/%{name}/tar.gz/%{arch Source1: https://codeload.github.com/%{github_owner}/%{nagios_name}/tar.gz/%{nagios_archive_github_url} # upstream commits -Patch001: 001-sync-points.patch -Patch002: 002-remote-regression.patch -Patch003: 003-history-cleanup.patch -Patch004: 004-g_source_remove.patch -Patch005: 005-query-null.patch -Patch006: 006-watchdog-fencing-topology.patch +Patch001: 001-remote-start-state.patch +Patch002: 002-group-colocation-constraint.patch +Patch003: 003-clone-shuffle.patch +Patch004: 004-clone-rsc-display.patch +Patch005: 005-attrd-dampen.patch +Patch006: 006-controller-reply.patch +Patch007: 007-glib-assertions.patch +Patch008: 008-attrd-shutdown.patch +Patch009: 009-attrd-shutdown-2.patch +Patch010: 010-revert-58400e27.patch +Patch011: 011-revert-f5263c94.patch Requires: resource-agents Requires: %{pkgname_pcmk_libs}%{?_isa} = %{version}-%{release} Requires: %{name}-cluster-libs%{?_isa} = %{version}-%{release} Requires: %{name}-cli = %{version}-%{release} +%if %{with stonithd} +Requires: %{python_name}-%{name} = %{version}-%{release} +%endif %{?systemd_requires} %if %{defined centos} @@ -271,6 +279,7 @@ ExclusiveArch: aarch64 i686 ppc64le s390x x86_64 Requires: %{python_path} BuildRequires: %{python_name}-devel +BuildRequires: %{python_name}-setuptools # Pacemaker requires a minimum libqb functionality # RHEL requires a higher version than upstream, for qb_ipcc_connect_async() @@ -360,7 +369,7 @@ Available rpmbuild rebuild options: stonithd %package cli -License: GPLv2+ and LGPLv2+ +License: GPL-2.0-or-later AND LGPL-2.1-or-later Summary: Command line tools for controlling Pacemaker clusters Requires: %{pkgname_pcmk_libs}%{?_isa} = %{version}-%{release} %if 0%{?supports_recommends} @@ -383,7 +392,7 @@ to query and control the cluster from machines that may, or may not, be part of the cluster. %package -n %{pkgname_pcmk_libs} -License: GPLv2+ and LGPLv2+ +License: GPL-2.0-or-later AND LGPL-2.1-or-later Summary: Core Pacemaker libraries Requires(pre): %{pkgname_shadow_utils} Requires: %{name}-schemas = %{version}-%{release} @@ -400,7 +409,7 @@ The %{pkgname_pcmk_libs} package contains shared libraries needed for cluster nodes and those just running the CLI tools. %package cluster-libs -License: GPLv2+ and LGPLv2+ +License: GPL-2.0-or-later AND LGPL-2.1-or-later Summary: Cluster Libraries used by Pacemaker Requires: %{pkgname_pcmk_libs}%{?_isa} = %{version}-%{release} @@ -411,8 +420,22 @@ manager. The %{name}-cluster-libs package contains cluster-aware shared libraries needed for nodes that will form part of the cluster nodes. +%package -n %{python_name}-%{name} +License: LGPL-2.1-or-later +Summary: Python libraries for Pacemaker +Requires: %{python_path} +Requires: %{pkgname_pcmk_libs} = %{version}-%{release} +BuildArch: noarch + +%description -n %{python_name}-%{name} +Pacemaker is an advanced, scalable High-Availability cluster resource +manager. + +The %{python_name}-%{name} package contains a Python library that can be used +to interface with Pacemaker. + %package remote -License: GPLv2+ and LGPLv2+ +License: GPL-2.0-or-later AND LGPL-2.1-or-later Summary: Pacemaker remote executor daemon for non-cluster nodes Requires: %{pkgname_pcmk_libs}%{?_isa} = %{version}-%{release} Requires: %{name}-cli = %{version}-%{release} @@ -431,7 +454,7 @@ which is capable of extending pacemaker functionality to remote nodes not running the full corosync/cluster stack. %package -n %{pkgname_pcmk_libs}-devel -License: GPLv2+ and LGPLv2+ +License: GPL-2.0-or-later AND LGPL-2.1-or-later Summary: Pacemaker development package Requires: %{pkgname_pcmk_libs}%{?_isa} = %{version}-%{release} Requires: %{name}-cluster-libs%{?_isa} = %{version}-%{release} @@ -454,11 +477,12 @@ The %{pkgname_pcmk_libs}-devel package contains headers and shared libraries for developing tools for Pacemaker. %package cts -License: GPLv2+ and LGPLv2+ +License: GPL-2.0-or-later AND LGPL-2.1-or-later Summary: Test framework for cluster-related technologies like Pacemaker Requires: %{python_path} Requires: %{pkgname_pcmk_libs} = %{version}-%{release} Requires: %{name}-cli = %{version}-%{release} +Requires: %{python_name}-%{name} = %{version}-%{release} Requires: %{pkgname_procps} Requires: psmisc Requires: %{python_name}-psutil @@ -486,7 +510,7 @@ Pacemaker is an advanced, scalable High-Availability cluster resource manager. %package schemas -License: GPLv2+ +License: GPL-2.0-or-later Summary: Schemas and upgrade stylesheets for Pacemaker BuildArch: noarch @@ -558,6 +582,10 @@ export LDFLAGS_HARDENED_LIB="%{?_hardening_ldflags}" make %{_smp_mflags} V=1 +pushd python +%py3_build +popd + %check make %{_smp_mflags} check { cts/cts-scheduler --run load-stopped-loop \ @@ -575,6 +603,10 @@ make install \ DESTDIR=%{buildroot} V=1 docdir=%{pcmk_docdir} \ %{?_python_bytecompile_extra:%{?py_byte_compile:am__py_compile=true}} +pushd python +%py3_install +popd + mkdir -p %{buildroot}%{_datadir}/pacemaker/nagios/plugins-metadata for file in $(find %{nagios_name}-%{nagios_hash}/metadata -type f); do install -m 644 $file %{buildroot}%{_datadir}/pacemaker/nagios/plugins-metadata @@ -759,19 +791,22 @@ exit 0 %dir %{ocf_root}/resource.d %{ocf_root}/resource.d/pacemaker -%doc %{_mandir}/man7/* +%doc %{_mandir}/man7/*pacemaker* %exclude %{_mandir}/man7/pacemaker-controld.* %exclude %{_mandir}/man7/pacemaker-schedulerd.* %exclude %{_mandir}/man7/pacemaker-fenced.* %exclude %{_mandir}/man7/ocf_pacemaker_controld.* %exclude %{_mandir}/man7/ocf_pacemaker_o2cb.* %exclude %{_mandir}/man7/ocf_pacemaker_remote.* -%doc %{_mandir}/man8/* +%doc %{_mandir}/man8/crm*.8.gz %exclude %{_mandir}/man8/crm_master.* -%exclude %{_mandir}/man8/fence_legacy.* -%exclude %{_mandir}/man8/fence_watchdog.* -%exclude %{_mandir}/man8/pacemakerd.* -%exclude %{_mandir}/man8/pacemaker-remoted.* +%doc %{_mandir}/man8/attrd_updater.* +%doc %{_mandir}/man8/cibadmin.* +%if %{with cibsecrets} + %doc %{_mandir}/man8/cibsecret.* +%endif +%doc %{_mandir}/man8/iso8601.* +%doc %{_mandir}/man8/stonith_admin.* %license licenses/GPLv2 %doc COPYING @@ -802,6 +837,14 @@ exit 0 %doc COPYING %doc ChangeLog +%files -n %{python_name}-%{name} +%{python3_sitelib}/pacemaker/ +%{python3_sitelib}/pacemaker-*.egg-info +%exclude %{python3_sitelib}/pacemaker/_cts/ +%license licenses/LGPLv2.1 +%doc COPYING +%doc ChangeLog + %files remote %config(noreplace) %{_sysconfdir}/sysconfig/pacemaker # state directory is shared between the subpackets @@ -823,6 +866,7 @@ exit 0 %files cts %{python_site}/cts +%{python3_sitelib}/pacemaker/_cts/ %{_datadir}/pacemaker/tests %{_libexecdir}/pacemaker/cts-log-watcher @@ -834,8 +878,16 @@ exit 0 %files -n %{pkgname_pcmk_libs}-devel %{_includedir}/pacemaker -%{_libdir}/*.so -%{_libdir}/pkgconfig/*.pc +%{_libdir}/libcib.so +%{_libdir}/liblrmd.so +%{_libdir}/libcrmservice.so +%{_libdir}/libcrmcommon.so +%{_libdir}/libpe_status.so +%{_libdir}/libpe_rules.so +%{_libdir}/libpacemaker.so +%{_libdir}/libstonithd.so +%{_libdir}/libcrmcluster.so +%{_libdir}/pkgconfig/*pacemaker*.pc %license licenses/LGPLv2.1 %doc COPYING %doc ChangeLog @@ -856,6 +908,55 @@ exit 0 %license %{nagios_name}-%{nagios_hash}/COPYING %changelog +* Tue Oct 31 2023 Chris Lumens - 2.1.6-10.1 +- Revert the rest of the attrd shutdown race condition fix +- Related: RHEL-14044 + +* Thu Oct 19 2023 Chris Lumens - 2.1.6-10 +- Avoid an error if the elected attrd is on a node that is shutting down +- Resolves: RHEL-14044 + +* Mon Aug 28 2023 Chris Lumens - 2.1.6-9 +- Fix an additional shutdown race between attrd and the controller +- Related: rhbz2228933 + +* Tue Aug 8 2023 Chris Lumens - 2.1.6-8 +- Fix attrd race condition when shutting down +- Resolves: rhbz2228933 + +* Thu Jul 27 2023 Chris Lumens - 2.1.6-7 +- Wait for a reply from various controller commands +- Resolves: rhbz2221084 +- Related: rhbz2189301 + +* Mon Jul 24 2023 Chris Lumens - 2.1.6-6 +- Apply dampening when creating attributes with attrd_updater -U +- Resolves: rhbz2224051 +- Related: rhbz2189301 + +* Wed Jul 19 2023 Chris Lumens - 2.1.6-5 +- Clone instances should not shuffle unnecessarily +- Fix a bug in clone resource description display +- Resolves: rhbz2222055 +- Related: rhbz2189301 + +* Fri Jun 30 2023 Chris Lumens - 2.1.6-4 +- Fix moving groups when there's a constraint for a single group member +- Resolves: rhbz2218218 +- Resolves: rhbz2189301 + +* Wed Jun 21 2023 Chris Lumens - 2.1.6-3 +- Support start state for Pacemaker Remote nodes +- Related: rhbz2182482 + +* Fri May 26 2023 Chris Lumens - 2.1.6-2 +- Rebase pacemaker on upstream 2.1.6 final release +- Related: rhbz2182482 + +* Tue May 23 2023 Chris Lumens - 2.1.6-1 +- Rebase on upstream 2.1.6-rc2 release +- Resolves: rhbz2182482 + * Wed May 17 2023 Klaus Wenninger - 2.1.5-9 - Rebuild with incremented release to allow a safe upgrade from c8s/rhel-8