import pacemaker-2.1.3-1.el8

This commit is contained in:
CentOS Sources 2022-06-01 18:11:03 +00:00 committed by Stepan Oksanichenko
parent b82d1e1842
commit 3a11e8a743
29 changed files with 41 additions and 22324 deletions

2
.gitignore vendored
View File

@ -1,2 +1,2 @@
SOURCES/nagios-agents-metadata-105ab8a.tar.gz
SOURCES/pacemaker-ada5c3b.tar.gz
SOURCES/pacemaker-dff7c3a.tar.gz

View File

@ -1,2 +1,2 @@
ea6c0a27fd0ae8ce02f84a11f08a0d79377041c3 SOURCES/nagios-agents-metadata-105ab8a.tar.gz
f9fd69263d5b21446b530f9750c262f7b492cad4 SOURCES/pacemaker-ada5c3b.tar.gz
aa7a8aecfe487f051545845476fd83d493da0326 SOURCES/pacemaker-dff7c3a.tar.gz

View File

@ -1,230 +0,0 @@
From f5ffbaf1f537d3d5b00e594211cd322f97df51ac Mon Sep 17 00:00:00 2001
From: Grace Chin <gchin@redhat.com>
Date: Fri, 5 Nov 2021 11:39:39 -0400
Subject: [PATCH 1/3] Low: xml: clone acls schema in preparation for changes
---
xml/acls-3.8.rng | 80 ++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 80 insertions(+)
create mode 100644 xml/acls-3.8.rng
diff --git a/xml/acls-3.8.rng b/xml/acls-3.8.rng
new file mode 100644
index 000000000..0fe6eed96
--- /dev/null
+++ b/xml/acls-3.8.rng
@@ -0,0 +1,80 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<grammar xmlns="http://relaxng.org/ns/structure/1.0"
+ datatypeLibrary="http://www.w3.org/2001/XMLSchema-datatypes">
+ <start>
+ <optional>
+ <ref name="element-acls"/>
+ </optional>
+ </start>
+
+ <define name="element-acls">
+ <element name="acls">
+ <zeroOrMore>
+ <choice>
+ <element name="acl_target">
+ <attribute name="id"><text/></attribute>
+ <zeroOrMore>
+ <element name="role">
+ <attribute name="id"><data type="IDREF"/></attribute>
+ </element>
+ </zeroOrMore>
+ </element>
+ <element name="acl_group">
+ <!-- Here 'id' is the name of a unix group -->
+ <attribute name="id"><data type="ID"/></attribute>
+ <zeroOrMore>
+ <element name="role">
+ <attribute name="id"><data type="IDREF"/></attribute>
+ </element>
+ </zeroOrMore>
+ </element>
+ <element name="acl_role">
+ <attribute name="id"><data type="ID"/></attribute>
+ <optional>
+ <attribute name="description"><text/></attribute>
+ </optional>
+ <zeroOrMore>
+ <ref name="element-permission"/>
+ </zeroOrMore>
+ </element>
+ </choice>
+ </zeroOrMore>
+ </element>
+ </define>
+
+ <define name="element-permission">
+ <element name="acl_permission">
+ <attribute name="id"><data type="ID"/></attribute>
+
+ <attribute name="kind">
+ <choice>
+ <value>read</value>
+ <value>write</value>
+ <value>deny</value>
+ </choice>
+ </attribute>
+
+ <choice>
+ <attribute name="xpath"><text/></attribute>
+ <!-- reference is already sufficiently specific without 'object-type' -->
+ <attribute name="reference"><data type="IDREF"/></attribute>
+ <group>
+ <!-- Use 'object-type' to avoid conflicting with the 'tag' configuration concept -->
+ <attribute name="object-type"><text/></attribute>
+ <optional>
+ <!--
+ does not make sense with anything other than object-type
+ xpath and reference are already sufficiently specific
+ -->
+ <attribute name="attribute"><text/></attribute>
+ </optional>
+ </group>
+ </choice>
+
+ <optional>
+ <attribute name="description"><text/></attribute>
+ </optional>
+ </element>
+ </define>
+
+</grammar>
--
2.27.0
From 7838213fc639236bdedf5f15320152d973f1bdad Mon Sep 17 00:00:00 2001
From: Grace Chin <gchin@redhat.com>
Date: Fri, 5 Nov 2021 11:40:48 -0400
Subject: [PATCH 2/3] Add a 'name' attribute to acl_target and acl_group
elements
---
xml/acls-3.8.rng | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/xml/acls-3.8.rng b/xml/acls-3.8.rng
index 0fe6eed96..48bcdffe3 100644
--- a/xml/acls-3.8.rng
+++ b/xml/acls-3.8.rng
@@ -13,6 +13,9 @@
<choice>
<element name="acl_target">
<attribute name="id"><text/></attribute>
+ <optional>
+ <attribute name="name"><text/></attribute>
+ </optional>
<zeroOrMore>
<element name="role">
<attribute name="id"><data type="IDREF"/></attribute>
@@ -22,6 +25,9 @@
<element name="acl_group">
<!-- Here 'id' is the name of a unix group -->
<attribute name="id"><data type="ID"/></attribute>
+ <optional>
+ <attribute name="name"><text/></attribute>
+ </optional>
<zeroOrMore>
<element name="role">
<attribute name="id"><data type="IDREF"/></attribute>
--
2.27.0
From c3c498f4636f57e29670f8e385b625024ed222d7 Mon Sep 17 00:00:00 2001
From: Grace Chin <gchin@redhat.com>
Date: Fri, 5 Nov 2021 11:42:48 -0400
Subject: [PATCH 3/3] Changes made by run of 'cts/cts-cli -s'
---
cts/cli/regression.upgrade.exp | 7 +++++--
cts/cli/regression.validity.exp | 22 ++++++++++++++++++----
2 files changed, 23 insertions(+), 6 deletions(-)
diff --git a/cts/cli/regression.upgrade.exp b/cts/cli/regression.upgrade.exp
index e38adebdd..7ce7ec13b 100644
--- a/cts/cli/regression.upgrade.exp
+++ b/cts/cli/regression.upgrade.exp
@@ -91,8 +91,11 @@ update_validation debug: Configuration valid for schema: pacemaker-3.6
update_validation debug: pacemaker-3.6-style configuration is also valid for pacemaker-3.7
update_validation debug: Testing 'pacemaker-3.7' validation (21 of X)
update_validation debug: Configuration valid for schema: pacemaker-3.7
-update_validation trace: Stopping at pacemaker-3.7
-update_validation info: Transformed the configuration from pacemaker-2.10 to pacemaker-3.7
+update_validation debug: pacemaker-3.7-style configuration is also valid for pacemaker-3.8
+update_validation debug: Testing 'pacemaker-3.8' validation (22 of X)
+update_validation debug: Configuration valid for schema: pacemaker-3.8
+update_validation trace: Stopping at pacemaker-3.8
+update_validation info: Transformed the configuration from pacemaker-2.10 to pacemaker-3.8
=#=#=#= Current cib after: Upgrade to latest CIB schema (trigger 2.10.xsl + the wrapping) =#=#=#=
<cib epoch="2" num_updates="0" admin_epoch="1">
<configuration>
diff --git a/cts/cli/regression.validity.exp b/cts/cli/regression.validity.exp
index 5ace430e7..125035a47 100644
--- a/cts/cli/regression.validity.exp
+++ b/cts/cli/regression.validity.exp
@@ -121,7 +121,11 @@ update_validation debug: Testing 'pacemaker-3.7' validation (21 of X)
element rsc_order: Relax-NG validity error : Invalid attribute first-action for element rsc_order
element rsc_order: Relax-NG validity error : Element constraints has extra content: rsc_order
update_validation trace: pacemaker-3.7 validation failed
-Cannot upgrade configuration (claiming schema pacemaker-1.2) to at least pacemaker-3.0 because it does not validate with any schema from pacemaker-1.2 to pacemaker-3.7
+update_validation debug: Testing 'pacemaker-3.8' validation (22 of X)
+element rsc_order: Relax-NG validity error : Invalid attribute first-action for element rsc_order
+element rsc_order: Relax-NG validity error : Element constraints has extra content: rsc_order
+update_validation trace: pacemaker-3.8 validation failed
+Cannot upgrade configuration (claiming schema pacemaker-1.2) to at least pacemaker-3.0 because it does not validate with any schema from pacemaker-1.2 to pacemaker-3.8
=#=#=#= End test: Run crm_simulate with invalid CIB (enum violation) - Invalid configuration (78) =#=#=#=
* Passed: crm_simulate - Run crm_simulate with invalid CIB (enum violation)
=#=#=#= Begin test: Try to make resulting CIB invalid (unrecognized validate-with) =#=#=#=
@@ -226,7 +230,10 @@ update_validation trace: pacemaker-3.6 validation failed
update_validation debug: Testing 'pacemaker-3.7' validation (21 of X)
element cib: Relax-NG validity error : Invalid attribute validate-with for element cib
update_validation trace: pacemaker-3.7 validation failed
-Cannot upgrade configuration (claiming schema pacemaker-9999.0) to at least pacemaker-3.0 because it does not validate with any schema from unknown to pacemaker-3.7
+update_validation debug: Testing 'pacemaker-3.8' validation (22 of X)
+element cib: Relax-NG validity error : Invalid attribute validate-with for element cib
+update_validation trace: pacemaker-3.8 validation failed
+Cannot upgrade configuration (claiming schema pacemaker-9999.0) to at least pacemaker-3.0 because it does not validate with any schema from unknown to pacemaker-3.8
=#=#=#= End test: Run crm_simulate with invalid CIB (unrecognized validate-with) - Invalid configuration (78) =#=#=#=
* Passed: crm_simulate - Run crm_simulate with invalid CIB (unrecognized validate-with)
=#=#=#= Begin test: Try to make resulting CIB invalid, but possibly recoverable (valid with X.Y+1) =#=#=#=
@@ -326,8 +333,11 @@ update_validation debug: Configuration valid for schema: pacemaker-3.6
update_validation debug: pacemaker-3.6-style configuration is also valid for pacemaker-3.7
update_validation debug: Testing 'pacemaker-3.7' validation (21 of X)
update_validation debug: Configuration valid for schema: pacemaker-3.7
-update_validation trace: Stopping at pacemaker-3.7
-update_validation info: Transformed the configuration from pacemaker-1.2 to pacemaker-3.7
+update_validation debug: pacemaker-3.7-style configuration is also valid for pacemaker-3.8
+update_validation debug: Testing 'pacemaker-3.8' validation (22 of X)
+update_validation debug: Configuration valid for schema: pacemaker-3.8
+update_validation trace: Stopping at pacemaker-3.8
+update_validation info: Transformed the configuration from pacemaker-1.2 to pacemaker-3.8
unpack_resources error: Resource start-up disabled since no STONITH resources have been defined
unpack_resources error: Either configure some or disable STONITH with the stonith-enabled option
unpack_resources error: NOTE: Clusters with shared data need STONITH to ensure data integrity
@@ -437,6 +447,8 @@ element rsc_order: Relax-NG validity error : Invalid attribute first-action for
element rsc_order: Relax-NG validity error : Element constraints has extra content: rsc_order
element rsc_order: Relax-NG validity error : Invalid attribute first-action for element rsc_order
element rsc_order: Relax-NG validity error : Element constraints has extra content: rsc_order
+element rsc_order: Relax-NG validity error : Invalid attribute first-action for element rsc_order
+element rsc_order: Relax-NG validity error : Element constraints has extra content: rsc_order
=#=#=#= Current cib after: Make resulting CIB invalid, and without validate-with attribute =#=#=#=
<cib epoch="41" num_updates="0" admin_epoch="0" validate-with="none">
<configuration>
@@ -502,6 +514,8 @@ validity.bad.xml:10: element rsc_order: Relax-NG validity error : Invalid attrib
validity.bad.xml:10: element rsc_order: Relax-NG validity error : Element constraints has extra content: rsc_order
validity.bad.xml:10: element rsc_order: Relax-NG validity error : Invalid attribute first-action for element rsc_order
validity.bad.xml:10: element rsc_order: Relax-NG validity error : Element constraints has extra content: rsc_order
+validity.bad.xml:10: element rsc_order: Relax-NG validity error : Invalid attribute first-action for element rsc_order
+validity.bad.xml:10: element rsc_order: Relax-NG validity error : Element constraints has extra content: rsc_order
unpack_resources error: Resource start-up disabled since no STONITH resources have been defined
unpack_resources error: Either configure some or disable STONITH with the stonith-enabled option
unpack_resources error: NOTE: Clusters with shared data need STONITH to ensure data integrity
--
2.27.0

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,73 +0,0 @@
From 09ef95a2eed48b4eb7488788a1b655d67eafe783 Mon Sep 17 00:00:00 2001
From: Chris Lumens <clumens@redhat.com>
Date: Tue, 30 Nov 2021 14:47:12 -0500
Subject: [PATCH] Low: libcrmservice: Handle systemd service templates.
These unit files (which have an @ sign at the end) expect to be
parameterized by an instance name. Not providing an instance name
causes the dbus lookup to fail, and we fall back to assume this is an
LSB service. If the user doesn't provide an instance name, just add a
fake one. It doesn't seem to matter what name is given for the lookup.
See: rhbz#2003151
---
lib/services/systemd.c | 22 ++++++++++++++++------
1 file changed, 16 insertions(+), 6 deletions(-)
diff --git a/lib/services/systemd.c b/lib/services/systemd.c
index 8e9fff484..27a3b376d 100644
--- a/lib/services/systemd.c
+++ b/lib/services/systemd.c
@@ -206,17 +206,27 @@ systemd_unit_extension(const char *name)
}
static char *
-systemd_service_name(const char *name)
+systemd_service_name(const char *name, bool add_instance_name)
{
- if (name == NULL) {
+ if (pcmk__str_empty(name)) {
return NULL;
}
if (systemd_unit_extension(name)) {
return strdup(name);
- }
- return crm_strdup_printf("%s.service", name);
+ /* Services that end with an @ sign are systemd templates. They expect an
+ * instance name to follow the service name. If no instance name was
+ * provided, just add "x" to the string as the instance name. It doesn't
+ * seem to matter for purposes of looking up whether a service exists or
+ * not.
+ */
+ } else if (add_instance_name && *(name+strlen(name)-1) == '@') {
+ return crm_strdup_printf("%sx.service", name);
+
+ } else {
+ return crm_strdup_printf("%s.service", name);
+ }
}
static void
@@ -427,7 +437,7 @@ invoke_unit_by_name(const char *arg_name, svc_action_t *op, char **path)
CRM_ASSERT(msg != NULL);
// Add the (expanded) unit name as the argument
- name = systemd_service_name(arg_name);
+ name = systemd_service_name(arg_name, op == NULL || pcmk__str_eq(op->action, "meta-data", pcmk__str_none));
CRM_LOG_ASSERT(dbus_message_append_args(msg, DBUS_TYPE_STRING, &name,
DBUS_TYPE_INVALID));
free(name);
@@ -944,7 +954,7 @@ invoke_unit_by_path(svc_action_t *op, const char *unit)
/* (ss) */
{
const char *replace_s = "replace";
- char *name = systemd_service_name(op->agent);
+ char *name = systemd_service_name(op->agent, pcmk__str_eq(op->action, "meta-data", pcmk__str_none));
CRM_LOG_ASSERT(dbus_message_append_args(msg, DBUS_TYPE_STRING, &name, DBUS_TYPE_INVALID));
CRM_LOG_ASSERT(dbus_message_append_args(msg, DBUS_TYPE_STRING, &replace_s, DBUS_TYPE_INVALID));
--
2.27.0

File diff suppressed because it is too large Load Diff

View File

@ -1,143 +0,0 @@
From b52fe799c89637e2a761a5725c2376db5c05f2d1 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Tue, 30 Nov 2021 15:51:54 -0600
Subject: [PATCH 1/2] Low: resources: remove DOCTYPE from OCF 1.1-compliant
agents
OCF 1.1 replaced the DTD schema with RNG, but DOCTYPE still refers to the DTD.
There's no DOCTYPE for RNG, and DOCTYPE is optional, so just remove it.
---
extra/resources/Dummy | 3 +--
extra/resources/HealthIOWait | 3 +--
extra/resources/Stateful | 3 +--
extra/resources/attribute | 3 +--
extra/resources/ping | 3 +--
extra/resources/remote | 3 +--
6 files changed, 6 insertions(+), 12 deletions(-)
diff --git a/extra/resources/Dummy b/extra/resources/Dummy
index a344deac0..56584e564 100755
--- a/extra/resources/Dummy
+++ b/extra/resources/Dummy
@@ -58,8 +58,7 @@
meta_data() {
cat <<END
<?xml version="1.0"?>
-<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
-<resource-agent name="Dummy" version="2.0">
+<resource-agent name="Dummy" version="2.1">
<version>1.1</version>
<longdesc lang="en">
diff --git a/extra/resources/HealthIOWait b/extra/resources/HealthIOWait
index 43a8b70c4..5f1483ef7 100755
--- a/extra/resources/HealthIOWait
+++ b/extra/resources/HealthIOWait
@@ -25,8 +25,7 @@
meta_data() {
cat <<END
<?xml version="1.0"?>
-<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
-<resource-agent name="HealthIOWait" version="1.1">
+<resource-agent name="HealthIOWait" version="1.2">
<version>1.1</version>
<longdesc lang="en">
diff --git a/extra/resources/Stateful b/extra/resources/Stateful
index ae3424bbf..0d2062d51 100755
--- a/extra/resources/Stateful
+++ b/extra/resources/Stateful
@@ -39,8 +39,7 @@ SCORE_PROMOTED=10
meta_data() {
cat <<END
<?xml version="1.0"?>
-<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
-<resource-agent name="Stateful" version="1.1">
+<resource-agent name="Stateful" version="1.2">
<version>1.1</version>
<longdesc lang="en">
diff --git a/extra/resources/attribute b/extra/resources/attribute
index 1800dff8f..a2bd353e0 100755
--- a/extra/resources/attribute
+++ b/extra/resources/attribute
@@ -57,8 +57,7 @@ END
meta_data() {
cat <<END
<?xml version="1.0"?>
-<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
-<resource-agent name="attribute" version="1.1">
+<resource-agent name="attribute" version="1.2">
<version>1.1</version>
<shortdesc lang="en">Manages a node attribute</shortdesc>
<longdesc lang="en">
diff --git a/extra/resources/ping b/extra/resources/ping
index 6e296979f..7cc6b802d 100755
--- a/extra/resources/ping
+++ b/extra/resources/ping
@@ -36,8 +36,7 @@
meta_data() {
cat <<END
<?xml version="1.0"?>
-<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
-<resource-agent name="ping" version="1.1">
+<resource-agent name="ping" version="1.2">
<version>1.1</version>
<longdesc lang="en">
diff --git a/extra/resources/remote b/extra/resources/remote
index a53262bb6..f7e40dc81 100755
--- a/extra/resources/remote
+++ b/extra/resources/remote
@@ -24,8 +24,7 @@
meta_data() {
cat <<END
<?xml version="1.0"?>
-<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
-<resource-agent name="remote" version="1.0">
+<resource-agent name="remote" version="1.1">
<version>1.1</version>
<shortdesc lang="en">Pacemaker Remote connection</shortdesc>
<parameters>
--
2.27.0
From 70f469120f8db6a024c786466ee74a6c7fbd1f43 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Tue, 30 Nov 2021 15:53:39 -0600
Subject: [PATCH 2/2] Fix: resources: use correct syntax in Stateful meta-data
The OCF standard only allows "0" or "1" for booleans.
This fixes incorrect ocf:pacemaker:Stateful meta-data syntax introduced by
7024398 as a regression in the 2.1.0 release.
---
extra/resources/Stateful | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/extra/resources/Stateful b/extra/resources/Stateful
index 0d2062d51..2ebe6725f 100755
--- a/extra/resources/Stateful
+++ b/extra/resources/Stateful
@@ -57,7 +57,7 @@ Location to store the resource state in
<content type="string" default="${HA_VARRUN%%/}/Stateful-${OCF_RESOURCE_INSTANCE}.state" />
</parameter>
-<parameter name="envfile" reloadable="true">
+<parameter name="envfile" reloadable="1">
<longdesc lang="en">
If this is set, the environment will be dumped to this file for every call.
</longdesc>
@@ -65,7 +65,7 @@ If this is set, the environment will be dumped to this file for every call.
<content type="string" default="" />
</parameter>
-<parameter name="notify_delay" reloadable="true">
+<parameter name="notify_delay" reloadable="1">
<longdesc lang="en">
The notify action will sleep for this many seconds before returning,
to simulate a long-running notify.
--
2.27.0

View File

@ -1,39 +0,0 @@
From f491d9d5a7ed554fed985de356bb085fdec3421c Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Tue, 7 Dec 2021 09:01:00 -0600
Subject: [PATCH] Fix: fencer: avoid memory leak when broadcasting history
differences
Regression introduced in 2.1.0 by dbc27b2
---
daemons/fenced/fenced_history.c | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/daemons/fenced/fenced_history.c b/daemons/fenced/fenced_history.c
index bc159383c..a9c57dc86 100644
--- a/daemons/fenced/fenced_history.c
+++ b/daemons/fenced/fenced_history.c
@@ -484,8 +484,6 @@ stonith_fence_history(xmlNode *msg, xmlNode **output,
!pcmk__str_eq(remote_peer, stonith_our_uname, pcmk__str_casei)) {
xmlNode *history = get_xpath_object("//" F_STONITH_HISTORY_LIST,
msg, LOG_NEVER);
- GHashTable *received_history =
- history?stonith_xml_history_to_list(history):NULL;
/* either a broadcast created directly upon stonith-API request
* or a diff as response to such a thing
@@ -497,6 +495,11 @@ stonith_fence_history(xmlNode *msg, xmlNode **output,
if (!history ||
!crm_is_true(crm_element_value(history,
F_STONITH_DIFFERENTIAL))) {
+ GHashTable *received_history = NULL;
+
+ if (history != NULL) {
+ received_history = stonith_xml_history_to_list(history);
+ }
out_history =
stonith_local_history_diff_and_merge(received_history, TRUE, NULL);
if (out_history) {
--
2.27.0

View File

@ -1,43 +0,0 @@
From 0339e89f3238b31df78b864dae8684b82c370741 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Mon, 13 Dec 2021 15:22:40 -0600
Subject: [PATCH] Fix: fencer: get current time correctly
f52bc8e1ce (2.1.2) introduced a regression by using clock_gettime() with
CLOCK_MONOTONIC to get the current time. Use qb_util_timespec_from_epoch_get()
instead (which as of this writing uses clock_gettime() with CLOCK_REALTIME if
available, and falls back to gettimeofday() if not).
---
daemons/fenced/fenced_commands.c | 11 +++--------
1 file changed, 3 insertions(+), 8 deletions(-)
diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
index f34cb4f13..7685cb8c3 100644
--- a/daemons/fenced/fenced_commands.c
+++ b/daemons/fenced/fenced_commands.c
@@ -2746,19 +2746,14 @@ bool fencing_peer_active(crm_node_t *peer)
return FALSE;
}
-void set_fencing_completed(remote_fencing_op_t * op)
+void
+set_fencing_completed(remote_fencing_op_t *op)
{
-#ifdef CLOCK_MONOTONIC
struct timespec tv;
- clock_gettime(CLOCK_MONOTONIC, &tv);
-
+ qb_util_timespec_from_epoch_get(&tv);
op->completed = tv.tv_sec;
op->completed_nsec = tv.tv_nsec;
-#else
- op->completed = time(NULL);
- op->completed_nsec = 0L;
-#endif
}
/*!
--
2.27.0

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,65 +0,0 @@
From ed8b2c86ab77aaa3d7fd688c049ad5e1b922a9c6 Mon Sep 17 00:00:00 2001
From: Reid Wahl <nrwahl@protonmail.com>
Date: Thu, 13 Jan 2022 02:56:55 -0800
Subject: [PATCH] Fix: liblrmd: Avoid double-free during notify operation
This commit fixes a regression introduced by 31c7fa8a, causing a
double-free in notify operations. lrmd_dispatch_internal() assigns the
exit_reason string directly from an XML node to a new lrmd_event_data_t
object (without duplicating), and this string gets freed twice.
Free #1: pcmk__create_history_xml() (reached via callback) calls
lrmd__set_result(), which frees event.exit_reason and sets it to NULL.
Free #2: lrmd_ipc_dispatch() frees the XML node, which contains a
pointer to the exit_reason string just freed, after
lrmd_dispatch_internal() returns.
Prior to 31c7fa8a, pcmk__create_history_xml reset event.rc and
event.op_status but **not** event.exit_reason.
In this commit we simply make a copy of event.exit_reason in
lrmd_dispatch_internal() before the callback. This way we don't have to
worry about whatever happens in the callback, and we can continue to
unset the exit_reason alongside the rc and op_status. The added overhead
should be minimal.
This commit also makes a copy of output. That's not strictly necessary
but adds some futureproofing and allows us to call lrmd__reset_result()
at the end of lrmd_dispatch_internal().
Resolves: RHBZ#2039675
Signed-off-by: Reid Wahl <nrwahl@protonmail.com>
---
lib/lrmd/lrmd_client.c | 8 +++++---
1 file changed, 5 insertions(+), 3 deletions(-)
diff --git a/lib/lrmd/lrmd_client.c b/lib/lrmd/lrmd_client.c
index ee31bb5ae9..5131a648b7 100644
--- a/lib/lrmd/lrmd_client.c
+++ b/lib/lrmd/lrmd_client.c
@@ -305,9 +305,10 @@ lrmd_dispatch_internal(lrmd_t * lrmd, xmlNode * msg)
event.user_data = crm_element_value(msg, F_LRMD_RSC_USERDATA_STR);
event.type = lrmd_event_exec_complete;
- // No need to duplicate the memory, so don't use setter functions
- event.output = crm_element_value(msg, F_LRMD_RSC_OUTPUT);
- event.exit_reason = crm_element_value(msg, F_LRMD_RSC_EXIT_REASON);
+ /* output and exit_reason may be freed by a callback */
+ event.output = crm_element_value_copy(msg, F_LRMD_RSC_OUTPUT);
+ lrmd__set_result(&event, event.rc, event.op_status,
+ crm_element_value(msg, F_LRMD_RSC_EXIT_REASON));
event.params = xml2list(msg);
} else if (pcmk__str_eq(type, LRMD_OP_NEW_CLIENT, pcmk__str_none)) {
@@ -324,6 +325,7 @@ lrmd_dispatch_internal(lrmd_t * lrmd, xmlNode * msg)
if (event.params) {
g_hash_table_destroy(event.params);
}
+ lrmd__reset_result(&event);
}
// \return Always 0, to indicate that IPC mainloop source should be kept
--
2.27.0

View File

@ -1,26 +0,0 @@
From 186d5a02fba919c455fd6eeb050b4be107f82159 Mon Sep 17 00:00:00 2001
From: Chris Lumens <clumens@redhat.com>
Date: Thu, 13 Jan 2022 17:02:47 -0500
Subject: [PATCH] Low: scheduler: Use the old RC code to log maskable probe
failures.
---
lib/pengine/unpack.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c
index 8a2d2a6d6d..b01f86257a 100644
--- a/lib/pengine/unpack.c
+++ b/lib/pengine/unpack.c
@@ -3780,7 +3780,7 @@ unpack_rsc_op(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op,
if (maskable_probe_failure) {
crm_notice("Treating probe result '%s' for %s on %s as 'not running'",
- services_ocf_exitcode_str(rc), rsc->id, node->details->uname);
+ services_ocf_exitcode_str(old_rc), rsc->id, node->details->uname);
update_resource_state(rsc, node, xml_op, task, target_rc, *last_failure,
on_fail, data_set);
crm_xml_add(xml_op, XML_ATTR_UNAME, node->details->uname);
--
2.27.0

View File

@ -1,43 +0,0 @@
From 9d812b0401d4cedef53a3cc3653ec782a5c49e37 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Thu, 13 Jan 2022 10:42:02 -0600
Subject: [PATCH] Doc: fencer: improve pcmk_delay_base meta-data
Update its type, since its value can now be a node map as well as a string,
and add more detail to its description.
---
daemons/fenced/pacemaker-fenced.c | 18 +++++++++++-------
1 file changed, 11 insertions(+), 7 deletions(-)
diff --git a/daemons/fenced/pacemaker-fenced.c b/daemons/fenced/pacemaker-fenced.c
index 1b954be5a4..12f331496c 100644
--- a/daemons/fenced/pacemaker-fenced.c
+++ b/daemons/fenced/pacemaker-fenced.c
@@ -1548,13 +1548,17 @@ main(int argc, char **argv)
PCMK_STONITH_DELAY_BASE);
printf(" <shortdesc lang=\"en\">Enable a base delay for "
"fencing actions and specify base delay value.</shortdesc>\n");
- printf(" <longdesc lang=\"en\">This prevents double fencing when "
- "different delays are configured on the nodes.\nUse this to "
- "enable a static delay for fencing actions.\nThe overall delay "
- "is derived from a random delay value adding this static delay "
- "so that the sum is kept below the maximum delay.\nSet to eg. "
- "node1:1s;node2:5 to set different value per node.</longdesc>\n");
- printf(" <content type=\"time\" default=\"0s\"/>\n");
+ printf(" <longdesc lang=\"en\">This enables a static delay for "
+ "fencing actions, which can help avoid \"death matches\" where "
+ "two nodes try to fence each other at the same time. If "
+ PCMK_STONITH_DELAY_MAX " is also used, a random delay will be "
+ "added such that the total delay is kept below that value.\n"
+ "This can be set to a single time value to apply to any node "
+ "targeted by this device (useful if a separate device is "
+ "configured for each target), or to a node map (for example, "
+ "\"node1:1s;node2:5\") to set a different value per target.\n"
+ " </longdesc>\n");
+ printf(" <content type=\"string\" default=\"0s\"/>\n");
printf(" </parameter>\n");
printf(" <parameter name=\"%s\" unique=\"0\">\n",
--
2.27.0

File diff suppressed because it is too large Load Diff

View File

@ -1,56 +0,0 @@
From e330568504ec379ea42460d21a2e20b1652d9445 Mon Sep 17 00:00:00 2001
From: Reid Wahl <nrwahl@protonmail.com>
Date: Fri, 14 Jan 2022 01:35:35 -0800
Subject: [PATCH] Fix: fencing: Don't set stonith action to pending if fork
fails
Currently, we set a stonith action to pending if
services_action_async_fork_notify() returns true. However, "true" means
that the svc_action should not be freed. This might be because the
svc_action forked successfully and is pending, or it might be because
the svc_action has already been freed.
In the case of stonith actions, if we fail to fork, the stonith_action_t
object stored in svc_action->cb_data gets freed by the done callback,
and services_action_async_fork_notify() returns true. If we try to set
the action to pending, it causes a segfault.
This commit moves the "set to pending" step to the
stonith_action_async_forked() callback. We avoid the segfault and only
set it to pending if it's actually pending.
A slight difference in ordering was required to achieve this. Now, the
action gets set to pending immediately before being added to the
mainloop, instead of immediately after.
Signed-off-by: Reid Wahl <nrwahl@protonmail.com>
---
lib/fencing/st_actions.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/lib/fencing/st_actions.c b/lib/fencing/st_actions.c
index e4e43225cd..306001af69 100644
--- a/lib/fencing/st_actions.c
+++ b/lib/fencing/st_actions.c
@@ -550,6 +550,9 @@ stonith_action_async_forked(svc_action_t *svc_action)
(action->fork_cb) (svc_action->pid, action->userdata);
}
+ pcmk__set_result(&(action->result), PCMK_OCF_UNKNOWN, PCMK_EXEC_PENDING,
+ NULL);
+
crm_trace("Child process %d performing action '%s' successfully forked",
action->pid, action->action);
}
@@ -619,8 +622,6 @@ internal_stonith_action_execute(stonith_action_t * action)
if (services_action_async_fork_notify(svc_action,
&stonith_action_async_done,
&stonith_action_async_forked)) {
- pcmk__set_result(&(action->result), PCMK_OCF_UNKNOWN,
- PCMK_EXEC_PENDING, NULL);
return pcmk_ok;
}
--
2.27.0

View File

@ -1,875 +0,0 @@
From 523f62eb235836a01ea039c23ada261a494f7b32 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Wed, 10 Nov 2021 15:22:47 -0600
Subject: [PATCH 01/11] Feature: libpacemaker: improve result for high-level
fencing API
Previously, pcmk__fencing_action()'s helpers for asynchronous fencing actions
initialized the result to a generic error, and then overrode that only on
success.
Now, set a detailed result for early failures, and use the full result when
available from the fencing API.
A standard return code is still returned to callers at this point.
---
lib/pacemaker/pcmk_fence.c | 31 ++++++++++++++++++-------------
1 file changed, 18 insertions(+), 13 deletions(-)
diff --git a/lib/pacemaker/pcmk_fence.c b/lib/pacemaker/pcmk_fence.c
index 7d6acd0de6..125e1b268b 100644
--- a/lib/pacemaker/pcmk_fence.c
+++ b/lib/pacemaker/pcmk_fence.c
@@ -32,8 +32,8 @@ static struct {
unsigned int timeout;
unsigned int tolerance;
int delay;
- int rc;
-} async_fence_data;
+ pcmk__action_result_t result;
+} async_fence_data = { NULL, };
static int
handle_level(stonith_t *st, char *target, int fence_level,
@@ -76,14 +76,13 @@ handle_level(stonith_t *st, char *target, int fence_level,
static void
notify_callback(stonith_t * st, stonith_event_t * e)
{
- if (e->result != pcmk_ok) {
- return;
- }
+ if (pcmk__str_eq(async_fence_data.target, e->target, pcmk__str_casei)
+ && pcmk__str_eq(async_fence_data.action, e->action, pcmk__str_casei)) {
- if (pcmk__str_eq(async_fence_data.target, e->target, pcmk__str_casei) &&
- pcmk__str_eq(async_fence_data.action, e->action, pcmk__str_casei)) {
-
- async_fence_data.rc = e->result;
+ pcmk__set_result(&async_fence_data.result,
+ stonith__event_exit_status(e),
+ stonith__event_execution_status(e),
+ stonith__event_exit_reason(e));
g_main_loop_quit(mainloop);
}
}
@@ -91,8 +90,9 @@ notify_callback(stonith_t * st, stonith_event_t * e)
static void
fence_callback(stonith_t * stonith, stonith_callback_data_t * data)
{
- async_fence_data.rc = data->rc;
-
+ pcmk__set_result(&async_fence_data.result, stonith__exit_status(data),
+ stonith__execution_status(data),
+ stonith__exit_reason(data));
g_main_loop_quit(mainloop);
}
@@ -106,6 +106,8 @@ async_fence_helper(gpointer user_data)
if (rc != pcmk_ok) {
fprintf(stderr, "Could not connect to fencer: %s\n", pcmk_strerror(rc));
g_main_loop_quit(mainloop);
+ pcmk__set_result(&async_fence_data.result, CRM_EX_ERROR,
+ PCMK_EXEC_NOT_CONNECTED, NULL);
return TRUE;
}
@@ -121,6 +123,8 @@ async_fence_helper(gpointer user_data)
if (call_id < 0) {
g_main_loop_quit(mainloop);
+ pcmk__set_result(&async_fence_data.result, CRM_EX_ERROR,
+ PCMK_EXEC_ERROR, pcmk_strerror(call_id));
return TRUE;
}
@@ -146,7 +150,8 @@ pcmk__fence_action(stonith_t *st, const char *target, const char *action,
async_fence_data.timeout = timeout;
async_fence_data.tolerance = tolerance;
async_fence_data.delay = delay;
- async_fence_data.rc = pcmk_err_generic;
+ pcmk__set_result(&async_fence_data.result, CRM_EX_ERROR, PCMK_EXEC_UNKNOWN,
+ NULL);
trig = mainloop_add_trigger(G_PRIORITY_HIGH, async_fence_helper, NULL);
mainloop_set_trigger(trig);
@@ -156,7 +161,7 @@ pcmk__fence_action(stonith_t *st, const char *target, const char *action,
free(async_fence_data.name);
- return pcmk_legacy2rc(async_fence_data.rc);
+ return stonith__result2rc(&async_fence_data.result);
}
#ifdef BUILD_PUBLIC_LIBPACEMAKER
--
2.27.0
From 008868fae5d1b0d6d8dc61f7acfb3856801ddd52 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Fri, 10 Dec 2021 15:36:10 -0600
Subject: [PATCH 02/11] Refactor: libpacemaker: add exit reason to high-level
fencing API
Nothing uses it as of this commit
---
include/pacemaker.h | 5 ++++-
include/pcmki/pcmki_fence.h | 5 ++++-
lib/pacemaker/pcmk_fence.c | 10 +++++++---
tools/stonith_admin.c | 6 +++---
4 files changed, 18 insertions(+), 8 deletions(-)
diff --git a/include/pacemaker.h b/include/pacemaker.h
index a8523c969e..0daa4c5945 100644
--- a/include/pacemaker.h
+++ b/include/pacemaker.h
@@ -189,12 +189,15 @@ int pcmk_list_nodes(xmlNodePtr *xml, char *node_types);
* again.
* \param[in] delay Apply a fencing delay. Value -1 means disable also any
* static/random fencing delays from pcmk_delay_base/max.
+ * \param[out] reason If not NULL, where to put descriptive failure reason
*
* \return Standard Pacemaker return code
+ * \note If \p reason is not NULL, the caller is responsible for freeing its
+ * returned value.
*/
int pcmk_fence_action(stonith_t *st, const char *target, const char *action,
const char *name, unsigned int timeout, unsigned int tolerance,
- int delay);
+ int delay, char **reason);
/*!
* \brief List the fencing operations that have occurred for a specific node.
diff --git a/include/pcmki/pcmki_fence.h b/include/pcmki/pcmki_fence.h
index d4cef68f5c..c3da0361d7 100644
--- a/include/pcmki/pcmki_fence.h
+++ b/include/pcmki/pcmki_fence.h
@@ -28,12 +28,15 @@
* again.
* \param[in] delay Apply a fencing delay. Value -1 means disable also any
* static/random fencing delays from pcmk_delay_base/max
+ * \param[out] reason If not NULL, where to put descriptive failure reason
*
* \return Standard Pacemaker return code
+ * \note If \p reason is not NULL, the caller is responsible for freeing its
+ * returned value.
*/
int pcmk__fence_action(stonith_t *st, const char *target, const char *action,
const char *name, unsigned int timeout, unsigned int tolerance,
- int delay);
+ int delay, char **reason);
/*!
* \brief List the fencing operations that have occurred for a specific node.
diff --git a/lib/pacemaker/pcmk_fence.c b/lib/pacemaker/pcmk_fence.c
index 125e1b268b..dbf084fb6b 100644
--- a/lib/pacemaker/pcmk_fence.c
+++ b/lib/pacemaker/pcmk_fence.c
@@ -139,7 +139,7 @@ async_fence_helper(gpointer user_data)
int
pcmk__fence_action(stonith_t *st, const char *target, const char *action,
const char *name, unsigned int timeout, unsigned int tolerance,
- int delay)
+ int delay, char **reason)
{
crm_trigger_t *trig;
@@ -161,6 +161,9 @@ pcmk__fence_action(stonith_t *st, const char *target, const char *action,
free(async_fence_data.name);
+ if ((reason != NULL) && (async_fence_data.result.exit_reason != NULL)) {
+ *reason = strdup(async_fence_data.result.exit_reason);
+ }
return stonith__result2rc(&async_fence_data.result);
}
@@ -168,9 +171,10 @@ pcmk__fence_action(stonith_t *st, const char *target, const char *action,
int
pcmk_fence_action(stonith_t *st, const char *target, const char *action,
const char *name, unsigned int timeout, unsigned int tolerance,
- int delay)
+ int delay, char **reason)
{
- return pcmk__fence_action(st, target, action, name, timeout, tolerance, delay);
+ return pcmk__fence_action(st, target, action, name, timeout, tolerance,
+ delay, reason);
}
#endif
diff --git a/tools/stonith_admin.c b/tools/stonith_admin.c
index 2d48326e1b..fdc7c46d49 100644
--- a/tools/stonith_admin.c
+++ b/tools/stonith_admin.c
@@ -571,17 +571,17 @@ main(int argc, char **argv)
case 'B':
rc = pcmk__fence_action(st, target, "reboot", name, options.timeout*1000,
- options.tolerance*1000, options.delay);
+ options.tolerance*1000, options.delay, NULL);
break;
case 'F':
rc = pcmk__fence_action(st, target, "off", name, options.timeout*1000,
- options.tolerance*1000, options.delay);
+ options.tolerance*1000, options.delay, NULL);
break;
case 'U':
rc = pcmk__fence_action(st, target, "on", name, options.timeout*1000,
- options.tolerance*1000, options.delay);
+ options.tolerance*1000, options.delay, NULL);
break;
case 'h':
--
2.27.0
From 7570510f9985ba75ef73fb824f28109e135ace0a Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Fri, 10 Dec 2021 15:40:48 -0600
Subject: [PATCH 03/11] Refactor: libpacemaker: rename high-level fencing API
Rename pcmk_fence_action() to pcmk_request_fencing(), and its internal
equivalent pcmk__fence_action() to pcmk__request_fencing(). The change is
backward-compatible because pcmk_fence_action() has not been exposed publicly
yet.
"Fence action" can be easily confused with libcrmservice actions, liblrmd
actions, libstonithd actions, scheduler actions, and so forth.
Also, the new name makes it clearer that the caller is requesting that the
cluster perform fencing, and not directly performing fencing.
---
include/pacemaker.h | 20 ++++++++++----------
include/pcmki/pcmki_fence.h | 16 ++++++++--------
lib/pacemaker/pcmk_fence.c | 16 ++++++++--------
tools/stonith_admin.c | 18 ++++++++++++------
4 files changed, 38 insertions(+), 32 deletions(-)
diff --git a/include/pacemaker.h b/include/pacemaker.h
index 0daa4c5945..e581f975a9 100644
--- a/include/pacemaker.h
+++ b/include/pacemaker.h
@@ -177,27 +177,27 @@ int pcmk_list_nodes(xmlNodePtr *xml, char *node_types);
#ifdef BUILD_PUBLIC_LIBPACEMAKER
/*!
- * \brief Perform a STONITH action.
+ * \brief Ask the cluster to perform fencing
*
- * \param[in] st A connection to the STONITH API.
- * \param[in] target The node receiving the action.
- * \param[in] action The action to perform.
+ * \param[in] st A connection to the fencer API
+ * \param[in] target The node that should be fenced
+ * \param[in] action The fencing action (on, off, reboot) to perform
* \param[in] name Who requested the fence action?
- * \param[in] timeout How long to wait for the operation to complete (in ms).
+ * \param[in] timeout How long to wait for the operation to complete (in ms)
* \param[in] tolerance If a successful action for \p target happened within
* this many ms, return 0 without performing the action
- * again.
+ * again
* \param[in] delay Apply a fencing delay. Value -1 means disable also any
- * static/random fencing delays from pcmk_delay_base/max.
+ * static/random fencing delays from pcmk_delay_base/max
* \param[out] reason If not NULL, where to put descriptive failure reason
*
* \return Standard Pacemaker return code
* \note If \p reason is not NULL, the caller is responsible for freeing its
* returned value.
*/
-int pcmk_fence_action(stonith_t *st, const char *target, const char *action,
- const char *name, unsigned int timeout, unsigned int tolerance,
- int delay, char **reason);
+int pcmk_request_fencing(stonith_t *st, const char *target, const char *action,
+ const char *name, unsigned int timeout,
+ unsigned int tolerance, int delay, char **reason);
/*!
* \brief List the fencing operations that have occurred for a specific node.
diff --git a/include/pcmki/pcmki_fence.h b/include/pcmki/pcmki_fence.h
index c3da0361d7..e3a7e27264 100644
--- a/include/pcmki/pcmki_fence.h
+++ b/include/pcmki/pcmki_fence.h
@@ -13,14 +13,14 @@
# include <crm/common/output_internal.h>
/*!
- * \brief Perform a STONITH action.
+ * \brief Ask the cluster to perform fencing
*
- * \note This is the internal version of pcmk_fence_action(). External users
+ * \note This is the internal version of pcmk_request_fencing(). External users
* of the pacemaker API should use that function instead.
*
- * \param[in] st A connection to the STONITH API.
- * \param[in] target The node receiving the action.
- * \param[in] action The action to perform.
+ * \param[in] st A connection to the fencer API
+ * \param[in] target The node that should be fenced
+ * \param[in] action The fencing action (on, off, reboot) to perform
* \param[in] name Who requested the fence action?
* \param[in] timeout How long to wait for the operation to complete (in ms).
* \param[in] tolerance If a successful action for \p target happened within
@@ -34,9 +34,9 @@
* \note If \p reason is not NULL, the caller is responsible for freeing its
* returned value.
*/
-int pcmk__fence_action(stonith_t *st, const char *target, const char *action,
- const char *name, unsigned int timeout, unsigned int tolerance,
- int delay, char **reason);
+int pcmk__request_fencing(stonith_t *st, const char *target, const char *action,
+ const char *name, unsigned int timeout,
+ unsigned int tolerance, int delay, char **reason);
/*!
* \brief List the fencing operations that have occurred for a specific node.
diff --git a/lib/pacemaker/pcmk_fence.c b/lib/pacemaker/pcmk_fence.c
index dbf084fb6b..1b7feb54b2 100644
--- a/lib/pacemaker/pcmk_fence.c
+++ b/lib/pacemaker/pcmk_fence.c
@@ -137,9 +137,9 @@ async_fence_helper(gpointer user_data)
}
int
-pcmk__fence_action(stonith_t *st, const char *target, const char *action,
- const char *name, unsigned int timeout, unsigned int tolerance,
- int delay, char **reason)
+pcmk__request_fencing(stonith_t *st, const char *target, const char *action,
+ const char *name, unsigned int timeout,
+ unsigned int tolerance, int delay, char **reason)
{
crm_trigger_t *trig;
@@ -169,12 +169,12 @@ pcmk__fence_action(stonith_t *st, const char *target, const char *action,
#ifdef BUILD_PUBLIC_LIBPACEMAKER
int
-pcmk_fence_action(stonith_t *st, const char *target, const char *action,
- const char *name, unsigned int timeout, unsigned int tolerance,
- int delay, char **reason)
+pcmk_request_fencing(stonith_t *st, const char *target, const char *action,
+ const char *name, unsigned int timeout,
+ unsigned int tolerance, int delay, char **reason)
{
- return pcmk__fence_action(st, target, action, name, timeout, tolerance,
- delay, reason);
+ return pcmk__request_fencing(st, target, action, name, timeout, tolerance,
+ delay, reason);
}
#endif
diff --git a/tools/stonith_admin.c b/tools/stonith_admin.c
index fdc7c46d49..56948b3875 100644
--- a/tools/stonith_admin.c
+++ b/tools/stonith_admin.c
@@ -570,18 +570,24 @@ main(int argc, char **argv)
break;
case 'B':
- rc = pcmk__fence_action(st, target, "reboot", name, options.timeout*1000,
- options.tolerance*1000, options.delay, NULL);
+ rc = pcmk__request_fencing(st, target, "reboot", name,
+ options.timeout * 1000,
+ options.tolerance * 1000,
+ options.delay, NULL);
break;
case 'F':
- rc = pcmk__fence_action(st, target, "off", name, options.timeout*1000,
- options.tolerance*1000, options.delay, NULL);
+ rc = pcmk__request_fencing(st, target, "off", name,
+ options.timeout * 1000,
+ options.tolerance * 1000,
+ options.delay, NULL);
break;
case 'U':
- rc = pcmk__fence_action(st, target, "on", name, options.timeout*1000,
- options.tolerance*1000, options.delay, NULL);
+ rc = pcmk__request_fencing(st, target, "on", name,
+ options.timeout * 1000,
+ options.tolerance * 1000,
+ options.delay, NULL);
break;
case 'h':
--
2.27.0
From 247eb303df934944c0b72b162bb661cee6e0ed8b Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Fri, 10 Dec 2021 15:52:37 -0600
Subject: [PATCH 04/11] Refactor: tools: drop unnecessary string duplication in
stonith_admin
---
tools/stonith_admin.c | 11 ++++-------
1 file changed, 4 insertions(+), 7 deletions(-)
diff --git a/tools/stonith_admin.c b/tools/stonith_admin.c
index 56948b3875..c11e302e76 100644
--- a/tools/stonith_admin.c
+++ b/tools/stonith_admin.c
@@ -360,8 +360,6 @@ main(int argc, char **argv)
pcmk__cli_init_logging("stonith_admin", args->verbosity);
- name = strdup(crm_system_name);
-
rc = pcmk__output_new(&out, args->output_ty, args->output_dest, argv);
if (rc != pcmk_rc_ok) {
exit_code = CRM_EX_ERROR;
@@ -496,7 +494,7 @@ main(int argc, char **argv)
if (st == NULL) {
rc = -ENOMEM;
} else if (!no_connect) {
- rc = st->cmds->connect(st, name, NULL);
+ rc = st->cmds->connect(st, crm_system_name, NULL);
}
if (rc < 0) {
out->err(out, "Could not connect to fencer: %s", pcmk_strerror(rc));
@@ -570,21 +568,21 @@ main(int argc, char **argv)
break;
case 'B':
- rc = pcmk__request_fencing(st, target, "reboot", name,
+ rc = pcmk__request_fencing(st, target, "reboot", crm_system_name,
options.timeout * 1000,
options.tolerance * 1000,
options.delay, NULL);
break;
case 'F':
- rc = pcmk__request_fencing(st, target, "off", name,
+ rc = pcmk__request_fencing(st, target, "off", crm_system_name,
options.timeout * 1000,
options.tolerance * 1000,
options.delay, NULL);
break;
case 'U':
- rc = pcmk__request_fencing(st, target, "on", name,
+ rc = pcmk__request_fencing(st, target, "on", crm_system_name,
options.timeout * 1000,
options.tolerance * 1000,
options.delay, NULL);
@@ -619,7 +617,6 @@ main(int argc, char **argv)
out->finish(out, exit_code, true, NULL);
pcmk__output_free(out);
}
- free(name);
stonith_key_value_freeall(options.params, 1, 1);
if (st != NULL) {
--
2.27.0
From a7888bf6868d8d9d9c77f65ae9983cf748bb0548 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Fri, 10 Dec 2021 15:56:34 -0600
Subject: [PATCH 05/11] Refactor: tools: functionize requesting fencing in
stonith_admin
... to reduce code duplication and improve readability
---
tools/stonith_admin.c | 27 +++++++++++++++------------
1 file changed, 15 insertions(+), 12 deletions(-)
diff --git a/tools/stonith_admin.c b/tools/stonith_admin.c
index c11e302e76..f738a9c888 100644
--- a/tools/stonith_admin.c
+++ b/tools/stonith_admin.c
@@ -331,6 +331,18 @@ build_arg_context(pcmk__common_args_t *args, GOptionGroup **group) {
return context;
}
+// \return Standard Pacemaker return code
+static int
+request_fencing(stonith_t *st, const char *target, const char *command)
+{
+ int rc = pcmk__request_fencing(st, target, command, crm_system_name,
+ options.timeout * 1000,
+ options.tolerance * 1000,
+ options.delay, NULL);
+
+ return rc;
+}
+
int
main(int argc, char **argv)
{
@@ -568,24 +580,15 @@ main(int argc, char **argv)
break;
case 'B':
- rc = pcmk__request_fencing(st, target, "reboot", crm_system_name,
- options.timeout * 1000,
- options.tolerance * 1000,
- options.delay, NULL);
+ rc = request_fencing(st, target, "reboot");
break;
case 'F':
- rc = pcmk__request_fencing(st, target, "off", crm_system_name,
- options.timeout * 1000,
- options.tolerance * 1000,
- options.delay, NULL);
+ rc = request_fencing(st, target, "off");
break;
case 'U':
- rc = pcmk__request_fencing(st, target, "on", crm_system_name,
- options.timeout * 1000,
- options.tolerance * 1000,
- options.delay, NULL);
+ rc = request_fencing(st, target, "on");
break;
case 'h':
--
2.27.0
From 2da32df780983ec1197e857eed5eeb5bf1101889 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Fri, 10 Dec 2021 16:05:19 -0600
Subject: [PATCH 06/11] Feature: tools: display failure reasons for
stonith_admin fencing commands
Previously, stonith_admin's --fence/--unfence/--reboot options did not output
any error message on failure. Now, they do, including the exit reason, if
available.
---
tools/stonith_admin.c | 30 +++++++++++++++++++++++++-----
1 file changed, 25 insertions(+), 5 deletions(-)
diff --git a/tools/stonith_admin.c b/tools/stonith_admin.c
index f738a9c888..5590faf11e 100644
--- a/tools/stonith_admin.c
+++ b/tools/stonith_admin.c
@@ -333,13 +333,33 @@ build_arg_context(pcmk__common_args_t *args, GOptionGroup **group) {
// \return Standard Pacemaker return code
static int
-request_fencing(stonith_t *st, const char *target, const char *command)
+request_fencing(stonith_t *st, const char *target, const char *command,
+ GError **error)
{
+ char *reason = NULL;
int rc = pcmk__request_fencing(st, target, command, crm_system_name,
options.timeout * 1000,
options.tolerance * 1000,
- options.delay, NULL);
+ options.delay, &reason);
+ if (rc != pcmk_rc_ok) {
+ const char *rc_str = pcmk_rc_str(rc);
+
+ // If reason is identical to return code string, don't display it twice
+ if (pcmk__str_eq(rc_str, reason, pcmk__str_none)) {
+ free(reason);
+ reason = NULL;
+ }
+
+ g_set_error(error, PCMK__RC_ERROR, rc,
+ "Couldn't %sfence %s: %s%s%s%s",
+ ((strcmp(command, "on") == 0)? "un" : ""),
+ target, pcmk_rc_str(rc),
+ ((reason == NULL)? "" : " ("),
+ ((reason == NULL)? "" : reason),
+ ((reason == NULL)? "" : ")"));
+ }
+ free(reason);
return rc;
}
@@ -580,15 +600,15 @@ main(int argc, char **argv)
break;
case 'B':
- rc = request_fencing(st, target, "reboot");
+ rc = request_fencing(st, target, "reboot", &error);
break;
case 'F':
- rc = request_fencing(st, target, "off");
+ rc = request_fencing(st, target, "off", &error);
break;
case 'U':
- rc = request_fencing(st, target, "on");
+ rc = request_fencing(st, target, "on", &error);
break;
case 'h':
--
2.27.0
From 2d99eba4c326d3b13dbbe446971ea5febd5d05be Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Fri, 10 Dec 2021 16:08:49 -0600
Subject: [PATCH 07/11] Feature: libpacemaker: return exit reason for fencer
connection failures
... instead of outputting to stderr directly, so that the caller (i.e.
stonith_admin) can output the error in the correct output format.
---
lib/pacemaker/pcmk_fence.c | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/lib/pacemaker/pcmk_fence.c b/lib/pacemaker/pcmk_fence.c
index 1b7feb54b2..d17b07cda2 100644
--- a/lib/pacemaker/pcmk_fence.c
+++ b/lib/pacemaker/pcmk_fence.c
@@ -104,10 +104,9 @@ async_fence_helper(gpointer user_data)
int rc = stonith_api_connect_retry(st, async_fence_data.name, 10);
if (rc != pcmk_ok) {
- fprintf(stderr, "Could not connect to fencer: %s\n", pcmk_strerror(rc));
g_main_loop_quit(mainloop);
pcmk__set_result(&async_fence_data.result, CRM_EX_ERROR,
- PCMK_EXEC_NOT_CONNECTED, NULL);
+ PCMK_EXEC_NOT_CONNECTED, pcmk_strerror(rc));
return TRUE;
}
--
2.27.0
From 4480ef0602f47450bdddfbde360a6a8327710927 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Mon, 17 Jan 2022 09:39:39 -0600
Subject: [PATCH 08/11] Low: libpacemaker: compare fence action names
case-sensitively
---
lib/pacemaker/pcmk_fence.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/lib/pacemaker/pcmk_fence.c b/lib/pacemaker/pcmk_fence.c
index d17b07cda2..2a8f50a555 100644
--- a/lib/pacemaker/pcmk_fence.c
+++ b/lib/pacemaker/pcmk_fence.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2009-2021 the Pacemaker project contributors
+ * Copyright 2009-2022 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
@@ -77,7 +77,7 @@ static void
notify_callback(stonith_t * st, stonith_event_t * e)
{
if (pcmk__str_eq(async_fence_data.target, e->target, pcmk__str_casei)
- && pcmk__str_eq(async_fence_data.action, e->action, pcmk__str_casei)) {
+ && pcmk__str_eq(async_fence_data.action, e->action, pcmk__str_none)) {
pcmk__set_result(&async_fence_data.result,
stonith__event_exit_status(e),
@@ -549,7 +549,7 @@ pcmk__reduce_fence_history(stonith_history_t *history)
if ((hp->state == st_done) || (hp->state == st_failed)) {
/* action not in progress */
if (pcmk__str_eq(hp->target, np->target, pcmk__str_casei) &&
- pcmk__str_eq(hp->action, np->action, pcmk__str_casei) &&
+ pcmk__str_eq(hp->action, np->action, pcmk__str_none) &&
(hp->state == np->state) &&
((hp->state == st_done) ||
pcmk__str_eq(hp->delegate, np->delegate, pcmk__str_casei))) {
--
2.27.0
From fe4c65a3b9e715c2b535709f989f2369d3637b78 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Mon, 17 Jan 2022 09:45:24 -0600
Subject: [PATCH 09/11] Refactor: libpacemaker: avoid unnecessary string
duplication
... and don't leave any dynamic memory hanging around
---
lib/pacemaker/pcmk_fence.c | 11 ++++++++---
1 file changed, 8 insertions(+), 3 deletions(-)
diff --git a/lib/pacemaker/pcmk_fence.c b/lib/pacemaker/pcmk_fence.c
index 2a8f50a555..260fa5ab8e 100644
--- a/lib/pacemaker/pcmk_fence.c
+++ b/lib/pacemaker/pcmk_fence.c
@@ -141,6 +141,7 @@ pcmk__request_fencing(stonith_t *st, const char *target, const char *action,
unsigned int tolerance, int delay, char **reason)
{
crm_trigger_t *trig;
+ int rc = pcmk_rc_ok;
async_fence_data.st = st;
async_fence_data.name = strdup(name);
@@ -160,10 +161,14 @@ pcmk__request_fencing(stonith_t *st, const char *target, const char *action,
free(async_fence_data.name);
- if ((reason != NULL) && (async_fence_data.result.exit_reason != NULL)) {
- *reason = strdup(async_fence_data.result.exit_reason);
+ if (reason != NULL) {
+ // Give the caller ownership of the exit reason
+ *reason = async_fence_data.result.exit_reason;
+ async_fence_data.result.exit_reason = NULL;
}
- return stonith__result2rc(&async_fence_data.result);
+ rc = stonith__result2rc(&async_fence_data.result);
+ pcmk__reset_result(&async_fence_data.result);
+ return rc;
}
#ifdef BUILD_PUBLIC_LIBPACEMAKER
--
2.27.0
From 7b7af07796f05a1adabdac655582be2e17106f81 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Mon, 17 Jan 2022 10:07:10 -0600
Subject: [PATCH 10/11] Doc: libpacemaker: improve pcmk__request_fencing()
doxygen block
---
include/pacemaker.h | 6 ++++--
include/pcmki/pcmki_fence.h | 15 +++++++++------
2 files changed, 13 insertions(+), 8 deletions(-)
diff --git a/include/pacemaker.h b/include/pacemaker.h
index e581f975a9..266a844892 100644
--- a/include/pacemaker.h
+++ b/include/pacemaker.h
@@ -187,8 +187,10 @@ int pcmk_list_nodes(xmlNodePtr *xml, char *node_types);
* \param[in] tolerance If a successful action for \p target happened within
* this many ms, return 0 without performing the action
* again
- * \param[in] delay Apply a fencing delay. Value -1 means disable also any
- * static/random fencing delays from pcmk_delay_base/max
+ * \param[in] delay Apply this delay (in milliseconds) before initiating the
+ * fencing action (a value of -1 applies no delay and also
+ * disables any fencing delay from pcmk_delay_base and
+ * pcmk_delay_max)
* \param[out] reason If not NULL, where to put descriptive failure reason
*
* \return Standard Pacemaker return code
diff --git a/include/pcmki/pcmki_fence.h b/include/pcmki/pcmki_fence.h
index e3a7e27264..4a2fe3c481 100644
--- a/include/pcmki/pcmki_fence.h
+++ b/include/pcmki/pcmki_fence.h
@@ -1,5 +1,5 @@
/*
- * Copyright 2019-2021 the Pacemaker project contributors
+ * Copyright 2019-2022 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
@@ -22,17 +22,20 @@
* \param[in] target The node that should be fenced
* \param[in] action The fencing action (on, off, reboot) to perform
* \param[in] name Who requested the fence action?
- * \param[in] timeout How long to wait for the operation to complete (in ms).
+ * \param[in] timeout How long to wait for the operation to complete (in ms)
* \param[in] tolerance If a successful action for \p target happened within
- * this many ms, return 0 without performing the action
- * again.
- * \param[in] delay Apply a fencing delay. Value -1 means disable also any
- * static/random fencing delays from pcmk_delay_base/max
+ * this many milliseconds, return success without
+ * performing the action again
+ * \param[in] delay Apply this delay (in milliseconds) before initiating the
+ * fencing action (a value of -1 applies no delay and also
+ * disables any fencing delay from pcmk_delay_base and
+ * pcmk_delay_max)
* \param[out] reason If not NULL, where to put descriptive failure reason
*
* \return Standard Pacemaker return code
* \note If \p reason is not NULL, the caller is responsible for freeing its
* returned value.
+ * \todo delay is eventually used with g_timeout_add() and should be guint
*/
int pcmk__request_fencing(stonith_t *st, const char *target, const char *action,
const char *name, unsigned int timeout,
--
2.27.0
From 61fb7271712e1246eb6d9472dc1afc7cd10e0a79 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Mon, 17 Jan 2022 10:18:02 -0600
Subject: [PATCH 11/11] Fix: tools: get stonith_admin -T option working again
Regression introduced in 2.0.3 by 3910b6fec
This reverts commit 247eb303df934944c0b72b162bb661cee6e0ed8b
("Refactor: tools: drop unnecessary string duplication in stonith_admin")
and fixes a regression introduced when stonith_admin was converted to use
GOption.
The -T option is intended to override the client name passed to the fencer API,
but the client name was set to the default (crm_system_name) after option
processing had already been done, so any value for -T was overwritten by the
default, and its memory was leaked.
This commit sets the default only if -T was not used.
---
tools/stonith_admin.c | 15 ++++++++++-----
1 file changed, 10 insertions(+), 5 deletions(-)
diff --git a/tools/stonith_admin.c b/tools/stonith_admin.c
index 5590faf11e..54774b6fee 100644
--- a/tools/stonith_admin.c
+++ b/tools/stonith_admin.c
@@ -337,10 +337,10 @@ request_fencing(stonith_t *st, const char *target, const char *command,
GError **error)
{
char *reason = NULL;
- int rc = pcmk__request_fencing(st, target, command, crm_system_name,
- options.timeout * 1000,
- options.tolerance * 1000,
- options.delay, &reason);
+ int rc = pcmk__request_fencing(st, target, command, name,
+ options.timeout * 1000,
+ options.tolerance * 1000,
+ options.delay, &reason);
if (rc != pcmk_rc_ok) {
const char *rc_str = pcmk_rc_str(rc);
@@ -392,6 +392,10 @@ main(int argc, char **argv)
pcmk__cli_init_logging("stonith_admin", args->verbosity);
+ if (name == NULL) {
+ name = strdup(crm_system_name);
+ }
+
rc = pcmk__output_new(&out, args->output_ty, args->output_dest, argv);
if (rc != pcmk_rc_ok) {
exit_code = CRM_EX_ERROR;
@@ -526,7 +530,7 @@ main(int argc, char **argv)
if (st == NULL) {
rc = -ENOMEM;
} else if (!no_connect) {
- rc = st->cmds->connect(st, crm_system_name, NULL);
+ rc = st->cmds->connect(st, name, NULL);
}
if (rc < 0) {
out->err(out, "Could not connect to fencer: %s", pcmk_strerror(rc));
@@ -640,6 +644,7 @@ main(int argc, char **argv)
out->finish(out, exit_code, true, NULL);
pcmk__output_free(out);
}
+ free(name);
stonith_key_value_freeall(options.params, 1, 1);
if (st != NULL) {
--
2.27.0

View File

@ -1,796 +0,0 @@
From 08c3420f2c857e7b27cd960f355d787af534da7d Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Tue, 18 Jan 2022 16:04:49 -0600
Subject: [PATCH 01/12] Log: libcrmcommon: improve description for "not
connected" status
PCMK_EXEC_NOT_CONNECTED was originally added to represent "No executor
connection", but it can also now mean no fencer connection, so change it to
"Internal communication failure" which is probably less mysterious to end users
anyway (especially since it should be accompanied by a more descriptive exit
reason).
---
include/crm/common/results.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/include/crm/common/results.h b/include/crm/common/results.h
index 873faf5c43..3d322a7ce6 100644
--- a/include/crm/common/results.h
+++ b/include/crm/common/results.h
@@ -349,7 +349,7 @@ pcmk_exec_status_str(enum pcmk_exec_status status)
case PCMK_EXEC_ERROR_HARD: return "Hard error";
case PCMK_EXEC_ERROR_FATAL: return "Fatal error";
case PCMK_EXEC_NOT_INSTALLED: return "Not installed";
- case PCMK_EXEC_NOT_CONNECTED: return "No executor connection";
+ case PCMK_EXEC_NOT_CONNECTED: return "Internal communication failure";
case PCMK_EXEC_INVALID: return "Cannot execute now";
case PCMK_EXEC_NO_FENCE_DEVICE: return "No fence device";
case PCMK_EXEC_NO_SECRETS: return "CIB secrets unavailable";
--
2.27.0
From 7c345cf8cf0cb054f5634206880df035bfef7311 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Mon, 20 Dec 2021 15:12:36 -0600
Subject: [PATCH 02/12] Refactor: libcrmcommon: drop unnecessary system error
redefinitions
portability.h defines some system error codes that might not be present on
non-Linux systems.
This was a bad idea, since there's no way to ensure the defined values don't
conflict with existing system codes. However, we use a number of them, so it's
probably best to keep them, at least until we can make a backward compatibility
break.
However, we don't use EUNATCH, ENOSR, or ENOSTR, so we can delete those.
---
include/portability.h | 12 ------------
lib/common/results.c | 9 ++++++---
2 files changed, 6 insertions(+), 15 deletions(-)
diff --git a/include/portability.h b/include/portability.h
index 9a60c583a7..ee065a376d 100644
--- a/include/portability.h
+++ b/include/portability.h
@@ -131,10 +131,6 @@ typedef union
# define EREMOTEIO 193
# endif
-# ifndef EUNATCH
-# define EUNATCH 194
-# endif
-
# ifndef ENOKEY
# define ENOKEY 195
# endif
@@ -147,14 +143,6 @@ typedef union
# define ETIME 197
# endif
-# ifndef ENOSR
-# define ENOSR 198
-# endif
-
-# ifndef ENOSTR
-# define ENOSTR 199
-# endif
-
# ifndef EKEYREJECTED
# define EKEYREJECTED 200
# endif
diff --git a/lib/common/results.c b/lib/common/results.c
index 6d120694cd..96cd4e5659 100644
--- a/lib/common/results.c
+++ b/lib/common/results.c
@@ -118,9 +118,6 @@ pcmk_strerror(int rc)
case EREMOTEIO:
return "Remote I/O error";
/* coverity[dead_error_condition] False positive on non-Linux */
- case EUNATCH:
- return "Protocol driver not attached";
- /* coverity[dead_error_condition] False positive on non-Linux */
case ENOKEY:
return "Required key not available";
}
@@ -342,8 +339,12 @@ pcmk_rc_name(int rc)
case ENOMSG: return "ENOMSG";
case ENOPROTOOPT: return "ENOPROTOOPT";
case ENOSPC: return "ENOSPC";
+#ifdef ENOSR
case ENOSR: return "ENOSR";
+#endif
+#ifdef ENOSTR
case ENOSTR: return "ENOSTR";
+#endif
case ENOSYS: return "ENOSYS";
case ENOTBLK: return "ENOTBLK";
case ENOTCONN: return "ENOTCONN";
@@ -376,7 +377,9 @@ pcmk_rc_name(int rc)
case ETIME: return "ETIME";
case ETIMEDOUT: return "ETIMEDOUT";
case ETXTBSY: return "ETXTBSY";
+#ifdef EUNATCH
case EUNATCH: return "EUNATCH";
+#endif
case EUSERS: return "EUSERS";
/* case EWOULDBLOCK: return "EWOULDBLOCK"; */
case EXDEV: return "EXDEV";
--
2.27.0
From eac8d1ca51eac3f437e18584f7e013d976ecee2c Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Mon, 20 Dec 2021 15:33:12 -0600
Subject: [PATCH 03/12] Log: libcrmcommon: improve handling of portability.h
error codes
portability.h defines some system error codes that might not be present on
non-Linux systems.
Define a constant for each one (for example, PCMK__ECOMM for ECOMM) when
the system doesn't have the value, so we can detect that when relevant.
Also, make sure pcmk_rc_name() and pcmk_rc_str() handle all of these values.
---
include/portability.h | 8 ++++++++
lib/common/results.c | 32 ++++++++++++++++++++++++++++++--
2 files changed, 38 insertions(+), 2 deletions(-)
diff --git a/include/portability.h b/include/portability.h
index ee065a376d..5d5fbf21cb 100644
--- a/include/portability.h
+++ b/include/portability.h
@@ -116,34 +116,42 @@ typedef union
# include <errno.h>
# ifndef ENOTUNIQ
+# define PCMK__ENOTUNIQ
# define ENOTUNIQ 190
# endif
# ifndef ECOMM
+# define PCMK__ECOMM
# define ECOMM 191
# endif
# ifndef ELIBACC
+# define PCMK__ELIBACC
# define ELIBACC 192
# endif
# ifndef EREMOTEIO
+# define PCMK__EREMOTIO
# define EREMOTEIO 193
# endif
# ifndef ENOKEY
+# define PCMK__ENOKEY
# define ENOKEY 195
# endif
# ifndef ENODATA
+# define PCMK__ENODATA
# define ENODATA 196
# endif
# ifndef ETIME
+# define PCMK__ETIME
# define ETIME 197
# endif
# ifndef EKEYREJECTED
+# define PCMK__EKEYREJECTED
# define EKEYREJECTED 200
# endif
diff --git a/lib/common/results.c b/lib/common/results.c
index 96cd4e5659..bcf289d0d6 100644
--- a/lib/common/results.c
+++ b/lib/common/results.c
@@ -395,9 +395,9 @@ pcmk_rc_name(int rc)
#ifdef EISNAM // Not available on OS X, Illumos, Solaris
case EISNAM: return "EISNAM";
case EKEYEXPIRED: return "EKEYEXPIRED";
- case EKEYREJECTED: return "EKEYREJECTED";
case EKEYREVOKED: return "EKEYREVOKED";
#endif
+ case EKEYREJECTED: return "EKEYREJECTED";
case EL2HLT: return "EL2HLT";
case EL2NSYNC: return "EL2NSYNC";
case EL3HLT: return "EL3HLT";
@@ -443,7 +443,35 @@ pcmk_rc_str(int rc)
if (rc < 0) {
return "Unknown error";
}
- return strerror(rc);
+
+ // Handle values that could be defined by system or by portability.h
+ switch (rc) {
+#ifdef PCMK__ENOTUNIQ
+ case ENOTUNIQ: return "Name not unique on network";
+#endif
+#ifdef PCMK__ECOMM
+ case ECOMM: return "Communication error on send";
+#endif
+#ifdef PCMK__ELIBACC
+ case ELIBACC: return "Can not access a needed shared library";
+#endif
+#ifdef PCMK__EREMOTEIO
+ case EREMOTEIO: return "Remote I/O error";
+#endif
+#ifdef PCMK__ENOKEY
+ case ENOKEY: return "Required key not available";
+#endif
+#ifdef PCMK__ENODATA
+ case ENODATA: return "No data available";
+#endif
+#ifdef PCMK__ETIME
+ case ETIME: return "Timer expired";
+#endif
+#ifdef PCMK__EKEYREJECTED
+ case EKEYREJECTED: return "Key was rejected by service";
+#endif
+ default: return strerror(rc);
+ }
}
// This returns negative values for errors
--
2.27.0
From 32a38ac6374f85c43e7f4051f5e519822cc481e6 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Mon, 20 Dec 2021 15:39:19 -0600
Subject: [PATCH 04/12] Log: libcrmcommon: redefine pcmk_strerror() in terms of
pcmk_rc_str()
... to reduce code duplication. This causes minor differences in the string for
a few values.
---
lib/common/results.c | 67 +-------------------------------------------
1 file changed, 1 insertion(+), 66 deletions(-)
diff --git a/lib/common/results.c b/lib/common/results.c
index bcf289d0d6..b2c6e8d553 100644
--- a/lib/common/results.c
+++ b/lib/common/results.c
@@ -57,72 +57,7 @@ pcmk_errorname(int rc)
const char *
pcmk_strerror(int rc)
{
- if (rc == 0) {
- return "OK";
- }
-
- rc = abs(rc);
-
- // Of course rc > 0 ... unless someone passed INT_MIN as rc
- if ((rc > 0) && (rc < PCMK_ERROR_OFFSET)) {
- return strerror(rc);
- }
-
- switch (rc) {
- case pcmk_err_generic:
- return "Generic Pacemaker error";
- case pcmk_err_no_quorum:
- return "Operation requires quorum";
- case pcmk_err_schema_validation:
- return "Update does not conform to the configured schema";
- case pcmk_err_transform_failed:
- return "Schema transform failed";
- case pcmk_err_old_data:
- return "Update was older than existing configuration";
- case pcmk_err_diff_failed:
- return "Application of an update diff failed";
- case pcmk_err_diff_resync:
- return "Application of an update diff failed, requesting a full refresh";
- case pcmk_err_cib_modified:
- return "The on-disk configuration was manually modified";
- case pcmk_err_cib_backup:
- return "Could not archive the previous configuration";
- case pcmk_err_cib_save:
- return "Could not save the new configuration to disk";
- case pcmk_err_cib_corrupt:
- return "Could not parse on-disk configuration";
- case pcmk_err_multiple:
- return "Resource active on multiple nodes";
- case pcmk_err_node_unknown:
- return "Node not found";
- case pcmk_err_already:
- return "Situation already as requested";
- case pcmk_err_bad_nvpair:
- return "Bad name/value pair given";
- case pcmk_err_schema_unchanged:
- return "Schema is already the latest available";
- case pcmk_err_unknown_format:
- return "Unknown output format";
-
- /* The following cases will only be hit on systems for which they are non-standard */
- /* coverity[dead_error_condition] False positive on non-Linux */
- case ENOTUNIQ:
- return "Name not unique on network";
- /* coverity[dead_error_condition] False positive on non-Linux */
- case ECOMM:
- return "Communication error on send";
- /* coverity[dead_error_condition] False positive on non-Linux */
- case ELIBACC:
- return "Can not access a needed shared library";
- /* coverity[dead_error_condition] False positive on non-Linux */
- case EREMOTEIO:
- return "Remote I/O error";
- /* coverity[dead_error_condition] False positive on non-Linux */
- case ENOKEY:
- return "Required key not available";
- }
- crm_err("Unknown error code: %d", rc);
- return "Unknown error";
+ return pcmk_rc_str(pcmk_legacy2rc(rc));
}
// Standard Pacemaker API return codes
--
2.27.0
From 7c331d7e2275ffebbfd5e2f6432a6137a66ee5db Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Mon, 20 Dec 2021 15:41:24 -0600
Subject: [PATCH 05/12] Log: libcrmcommon: don't say "Unknown error"
... which is unhelpful and annoying to users
---
lib/common/results.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/lib/common/results.c b/lib/common/results.c
index b2c6e8d553..5ffac76549 100644
--- a/lib/common/results.c
+++ b/lib/common/results.c
@@ -376,7 +376,7 @@ pcmk_rc_str(int rc)
return pcmk__rcs[pcmk_rc_error - rc].desc;
}
if (rc < 0) {
- return "Unknown error";
+ return "Error";
}
// Handle values that could be defined by system or by portability.h
@@ -768,7 +768,7 @@ bz2_strerror(int rc)
case BZ_OUTBUFF_FULL:
return "output data will not fit into the buffer provided";
}
- return "Unknown error";
+ return "Data compression error";
}
crm_exit_t
--
2.27.0
From 26883b4edda7d81bfcb79bd7b33bb3210beff110 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Mon, 20 Dec 2021 16:01:39 -0600
Subject: [PATCH 06/12] Log: fencing: don't warn if cluster has no watchdog
device
---
lib/fencing/st_client.c | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c
index b1de912b2a..a0f3119f3b 100644
--- a/lib/fencing/st_client.c
+++ b/lib/fencing/st_client.c
@@ -187,7 +187,12 @@ stonith__watchdog_fencing_enabled_for_node_api(stonith_t *st, const char *node)
* we drop in here - so as not to make remote nodes
* panic on that answer
*/
- crm_warn("watchdog-fencing-query failed");
+ if (rc == -ENODEV) {
+ crm_notice("Cluster does not have watchdog fencing device");
+ } else {
+ crm_warn("Could not check for watchdog fencing device: %s",
+ pcmk_strerror(rc));
+ }
} else if (list[0] == '\0') {
rv = TRUE;
} else {
--
2.27.0
From 72b3c42232deaca64ffba9582598c59331203761 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Mon, 20 Dec 2021 16:22:49 -0600
Subject: [PATCH 07/12] Test: libcrmcommon: update pcmk_rc_str() unit test for
recent change
---
lib/common/tests/results/pcmk__results_test.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/lib/common/tests/results/pcmk__results_test.c b/lib/common/tests/results/pcmk__results_test.c
index 57a520c501..e08d4b6261 100644
--- a/lib/common/tests/results/pcmk__results_test.c
+++ b/lib/common/tests/results/pcmk__results_test.c
@@ -30,7 +30,7 @@ static void
test_for_pcmk_rc_str(void **state) {
assert_string_equal(pcmk_rc_str(pcmk_rc_error-1), "Unknown output format");
assert_string_equal(pcmk_rc_str(pcmk_rc_ok), "OK");
- assert_string_equal(pcmk_rc_str(-1), "Unknown error");
+ assert_string_equal(pcmk_rc_str(-1), "Error");
}
static void
--
2.27.0
From c1ad3d6640f695321a83183c95fae2f105adc429 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Tue, 21 Dec 2021 10:20:38 -0600
Subject: [PATCH 08/12] Test: cts-lab: update expected patterns for recent
changes
---
cts/lab/CTStests.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/cts/lab/CTStests.py b/cts/lab/CTStests.py
index 62c832eb45..f4be998cfb 100644
--- a/cts/lab/CTStests.py
+++ b/cts/lab/CTStests.py
@@ -3055,7 +3055,7 @@ class RemoteStonithd(RemoteDriver):
r"pacemaker-controld.*:\s+error.*: Operation remote-.*_monitor",
r"pacemaker-controld.*:\s+error.*: Result of monitor operation for remote-.*",
r"schedulerd.*:\s+Recover remote-.*\s*\(.*\)",
- r"error: Result of monitor operation for .* on remote-.*: No executor connection",
+ r"error: Result of monitor operation for .* on remote-.*: Internal communication failure",
]
ignore_pats.extend(RemoteDriver.errorstoignore(self))
--
2.27.0
From f272e2f526633c707e894b39c7c7bce3c14de898 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Tue, 21 Dec 2021 15:40:49 -0600
Subject: [PATCH 09/12] Log: controller,libpacemaker: make history XML creation
less chatty
Other messages with the same info will already be logged at higher severity
---
daemons/controld/controld_execd.c | 3 +--
daemons/controld/controld_te_actions.c | 7 ++-----
include/pcmki/pcmki_sched_utils.h | 3 +--
lib/pacemaker/pcmk_injections.c | 3 +--
lib/pacemaker/pcmk_sched_actions.c | 12 +++++-------
5 files changed, 10 insertions(+), 18 deletions(-)
diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c
index 15784e7687..52157fa5d4 100644
--- a/daemons/controld/controld_execd.c
+++ b/daemons/controld/controld_execd.c
@@ -693,9 +693,8 @@ build_operation_update(xmlNode * parent, lrmd_rsc_info_t * rsc, lrmd_event_data_
caller_version = CRM_FEATURE_SET;
}
- crm_trace("Building %s operation update with originator version: %s", op->rsc_id, caller_version);
xml_op = pcmk__create_history_xml(parent, op, caller_version, target_rc,
- fsa_our_uname, src, LOG_DEBUG);
+ fsa_our_uname, src);
if (xml_op == NULL) {
return TRUE;
}
diff --git a/daemons/controld/controld_te_actions.c b/daemons/controld/controld_te_actions.c
index 63b7c72359..b0bcb8b2e4 100644
--- a/daemons/controld/controld_te_actions.c
+++ b/daemons/controld/controld_te_actions.c
@@ -181,7 +181,6 @@ controld_record_action_timeout(crm_action_t *action)
lrmd_event_data_t *op = NULL;
xmlNode *state = NULL;
xmlNode *rsc = NULL;
- xmlNode *xml_op = NULL;
xmlNode *action_rsc = NULL;
int rc = pcmk_ok;
@@ -245,12 +244,10 @@ controld_record_action_timeout(crm_action_t *action)
op->user_data = pcmk__transition_key(transition_graph->id, action->id,
target_rc, te_uuid);
- xml_op = pcmk__create_history_xml(rsc, op, CRM_FEATURE_SET, target_rc,
- target, __func__, LOG_INFO);
+ pcmk__create_history_xml(rsc, op, CRM_FEATURE_SET, target_rc, target,
+ __func__);
lrmd_free_event(op);
- crm_log_xml_trace(xml_op, "Action timeout");
-
rc = fsa_cib_conn->cmds->update(fsa_cib_conn, XML_CIB_TAG_STATUS, state, call_options);
fsa_register_cib_callback(rc, FALSE, NULL, cib_action_updated);
free_xml(state);
diff --git a/include/pcmki/pcmki_sched_utils.h b/include/pcmki/pcmki_sched_utils.h
index 68d60fc7db..144424a609 100644
--- a/include/pcmki/pcmki_sched_utils.h
+++ b/include/pcmki/pcmki_sched_utils.h
@@ -52,8 +52,7 @@ extern void process_utilization(pe_resource_t * rsc, pe_node_t ** prefer, pe_wor
xmlNode *pcmk__create_history_xml(xmlNode *parent, lrmd_event_data_t *event,
const char *caller_version, int target_rc,
- const char *node, const char *origin,
- int level);
+ const char *node, const char *origin);
# define LOAD_STOPPED "load_stopped"
diff --git a/lib/pacemaker/pcmk_sched_transition.c b/lib/pacemaker/pcmk_sched_transition.c
index 678c3f5dd2..1aa90a5a0b 100644
--- a/lib/pacemaker/pcmk_sched_transition.c
+++ b/lib/pacemaker/pcmk_sched_transition.c
@@ -201,8 +201,7 @@ inject_op(xmlNode * cib_resource, lrmd_event_data_t * op, int target_rc)
inject_op(xmlNode * cib_resource, lrmd_event_data_t * op, int target_rc)
{
return pcmk__create_history_xml(cib_resource, op, CRM_FEATURE_SET,
- target_rc, NULL, crm_system_name,
- LOG_TRACE);
+ target_rc, NULL, crm_system_name);
}
static xmlNode *
diff --git a/lib/pacemaker/pcmk_sched_actions.c b/lib/pacemaker/pcmk_sched_actions.c
index f8200b0efc..4f63d3374d 100644
--- a/lib/pacemaker/pcmk_sched_utils.c
+++ b/lib/pacemaker/pcmk_sched_utils.c
@@ -892,14 +892,13 @@ add_op_digest_to_xml(lrmd_event_data_t *op, xmlNode *update)
* \param[in] target_rc Expected result of operation
* \param[in] node Name of node on which operation was performed
* \param[in] origin Arbitrary description of update source
- * \param[in] level A log message will be logged at this level
*
* \return Newly created XML node for history update
*/
xmlNode *
pcmk__create_history_xml(xmlNode *parent, lrmd_event_data_t *op,
const char *caller_version, int target_rc,
- const char *node, const char *origin, int level)
+ const char *node, const char *origin)
{
char *key = NULL;
char *magic = NULL;
@@ -912,11 +911,10 @@ pcmk__create_history_xml(xmlNode *parent, lrmd_event_data_t *op,
const char *task = NULL;
CRM_CHECK(op != NULL, return NULL);
- do_crm_log(level, "%s: Updating resource %s after %s op %s (interval=%u)",
- origin, op->rsc_id, op->op_type,
- pcmk_exec_status_str(op->op_status), op->interval_ms);
-
- crm_trace("DC version: %s", caller_version);
+ crm_trace("Creating history XML for %s-interval %s action for %s on %s "
+ "(DC version: %s, origin: %s)",
+ pcmk__readable_interval(op->interval_ms), op->op_type, op->rsc_id,
+ ((node == NULL)? "no node" : node), caller_version, origin);
task = op->op_type;
--
2.27.0
From 06b1da9e5345e0d1571042c11646fd7157961279 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Tue, 21 Dec 2021 17:09:44 -0600
Subject: [PATCH 10/12] Feature: controller: improve exit reason for internal
timeouts
Functionize the part of controld_record_action_timeout() that creates a fake
executor event, into a new function synthesize_timeout_event(), and have it set
a more detailed exit reason describing what timed out.
---
daemons/controld/controld_te_actions.c | 61 ++++++++++++++++++++------
1 file changed, 48 insertions(+), 13 deletions(-)
diff --git a/daemons/controld/controld_te_actions.c b/daemons/controld/controld_te_actions.c
index b0bcb8b2e4..de2fbb82bf 100644
--- a/daemons/controld/controld_te_actions.c
+++ b/daemons/controld/controld_te_actions.c
@@ -175,6 +175,53 @@ te_crm_command(crm_graph_t * graph, crm_action_t * action)
return TRUE;
}
+/*!
+ * \internal
+ * \brief Synthesize an executor event for a resource action timeout
+ *
+ * \param[in] action Resource action that timed out
+ * \param[in] target_rc Expected result of action that timed out
+ *
+ * Synthesize an executor event for a resource action timeout. (If the executor
+ * gets a timeout while waiting for a resource action to complete, that will be
+ * reported via the usual callback. This timeout means we didn't hear from the
+ * executor itself or the controller that relayed the action to the executor.)
+ *
+ * \return Newly created executor event for result of \p action
+ * \note The caller is responsible for freeing the return value using
+ * lrmd_free_event().
+ */
+static lrmd_event_data_t *
+synthesize_timeout_event(crm_action_t *action, int target_rc)
+{
+ lrmd_event_data_t *op = NULL;
+ const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
+ const char *reason = NULL;
+ char *dynamic_reason = NULL;
+
+ if (pcmk__str_eq(target, get_local_node_name(), pcmk__str_casei)) {
+ reason = "Local executor did not return result in time";
+ } else {
+ const char *router_node = NULL;
+
+ router_node = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE);
+ if (router_node == NULL) {
+ router_node = target;
+ }
+ dynamic_reason = crm_strdup_printf("Controller on %s did not return "
+ "result in time", router_node);
+ reason = dynamic_reason;
+ }
+
+ op = pcmk__event_from_graph_action(NULL, action, PCMK_EXEC_TIMEOUT,
+ PCMK_OCF_UNKNOWN_ERROR, reason);
+ op->call_id = -1;
+ op->user_data = pcmk__transition_key(transition_graph->id, action->id,
+ target_rc, te_uuid);
+ free(dynamic_reason);
+ return op;
+}
+
void
controld_record_action_timeout(crm_action_t *action)
{
@@ -231,19 +278,7 @@ controld_record_action_timeout(crm_action_t *action)
crm_copy_xml_element(action_rsc, rsc, XML_AGENT_ATTR_CLASS);
crm_copy_xml_element(action_rsc, rsc, XML_AGENT_ATTR_PROVIDER);
- /* If the executor gets a timeout while waiting for the action to complete,
- * that will be reported via the usual callback. This timeout means that we
- * didn't hear from the executor or the controller that relayed the action
- * to the executor.
- */
- op = pcmk__event_from_graph_action(NULL, action, PCMK_EXEC_TIMEOUT,
- PCMK_OCF_UNKNOWN_ERROR,
- "Cluster communication timeout "
- "(no response from executor)");
- op->call_id = -1;
- op->user_data = pcmk__transition_key(transition_graph->id, action->id,
- target_rc, te_uuid);
-
+ op = synthesize_timeout_event(action, target_rc);
pcmk__create_history_xml(rsc, op, CRM_FEATURE_SET, target_rc, target,
__func__);
lrmd_free_event(op);
--
2.27.0
From be620d206faefab967d4c8567d6554d10c9e72ba Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Wed, 22 Dec 2021 16:35:06 -0600
Subject: [PATCH 11/12] Feature: fencing: improve exit reason for fencing
timeouts
Troubleshooting timeouts is one of the more difficult aspects of cluster
maintenance. We want to give as much of a hint as possible, but for fencing in
particular it is difficult because an operation might involve multiple retries
of multiple devices.
Barring another major project to track exactly which devices, retries, etc.,
were used in a given operation, these changes in wording are probably the best
we can do.
---
daemons/fenced/fenced_remote.c | 8 +++++---
lib/fencing/st_client.c | 2 +-
2 files changed, 6 insertions(+), 4 deletions(-)
diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
index 1e237150c5..6eebb7381e 100644
--- a/daemons/fenced/fenced_remote.c
+++ b/daemons/fenced/fenced_remote.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2009-2021 the Pacemaker project contributors
+ * Copyright 2009-2022 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
@@ -715,8 +715,10 @@ remote_op_timeout(gpointer userdata)
CRM_XS " id=%.8s",
op->action, op->target, op->client_name, op->id);
} else {
- finalize_timed_out_op(userdata, "Fencing could not be completed "
- "within overall timeout");
+ finalize_timed_out_op(userdata, "Fencing did not complete within a "
+ "total timeout based on the "
+ "configured timeout and retries for "
+ "any devices attempted");
}
return G_SOURCE_REMOVE;
}
diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c
index a0f3119f3b..718739b321 100644
--- a/lib/fencing/st_client.c
+++ b/lib/fencing/st_client.c
@@ -906,7 +906,7 @@ invoke_registered_callbacks(stonith_t *stonith, xmlNode *msg, int call_id)
if (msg == NULL) {
// Fencer didn't reply in time
pcmk__set_result(&result, CRM_EX_ERROR, PCMK_EXEC_TIMEOUT,
- "Timeout waiting for reply from fencer");
+ "Fencer accepted request but did not reply in time");
CRM_LOG_ASSERT(call_id > 0);
} else {
--
2.27.0
From 0fe8ede2f8e838e335fe42846bdf147111ce9955 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Wed, 22 Dec 2021 17:09:09 -0600
Subject: [PATCH 12/12] Feature: libcrmservice: improve exit reason for
timeouts
The services library doesn't have enough information about an action to say
(for example) what configuration parameters might be relevant, but we can at
least distinguish what kind of agent timed out.
---
lib/services/services_linux.c | 12 +++++++++++-
lib/services/systemd.c | 2 +-
2 files changed, 12 insertions(+), 2 deletions(-)
diff --git a/lib/services/services_linux.c b/lib/services/services_linux.c
index f15eee860e..d6aafcfe46 100644
--- a/lib/services/services_linux.c
+++ b/lib/services/services_linux.c
@@ -677,9 +677,19 @@ async_action_complete(mainloop_child_t *p, pid_t pid, int core, int signo,
parse_exit_reason_from_stderr(op);
} else if (mainloop_child_timeout(p)) {
+ const char *reason = NULL;
+
+ if (op->rsc != NULL) {
+ reason = "Resource agent did not complete in time";
+ } else if (pcmk__str_eq(op->standard, PCMK_RESOURCE_CLASS_STONITH,
+ pcmk__str_none)) {
+ reason = "Fence agent did not complete in time";
+ } else {
+ reason = "Process did not complete in time";
+ }
crm_info("%s[%d] timed out after %dms", op->id, op->pid, op->timeout);
services__set_result(op, services__generic_error(op), PCMK_EXEC_TIMEOUT,
- "Process did not exit within specified timeout");
+ reason);
} else if (op->cancel) {
/* If an in-flight recurring operation was killed because it was
diff --git a/lib/services/systemd.c b/lib/services/systemd.c
index 27a3b376db..d87b287424 100644
--- a/lib/services/systemd.c
+++ b/lib/services/systemd.c
@@ -995,7 +995,7 @@ systemd_timeout_callback(gpointer p)
crm_info("%s action for systemd unit %s named '%s' timed out",
op->action, op->agent, op->rsc);
services__set_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_TIMEOUT,
- "Systemd action did not complete within specified timeout");
+ "Systemd unit action did not complete in time");
services__finalize_async_op(op);
return FALSE;
}
--
2.27.0

View File

@ -1,29 +0,0 @@
From e8bf0161b872267f1bb7143a9866fdc15ec218f2 Mon Sep 17 00:00:00 2001
From: Jan Friesse <jfriesse@redhat.com>
Date: Tue, 18 Jan 2022 16:35:24 +0100
Subject: [PATCH] Fix: corosync: Repeat corosync_cfg_trackstart
corosync_cfg_trackstart can fail with CS_ERR_TRY_AGAIN failure so
(similarly as for corosync_cfg_local_get, ...) handle failure with
using cs_repeat macro.
---
daemons/pacemakerd/pcmkd_corosync.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/daemons/pacemakerd/pcmkd_corosync.c b/daemons/pacemakerd/pcmkd_corosync.c
index 7990bc43c5..cd7a40321d 100644
--- a/daemons/pacemakerd/pcmkd_corosync.c
+++ b/daemons/pacemakerd/pcmkd_corosync.c
@@ -186,7 +186,8 @@ cluster_connect_cfg(void)
crm_debug("Corosync reports local node ID is %lu", (unsigned long) nodeid);
#ifdef HAVE_COROSYNC_CFG_TRACKSTART
- rc = corosync_cfg_trackstart(cfg_handle, 0);
+ retries = 0;
+ cs_repeat(retries, 30, rc = corosync_cfg_trackstart(cfg_handle, 0));
if (rc != CS_OK) {
crm_crit("Could not enable Corosync CFG shutdown tracker: %s " CRM_XS " rc=%d",
cs_strerror(rc), rc);
--
2.27.0

View File

@ -1,41 +0,0 @@
From e316840a7e1d2a72e3089ee194334244c959905a Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Wed, 19 Jan 2022 09:53:53 -0600
Subject: [PATCH] Fix: pacemakerd: tweak systemd unit respawn settings
If pacemaker exits immediately after starting, wait 1 second before trying to
respawn, since the default of 100ms is a bit aggressive for a Pacemaker
cluster.
Also, allow 5 attempts in 25 seconds before giving up.
---
daemons/pacemakerd/pacemaker.service.in | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/daemons/pacemakerd/pacemaker.service.in b/daemons/pacemakerd/pacemaker.service.in
index 0363a2259c..3fd53d9ffb 100644
--- a/daemons/pacemakerd/pacemaker.service.in
+++ b/daemons/pacemakerd/pacemaker.service.in
@@ -31,6 +31,9 @@ After=rsyslog.service
After=corosync.service
Requires=corosync.service
+# If Pacemaker respawns repeatedly, give up after this many tries in this time
+StartLimitBurst=5
+StartLimitIntervalSec=25s
[Install]
WantedBy=multi-user.target
@@ -57,6 +60,9 @@ TasksMax=infinity
# resource. Sending -KILL will just get the node fenced
SendSIGKILL=no
+# Systemd's default of respawning a failed service after 100ms is too aggressive
+RestartSec=1s
+
# If we ever hit the StartLimitInterval/StartLimitBurst limit, and the
# admin wants to stop the cluster while pacemakerd is not running, it
# might be a good idea to enable the ExecStopPost directive below.
--
2.27.0

File diff suppressed because it is too large Load Diff

View File

@ -1,82 +0,0 @@
From 8034a203bbff0aa3b53f2946dc58e409bd7246c9 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Thu, 20 Jan 2022 15:03:31 -0600
Subject: [PATCH] Fix: scheduler: avoid memory leak when displaying clones
Previously, pe__clone_default() unconditionally created a hash table for
stopped instances, but didn't free it in every code path.
Now, only create the table when we have something to put in it and might
actually use it, and ensure it always gets freed.
---
lib/pengine/clone.c | 18 +++++++++++++-----
1 file changed, 13 insertions(+), 5 deletions(-)
diff --git a/lib/pengine/clone.c b/lib/pengine/clone.c
index 742e2920b0..920a04c32c 100644
--- a/lib/pengine/clone.c
+++ b/lib/pengine/clone.c
@@ -761,7 +761,7 @@ pe__clone_default(pcmk__output_t *out, va_list args)
GList *only_node = va_arg(args, GList *);
GList *only_rsc = va_arg(args, GList *);
- GHashTable *stopped = pcmk__strkey_table(free, free);
+ GHashTable *stopped = NULL;
char *list_text = NULL;
size_t list_text_len = 0;
@@ -818,7 +818,11 @@ pe__clone_default(pcmk__output_t *out, va_list args)
} else if (partially_active == FALSE) {
// List stopped instances when requested (except orphans)
if (!pcmk_is_set(child_rsc->flags, pe_rsc_orphan)
+ && !pcmk_is_set(show_opts, pcmk_show_clone_detail)
&& pcmk_is_set(show_opts, pcmk_show_inactive_rscs)) {
+ if (stopped == NULL) {
+ stopped = pcmk__strkey_table(free, free);
+ }
g_hash_table_insert(stopped, strdup(child_rsc->id), strdup("Stopped"));
}
@@ -873,7 +877,6 @@ pe__clone_default(pcmk__output_t *out, va_list args)
}
if (pcmk_is_set(show_opts, pcmk_show_clone_detail)) {
- g_hash_table_destroy(stopped);
PCMK__OUTPUT_LIST_FOOTER(out, rc);
return pcmk_rc_ok;
}
@@ -948,8 +951,10 @@ pe__clone_default(pcmk__output_t *out, va_list args)
GList *list = g_hash_table_get_values(rsc->allowed_nodes);
/* Custom stopped table for non-unique clones */
- g_hash_table_destroy(stopped);
- stopped = pcmk__strkey_table(free, free);
+ if (stopped != NULL) {
+ g_hash_table_destroy(stopped);
+ stopped = NULL;
+ }
if (list == NULL) {
/* Clusters with symmetrical=false haven't calculated allowed_nodes yet
@@ -972,6 +977,9 @@ pe__clone_default(pcmk__output_t *out, va_list args)
state = "Stopped (disabled)";
}
+ if (stopped == NULL) {
+ stopped = pcmk__strkey_table(free, free);
+ }
if (probe_op != NULL) {
int rc;
@@ -987,7 +995,7 @@ pe__clone_default(pcmk__output_t *out, va_list args)
g_list_free(list);
}
- if (g_hash_table_size(stopped) > 0) {
+ if (stopped != NULL) {
GList *list = sorted_hash_table_values(stopped);
clone_header(out, &rc, rsc, clone_data);
--
2.27.0

View File

@ -1,30 +0,0 @@
From 16928cfc69136bc56b1574bee9966e0d5de73abd Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Wed, 26 Jan 2022 09:15:43 -0600
Subject: [PATCH] Fix: controller: correctly match "node down" events
regression introduced in 2.1.2 by 03ce7376e
The symptom that led to this was that removing a remote node connection
resource would lead to the remote node getting fenced when the connection stop
was not recognized as an expected down event.
---
daemons/controld/controld_te_events.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/daemons/controld/controld_te_events.c b/daemons/controld/controld_te_events.c
index 36fd832ba0..1fd7129922 100644
--- a/daemons/controld/controld_te_events.c
+++ b/daemons/controld/controld_te_events.c
@@ -304,7 +304,7 @@ match_down_event(const char *target)
gIter2 = gIter2->next) {
match = (crm_action_t*)gIter2->data;
- if (pcmk_is_set(match->flags, pcmk__graph_action_confirmed)) {
+ if (pcmk_is_set(match->flags, pcmk__graph_action_executed)) {
xpath_ret = xpath_search(match->xml, xpath);
if (numXpathResults(xpath_ret) < 1) {
match = NULL;
--
2.27.0

View File

@ -1,806 +0,0 @@
From 767b5552ab49850204692c2c990dfb41d37589f3 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Mon, 28 Mar 2022 18:11:52 -0500
Subject: [PATCH 1/9] Refactor: libpacemaker: drop unnecessary argument from
"rsc-action" message
9875cab129 moved the setting of the "moving" variable from LogActions() to a
new "rsc-action" message, but continued to pass the variable unnecessarily
Also simplify how it's set
---
lib/pacemaker/pcmk_output.c | 10 ++++------
lib/pacemaker/pcmk_sched_native.c | 4 +---
2 files changed, 5 insertions(+), 9 deletions(-)
diff --git a/lib/pacemaker/pcmk_output.c b/lib/pacemaker/pcmk_output.c
index d864c8bd2..56963a93f 100644
--- a/lib/pacemaker/pcmk_output.c
+++ b/lib/pacemaker/pcmk_output.c
@@ -873,19 +873,18 @@ digests_xml(pcmk__output_t *out, va_list args)
} \
} while(0)
-PCMK__OUTPUT_ARGS("rsc-action", "pe_resource_t *", "pe_node_t *", "pe_node_t *",
- "gboolean")
+PCMK__OUTPUT_ARGS("rsc-action", "pe_resource_t *", "pe_node_t *", "pe_node_t *")
static int
rsc_action_default(pcmk__output_t *out, va_list args)
{
pe_resource_t *rsc = va_arg(args, pe_resource_t *);
pe_node_t *current = va_arg(args, pe_node_t *);
pe_node_t *next = va_arg(args, pe_node_t *);
- gboolean moving = va_arg(args, gboolean);
GList *possible_matches = NULL;
char *key = NULL;
int rc = pcmk_rc_no_output;
+ bool moving = false;
pe_node_t *start_node = NULL;
pe_action_t *start = NULL;
@@ -901,9 +900,8 @@ rsc_action_default(pcmk__output_t *out, va_list args)
return rc;
}
- if (current != NULL && next != NULL && !pcmk__str_eq(current->details->id, next->details->id, pcmk__str_casei)) {
- moving = TRUE;
- }
+ moving = (current != NULL) && (next != NULL)
+ && (current->details != next->details);
possible_matches = pe__resource_actions(rsc, next, RSC_START, FALSE);
if (possible_matches) {
diff --git a/lib/pacemaker/pcmk_sched_resource.c b/lib/pacemaker/pcmk_sched_resource.c
index a3d646775..41631da3d 100644
--- a/lib/pacemaker/pcmk_sched_native.c
+++ b/lib/pacemaker/pcmk_sched_native.c
@@ -2037,8 +2037,6 @@ LogActions(pe_resource_t * rsc, pe_working_set_t * data_set)
pe_node_t *next = NULL;
pe_node_t *current = NULL;
- gboolean moving = FALSE;
-
if(rsc->variant == pe_container) {
pcmk__bundle_log_actions(rsc, data_set);
return;
@@ -2066,7 +2064,7 @@ LogActions(pe_resource_t * rsc, pe_working_set_t * data_set)
return;
}
- out->message(out, "rsc-action", rsc, current, next, moving);
+ out->message(out, "rsc-action", rsc, current, next);
}
gboolean
--
2.27.0
From 870fb19715618c4ceab9ed4ae13a99658440b662 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Tue, 22 Mar 2022 15:22:23 -0500
Subject: [PATCH 2/9] Refactor: scheduler: functionize scheduling restart
actions
native_create_actions() is already overlarge, and more needs to be added to it
---
lib/pacemaker/pcmk_sched_native.c | 85 ++++++++++++++++++++-----------
1 file changed, 54 insertions(+), 31 deletions(-)
diff --git a/lib/pacemaker/pcmk_sched_native.c b/lib/pacemaker/pcmk_sched_native.c
index 808e97540..b8a1c1e1a 100644
--- a/lib/pacemaker/pcmk_sched_native.c
+++ b/lib/pacemaker/pcmk_sched_native.c
@@ -1185,6 +1185,58 @@ handle_migration_actions(pe_resource_t * rsc, pe_node_t *current, pe_node_t *cho
}
}
+/*!
+ * \internal
+ * \brief Schedule actions to bring resource down and back to current role
+ *
+ * \param[in] rsc Resource to restart
+ * \param[in] current Node that resource should be brought down on
+ * \param[in] chosen Node that resource should be brought up on
+ * \param[in] need_stop Whether the resource must be stopped
+ * \param[in] need_promote Whether the resource must be promoted
+ *
+ * \return Role that resource would have after scheduled actions are taken
+ */
+static void
+schedule_restart_actions(pe_resource_t *rsc, pe_node_t *current,
+ pe_node_t *chosen, bool need_stop, bool need_promote)
+{
+ enum rsc_role_e role = rsc->role;
+ enum rsc_role_e next_role;
+
+ // Bring resource down to a stop on its current node
+ while (role != RSC_ROLE_STOPPED) {
+ next_role = rsc_state_matrix[role][RSC_ROLE_STOPPED];
+ pe_rsc_trace(rsc, "Creating %s action to take %s down from %s to %s",
+ (need_stop? "required" : "optional"), rsc->id,
+ role2text(role), role2text(next_role));
+ if (!rsc_action_matrix[role][next_role](rsc, current, !need_stop,
+ rsc->cluster)) {
+ break;
+ }
+ role = next_role;
+ }
+
+ // Bring resource up to its next role on its next node
+ while ((rsc->role <= rsc->next_role) && (role != rsc->role)
+ && !pcmk_is_set(rsc->flags, pe_rsc_block)) {
+ bool required = need_stop;
+
+ next_role = rsc_state_matrix[role][rsc->role];
+ if ((next_role == RSC_ROLE_PROMOTED) && need_promote) {
+ required = true;
+ }
+ pe_rsc_trace(rsc, "Creating %s action to take %s up from %s to %s",
+ (required? "required" : "optional"), rsc->id,
+ role2text(role), role2text(next_role));
+ if (!rsc_action_matrix[role][next_role](rsc, chosen, !required,
+ rsc->cluster)) {
+ break;
+ }
+ role = next_role;
+ }
+}
+
void
native_create_actions(pe_resource_t * rsc, pe_working_set_t * data_set)
{
@@ -1332,39 +1384,10 @@ native_create_actions(pe_resource_t * rsc, pe_working_set_t * data_set)
/* Create any additional actions required when bringing resource down and
* back up to same level.
*/
- role = rsc->role;
- while (role != RSC_ROLE_STOPPED) {
- next_role = rsc_state_matrix[role][RSC_ROLE_STOPPED];
- pe_rsc_trace(rsc, "Creating %s action to take %s down from %s to %s",
- (need_stop? "required" : "optional"), rsc->id,
- role2text(role), role2text(next_role));
- if (rsc_action_matrix[role][next_role] (rsc, current, !need_stop, data_set) == FALSE) {
- break;
- }
- role = next_role;
- }
-
-
- while ((rsc->role <= rsc->next_role) && (role != rsc->role)
- && !pcmk_is_set(rsc->flags, pe_rsc_block)) {
- bool required = need_stop;
-
- next_role = rsc_state_matrix[role][rsc->role];
- if ((next_role == RSC_ROLE_PROMOTED) && need_promote) {
- required = true;
- }
- pe_rsc_trace(rsc, "Creating %s action to take %s up from %s to %s",
- (required? "required" : "optional"), rsc->id,
- role2text(role), role2text(next_role));
- if (rsc_action_matrix[role][next_role](rsc, chosen, !required,
- data_set) == FALSE) {
- break;
- }
- role = next_role;
- }
- role = rsc->role;
+ schedule_restart_actions(rsc, current, chosen, need_stop, need_promote);
/* Required steps from this role to the next */
+ role = rsc->role;
while (role != rsc->next_role) {
next_role = rsc_state_matrix[role][rsc->next_role];
pe_rsc_trace(rsc, "Creating action to take %s from %s to %s (ending at %s)",
--
2.27.0
From 736d4d8f5e432acf12e577d137e9165904c71b3b Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Mon, 28 Mar 2022 17:42:26 -0500
Subject: [PATCH 3/9] Log: scheduler: improve trace messages when creating
actions
---
lib/pacemaker/pcmk_sched_native.c | 22 ++++++++++++++++------
1 file changed, 16 insertions(+), 6 deletions(-)
diff --git a/lib/pacemaker/pcmk_sched_native.c b/lib/pacemaker/pcmk_sched_native.c
index b8a1c1e1a..8b651ebd2 100644
--- a/lib/pacemaker/pcmk_sched_native.c
+++ b/lib/pacemaker/pcmk_sched_native.c
@@ -1997,7 +1997,6 @@ StopRsc(pe_resource_t * rsc, pe_node_t * next, gboolean optional, pe_working_set
GList *gIter = NULL;
CRM_ASSERT(rsc);
- pe_rsc_trace(rsc, "%s", rsc->id);
for (gIter = rsc->running_on; gIter != NULL; gIter = gIter->next) {
pe_node_t *current = (pe_node_t *) gIter->data;
@@ -2005,16 +2004,23 @@ StopRsc(pe_resource_t * rsc, pe_node_t * next, gboolean optional, pe_working_set
if (rsc->partial_migration_target) {
if (rsc->partial_migration_target->details == current->details) {
- pe_rsc_trace(rsc, "Filtered %s -> %s %s", current->details->uname,
- next->details->uname, rsc->id);
+ pe_rsc_trace(rsc,
+ "Skipping stop of %s on %s "
+ "because migration to %s in progress",
+ rsc->id, current->details->uname,
+ next->details->uname);
continue;
} else {
- pe_rsc_trace(rsc, "Forced on %s %s", current->details->uname, rsc->id);
+ pe_rsc_trace(rsc,
+ "Forcing stop of %s on %s "
+ "because migration target changed",
+ rsc->id, current->details->uname);
optional = FALSE;
}
}
- pe_rsc_trace(rsc, "%s on %s", rsc->id, current->details->uname);
+ pe_rsc_trace(rsc, "Scheduling stop of %s on %s",
+ rsc->id, current->details->uname);
stop = stop_action(rsc, current, optional);
if(rsc->allocated_to == NULL) {
@@ -2048,7 +2054,11 @@ StartRsc(pe_resource_t * rsc, pe_node_t * next, gboolean optional, pe_working_se
pe_action_t *start = NULL;
CRM_ASSERT(rsc);
- pe_rsc_trace(rsc, "%s on %s %d %d", rsc->id, next ? next->details->uname : "N/A", optional, next ? next->weight : 0);
+
+ pe_rsc_trace(rsc, "Scheduling %s start of %s on %s (weight=%d)",
+ (optional? "optional" : "required"), rsc->id,
+ ((next == NULL)? "N/A" : next->details->uname),
+ ((next == NULL)? 0 : next->weight));
start = start_action(rsc, next, TRUE);
pcmk__order_vs_unfence(rsc, next, start, pe_order_implies_then, data_set);
--
2.27.0
From 6f987234d5246ed50f4fe2db90e5edb6a23e877d Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Tue, 1 Mar 2022 16:42:06 -0600
Subject: [PATCH 4/9] Log: scheduler: log a warning if invalid value is given
for multiple-active
---
lib/pengine/complex.c | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/lib/pengine/complex.c b/lib/pengine/complex.c
index e82af2aae..f2caef831 100644
--- a/lib/pengine/complex.c
+++ b/lib/pengine/complex.c
@@ -694,7 +694,12 @@ common_unpack(xmlNode * xml_obj, pe_resource_t ** rsc,
(*rsc)->recovery_type = recovery_block;
pe_rsc_trace((*rsc), "\tMultiple running resource recovery: block");
- } else {
+ } else { // "stop_start"
+ if (!pcmk__str_eq(value, "stop_start",
+ pcmk__str_casei|pcmk__str_null_matches)) {
+ pe_warn("%s is not a valid value for " XML_RSC_ATTR_MULTIPLE
+ ", using default of \"stop_start\"", value);
+ }
(*rsc)->recovery_type = recovery_stop_start;
pe_rsc_trace((*rsc), "\tMultiple running resource recovery: stop/start");
}
--
2.27.0
From 50456c3e229a6021ca0ba7346af41cd234abcc16 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Tue, 1 Mar 2022 16:49:31 -0600
Subject: [PATCH 5/9] API: libpe_status: add recovery_stop_unexpected to enum
rsc_recovery_type
The behavior is not implemented as of this commit
---
include/crm/pengine/common.h | 14 ++++++++++++--
lib/pengine/complex.c | 5 +++++
lib/pengine/native.c | 7 +++++--
3 files changed, 22 insertions(+), 4 deletions(-)
diff --git a/include/crm/pengine/common.h b/include/crm/pengine/common.h
index efe89a171..9b9f38f3b 100644
--- a/include/crm/pengine/common.h
+++ b/include/crm/pengine/common.h
@@ -1,5 +1,5 @@
/*
- * Copyright 2004-2021 the Pacemaker project contributors
+ * Copyright 2004-2022 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
@@ -78,7 +78,8 @@ enum action_tasks {
enum rsc_recovery_type {
recovery_stop_start,
recovery_stop_only,
- recovery_block
+ recovery_block,
+ recovery_stop_unexpected,
};
enum rsc_start_requirement {
@@ -143,6 +144,13 @@ const char *fail2text(enum action_fail_response fail);
const char *pe_pref(GHashTable * options, const char *name);
void calculate_active_ops(GList * sorted_op_list, int *start_index, int *stop_index);
+/*!
+ * \brief Get readable description of a recovery type
+ *
+ * \param[in] type Recovery type
+ *
+ * \return Static string describing \p type
+ */
static inline const char *
recovery2text(enum rsc_recovery_type type)
{
@@ -153,6 +161,8 @@ recovery2text(enum rsc_recovery_type type)
return "attempting recovery";
case recovery_block:
return "waiting for an administrator";
+ case recovery_stop_unexpected:
+ return "stopping unexpected instances";
}
return "Unknown";
}
diff --git a/lib/pengine/complex.c b/lib/pengine/complex.c
index f2caef831..fc9028e81 100644
--- a/lib/pengine/complex.c
+++ b/lib/pengine/complex.c
@@ -694,6 +694,11 @@ common_unpack(xmlNode * xml_obj, pe_resource_t ** rsc,
(*rsc)->recovery_type = recovery_block;
pe_rsc_trace((*rsc), "\tMultiple running resource recovery: block");
+ } else if (pcmk__str_eq(value, "stop_unexpected", pcmk__str_casei)) {
+ (*rsc)->recovery_type = recovery_stop_unexpected;
+ pe_rsc_trace((*rsc), "\tMultiple running resource recovery: "
+ "stop unexpected instances");
+
} else { // "stop_start"
if (!pcmk__str_eq(value, "stop_start",
pcmk__str_casei|pcmk__str_null_matches)) {
diff --git a/lib/pengine/native.c b/lib/pengine/native.c
index e16e54bae..fa7dc8960 100644
--- a/lib/pengine/native.c
+++ b/lib/pengine/native.c
@@ -149,8 +149,6 @@ native_add_running(pe_resource_t * rsc, pe_node_t * node, pe_working_set_t * dat
}
}
break;
- case recovery_stop_start:
- break;
case recovery_block:
pe__clear_resource_flags(rsc, pe_rsc_managed);
pe__set_resource_flags(rsc, pe_rsc_block);
@@ -171,6 +169,11 @@ native_add_running(pe_resource_t * rsc, pe_node_t * node, pe_working_set_t * dat
}
}
break;
+ default: // recovery_stop_start, recovery_stop_unexpected
+ /* The scheduler will do the right thing because the relevant
+ * variables and flags are set when unpacking the history.
+ */
+ break;
}
crm_debug("%s is active on multiple nodes including %s: %s",
rsc->id, node->details->uname,
--
2.27.0
From 5e994f0633b27e7a53701e0954466739c8f1acf7 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Wed, 30 Mar 2022 16:26:19 -0500
Subject: [PATCH 6/9] API: libpe_status: add pe_rsc_stop_unexpected flag
---
include/crm/pengine/pe_types.h | 1 +
1 file changed, 1 insertion(+)
diff --git a/include/crm/pengine/pe_types.h b/include/crm/pengine/pe_types.h
index e3ecaa823..7d5394bff 100644
--- a/include/crm/pengine/pe_types.h
+++ b/include/crm/pengine/pe_types.h
@@ -277,6 +277,7 @@ struct pe_node_s {
# define pe_rsc_starting 0x00100000ULL
# define pe_rsc_stopping 0x00200000ULL
+# define pe_rsc_stop_unexpected 0x00400000ULL
# define pe_rsc_allow_migrate 0x00800000ULL
# define pe_rsc_failure_ignored 0x01000000ULL
--
2.27.0
From c1acf05be853d99c17761759b8c961f2ec4a55c2 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Thu, 31 Mar 2022 09:56:34 -0500
Subject: [PATCH 7/9] API: libpe_status: add pe_rsc_restarting flag
This is used to indicate that any actions currently being scheduled are part of
the resource's restart actions (i.e. we are in schedule_restart_actions()).
---
include/crm/pengine/pe_types.h | 1 +
lib/pacemaker/pcmk_sched_native.c | 4 ++++
2 files changed, 5 insertions(+)
diff --git a/include/crm/pengine/pe_types.h b/include/crm/pengine/pe_types.h
index 7d5394bff..77d28e900 100644
--- a/include/crm/pengine/pe_types.h
+++ b/include/crm/pengine/pe_types.h
@@ -265,6 +265,7 @@ struct pe_node_s {
# define pe_rsc_provisional 0x00000100ULL
# define pe_rsc_allocating 0x00000200ULL
# define pe_rsc_merging 0x00000400ULL
+# define pe_rsc_restarting 0x00000800ULL
# define pe_rsc_stop 0x00001000ULL
# define pe_rsc_reload 0x00002000ULL
diff --git a/lib/pacemaker/pcmk_sched_native.c b/lib/pacemaker/pcmk_sched_native.c
index 8b651ebd2..8002938b5 100644
--- a/lib/pacemaker/pcmk_sched_native.c
+++ b/lib/pacemaker/pcmk_sched_native.c
@@ -1204,6 +1204,8 @@ schedule_restart_actions(pe_resource_t *rsc, pe_node_t *current,
enum rsc_role_e role = rsc->role;
enum rsc_role_e next_role;
+ pe__set_resource_flags(rsc, pe_rsc_restarting);
+
// Bring resource down to a stop on its current node
while (role != RSC_ROLE_STOPPED) {
next_role = rsc_state_matrix[role][RSC_ROLE_STOPPED];
@@ -1235,6 +1237,8 @@ schedule_restart_actions(pe_resource_t *rsc, pe_node_t *current,
}
role = next_role;
}
+
+ pe__clear_resource_flags(rsc, pe_rsc_restarting);
}
void
--
2.27.0
From 871e2201d92520039df45062afc9120fd1fb0f30 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Tue, 1 Mar 2022 17:46:39 -0600
Subject: [PATCH 8/9] Refactor: scheduler: add expected node to primitive
variant data
Nothing uses it yet
---
include/crm/pengine/internal.h | 4 ++++
lib/pengine/native.c | 38 ++++++++++++++++++++++++++++++++++
lib/pengine/variant.h | 8 +++++--
3 files changed, 48 insertions(+), 2 deletions(-)
diff --git a/include/crm/pengine/internal.h b/include/crm/pengine/internal.h
index f949684b4..f69e6bcce 100644
--- a/include/crm/pengine/internal.h
+++ b/include/crm/pengine/internal.h
@@ -579,4 +579,8 @@ xmlNode *pe__failed_probe_for_rsc(pe_resource_t *rsc, const char *name);
const char *pe__clone_child_id(pe_resource_t *rsc);
+void pe__update_expected_node(pe_resource_t *rsc, pe_node_t *node,
+ int execution_status, int exit_status,
+ int expected_exit_status);
+
#endif
diff --git a/lib/pengine/native.c b/lib/pengine/native.c
index fa7dc8960..591d1c6f5 100644
--- a/lib/pengine/native.c
+++ b/lib/pengine/native.c
@@ -1376,3 +1376,41 @@ pe__native_is_filtered(pe_resource_t *rsc, GList *only_rsc, gboolean check_paren
return TRUE;
}
+
+/*!
+ * \internal
+ * \brief Set a resource's expected node if appropriate for a history result
+ *
+ * \param[in] rsc Resource to set expected node for
+ * \param[in] node Node to set as expected node
+ * \param[in] execution_status History entry's execution status
+ * \param[in] exit_status History entry's actual exit status
+ * \param[in] expected_status History entry's expected exit status
+ */
+void
+pe__update_expected_node(pe_resource_t *rsc, pe_node_t *node,
+ int execution_status, int exit_status,
+ int expected_exit_status)
+{
+ native_variant_data_t *native_data = NULL;
+
+ get_native_variant_data(native_data, rsc);
+
+ if ((rsc->recovery_type == recovery_stop_unexpected)
+ && (rsc->role > RSC_ROLE_STOPPED)
+ && (execution_status == PCMK_EXEC_DONE)
+ && (exit_status == expected_exit_status)) {
+ // Resource is active and was expected on this node
+ pe_rsc_trace(rsc, "Found expected node %s for %s",
+ node->details->uname, rsc->id);
+ native_data->expected_node = node;
+ pe__set_resource_flags(rsc, pe_rsc_stop_unexpected);
+
+ } else if ((native_data->expected_node != NULL)
+ && (native_data->expected_node->details == node->details)) {
+ // Resource is not cleanly active here
+ pe_rsc_trace(rsc, "Clearing expected node for %s", rsc->id);
+ native_data->expected_node = NULL;
+ pe__clear_resource_flags(rsc, pe_rsc_stop_unexpected);
+ }
+}
diff --git a/lib/pengine/variant.h b/lib/pengine/variant.h
index cabfbe81f..d8fefa9d6 100644
--- a/lib/pengine/variant.h
+++ b/lib/pengine/variant.h
@@ -1,5 +1,5 @@
/*
- * Copyright 2004-2021 the Pacemaker project contributors
+ * Copyright 2004-2022 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
@@ -139,7 +139,11 @@ typedef struct group_variant_data_s {
# elif VARIANT_NATIVE
typedef struct native_variant_data_s {
- int dummy;
+ /* If the resource is multiply active, and has multiple-active set to
+ * stop_unexpected, this will be set to the node where the resource was
+ * found active by an operation with a expected result.
+ */
+ pe_node_t *expected_node;
} native_variant_data_t;
# define get_native_variant_data(data, rsc) \
--
2.27.0
From 0e4e17e972f1c3663389f18d8f8c527bd819b3c5 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Thu, 7 Apr 2022 10:20:00 -0500
Subject: [PATCH 9/9] Feature: scheduler: implement
multiple-active=stop_unexpected
The default multiple-active policy of restarting the resource on all nodes
requires no special handling, because at least one of the locations will have
an unexpected rc, causing the resource to be marked as failed and restarted,
and StopRsc() creates stops on all nodes running the resource.
The new stop_unexpected behavior relies on most of the same handling, but
the action creation functions need to skip the node where the resource had the
expected result. For that, we set the new rsc->expected_node when unpacking a
successful result, to be checked by those functions.
Note that this still schedules a start for the resource, which is a pseudo-op
for the resource itself, but (properly) causes any dependent resources to be
restarted.
Fixes T23
---
lib/pacemaker/pcmk_output.c | 10 ++++
lib/pacemaker/pcmk_sched_native.c | 94 ++++++++++++++++++++++++++++++-
lib/pengine/unpack.c | 1 +
3 files changed, 103 insertions(+), 2 deletions(-)
diff --git a/lib/pacemaker/pcmk_output.c b/lib/pacemaker/pcmk_output.c
index 56963a93f..9a522a3e5 100644
--- a/lib/pacemaker/pcmk_output.c
+++ b/lib/pacemaker/pcmk_output.c
@@ -918,6 +918,16 @@ rsc_action_default(pcmk__output_t *out, va_list args)
if (possible_matches) {
stop = possible_matches->data;
g_list_free(possible_matches);
+ } else if (pcmk_is_set(rsc->flags, pe_rsc_stop_unexpected)) {
+ /* The resource is multiply active with multiple-active set to
+ * stop_unexpected, and not stopping on its current node, but it should
+ * be stopping elsewhere.
+ */
+ possible_matches = pe__resource_actions(rsc, NULL, RSC_STOP, FALSE);
+ if (possible_matches != NULL) {
+ stop = possible_matches->data;
+ g_list_free(possible_matches);
+ }
}
possible_matches = pe__resource_actions(rsc, next, RSC_PROMOTE, FALSE);
diff --git a/lib/pacemaker/pcmk_sched_native.c b/lib/pacemaker/pcmk_sched_native.c
index 8002938b5..c0224849f 100644
--- a/lib/pacemaker/pcmk_sched_native.c
+++ b/lib/pacemaker/pcmk_sched_native.c
@@ -1259,7 +1259,10 @@ native_create_actions(pe_resource_t * rsc, pe_working_set_t * data_set)
enum rsc_role_e role = RSC_ROLE_UNKNOWN;
enum rsc_role_e next_role = RSC_ROLE_UNKNOWN;
- CRM_ASSERT(rsc);
+ native_variant_data_t *native_data = NULL;
+
+ get_native_variant_data(native_data, rsc);
+
chosen = rsc->allocated_to;
next_role = rsc->next_role;
if (next_role == RSC_ROLE_UNKNOWN) {
@@ -1323,6 +1326,7 @@ native_create_actions(pe_resource_t * rsc, pe_working_set_t * data_set)
"(will stop on both nodes)",
rsc->id, rsc->partial_migration_source->details->uname,
rsc->partial_migration_target->details->uname);
+ multiply_active = false;
} else {
const char *class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS);
@@ -1345,6 +1349,11 @@ native_create_actions(pe_resource_t * rsc, pe_working_set_t * data_set)
allow_migrate = FALSE;
}
+ if (!multiply_active) {
+ native_data->expected_node = NULL;
+ pe__clear_resource_flags(rsc, pe_rsc_stop_unexpected);
+ }
+
if (pcmk_is_set(rsc->flags, pe_rsc_start_pending)) {
pe_rsc_trace(rsc, "Creating start action for %s to represent already pending start",
rsc->id);
@@ -1995,6 +2004,32 @@ native_expand(pe_resource_t * rsc, pe_working_set_t * data_set)
out->message(out, "rsc-action", rsc, current, next);
}
+/*!
+ * \internal
+ * \brief Check whether a node is a multiply active resource's expected node
+ *
+ * \param[in] rsc Resource to check
+ * \param[in] node Node to check
+ *
+ * \return true if \p rsc is multiply active with multiple-active set to
+ * stop_unexpected, and \p node is the node where it will remain active
+ * \note This assumes that the resource's next role cannot be changed to stopped
+ * after this is called, which should be reasonable if status has already
+ * been unpacked and resources have been assigned to nodes.
+ */
+static bool
+is_expected_node(const pe_resource_t *rsc, const pe_node_t *node)
+{
+ native_variant_data_t *native_data = NULL;
+
+ get_native_variant_data(native_data, rsc);
+ return pcmk_all_flags_set(rsc->flags,
+ pe_rsc_stop_unexpected|pe_rsc_restarting)
+ && (rsc->next_role > RSC_ROLE_STOPPED)
+ && (native_data->expected_node != NULL) && (node != NULL)
+ && (native_data->expected_node->details == node->details);
+}
+
gboolean
StopRsc(pe_resource_t * rsc, pe_node_t * next, gboolean optional, pe_working_set_t * data_set)
{
@@ -2006,6 +2041,18 @@ StopRsc(pe_resource_t * rsc, pe_node_t * next, gboolean optional, pe_working_set
pe_node_t *current = (pe_node_t *) gIter->data;
pe_action_t *stop;
+ if (is_expected_node(rsc, current)) {
+ /* We are scheduling restart actions for a multiply active resource
+ * with multiple-active=stop_unexpected, and this is where it should
+ * not be stopped.
+ */
+ pe_rsc_trace(rsc,
+ "Skipping stop of multiply active resource %s "
+ "on expected node %s",
+ rsc->id, current->details->uname);
+ continue;
+ }
+
if (rsc->partial_migration_target) {
if (rsc->partial_migration_target->details == current->details) {
pe_rsc_trace(rsc,
@@ -2029,6 +2076,17 @@ StopRsc(pe_resource_t * rsc, pe_node_t * next, gboolean optional, pe_working_set
if(rsc->allocated_to == NULL) {
pe_action_set_reason(stop, "node availability", TRUE);
+ } else if (pcmk_is_set(rsc->flags, pe_rsc_restarting)) {
+ native_variant_data_t *native_data = NULL;
+
+ get_native_variant_data(native_data, rsc);
+ if (native_data->expected_node != NULL) {
+ /* We are stopping a multiply active resource on a node that is
+ * not its expected node, and we are still scheduling restart
+ * actions, so the stop is for being multiply active.
+ */
+ pe_action_set_reason(stop, "being multiply active", TRUE);
+ }
}
if (!pcmk_is_set(rsc->flags, pe_rsc_managed)) {
@@ -2071,6 +2129,16 @@ StartRsc(pe_resource_t * rsc, pe_node_t * next, gboolean optional, pe_working_se
pe__clear_action_flags(start, pe_action_optional);
}
+ if (is_expected_node(rsc, next)) {
+ /* This could be a problem if the start becomes necessary for other
+ * reasons later.
+ */
+ pe_rsc_trace(rsc,
+ "Start of multiply active resouce %s "
+ "on expected node %s will be a pseudo-action",
+ rsc->id, next->details->uname);
+ pe__set_action_flags(start, pe_action_pseudo);
+ }
return TRUE;
}
@@ -2084,6 +2152,7 @@ PromoteRsc(pe_resource_t * rsc, pe_node_t * next, gboolean optional, pe_working_
CRM_ASSERT(rsc);
CRM_CHECK(next != NULL, return FALSE);
+
pe_rsc_trace(rsc, "%s on %s", rsc->id, next->details->uname);
action_list = pe__resource_actions(rsc, next, RSC_START, TRUE);
@@ -2098,7 +2167,19 @@ PromoteRsc(pe_resource_t * rsc, pe_node_t * next, gboolean optional, pe_working_
g_list_free(action_list);
if (runnable) {
- promote_action(rsc, next, optional);
+ pe_action_t *promote = promote_action(rsc, next, optional);
+
+ if (is_expected_node(rsc, next)) {
+ /* This could be a problem if the promote becomes necessary for
+ * other reasons later.
+ */
+ pe_rsc_trace(rsc,
+ "Promotion of multiply active resouce %s "
+ "on expected node %s will be a pseudo-action",
+ rsc->id, next->details->uname);
+ pe__set_action_flags(promote, pe_action_pseudo);
+ }
+
return TRUE;
}
@@ -2122,6 +2203,15 @@ DemoteRsc(pe_resource_t * rsc, pe_node_t * next, gboolean optional, pe_working_s
GList *gIter = NULL;
CRM_ASSERT(rsc);
+
+ if (is_expected_node(rsc, next)) {
+ pe_rsc_trace(rsc,
+ "Skipping demote of multiply active resource %s "
+ "on expected node %s",
+ rsc->id, next->details->uname);
+ return TRUE;
+ }
+
pe_rsc_trace(rsc, "%s", rsc->id);
/* CRM_CHECK(rsc->next_role == RSC_ROLE_UNPROMOTED, return FALSE); */
diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c
index d218f523f..edaa9de48 100644
--- a/lib/pengine/unpack.c
+++ b/lib/pengine/unpack.c
@@ -3974,6 +3974,7 @@ unpack_rsc_op(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op,
}
done:
+ pe__update_expected_node(rsc, node, status, rc, target_rc);
pe_rsc_trace(rsc, "Resource %s after %s: role=%s, next=%s",
rsc->id, task, role2text(rsc->role),
role2text(rsc->next_role));
--
2.27.0

View File

@ -1,495 +0,0 @@
From 8a0a16c8ed72c74d656664694ebe36b76ff22498 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Wed, 30 Mar 2022 17:14:33 -0500
Subject: [PATCH] Test: cts-scheduler: add test for
multiple-active=stop_unexpected
---
cts/cts-scheduler.in | 1 +
cts/scheduler/dot/stop-unexpected.dot | 40 ++++
cts/scheduler/exp/stop-unexpected.exp | 201 ++++++++++++++++++
cts/scheduler/scores/stop-unexpected.scores | 17 ++
cts/scheduler/summary/stop-unexpected.summary | 41 ++++
cts/scheduler/xml/stop-unexpected.xml | 131 ++++++++++++
6 files changed, 431 insertions(+)
create mode 100644 cts/scheduler/dot/stop-unexpected.dot
create mode 100644 cts/scheduler/exp/stop-unexpected.exp
create mode 100644 cts/scheduler/scores/stop-unexpected.scores
create mode 100644 cts/scheduler/summary/stop-unexpected.summary
create mode 100644 cts/scheduler/xml/stop-unexpected.xml
diff --git a/cts/cts-scheduler.in b/cts/cts-scheduler.in
index 3a8aeaca5..8c04687da 100644
--- a/cts/cts-scheduler.in
+++ b/cts/cts-scheduler.in
@@ -273,6 +273,7 @@ TESTS = [
[ "rec-rsc-6", "Resource Recover - multiple - restart" ],
[ "rec-rsc-7", "Resource Recover - multiple - stop" ],
[ "rec-rsc-8", "Resource Recover - multiple - block" ],
+ [ "stop-unexpected", "Resource Recover - multiple - stop unexpected" ],
[ "rec-rsc-9", "Resource Recover - group/group" ],
[ "monitor-recovery", "on-fail=block + resource recovery detected by recurring monitor" ],
[ "stop-failure-no-quorum", "Stop failure without quorum" ],
diff --git a/cts/scheduler/dot/stop-unexpected.dot b/cts/scheduler/dot/stop-unexpected.dot
new file mode 100644
index 000000000..0f67eec54
--- /dev/null
+++ b/cts/scheduler/dot/stop-unexpected.dot
@@ -0,0 +1,40 @@
+ digraph "g" {
+"dgroup_running_0" [ style=bold color="green" fontcolor="orange"]
+"dgroup_start_0" -> "dgroup_running_0" [ style = bold]
+"dgroup_start_0" -> "dummy2_start_0 node2" [ style = bold]
+"dgroup_start_0" -> "dummy3_start_0 node2" [ style = bold]
+"dgroup_start_0" -> "dummy_start_0 node2" [ style = bold]
+"dgroup_start_0" [ style=bold color="green" fontcolor="orange"]
+"dgroup_stop_0" -> "dgroup_stopped_0" [ style = bold]
+"dgroup_stop_0" -> "dummy2_stop_0 node2" [ style = bold]
+"dgroup_stop_0" -> "dummy3_stop_0 node2" [ style = bold]
+"dgroup_stop_0" -> "dummy_stop_0 node3" [ style = bold]
+"dgroup_stop_0" [ style=bold color="green" fontcolor="orange"]
+"dgroup_stopped_0" -> "dgroup_start_0" [ style = bold]
+"dgroup_stopped_0" [ style=bold color="green" fontcolor="orange"]
+"dummy2_monitor_10000 node2" [ style=bold color="green" fontcolor="black"]
+"dummy2_start_0 node2" -> "dgroup_running_0" [ style = bold]
+"dummy2_start_0 node2" -> "dummy2_monitor_10000 node2" [ style = bold]
+"dummy2_start_0 node2" -> "dummy3_start_0 node2" [ style = bold]
+"dummy2_start_0 node2" [ style=bold color="green" fontcolor="black"]
+"dummy2_stop_0 node2" -> "dgroup_stopped_0" [ style = bold]
+"dummy2_stop_0 node2" -> "dummy2_start_0 node2" [ style = bold]
+"dummy2_stop_0 node2" -> "dummy_stop_0 node3" [ style = bold]
+"dummy2_stop_0 node2" [ style=bold color="green" fontcolor="black"]
+"dummy3_monitor_10000 node2" [ style=bold color="green" fontcolor="black"]
+"dummy3_start_0 node2" -> "dgroup_running_0" [ style = bold]
+"dummy3_start_0 node2" -> "dummy3_monitor_10000 node2" [ style = bold]
+"dummy3_start_0 node2" [ style=bold color="green" fontcolor="black"]
+"dummy3_stop_0 node2" -> "dgroup_stopped_0" [ style = bold]
+"dummy3_stop_0 node2" -> "dummy2_stop_0 node2" [ style = bold]
+"dummy3_stop_0 node2" -> "dummy3_start_0 node2" [ style = bold]
+"dummy3_stop_0 node2" [ style=bold color="green" fontcolor="black"]
+"dummy_monitor_10000 node2" [ style=bold color="green" fontcolor="black"]
+"dummy_start_0 node2" -> "dgroup_running_0" [ style = bold]
+"dummy_start_0 node2" -> "dummy2_start_0 node2" [ style = bold]
+"dummy_start_0 node2" -> "dummy_monitor_10000 node2" [ style = bold]
+"dummy_start_0 node2" [ style=bold color="green" fontcolor="orange"]
+"dummy_stop_0 node3" -> "dgroup_stopped_0" [ style = bold]
+"dummy_stop_0 node3" -> "dummy_start_0 node2" [ style = bold]
+"dummy_stop_0 node3" [ style=bold color="green" fontcolor="black"]
+}
diff --git a/cts/scheduler/exp/stop-unexpected.exp b/cts/scheduler/exp/stop-unexpected.exp
new file mode 100644
index 000000000..1f94532f7
--- /dev/null
+++ b/cts/scheduler/exp/stop-unexpected.exp
@@ -0,0 +1,201 @@
+<transition_graph cluster-delay="60s" stonith-timeout="90" failed-stop-offset="INFINITY" failed-start-offset="INFINITY" transition_id="0">
+ <synapse id="0">
+ <action_set>
+ <pseudo_event id="15" operation="stopped" operation_key="dgroup_stopped_0">
+ <attributes CRM_meta_record_pending="false" CRM_meta_timeout="20000" />
+ </pseudo_event>
+ </action_set>
+ <inputs>
+ <trigger>
+ <rsc_op id="4" operation="stop" operation_key="dummy_stop_0" on_node="node3" on_node_uuid="node3"/>
+ </trigger>
+ <trigger>
+ <rsc_op id="8" operation="stop" operation_key="dummy2_stop_0" on_node="node2" on_node_uuid="node2"/>
+ </trigger>
+ <trigger>
+ <rsc_op id="10" operation="stop" operation_key="dummy3_stop_0" on_node="node2" on_node_uuid="node2"/>
+ </trigger>
+ <trigger>
+ <pseudo_event id="14" operation="stop" operation_key="dgroup_stop_0"/>
+ </trigger>
+ </inputs>
+ </synapse>
+ <synapse id="1">
+ <action_set>
+ <pseudo_event id="14" operation="stop" operation_key="dgroup_stop_0">
+ <attributes CRM_meta_record_pending="false" CRM_meta_timeout="20000" />
+ </pseudo_event>
+ </action_set>
+ <inputs/>
+ </synapse>
+ <synapse id="2">
+ <action_set>
+ <pseudo_event id="13" operation="running" operation_key="dgroup_running_0">
+ <attributes CRM_meta_record_pending="false" CRM_meta_timeout="20000" />
+ </pseudo_event>
+ </action_set>
+ <inputs>
+ <trigger>
+ <pseudo_event id="7" operation="start" operation_key="dummy_start_0"/>
+ </trigger>
+ <trigger>
+ <rsc_op id="9" operation="start" operation_key="dummy2_start_0" on_node="node2" on_node_uuid="node2"/>
+ </trigger>
+ <trigger>
+ <rsc_op id="11" operation="start" operation_key="dummy3_start_0" on_node="node2" on_node_uuid="node2"/>
+ </trigger>
+ <trigger>
+ <pseudo_event id="12" operation="start" operation_key="dgroup_start_0"/>
+ </trigger>
+ </inputs>
+ </synapse>
+ <synapse id="3">
+ <action_set>
+ <pseudo_event id="12" operation="start" operation_key="dgroup_start_0">
+ <attributes CRM_meta_record_pending="false" CRM_meta_timeout="20000" />
+ </pseudo_event>
+ </action_set>
+ <inputs>
+ <trigger>
+ <pseudo_event id="15" operation="stopped" operation_key="dgroup_stopped_0"/>
+ </trigger>
+ </inputs>
+ </synapse>
+ <synapse id="4">
+ <action_set>
+ <pseudo_event id="7" operation="start" operation_key="dummy_start_0">
+ <attributes CRM_meta_name="start" CRM_meta_record_pending="false" CRM_meta_timeout="300000" />
+ </pseudo_event>
+ </action_set>
+ <inputs>
+ <trigger>
+ <rsc_op id="4" operation="stop" operation_key="dummy_stop_0" on_node="node3" on_node_uuid="node3"/>
+ </trigger>
+ <trigger>
+ <pseudo_event id="12" operation="start" operation_key="dgroup_start_0"/>
+ </trigger>
+ </inputs>
+ </synapse>
+ <synapse id="5">
+ <action_set>
+ <rsc_op id="4" operation="stop" operation_key="dummy_stop_0" on_node="node3" on_node_uuid="node3">
+ <primitive id="dummy" class="ocf" provider="heartbeat" type="DummyTimeout"/>
+ <attributes CRM_meta_name="stop" CRM_meta_on_node="node3" CRM_meta_on_node_uuid="node3" CRM_meta_record_pending="false" CRM_meta_timeout="20000" />
+ </rsc_op>
+ </action_set>
+ <inputs>
+ <trigger>
+ <rsc_op id="8" operation="stop" operation_key="dummy2_stop_0" on_node="node2" on_node_uuid="node2"/>
+ </trigger>
+ <trigger>
+ <pseudo_event id="14" operation="stop" operation_key="dgroup_stop_0"/>
+ </trigger>
+ </inputs>
+ </synapse>
+ <synapse id="6">
+ <action_set>
+ <rsc_op id="2" operation="monitor" operation_key="dummy_monitor_10000" on_node="node2" on_node_uuid="node2">
+ <primitive id="dummy" class="ocf" provider="heartbeat" type="DummyTimeout"/>
+ <attributes CRM_meta_interval="10000" CRM_meta_name="monitor" CRM_meta_on_node="node2" CRM_meta_on_node_uuid="node2" CRM_meta_record_pending="false" CRM_meta_timeout="20000" />
+ </rsc_op>
+ </action_set>
+ <inputs>
+ <trigger>
+ <pseudo_event id="7" operation="start" operation_key="dummy_start_0"/>
+ </trigger>
+ </inputs>
+ </synapse>
+ <synapse id="7">
+ <action_set>
+ <rsc_op id="9" operation="start" operation_key="dummy2_start_0" on_node="node2" on_node_uuid="node2">
+ <primitive id="dummy2" class="ocf" provider="heartbeat" type="Dummy"/>
+ <attributes CRM_meta_name="start" CRM_meta_on_node="node2" CRM_meta_on_node_uuid="node2" CRM_meta_record_pending="false" CRM_meta_timeout="20000" />
+ </rsc_op>
+ </action_set>
+ <inputs>
+ <trigger>
+ <pseudo_event id="7" operation="start" operation_key="dummy_start_0"/>
+ </trigger>
+ <trigger>
+ <rsc_op id="8" operation="stop" operation_key="dummy2_stop_0" on_node="node2" on_node_uuid="node2"/>
+ </trigger>
+ <trigger>
+ <pseudo_event id="12" operation="start" operation_key="dgroup_start_0"/>
+ </trigger>
+ </inputs>
+ </synapse>
+ <synapse id="8">
+ <action_set>
+ <rsc_op id="8" operation="stop" operation_key="dummy2_stop_0" on_node="node2" on_node_uuid="node2">
+ <primitive id="dummy2" class="ocf" provider="heartbeat" type="Dummy"/>
+ <attributes CRM_meta_name="stop" CRM_meta_on_node="node2" CRM_meta_on_node_uuid="node2" CRM_meta_record_pending="false" CRM_meta_timeout="20000" />
+ </rsc_op>
+ </action_set>
+ <inputs>
+ <trigger>
+ <rsc_op id="10" operation="stop" operation_key="dummy3_stop_0" on_node="node2" on_node_uuid="node2"/>
+ </trigger>
+ <trigger>
+ <pseudo_event id="14" operation="stop" operation_key="dgroup_stop_0"/>
+ </trigger>
+ </inputs>
+ </synapse>
+ <synapse id="9">
+ <action_set>
+ <rsc_op id="3" operation="monitor" operation_key="dummy2_monitor_10000" on_node="node2" on_node_uuid="node2">
+ <primitive id="dummy2" class="ocf" provider="heartbeat" type="Dummy"/>
+ <attributes CRM_meta_interval="10000" CRM_meta_name="monitor" CRM_meta_on_node="node2" CRM_meta_on_node_uuid="node2" CRM_meta_record_pending="false" CRM_meta_timeout="20000" />
+ </rsc_op>
+ </action_set>
+ <inputs>
+ <trigger>
+ <rsc_op id="9" operation="start" operation_key="dummy2_start_0" on_node="node2" on_node_uuid="node2"/>
+ </trigger>
+ </inputs>
+ </synapse>
+ <synapse id="10">
+ <action_set>
+ <rsc_op id="11" operation="start" operation_key="dummy3_start_0" on_node="node2" on_node_uuid="node2">
+ <primitive id="dummy3" class="ocf" provider="heartbeat" type="Dummy"/>
+ <attributes CRM_meta_name="start" CRM_meta_on_node="node2" CRM_meta_on_node_uuid="node2" CRM_meta_record_pending="false" CRM_meta_timeout="20000" />
+ </rsc_op>
+ </action_set>
+ <inputs>
+ <trigger>
+ <rsc_op id="9" operation="start" operation_key="dummy2_start_0" on_node="node2" on_node_uuid="node2"/>
+ </trigger>
+ <trigger>
+ <rsc_op id="10" operation="stop" operation_key="dummy3_stop_0" on_node="node2" on_node_uuid="node2"/>
+ </trigger>
+ <trigger>
+ <pseudo_event id="12" operation="start" operation_key="dgroup_start_0"/>
+ </trigger>
+ </inputs>
+ </synapse>
+ <synapse id="11">
+ <action_set>
+ <rsc_op id="10" operation="stop" operation_key="dummy3_stop_0" on_node="node2" on_node_uuid="node2">
+ <primitive id="dummy3" class="ocf" provider="heartbeat" type="Dummy"/>
+ <attributes CRM_meta_name="stop" CRM_meta_on_node="node2" CRM_meta_on_node_uuid="node2" CRM_meta_record_pending="false" CRM_meta_timeout="20000" />
+ </rsc_op>
+ </action_set>
+ <inputs>
+ <trigger>
+ <pseudo_event id="14" operation="stop" operation_key="dgroup_stop_0"/>
+ </trigger>
+ </inputs>
+ </synapse>
+ <synapse id="12">
+ <action_set>
+ <rsc_op id="1" operation="monitor" operation_key="dummy3_monitor_10000" on_node="node2" on_node_uuid="node2">
+ <primitive id="dummy3" class="ocf" provider="heartbeat" type="Dummy"/>
+ <attributes CRM_meta_interval="10000" CRM_meta_name="monitor" CRM_meta_on_node="node2" CRM_meta_on_node_uuid="node2" CRM_meta_record_pending="false" CRM_meta_timeout="20000" />
+ </rsc_op>
+ </action_set>
+ <inputs>
+ <trigger>
+ <rsc_op id="11" operation="start" operation_key="dummy3_start_0" on_node="node2" on_node_uuid="node2"/>
+ </trigger>
+ </inputs>
+ </synapse>
+</transition_graph>
diff --git a/cts/scheduler/scores/stop-unexpected.scores b/cts/scheduler/scores/stop-unexpected.scores
new file mode 100644
index 000000000..68f98e837
--- /dev/null
+++ b/cts/scheduler/scores/stop-unexpected.scores
@@ -0,0 +1,17 @@
+
+pcmk__group_allocate: dgroup allocation score on node2: 0
+pcmk__group_allocate: dgroup allocation score on node3: 0
+pcmk__group_allocate: dummy allocation score on node2: 0
+pcmk__group_allocate: dummy allocation score on node3: 0
+pcmk__group_allocate: dummy2 allocation score on node2: 100
+pcmk__group_allocate: dummy2 allocation score on node3: 0
+pcmk__group_allocate: dummy3 allocation score on node2: 100
+pcmk__group_allocate: dummy3 allocation score on node3: 0
+pcmk__native_allocate: dummy allocation score on node2: 200
+pcmk__native_allocate: dummy allocation score on node3: 0
+pcmk__native_allocate: dummy2 allocation score on node2: 200
+pcmk__native_allocate: dummy2 allocation score on node3: -INFINITY
+pcmk__native_allocate: dummy3 allocation score on node2: 100
+pcmk__native_allocate: dummy3 allocation score on node3: -INFINITY
+pcmk__native_allocate: st-sbd allocation score on node2: 100
+pcmk__native_allocate: st-sbd allocation score on node3: 0
diff --git a/cts/scheduler/summary/stop-unexpected.summary b/cts/scheduler/summary/stop-unexpected.summary
new file mode 100644
index 000000000..7c7fc68b6
--- /dev/null
+++ b/cts/scheduler/summary/stop-unexpected.summary
@@ -0,0 +1,41 @@
+Current cluster status:
+ * Node List:
+ * Online: [ node2 node3 ]
+
+ * Full List of Resources:
+ * st-sbd (stonith:external/sbd): Started node2
+ * Resource Group: dgroup:
+ * dummy (ocf:heartbeat:DummyTimeout): FAILED [ node2 node3 ]
+ * dummy2 (ocf:heartbeat:Dummy): Started node2
+ * dummy3 (ocf:heartbeat:Dummy): Started node2
+
+Transition Summary:
+ * Recover dummy ( node2 ) due to being multiply active
+ * Restart dummy2 ( node2 ) due to required dummy start
+ * Restart dummy3 ( node2 ) due to required dummy2 start
+
+Executing Cluster Transition:
+ * Pseudo action: dgroup_stop_0
+ * Resource action: dummy3 stop on node2
+ * Resource action: dummy2 stop on node2
+ * Resource action: dummy stop on node3
+ * Pseudo action: dgroup_stopped_0
+ * Pseudo action: dgroup_start_0
+ * Pseudo action: dummy_start_0
+ * Resource action: dummy monitor=10000 on node2
+ * Resource action: dummy2 start on node2
+ * Resource action: dummy2 monitor=10000 on node2
+ * Resource action: dummy3 start on node2
+ * Resource action: dummy3 monitor=10000 on node2
+ * Pseudo action: dgroup_running_0
+
+Revised Cluster Status:
+ * Node List:
+ * Online: [ node2 node3 ]
+
+ * Full List of Resources:
+ * st-sbd (stonith:external/sbd): Started node2
+ * Resource Group: dgroup:
+ * dummy (ocf:heartbeat:DummyTimeout): Started node2
+ * dummy2 (ocf:heartbeat:Dummy): Started node2
+ * dummy3 (ocf:heartbeat:Dummy): Started node2
diff --git a/cts/scheduler/xml/stop-unexpected.xml b/cts/scheduler/xml/stop-unexpected.xml
new file mode 100644
index 000000000..6e61aeba3
--- /dev/null
+++ b/cts/scheduler/xml/stop-unexpected.xml
@@ -0,0 +1,131 @@
+<cib epoch="631" num_updates="25" admin_epoch="0" validate-with="pacemaker-3.0" crm_feature_set="3.0.8" have-quorum="1" cib-last-written="Thu Aug 20 11:44:27 2015" update-origin="node2" update-client="cibadmin" update-user="root" dc-uuid="node2">
+ <configuration>
+ <crm_config>
+ <cluster_property_set id="cib-bootstrap-options">
+ <nvpair name="dc-version" value="1.1.11-3ca8c3b" id="cib-bootstrap-options-dc-version"/>
+ <nvpair name="cluster-infrastructure" value="corosync" id="cib-bootstrap-options-cluster-infrastructure"/>
+ <nvpair name="node-action-limit" value="2" id="cib-bootstrap-options-node-action-limit"/>
+ <nvpair name="no-quorum-policy" value="ignore" id="cib-bootstrap-options-no-quorum-policy"/>
+ <nvpair name="stonith-enabled" value="false" id="cib-bootstrap-options-stonith-enabled"/>
+ <nvpair name="stonith-timeout" value="90" id="cib-bootstrap-options-stonith-timeout"/>
+ <nvpair name="last-lrm-refresh" value="1439556204" id="cib-bootstrap-options-last-lrm-refresh"/>
+ </cluster_property_set>
+ </crm_config>
+ <nodes>
+ <node uname="node2" id="node2">
+ <instance_attributes id="nodes-node2">
+ <nvpair id="nodes-node2-standby" name="standby" value="off"/>
+ </instance_attributes>
+ </node>
+ <node id="node3" uname="node3">
+ <instance_attributes id="nodes-node3">
+ <nvpair id="nodes-node3-standby" name="standby" value="off"/>
+ </instance_attributes>
+ </node>
+ </nodes>
+ <resources>
+ <primitive id="st-sbd" class="stonith" type="external/sbd"/>
+ <group id="dgroup">
+ <meta_attributes id="dgroup-meta_attributes">
+ <nvpair name="multiple-active" value="stop_unexpected" id="dgroup-meta_attributes-multiple-active"/>
+ </meta_attributes>
+ <primitive id="dummy" class="ocf" provider="heartbeat" type="DummyTimeout">
+ <operations>
+ <op name="monitor" interval="10s" timeout="20" id="dummy-monitor-10s"/>
+ <op name="start" timeout="300s" interval="0" id="dummy-start-0"/>
+ <op name="stop" timeout="20s" interval="0" id="dummy-stop-0"/>
+ </operations>
+ <meta_attributes id="dummy-meta_attributes">
+ <nvpair name="target-role" value="Started" id="dummy-meta_attributes-target-role"/>
+ </meta_attributes>
+ </primitive>
+ <primitive id="dummy2" class="ocf" provider="heartbeat" type="Dummy">
+ <operations>
+ <op name="monitor" interval="10s" timeout="20" id="dummy2-monitor-10s"/>
+ <op name="start" timeout="20s" interval="0" id="dummy2-start-0"/>
+ <op name="stop" timeout="20s" interval="0" id="dummy2-stop-0"/>
+ </operations>
+ <meta_attributes id="dummy2-meta_attributes">
+ <nvpair name="target-role" value="Started" id="dummy2-meta_attributes-target-role"/>
+ </meta_attributes>
+ </primitive>
+ <primitive id="dummy3" class="ocf" provider="heartbeat" type="Dummy">
+ <operations>
+ <op name="monitor" interval="10s" timeout="20" id="dummy3-monitor-10s"/>
+ <op name="start" timeout="20s" interval="0" id="dummy3-start-0"/>
+ <op name="stop" timeout="20s" interval="0" id="dummy3-stop-0"/>
+ </operations>
+ <meta_attributes id="dummy3-meta_attributes">
+ <nvpair name="target-role" value="Started" id="dummy3-meta_attributes-target-role"/>
+ </meta_attributes>
+ </primitive>
+ </group>
+ </resources>
+ <constraints/>
+ <op_defaults>
+ <meta_attributes id="op_defaults-options">
+ <nvpair id="op_defaults-options-record-pending" name="record-pending" value="false"/>
+ </meta_attributes>
+ </op_defaults>
+ <rsc_defaults>
+ <meta_attributes id="rsc_defaults-options">
+ <nvpair name="resource-stickiness" value="100" id="rsc_defaults-options-resource-stickiness"/>
+ <nvpair name="migration-threshold" value="100" id="rsc_defaults-options-migration-threshold"/>
+ </meta_attributes>
+ </rsc_defaults>
+ </configuration>
+ <status>
+ <node_state id="node2" uname="node2" in_ccm="true" crmd="online" crm-debug-origin="do_update_resource" join="member" expected="member">
+ <transient_attributes id="node2">
+ <instance_attributes id="status-node2">
+ <nvpair id="status-node2-shutdown" name="shutdown" value="0"/>
+ <nvpair id="status-node2-probe_complete" name="probe_complete" value="true"/>
+ </instance_attributes>
+ </transient_attributes>
+ <lrm id="node2">
+ <lrm_resources>
+ <lrm_resource id="dummy3" type="Dummy" class="ocf" provider="heartbeat">
+ <lrm_rsc_op id="dummy3_last_0" operation_key="dummy3_start_0" operation="start" crm-debug-origin="build_active_RAs" crm_feature_set="3.0.8" transition-key="13:1:0:b78eae54-472e-4e90-a3c5-ec4b25a6d8cf" transition-magic="0:0;13:1:0:b78eae54-472e-4e90-a3c5-ec4b25a6d8cf" call-id="24" rc-code="0" op-status="0" interval="0" last-run="1440063239" last-rc-change="1440063239" exec-time="6" queue-time="0" op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8" op-force-restart=" state " op-restart-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8"/>
+ <lrm_rsc_op id="dummy3_monitor_10000" operation_key="dummy3_monitor_10000" operation="monitor" crm-debug-origin="build_active_RAs" crm_feature_set="3.0.8" transition-key="14:1:0:b78eae54-472e-4e90-a3c5-ec4b25a6d8cf" transition-magic="0:0;14:1:0:b78eae54-472e-4e90-a3c5-ec4b25a6d8cf" call-id="25" rc-code="0" op-status="0" interval="10000" last-rc-change="1440063239" exec-time="5" queue-time="0" op-digest="4811cef7f7f94e3a35a70be7916cb2fd"/>
+ </lrm_resource>
+ <lrm_resource id="st-sbd" type="external/sbd" class="stonith">
+ <lrm_rsc_op id="st-sbd_last_0" operation_key="st-sbd_start_0" operation="start" crm-debug-origin="do_update_resource" crm_feature_set="3.0.8" transition-key="10:6:0:b78eae54-472e-4e90-a3c5-ec4b25a6d8cf" transition-magic="0:0;10:6:0:b78eae54-472e-4e90-a3c5-ec4b25a6d8cf" call-id="27" rc-code="0" op-status="0" interval="0" last-run="1440064019" last-rc-change="1440064019" exec-time="1213" queue-time="0" op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8"/>
+ </lrm_resource>
+ <lrm_resource id="dummy" type="DummyTimeout" class="ocf" provider="heartbeat">
+ <lrm_rsc_op id="dummy_last_0" operation_key="dummy_start_0" operation="start" crm-debug-origin="build_active_RAs" crm_feature_set="3.0.8" transition-key="9:1:0:b78eae54-472e-4e90-a3c5-ec4b25a6d8cf" transition-magic="0:0;9:1:0:b78eae54-472e-4e90-a3c5-ec4b25a6d8cf" call-id="20" rc-code="0" op-status="0" interval="0" last-run="1440063237" last-rc-change="1440063237" exec-time="1009" queue-time="0" op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8" op-force-restart=" state " op-restart-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8"/>
+ <lrm_rsc_op id="dummy_monitor_10000" operation_key="dummy_monitor_10000" operation="monitor" crm-debug-origin="build_active_RAs" crm_feature_set="3.0.8" transition-key="10:1:0:b78eae54-472e-4e90-a3c5-ec4b25a6d8cf" transition-magic="0:0;10:1:0:b78eae54-472e-4e90-a3c5-ec4b25a6d8cf" call-id="21" rc-code="0" op-status="0" interval="10000" last-rc-change="1440063238" exec-time="1010" queue-time="0" op-digest="4811cef7f7f94e3a35a70be7916cb2fd"/>
+ </lrm_resource>
+ <lrm_resource id="dummy2" type="Dummy" class="ocf" provider="heartbeat">
+ <lrm_rsc_op id="dummy2_last_0" operation_key="dummy2_start_0" operation="start" crm-debug-origin="build_active_RAs" crm_feature_set="3.0.8" transition-key="11:1:0:b78eae54-472e-4e90-a3c5-ec4b25a6d8cf" transition-magic="0:0;11:1:0:b78eae54-472e-4e90-a3c5-ec4b25a6d8cf" call-id="22" rc-code="0" op-status="0" interval="0" last-run="1440063239" last-rc-change="1440063239" exec-time="5" queue-time="0" op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8" op-force-restart=" state " op-restart-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8"/>
+ <lrm_rsc_op id="dummy2_monitor_10000" operation_key="dummy2_monitor_10000" operation="monitor" crm-debug-origin="build_active_RAs" crm_feature_set="3.0.8" transition-key="12:1:0:b78eae54-472e-4e90-a3c5-ec4b25a6d8cf" transition-magic="0:0;12:1:0:b78eae54-472e-4e90-a3c5-ec4b25a6d8cf" call-id="23" rc-code="0" op-status="0" interval="10000" last-rc-change="1440063239" exec-time="5" queue-time="0" op-digest="4811cef7f7f94e3a35a70be7916cb2fd"/>
+ </lrm_resource>
+ </lrm_resources>
+ </lrm>
+ </node_state>
+ <node_state id="node3" uname="node3" crmd="online" crm-debug-origin="do_update_resource" in_ccm="true" join="member" expected="member">
+ <transient_attributes id="node3">
+ <instance_attributes id="status-node3">
+ <nvpair id="status-node3-shutdown" name="shutdown" value="0"/>
+ <nvpair id="status-node3-probe_complete" name="probe_complete" value="true"/>
+ </instance_attributes>
+ </transient_attributes>
+ <lrm id="node3">
+ <lrm_resources>
+ <lrm_resource id="dummy3" type="Dummy" class="ocf" provider="heartbeat">
+ <lrm_rsc_op id="dummy3_last_0" operation_key="dummy3_monitor_0" operation="monitor" crm-debug-origin="build_active_RAs" crm_feature_set="3.0.8" transition-key="11:2:7:b78eae54-472e-4e90-a3c5-ec4b25a6d8cf" transition-magic="0:7;11:2:7:b78eae54-472e-4e90-a3c5-ec4b25a6d8cf" call-id="18" rc-code="7" op-status="0" interval="0" last-run="1440063820" last-rc-change="1440063820" exec-time="7" queue-time="0" op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8" op-force-restart=" state " op-restart-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8"/>
+ </lrm_resource>
+ <lrm_resource id="st-sbd" type="external/sbd" class="stonith">
+ <lrm_rsc_op id="st-sbd_last_0" operation_key="st-sbd_stop_0" operation="stop" crm-debug-origin="do_update_resource" crm_feature_set="3.0.8" transition-key="9:6:0:b78eae54-472e-4e90-a3c5-ec4b25a6d8cf" transition-magic="0:0;9:6:0:b78eae54-472e-4e90-a3c5-ec4b25a6d8cf" call-id="21" rc-code="0" op-status="0" interval="0" last-run="1440064019" last-rc-change="1440064019" exec-time="1" queue-time="0" op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8"/>
+ </lrm_resource>
+ <lrm_resource id="dummy" type="DummyTimeout" class="ocf" provider="heartbeat">
+ <lrm_rsc_op id="dummy_last_0" operation_key="dummy_start_0" operation="start" crm-debug-origin="do_update_resource" crm_feature_set="3.0.8" transition-key="5:0:0:a5e85e43-f35a-4f75-8e15-f0ddc8d81812" transition-magic="0:7;5:0:0:a5e85e43-f35a-4f75-8e15-f0ddc8d81812" call-id="20" rc-code="7" op-status="0" interval="0" last-run="1440063984" last-rc-change="1440063984" exec-time="179014" queue-time="0" op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8" op-force-restart=" state " op-restart-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8"/>
+ <lrm_rsc_op id="dummy_last_failure_0" operation_key="dummy_start_0" operation="start" crm-debug-origin="do_update_resource" crm_feature_set="3.0.8" transition-key="5:0:0:a5e85e43-f35a-4f75-8e15-f0ddc8d81812" transition-magic="0:7;5:0:0:a5e85e43-f35a-4f75-8e15-f0ddc8d81812" call-id="20" rc-code="7" op-status="0" interval="0" last-run="1440063984" last-rc-change="1440063984" exec-time="179014" queue-time="0" op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8"/>
+ </lrm_resource>
+ <lrm_resource id="dummy2" type="Dummy" class="ocf" provider="heartbeat">
+ <lrm_rsc_op id="dummy2_last_0" operation_key="dummy2_monitor_0" operation="monitor" crm-debug-origin="build_active_RAs" crm_feature_set="3.0.8" transition-key="10:2:7:b78eae54-472e-4e90-a3c5-ec4b25a6d8cf" transition-magic="0:7;10:2:7:b78eae54-472e-4e90-a3c5-ec4b25a6d8cf" call-id="14" rc-code="7" op-status="0" interval="0" last-run="1440063820" last-rc-change="1440063820" exec-time="11" queue-time="0" op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8" op-force-restart=" state " op-restart-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8"/>
+ </lrm_resource>
+ </lrm_resources>
+ </lrm>
+ </node_state>
+ </status>
+</cib>
--
2.27.0

View File

@ -1,589 +0,0 @@
From 4a5dcc5210160f7d167bc68142635c1b5a6d4af2 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Fri, 22 Apr 2022 10:47:29 -0500
Subject: [PATCH 1/3] Fix: scheduler: make multiple-active="stop_unexpected"
actually work
The previous implementation covered the scenario in the regression test and not
much else. It unnecessarily added an expected_node member to the native variant
data, when the resource's allocated_to is sufficient to know the expected node.
---
lib/pacemaker/pcmk_sched_native.c | 45 +++++++++++++++----------------
lib/pengine/unpack.c | 1 -
2 files changed, 22 insertions(+), 24 deletions(-)
diff --git a/lib/pacemaker/pcmk_sched_native.c b/lib/pacemaker/pcmk_sched_native.c
index c0224849f..a1a51721e 100644
--- a/lib/pacemaker/pcmk_sched_native.c
+++ b/lib/pacemaker/pcmk_sched_native.c
@@ -1250,7 +1250,7 @@ native_create_actions(pe_resource_t * rsc, pe_working_set_t * data_set)
gboolean need_stop = FALSE;
bool need_promote = FALSE;
gboolean is_moving = FALSE;
- gboolean allow_migrate = pcmk_is_set(rsc->flags, pe_rsc_allow_migrate)? TRUE : FALSE;
+ gboolean allow_migrate = FALSE;
GList *gIter = NULL;
unsigned int num_all_active = 0;
@@ -1259,9 +1259,8 @@ native_create_actions(pe_resource_t * rsc, pe_working_set_t * data_set)
enum rsc_role_e role = RSC_ROLE_UNKNOWN;
enum rsc_role_e next_role = RSC_ROLE_UNKNOWN;
- native_variant_data_t *native_data = NULL;
-
- get_native_variant_data(native_data, rsc);
+ CRM_ASSERT(rsc != NULL);
+ allow_migrate = pcmk_is_set(rsc->flags, pe_rsc_allow_migrate)? TRUE : FALSE;
chosen = rsc->allocated_to;
next_role = rsc->next_role;
@@ -1338,8 +1337,16 @@ native_create_actions(pe_resource_t * rsc, pe_working_set_t * data_set)
crm_notice("See https://wiki.clusterlabs.org/wiki/FAQ#Resource_is_Too_Active for more information");
}
- if (rsc->recovery_type == recovery_stop_start) {
- need_stop = TRUE;
+ switch (rsc->recovery_type) {
+ case recovery_stop_start:
+ need_stop = TRUE;
+ break;
+ case recovery_stop_unexpected:
+ need_stop = TRUE; // StopRsc() will skip expected node
+ pe__set_resource_flags(rsc, pe_rsc_stop_unexpected);
+ break;
+ default:
+ break;
}
/* If by chance a partial migration is in process, but the migration
@@ -1350,7 +1357,6 @@ native_create_actions(pe_resource_t * rsc, pe_working_set_t * data_set)
}
if (!multiply_active) {
- native_data->expected_node = NULL;
pe__clear_resource_flags(rsc, pe_rsc_stop_unexpected);
}
@@ -2020,14 +2026,11 @@ native_expand(pe_resource_t * rsc, pe_working_set_t * data_set)
static bool
is_expected_node(const pe_resource_t *rsc, const pe_node_t *node)
{
- native_variant_data_t *native_data = NULL;
-
- get_native_variant_data(native_data, rsc);
return pcmk_all_flags_set(rsc->flags,
pe_rsc_stop_unexpected|pe_rsc_restarting)
&& (rsc->next_role > RSC_ROLE_STOPPED)
- && (native_data->expected_node != NULL) && (node != NULL)
- && (native_data->expected_node->details == node->details);
+ && (rsc->allocated_to != NULL) && (node != NULL)
+ && (rsc->allocated_to->details == node->details);
}
gboolean
@@ -2076,17 +2079,13 @@ StopRsc(pe_resource_t * rsc, pe_node_t * next, gboolean optional, pe_working_set
if(rsc->allocated_to == NULL) {
pe_action_set_reason(stop, "node availability", TRUE);
- } else if (pcmk_is_set(rsc->flags, pe_rsc_restarting)) {
- native_variant_data_t *native_data = NULL;
-
- get_native_variant_data(native_data, rsc);
- if (native_data->expected_node != NULL) {
- /* We are stopping a multiply active resource on a node that is
- * not its expected node, and we are still scheduling restart
- * actions, so the stop is for being multiply active.
- */
- pe_action_set_reason(stop, "being multiply active", TRUE);
- }
+ } else if (pcmk_all_flags_set(rsc->flags, pe_rsc_restarting
+ |pe_rsc_stop_unexpected)) {
+ /* We are stopping a multiply active resource on a node that is
+ * not its expected node, and we are still scheduling restart
+ * actions, so the stop is for being multiply active.
+ */
+ pe_action_set_reason(stop, "being multiply active", TRUE);
}
if (!pcmk_is_set(rsc->flags, pe_rsc_managed)) {
diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c
index 17dea0d7a..426022013 100644
--- a/lib/pengine/unpack.c
+++ b/lib/pengine/unpack.c
@@ -3945,7 +3945,6 @@ unpack_rsc_op(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op,
}
done:
- pe__update_expected_node(rsc, node, status, rc, target_rc);
pe_rsc_trace(rsc, "Resource %s after %s: role=%s, next=%s",
rsc->id, task, role2text(rsc->role),
role2text(rsc->next_role));
--
2.27.0
From 703d3a09bce389afb4e095e1ac7af29eb5edd189 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Fri, 22 Apr 2022 14:02:34 -0500
Subject: [PATCH 2/3] Test: scheduler: add a second regression test for
multiple-active=stop_unexpected
---
cts/cts-scheduler.in | 3 +-
cts/scheduler/dot/stop-unexpected-2.dot | 7 +
cts/scheduler/exp/stop-unexpected-2.exp | 36 ++++
cts/scheduler/scores/stop-unexpected-2.scores | 21 ++
.../summary/stop-unexpected-2.summary | 29 +++
cts/scheduler/xml/stop-unexpected-2.xml | 204 ++++++++++++++++++
6 files changed, 299 insertions(+), 1 deletion(-)
create mode 100644 cts/scheduler/dot/stop-unexpected-2.dot
create mode 100644 cts/scheduler/exp/stop-unexpected-2.exp
create mode 100644 cts/scheduler/scores/stop-unexpected-2.scores
create mode 100644 cts/scheduler/summary/stop-unexpected-2.summary
create mode 100644 cts/scheduler/xml/stop-unexpected-2.xml
diff --git a/cts/cts-scheduler.in b/cts/cts-scheduler.in
index 8c04687da..7fc76cce4 100644
--- a/cts/cts-scheduler.in
+++ b/cts/cts-scheduler.in
@@ -273,8 +273,9 @@ TESTS = [
[ "rec-rsc-6", "Resource Recover - multiple - restart" ],
[ "rec-rsc-7", "Resource Recover - multiple - stop" ],
[ "rec-rsc-8", "Resource Recover - multiple - block" ],
- [ "stop-unexpected", "Resource Recover - multiple - stop unexpected" ],
[ "rec-rsc-9", "Resource Recover - group/group" ],
+ [ "stop-unexpected", "Recover multiply active group with stop_unexpected" ],
+ [ "stop-unexpected-2", "Resource multiply active primitve with stop_unexpected" ],
[ "monitor-recovery", "on-fail=block + resource recovery detected by recurring monitor" ],
[ "stop-failure-no-quorum", "Stop failure without quorum" ],
[ "stop-failure-no-fencing", "Stop failure without fencing available" ],
diff --git a/cts/scheduler/dot/stop-unexpected-2.dot b/cts/scheduler/dot/stop-unexpected-2.dot
new file mode 100644
index 000000000..cdaebf551
--- /dev/null
+++ b/cts/scheduler/dot/stop-unexpected-2.dot
@@ -0,0 +1,7 @@
+ digraph "g" {
+"test_monitor_10000 rhel8-4" [ style=bold color="green" fontcolor="black"]
+"test_start_0 rhel8-4" -> "test_monitor_10000 rhel8-4" [ style = bold]
+"test_start_0 rhel8-4" [ style=bold color="green" fontcolor="orange"]
+"test_stop_0 rhel8-3" -> "test_start_0 rhel8-4" [ style = bold]
+"test_stop_0 rhel8-3" [ style=bold color="green" fontcolor="black"]
+}
diff --git a/cts/scheduler/exp/stop-unexpected-2.exp b/cts/scheduler/exp/stop-unexpected-2.exp
new file mode 100644
index 000000000..258053c08
--- /dev/null
+++ b/cts/scheduler/exp/stop-unexpected-2.exp
@@ -0,0 +1,36 @@
+<transition_graph cluster-delay="60s" stonith-timeout="60s" failed-stop-offset="INFINITY" failed-start-offset="1" transition_id="0">
+ <synapse id="0">
+ <action_set>
+ <rsc_op id="10" operation="monitor" operation_key="test_monitor_10000" on_node="rhel8-4" on_node_uuid="4">
+ <primitive id="test" class="ocf" provider="pacemaker" type="Dummy"/>
+ <attributes CRM_meta_interval="10000" CRM_meta_name="monitor" CRM_meta_on_node="rhel8-4" CRM_meta_on_node_uuid="4" CRM_meta_timeout="20000" />
+ </rsc_op>
+ </action_set>
+ <inputs>
+ <trigger>
+ <pseudo_event id="9" operation="start" operation_key="test_start_0"/>
+ </trigger>
+ </inputs>
+ </synapse>
+ <synapse id="1">
+ <action_set>
+ <pseudo_event id="9" operation="start" operation_key="test_start_0">
+ <attributes CRM_meta_name="start" CRM_meta_timeout="20000" />
+ </pseudo_event>
+ </action_set>
+ <inputs>
+ <trigger>
+ <rsc_op id="8" operation="stop" operation_key="test_stop_0" on_node="rhel8-3" on_node_uuid="3"/>
+ </trigger>
+ </inputs>
+ </synapse>
+ <synapse id="2">
+ <action_set>
+ <rsc_op id="8" operation="stop" operation_key="test_stop_0" on_node="rhel8-3" on_node_uuid="3">
+ <primitive id="test" class="ocf" provider="pacemaker" type="Dummy"/>
+ <attributes CRM_meta_name="stop" CRM_meta_on_node="rhel8-3" CRM_meta_on_node_uuid="3" CRM_meta_timeout="20000" />
+ </rsc_op>
+ </action_set>
+ <inputs/>
+ </synapse>
+</transition_graph>
diff --git a/cts/scheduler/scores/stop-unexpected-2.scores b/cts/scheduler/scores/stop-unexpected-2.scores
new file mode 100644
index 000000000..0eb549f5e
--- /dev/null
+++ b/cts/scheduler/scores/stop-unexpected-2.scores
@@ -0,0 +1,21 @@
+
+pcmk__native_allocate: Fencing allocation score on rhel8-1: 0
+pcmk__native_allocate: Fencing allocation score on rhel8-2: 0
+pcmk__native_allocate: Fencing allocation score on rhel8-3: 0
+pcmk__native_allocate: Fencing allocation score on rhel8-4: 0
+pcmk__native_allocate: Fencing allocation score on rhel8-5: 0
+pcmk__native_allocate: FencingFail allocation score on rhel8-1: 0
+pcmk__native_allocate: FencingFail allocation score on rhel8-2: 0
+pcmk__native_allocate: FencingFail allocation score on rhel8-3: 0
+pcmk__native_allocate: FencingFail allocation score on rhel8-4: 0
+pcmk__native_allocate: FencingFail allocation score on rhel8-5: 0
+pcmk__native_allocate: FencingPass allocation score on rhel8-1: 0
+pcmk__native_allocate: FencingPass allocation score on rhel8-2: 0
+pcmk__native_allocate: FencingPass allocation score on rhel8-3: 0
+pcmk__native_allocate: FencingPass allocation score on rhel8-4: 0
+pcmk__native_allocate: FencingPass allocation score on rhel8-5: 0
+pcmk__native_allocate: test allocation score on rhel8-1: 0
+pcmk__native_allocate: test allocation score on rhel8-2: 0
+pcmk__native_allocate: test allocation score on rhel8-3: 0
+pcmk__native_allocate: test allocation score on rhel8-4: 0
+pcmk__native_allocate: test allocation score on rhel8-5: 0
diff --git a/cts/scheduler/summary/stop-unexpected-2.summary b/cts/scheduler/summary/stop-unexpected-2.summary
new file mode 100644
index 000000000..d6b0c15dc
--- /dev/null
+++ b/cts/scheduler/summary/stop-unexpected-2.summary
@@ -0,0 +1,29 @@
+Using the original execution date of: 2022-04-22 14:15:37Z
+Current cluster status:
+ * Node List:
+ * Online: [ rhel8-1 rhel8-2 rhel8-3 rhel8-4 rhel8-5 ]
+
+ * Full List of Resources:
+ * Fencing (stonith:fence_xvm): Started rhel8-1
+ * FencingPass (stonith:fence_dummy): Started rhel8-2
+ * FencingFail (stonith:fence_dummy): Started rhel8-3
+ * test (ocf:pacemaker:Dummy): Started [ rhel8-4 rhel8-3 ]
+
+Transition Summary:
+ * Restart test ( rhel8-4 )
+
+Executing Cluster Transition:
+ * Resource action: test stop on rhel8-3
+ * Pseudo action: test_start_0
+ * Resource action: test monitor=10000 on rhel8-4
+Using the original execution date of: 2022-04-22 14:15:37Z
+
+Revised Cluster Status:
+ * Node List:
+ * Online: [ rhel8-1 rhel8-2 rhel8-3 rhel8-4 rhel8-5 ]
+
+ * Full List of Resources:
+ * Fencing (stonith:fence_xvm): Started rhel8-1
+ * FencingPass (stonith:fence_dummy): Started rhel8-2
+ * FencingFail (stonith:fence_dummy): Started rhel8-3
+ * test (ocf:pacemaker:Dummy): Started rhel8-4
diff --git a/cts/scheduler/xml/stop-unexpected-2.xml b/cts/scheduler/xml/stop-unexpected-2.xml
new file mode 100644
index 000000000..e103629e9
--- /dev/null
+++ b/cts/scheduler/xml/stop-unexpected-2.xml
@@ -0,0 +1,204 @@
+<cib crm_feature_set="3.13.0" validate-with="pacemaker-3.7" epoch="59" num_updates="14" admin_epoch="0" cib-last-written="Fri Apr 22 09:15:36 2022" update-origin="rhel8-1" update-client="crmd" update-user="hacluster" have-quorum="1" dc-uuid="2" execution-date="1650636937">
+ <configuration>
+ <crm_config>
+ <cluster_property_set id="cib-bootstrap-options">
+ <nvpair id="cts-stonith-enabled" name="stonith-enabled" value="1"/>
+ <nvpair id="cts-start-failure-is-fatal" name="start-failure-is-fatal" value="false"/>
+ <nvpair id="cts-pe-input-series-max" name="pe-input-series-max" value="5000"/>
+ <nvpair id="cts-shutdown-escalation" name="shutdown-escalation" value="5min"/>
+ <nvpair id="cts-batch-limit" name="batch-limit" value="10"/>
+ <nvpair id="cts-dc-deadtime" name="dc-deadtime" value="5s"/>
+ <nvpair id="cts-no-quorum-policy" name="no-quorum-policy" value="stop"/>
+ <nvpair id="cib-bootstrap-options-have-watchdog" name="have-watchdog" value="false"/>
+ <nvpair id="cib-bootstrap-options-dc-version" name="dc-version" value="2.1.2-4.el8_6.1-ada5c3b36e2"/>
+ <nvpair id="cib-bootstrap-options-cluster-infrastructure" name="cluster-infrastructure" value="corosync"/>
+ <nvpair id="cib-bootstrap-options-cluster-name" name="cluster-name" value="rhel8-lab"/>
+ <nvpair id="cib-bootstrap-options-last-lrm-refresh" name="last-lrm-refresh" value="1650636936"/>
+ </cluster_property_set>
+ </crm_config>
+ <nodes>
+ <node id="1" uname="rhel8-1"/>
+ <node id="3" uname="rhel8-3"/>
+ <node id="4" uname="rhel8-4"/>
+ <node id="5" uname="rhel8-5"/>
+ <node id="2" uname="rhel8-2"/>
+ </nodes>
+ <resources>
+ <primitive class="stonith" id="Fencing" type="fence_xvm">
+ <meta_attributes id="Fencing-meta">
+ <nvpair id="Fencing-migration-threshold" name="migration-threshold" value="5"/>
+ </meta_attributes>
+ <instance_attributes id="Fencing-params">
+ <nvpair id="Fencing-pcmk_host_map" name="pcmk_host_map" value="remote-rhel8-1:rhel8-1;remote-rhel8-2:rhel8-2;remote-rhel8-3:rhel8-3;remote-rhel8-4:rhel8-4;remote-rhel8-5:rhel8-5;"/>
+ <nvpair id="Fencing-key_file" name="key_file" value="/etc/pacemaker/fence_xvm.key"/>
+ <nvpair id="Fencing-multicast_address" name="multicast_address" value="239.255.100.100"/>
+ <nvpair id="Fencing-pcmk_host_list" name="pcmk_host_list" value="rhel8-1 remote-rhel8-1 rhel8-2 remote-rhel8-2 rhel8-3 remote-rhel8-3 rhel8-4 remote-rhel8-4 rhel8-5 remote-rhel8-5"/>
+ </instance_attributes>
+ <operations>
+ <op id="Fencing-monitor-120s" interval="120s" name="monitor" timeout="120s"/>
+ <op id="Fencing-stop-0" interval="0" name="stop" timeout="60s"/>
+ <op id="Fencing-start-0" interval="0" name="start" timeout="60s"/>
+ </operations>
+ </primitive>
+ <primitive class="stonith" id="FencingPass" type="fence_dummy">
+ <instance_attributes id="FencingPass-params">
+ <nvpair id="FencingPass-pcmk_host_list" name="pcmk_host_list" value="rhel8-4 remote-rhel8-4 rhel8-5 remote-rhel8-5"/>
+ <nvpair id="FencingPass-random_sleep_range" name="random_sleep_range" value="30"/>
+ <nvpair id="FencingPass-mode" name="mode" value="pass"/>
+ </instance_attributes>
+ </primitive>
+ <primitive class="stonith" id="FencingFail" type="fence_dummy">
+ <instance_attributes id="FencingFail-params">
+ <nvpair id="FencingFail-pcmk_host_list" name="pcmk_host_list" value="rhel8-2 remote-rhel8-2"/>
+ <nvpair id="FencingFail-random_sleep_range" name="random_sleep_range" value="30"/>
+ <nvpair id="FencingFail-mode" name="mode" value="fail"/>
+ </instance_attributes>
+ </primitive>
+ <primitive class="ocf" id="test" provider="pacemaker" type="Dummy">
+ <meta_attributes id="test-meta_attributes">
+ <nvpair id="test-meta_attributes-multiple-active" name="multiple-active" value="stop_unexpected"/>
+ </meta_attributes>
+ <operations>
+ <op id="test-migrate_from-interval-0s" interval="0s" name="migrate_from" timeout="20s"/>
+ <op id="test-migrate_to-interval-0s" interval="0s" name="migrate_to" timeout="20s"/>
+ <op id="test-monitor-interval-10s" interval="10s" name="monitor" timeout="20s"/>
+ <op id="test-reload-interval-0s" interval="0s" name="reload" timeout="20s"/>
+ <op id="test-reload-agent-interval-0s" interval="0s" name="reload-agent" timeout="20s"/>
+ <op id="test-start-interval-0s" interval="0s" name="start" timeout="20s"/>
+ <op id="test-stop-interval-0s" interval="0s" name="stop" timeout="20s"/>
+ </operations>
+ </primitive>
+ </resources>
+ <constraints/>
+ <fencing-topology>
+ <fencing-level devices="FencingFail" id="cts-rhel8-2.1" index="1" target="rhel8-2"/>
+ <fencing-level devices="Fencing" id="cts-rhel8-2.2" index="2" target="rhel8-2"/>
+ <fencing-level devices="FencingFail" id="cts-remote-rhel8-2.1" index="1" target="remote-rhel8-2"/>
+ <fencing-level devices="Fencing" id="cts-remote-rhel8-2.2" index="2" target="remote-rhel8-2"/>
+ <fencing-level devices="FencingPass,Fencing" id="cts-rhel8-4.1" index="1" target="rhel8-4"/>
+ <fencing-level devices="FencingPass,Fencing" id="cts-remote-rhel8-4.1" index="1" target="remote-rhel8-4"/>
+ <fencing-level devices="FencingPass,Fencing" id="cts-rhel8-5.1" index="1" target="rhel8-5"/>
+ <fencing-level devices="FencingPass,Fencing" id="cts-remote-rhel8-5.1" index="1" target="remote-rhel8-5"/>
+ </fencing-topology>
+ <op_defaults>
+ <meta_attributes id="cts-op_defaults-meta">
+ <nvpair id="cts-op_defaults-timeout" name="timeout" value="90s"/>
+ </meta_attributes>
+ </op_defaults>
+ <alerts>
+ <alert id="alert-1" path="/var/lib/pacemaker/notify.sh">
+ <recipient id="alert-1-recipient-1" value="/run/crm/alert.log"/>
+ </alert>
+ </alerts>
+ </configuration>
+ <status>
+ <node_state id="4" uname="rhel8-4" in_ccm="true" crmd="online" crm-debug-origin="do_update_resource" join="member" expected="member">
+ <transient_attributes id="4">
+ <instance_attributes id="status-4"/>
+ </transient_attributes>
+ <lrm id="4">
+ <lrm_resources>
+ <lrm_resource id="FencingPass" type="fence_dummy" class="stonith">
+ <lrm_rsc_op id="FencingPass_last_0" operation_key="FencingPass_monitor_0" operation="monitor" crm-debug-origin="build_active_RAs" crm_feature_set="3.13.0" transition-key="47:0:7:181f4609-25b9-4673-b760-5de7a7f55635" transition-magic="0:7;47:0:7:181f4609-25b9-4673-b760-5de7a7f55635" exit-reason="" on_node="rhel8-4" call-id="13" rc-code="7" op-status="0" interval="0" last-rc-change="1650636745" exec-time="0" queue-time="0" op-digest="bdca24cab6ded2b426c6b31df675bf0b"/>
+ </lrm_resource>
+ <lrm_resource id="Fencing" type="fence_xvm" class="stonith">
+ <lrm_rsc_op id="Fencing_last_0" operation_key="Fencing_monitor_0" operation="monitor" crm-debug-origin="build_active_RAs" crm_feature_set="3.13.0" transition-key="46:0:7:181f4609-25b9-4673-b760-5de7a7f55635" transition-magic="0:7;46:0:7:181f4609-25b9-4673-b760-5de7a7f55635" exit-reason="" on_node="rhel8-4" call-id="9" rc-code="7" op-status="0" interval="0" last-rc-change="1650636745" exec-time="9" queue-time="0" op-digest="bf974d77f2d4d33e434be1f89e362a52"/>
+ </lrm_resource>
+ <lrm_resource id="FencingFail" type="fence_dummy" class="stonith">
+ <lrm_rsc_op id="FencingFail_last_0" operation_key="FencingFail_monitor_0" operation="monitor" crm-debug-origin="build_active_RAs" crm_feature_set="3.13.0" transition-key="48:0:7:181f4609-25b9-4673-b760-5de7a7f55635" transition-magic="0:7;48:0:7:181f4609-25b9-4673-b760-5de7a7f55635" exit-reason="" on_node="rhel8-4" call-id="17" rc-code="7" op-status="0" interval="0" last-rc-change="1650636745" exec-time="0" queue-time="0" op-digest="fe7e3f8acdd3228efda2766a0eea7ba5"/>
+ </lrm_resource>
+ <lrm_resource id="test" type="Dummy" class="ocf" provider="pacemaker">
+ <lrm_rsc_op id="test_last_0" operation_key="test_monitor_0" operation="monitor" crm-debug-origin="do_update_resource" crm_feature_set="3.13.0" transition-key="5:46:7:181f4609-25b9-4673-b760-5de7a7f55635" transition-magic="0:0;5:46:7:181f4609-25b9-4673-b760-5de7a7f55635" exit-reason="" on_node="rhel8-4" call-id="130" rc-code="0" op-status="0" interval="0" last-rc-change="1650636936" exec-time="25" queue-time="0" op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8" op-force-restart=" state " op-restart-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8" op-secure-params=" passwd " op-secure-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8"/>
+ <lrm_rsc_op id="test_last_failure_0" operation_key="test_monitor_0" operation="monitor" crm-debug-origin="do_update_resource" crm_feature_set="3.13.0" transition-key="5:46:7:181f4609-25b9-4673-b760-5de7a7f55635" transition-magic="0:0;5:46:7:181f4609-25b9-4673-b760-5de7a7f55635" exit-reason="" on_node="rhel8-4" call-id="130" rc-code="0" op-status="0" interval="0" last-rc-change="1650636936" exec-time="25" queue-time="0" op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8"/>
+ </lrm_resource>
+ </lrm_resources>
+ </lrm>
+ </node_state>
+ <node_state id="5" uname="rhel8-5" in_ccm="true" crmd="online" crm-debug-origin="do_update_resource" join="member" expected="member">
+ <transient_attributes id="5">
+ <instance_attributes id="status-5"/>
+ </transient_attributes>
+ <lrm id="5">
+ <lrm_resources>
+ <lrm_resource id="FencingPass" type="fence_dummy" class="stonith">
+ <lrm_rsc_op id="FencingPass_last_0" operation_key="FencingPass_monitor_0" operation="monitor" crm-debug-origin="build_active_RAs" crm_feature_set="3.13.0" transition-key="62:0:7:181f4609-25b9-4673-b760-5de7a7f55635" transition-magic="0:7;62:0:7:181f4609-25b9-4673-b760-5de7a7f55635" exit-reason="" on_node="rhel8-5" call-id="13" rc-code="7" op-status="0" interval="0" last-rc-change="1650636745" exec-time="0" queue-time="0" op-digest="bdca24cab6ded2b426c6b31df675bf0b"/>
+ </lrm_resource>
+ <lrm_resource id="Fencing" type="fence_xvm" class="stonith">
+ <lrm_rsc_op id="Fencing_last_0" operation_key="Fencing_monitor_0" operation="monitor" crm-debug-origin="build_active_RAs" crm_feature_set="3.13.0" transition-key="61:0:7:181f4609-25b9-4673-b760-5de7a7f55635" transition-magic="0:7;61:0:7:181f4609-25b9-4673-b760-5de7a7f55635" exit-reason="" on_node="rhel8-5" call-id="9" rc-code="7" op-status="0" interval="0" last-rc-change="1650636745" exec-time="6" queue-time="0" op-digest="bf974d77f2d4d33e434be1f89e362a52"/>
+ </lrm_resource>
+ <lrm_resource id="FencingFail" type="fence_dummy" class="stonith">
+ <lrm_rsc_op id="FencingFail_last_0" operation_key="FencingFail_monitor_0" operation="monitor" crm-debug-origin="build_active_RAs" crm_feature_set="3.13.0" transition-key="63:0:7:181f4609-25b9-4673-b760-5de7a7f55635" transition-magic="0:7;63:0:7:181f4609-25b9-4673-b760-5de7a7f55635" exit-reason="" on_node="rhel8-5" call-id="17" rc-code="7" op-status="0" interval="0" last-rc-change="1650636745" exec-time="0" queue-time="0" op-digest="fe7e3f8acdd3228efda2766a0eea7ba5"/>
+ </lrm_resource>
+ <lrm_resource id="test" type="Dummy" class="ocf" provider="pacemaker">
+ <lrm_rsc_op id="test_last_0" operation_key="test_monitor_0" operation="monitor" crm-debug-origin="do_update_resource" crm_feature_set="3.13.0" transition-key="6:46:7:181f4609-25b9-4673-b760-5de7a7f55635" transition-magic="0:7;6:46:7:181f4609-25b9-4673-b760-5de7a7f55635" exit-reason="" on_node="rhel8-5" call-id="113" rc-code="7" op-status="0" interval="0" last-rc-change="1650636936" exec-time="25" queue-time="0" op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8" op-force-restart=" state " op-restart-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8" op-secure-params=" passwd " op-secure-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8"/>
+ </lrm_resource>
+ </lrm_resources>
+ </lrm>
+ </node_state>
+ <node_state id="1" uname="rhel8-1" in_ccm="true" crmd="online" crm-debug-origin="do_update_resource" join="member" expected="member">
+ <transient_attributes id="1">
+ <instance_attributes id="status-1"/>
+ </transient_attributes>
+ <lrm id="1">
+ <lrm_resources>
+ <lrm_resource id="FencingPass" type="fence_dummy" class="stonith">
+ <lrm_rsc_op id="FencingPass_last_0" operation_key="FencingPass_monitor_0" operation="monitor" crm-debug-origin="build_active_RAs" crm_feature_set="3.13.0" transition-key="2:0:7:181f4609-25b9-4673-b760-5de7a7f55635" transition-magic="0:7;2:0:7:181f4609-25b9-4673-b760-5de7a7f55635" exit-reason="" on_node="rhel8-1" call-id="13" rc-code="7" op-status="0" interval="0" last-rc-change="1650636745" exec-time="0" queue-time="0" op-digest="bdca24cab6ded2b426c6b31df675bf0b"/>
+ </lrm_resource>
+ <lrm_resource id="Fencing" type="fence_xvm" class="stonith">
+ <lrm_rsc_op id="Fencing_last_0" operation_key="Fencing_start_0" operation="start" crm-debug-origin="build_active_RAs" crm_feature_set="3.13.0" transition-key="76:0:0:181f4609-25b9-4673-b760-5de7a7f55635" transition-magic="0:0;76:0:0:181f4609-25b9-4673-b760-5de7a7f55635" exit-reason="" on_node="rhel8-1" call-id="14" rc-code="0" op-status="0" interval="0" last-rc-change="1650636745" exec-time="36" queue-time="0" op-digest="bf974d77f2d4d33e434be1f89e362a52"/>
+ <lrm_rsc_op id="Fencing_monitor_120000" operation_key="Fencing_monitor_120000" operation="monitor" crm-debug-origin="build_active_RAs" crm_feature_set="3.13.0" transition-key="77:0:0:181f4609-25b9-4673-b760-5de7a7f55635" transition-magic="0:0;77:0:0:181f4609-25b9-4673-b760-5de7a7f55635" exit-reason="" on_node="rhel8-1" call-id="16" rc-code="0" op-status="0" interval="120000" last-rc-change="1650636745" exec-time="36" queue-time="0" op-digest="24c9c9364f847dcb857d6fb4e1b4d3c8"/>
+ </lrm_resource>
+ <lrm_resource id="FencingFail" type="fence_dummy" class="stonith">
+ <lrm_rsc_op id="FencingFail_last_0" operation_key="FencingFail_monitor_0" operation="monitor" crm-debug-origin="build_active_RAs" crm_feature_set="3.13.0" transition-key="3:0:7:181f4609-25b9-4673-b760-5de7a7f55635" transition-magic="0:7;3:0:7:181f4609-25b9-4673-b760-5de7a7f55635" exit-reason="" on_node="rhel8-1" call-id="21" rc-code="7" op-status="0" interval="0" last-rc-change="1650636745" exec-time="0" queue-time="0" op-digest="fe7e3f8acdd3228efda2766a0eea7ba5"/>
+ </lrm_resource>
+ <lrm_resource id="test" type="Dummy" class="ocf" provider="pacemaker">
+ <lrm_rsc_op id="test_last_0" operation_key="test_monitor_0" operation="monitor" crm-debug-origin="do_update_resource" crm_feature_set="3.13.0" transition-key="2:46:7:181f4609-25b9-4673-b760-5de7a7f55635" transition-magic="0:7;2:46:7:181f4609-25b9-4673-b760-5de7a7f55635" exit-reason="" on_node="rhel8-1" call-id="121" rc-code="7" op-status="0" interval="0" last-rc-change="1650636936" exec-time="62" queue-time="0" op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8" op-force-restart=" state " op-restart-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8" op-secure-params=" passwd " op-secure-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8"/>
+ </lrm_resource>
+ </lrm_resources>
+ </lrm>
+ </node_state>
+ <node_state id="2" uname="rhel8-2" in_ccm="true" crmd="online" crm-debug-origin="do_update_resource" join="member" expected="member">
+ <transient_attributes id="2">
+ <instance_attributes id="status-2"/>
+ </transient_attributes>
+ <lrm id="2">
+ <lrm_resources>
+ <lrm_resource id="FencingPass" type="fence_dummy" class="stonith">
+ <lrm_rsc_op id="FencingPass_last_0" operation_key="FencingPass_start_0" operation="start" crm-debug-origin="build_active_RAs" crm_feature_set="3.13.0" transition-key="78:0:0:181f4609-25b9-4673-b760-5de7a7f55635" transition-magic="0:0;78:0:0:181f4609-25b9-4673-b760-5de7a7f55635" exit-reason="" on_node="rhel8-2" call-id="18" rc-code="0" op-status="0" interval="0" last-rc-change="1650636745" exec-time="21041" queue-time="0" op-digest="bdca24cab6ded2b426c6b31df675bf0b"/>
+ </lrm_resource>
+ <lrm_resource id="Fencing" type="fence_xvm" class="stonith">
+ <lrm_rsc_op id="Fencing_last_0" operation_key="Fencing_monitor_0" operation="monitor" crm-debug-origin="build_active_RAs" crm_feature_set="3.13.0" transition-key="16:0:7:181f4609-25b9-4673-b760-5de7a7f55635" transition-magic="0:7;16:0:7:181f4609-25b9-4673-b760-5de7a7f55635" exit-reason="" on_node="rhel8-2" call-id="9" rc-code="7" op-status="0" interval="0" last-rc-change="1650636745" exec-time="2" queue-time="0" op-digest="bf974d77f2d4d33e434be1f89e362a52"/>
+ </lrm_resource>
+ <lrm_resource id="FencingFail" type="fence_dummy" class="stonith">
+ <lrm_rsc_op id="FencingFail_last_0" operation_key="FencingFail_monitor_0" operation="monitor" crm-debug-origin="build_active_RAs" crm_feature_set="3.13.0" transition-key="18:0:7:181f4609-25b9-4673-b760-5de7a7f55635" transition-magic="0:7;18:0:7:181f4609-25b9-4673-b760-5de7a7f55635" exit-reason="" on_node="rhel8-2" call-id="17" rc-code="7" op-status="0" interval="0" last-rc-change="1650636745" exec-time="0" queue-time="0" op-digest="fe7e3f8acdd3228efda2766a0eea7ba5"/>
+ </lrm_resource>
+ <lrm_resource id="test" type="Dummy" class="ocf" provider="pacemaker">
+ <lrm_rsc_op id="test_last_0" operation_key="test_monitor_0" operation="monitor" crm-debug-origin="do_update_resource" crm_feature_set="3.13.0" transition-key="3:46:7:181f4609-25b9-4673-b760-5de7a7f55635" transition-magic="0:7;3:46:7:181f4609-25b9-4673-b760-5de7a7f55635" exit-reason="" on_node="rhel8-2" call-id="109" rc-code="7" op-status="0" interval="0" last-rc-change="1650636936" exec-time="37" queue-time="0" op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8" op-force-restart=" state " op-restart-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8" op-secure-params=" passwd " op-secure-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8"/>
+ </lrm_resource>
+ </lrm_resources>
+ </lrm>
+ </node_state>
+ <node_state id="3" uname="rhel8-3" in_ccm="true" crmd="online" crm-debug-origin="do_update_resource" join="member" expected="member">
+ <transient_attributes id="3">
+ <instance_attributes id="status-3"/>
+ </transient_attributes>
+ <lrm id="3">
+ <lrm_resources>
+ <lrm_resource id="FencingPass" type="fence_dummy" class="stonith">
+ <lrm_rsc_op id="FencingPass_last_0" operation_key="FencingPass_monitor_0" operation="monitor" crm-debug-origin="build_active_RAs" crm_feature_set="3.13.0" transition-key="32:0:7:181f4609-25b9-4673-b760-5de7a7f55635" transition-magic="0:7;32:0:7:181f4609-25b9-4673-b760-5de7a7f55635" exit-reason="" on_node="rhel8-3" call-id="13" rc-code="7" op-status="0" interval="0" last-rc-change="1650636745" exec-time="0" queue-time="0" op-digest="bdca24cab6ded2b426c6b31df675bf0b"/>
+ </lrm_resource>
+ <lrm_resource id="Fencing" type="fence_xvm" class="stonith">
+ <lrm_rsc_op id="Fencing_last_0" operation_key="Fencing_monitor_0" operation="monitor" crm-debug-origin="build_active_RAs" crm_feature_set="3.13.0" transition-key="31:0:7:181f4609-25b9-4673-b760-5de7a7f55635" transition-magic="0:7;31:0:7:181f4609-25b9-4673-b760-5de7a7f55635" exit-reason="" on_node="rhel8-3" call-id="9" rc-code="7" op-status="0" interval="0" last-rc-change="1650636745" exec-time="2" queue-time="0" op-digest="bf974d77f2d4d33e434be1f89e362a52"/>
+ </lrm_resource>
+ <lrm_resource id="FencingFail" type="fence_dummy" class="stonith">
+ <lrm_rsc_op id="FencingFail_last_0" operation_key="FencingFail_start_0" operation="start" crm-debug-origin="build_active_RAs" crm_feature_set="3.13.0" transition-key="79:0:0:181f4609-25b9-4673-b760-5de7a7f55635" transition-magic="0:0;79:0:0:181f4609-25b9-4673-b760-5de7a7f55635" exit-reason="" on_node="rhel8-3" call-id="26" rc-code="0" op-status="0" interval="0" last-rc-change="1650636745" exec-time="1044" queue-time="0" op-digest="fe7e3f8acdd3228efda2766a0eea7ba5"/>
+ </lrm_resource>
+ <lrm_resource id="test" type="Dummy" class="ocf" provider="pacemaker">
+ <lrm_rsc_op id="test_last_0" operation_key="test_monitor_0" operation="monitor" crm-debug-origin="do_update_resource" crm_feature_set="3.13.0" transition-key="4:46:7:181f4609-25b9-4673-b760-5de7a7f55635" transition-magic="0:0;4:46:7:181f4609-25b9-4673-b760-5de7a7f55635" exit-reason="" on_node="rhel8-3" call-id="114" rc-code="0" op-status="0" interval="0" last-rc-change="1650636936" exec-time="24" queue-time="0" op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8" op-force-restart=" state " op-restart-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8" op-secure-params=" passwd " op-secure-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8"/>
+ <lrm_rsc_op id="test_last_failure_0" operation_key="test_monitor_0" operation="monitor" crm-debug-origin="do_update_resource" crm_feature_set="3.13.0" transition-key="4:46:7:181f4609-25b9-4673-b760-5de7a7f55635" transition-magic="0:0;4:46:7:181f4609-25b9-4673-b760-5de7a7f55635" exit-reason="" on_node="rhel8-3" call-id="114" rc-code="0" op-status="0" interval="0" last-rc-change="1650636936" exec-time="24" queue-time="0" op-digest="f2317cad3d54cec5d7d7aa7d0bf35cf8"/>
+ </lrm_resource>
+ </lrm_resources>
+ </lrm>
+ </node_state>
+ </status>
+</cib>
--
2.27.0
From 60d8bb01ba73dfd1cb25c6764ee2b923dcfc4e8c Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Fri, 22 Apr 2022 14:09:43 -0500
Subject: [PATCH 3/3] Revert "Refactor: scheduler: add expected node to
primitive variant data"
This reverts commit 871e2201d92520039df45062afc9120fd1fb0f30.
---
include/crm/pengine/internal.h | 4 ----
lib/pengine/native.c | 38 ----------------------------------
lib/pengine/variant.h | 8 ++-----
3 files changed, 2 insertions(+), 48 deletions(-)
diff --git a/include/crm/pengine/internal.h b/include/crm/pengine/internal.h
index a2e4b5bf7..fe9a23b7e 100644
--- a/include/crm/pengine/internal.h
+++ b/include/crm/pengine/internal.h
@@ -580,8 +580,4 @@ xmlNode *pe__failed_probe_for_rsc(pe_resource_t *rsc, const char *name);
const char *pe__clone_child_id(pe_resource_t *rsc);
-void pe__update_expected_node(pe_resource_t *rsc, pe_node_t *node,
- int execution_status, int exit_status,
- int expected_exit_status);
-
#endif
diff --git a/lib/pengine/native.c b/lib/pengine/native.c
index 591d1c6f5..fa7dc8960 100644
--- a/lib/pengine/native.c
+++ b/lib/pengine/native.c
@@ -1376,41 +1376,3 @@ pe__native_is_filtered(pe_resource_t *rsc, GList *only_rsc, gboolean check_paren
return TRUE;
}
-
-/*!
- * \internal
- * \brief Set a resource's expected node if appropriate for a history result
- *
- * \param[in] rsc Resource to set expected node for
- * \param[in] node Node to set as expected node
- * \param[in] execution_status History entry's execution status
- * \param[in] exit_status History entry's actual exit status
- * \param[in] expected_status History entry's expected exit status
- */
-void
-pe__update_expected_node(pe_resource_t *rsc, pe_node_t *node,
- int execution_status, int exit_status,
- int expected_exit_status)
-{
- native_variant_data_t *native_data = NULL;
-
- get_native_variant_data(native_data, rsc);
-
- if ((rsc->recovery_type == recovery_stop_unexpected)
- && (rsc->role > RSC_ROLE_STOPPED)
- && (execution_status == PCMK_EXEC_DONE)
- && (exit_status == expected_exit_status)) {
- // Resource is active and was expected on this node
- pe_rsc_trace(rsc, "Found expected node %s for %s",
- node->details->uname, rsc->id);
- native_data->expected_node = node;
- pe__set_resource_flags(rsc, pe_rsc_stop_unexpected);
-
- } else if ((native_data->expected_node != NULL)
- && (native_data->expected_node->details == node->details)) {
- // Resource is not cleanly active here
- pe_rsc_trace(rsc, "Clearing expected node for %s", rsc->id);
- native_data->expected_node = NULL;
- pe__clear_resource_flags(rsc, pe_rsc_stop_unexpected);
- }
-}
diff --git a/lib/pengine/variant.h b/lib/pengine/variant.h
index d8fefa9d6..cabfbe81f 100644
--- a/lib/pengine/variant.h
+++ b/lib/pengine/variant.h
@@ -1,5 +1,5 @@
/*
- * Copyright 2004-2022 the Pacemaker project contributors
+ * Copyright 2004-2021 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
@@ -139,11 +139,7 @@ typedef struct group_variant_data_s {
# elif VARIANT_NATIVE
typedef struct native_variant_data_s {
- /* If the resource is multiply active, and has multiple-active set to
- * stop_unexpected, this will be set to the node where the resource was
- * found active by an operation with a expected result.
- */
- pe_node_t *expected_node;
+ int dummy;
} native_variant_data_t;
# define get_native_variant_data(data, rsc) \
--
2.27.0

View File

@ -35,11 +35,11 @@
## Upstream pacemaker version, and its package version (specversion
## can be incremented to build packages reliably considered "newer"
## than previously built packages with the same pcmkversion)
%global pcmkversion 2.1.2
%global specversion 4
%global pcmkversion 2.1.3
%global specversion 1
## Upstream commit (full commit ID, abbreviated commit ID, or tag) to build
%global commit ada5c3b36e2adf1703d54d39f40a4b8628eca175
%global commit dff7c3a7265f02e37804d6302dd1bf1f4e4c1f17
## Since git v2.11, the extent of abbreviation is autoscaled by default
## (used to be constant of 7), so we need to convey it for non-tags, too.
@ -63,14 +63,13 @@
## Add option for whether to support storing sensitive information outside CIB
%bcond_without cibsecrets
## Add option to enable Native Language Support (experimental)
%bcond_with nls
## Add option to create binaries suitable for use with profiling tools
%bcond_with profiling
## Add option to create binaries with coverage analysis
%bcond_with coverage
## Add option to skip (or enable, on RHEL) generating documentation
## (the build tools aren't available everywhere)
## Allow deprecated option to skip (or enable, on RHEL) documentation
%if 0%{?rhel}
%bcond_with doc
%else
@ -159,6 +158,7 @@
%if 0%{?suse_version} > 0
%global pkgname_bzip2_devel libbz2-devel
%global pkgname_docbook_xsl docbook-xsl-stylesheets
%global pkgname_gettext gettext-tools
%global pkgname_gnutls_devel libgnutls-devel
%global pkgname_shadow_utils shadow
%global pkgname_procps procps
@ -170,6 +170,7 @@
%global pkgname_libtool_devel_arch libtool-ltdl-devel%{?_isa}
%global pkgname_bzip2_devel bzip2-devel
%global pkgname_docbook_xsl docbook-style-xsl
%global pkgname_gettext gettext-devel
%global pkgname_gnutls_devel gnutls-devel
%global pkgname_shadow_utils shadow-utils
%global pkgname_procps procps-ng
@ -242,7 +243,7 @@
Name: pacemaker
Summary: Scalable High-Availability cluster resource manager
Version: %{pcmkversion}
Release: %{pcmk_release}%{?dist}.2
Release: %{pcmk_release}%{?dist}
%if %{defined _unitdir}
License: GPLv2+ and LGPLv2+
%else
@ -263,32 +264,7 @@ Source0: https://codeload.github.com/%{github_owner}/%{name}/tar.gz/%{arch
Source1: nagios-agents-metadata-%{nagios_hash}.tar.gz
# upstream commits
Patch1: 001-acl-group-schema.patch
Patch2: 002-fencing-reasons.patch
Patch3: 003-fencing-reasons.patch
Patch4: 004-systemd-metadata.patch
Patch5: 005-fencing-reasons.patch
Patch6: 006-stateful-metadata.patch
Patch7: 007-memory-leak.patch
Patch8: 008-fencing-history.patch
Patch9: 009-fencing-reasons.patch
Patch10: 010-probe-failures.patch
Patch11: 011-fencing-reasons.patch
Patch12: 012-notify-crash.patch
Patch13: 013-probe-failures.patch
Patch14: 014-pcmk_delay_base.patch
Patch15: 015-fencing-reasons.patch
Patch16: 016-fencing-crash.patch
Patch17: 017-fencing-reasons.patch
Patch18: 018-failure-messages.patch
Patch19: 019-corosync-tracking.patch
Patch20: 020-systemd-unit.patch
Patch21: 021-failure-messages.patch
Patch22: 022-memory-leak.patch
Patch23: 023-regression.patch
Patch24: 024-stop_unexpected.patch
Patch25: 025-stop_unexpected-test.patch
Patch26: 026-stop_unexpected-fix.patch
#Patch001: 001-xxxx.patch
# downstream-only commits
#Patch1xx: 1xx-xxxx.patch
@ -347,6 +323,7 @@ BuildRequires: %{pkgname_gnutls_devel}
BuildRequires: help2man
BuildRequires: ncurses-devel
BuildRequires: pam-devel
BuildRequires: %{pkgname_gettext} >= 0.18
# Required for "make check"
BuildRequires: libcmocka-devel
@ -395,7 +372,7 @@ when related resources fail and can be configured to periodically check
resource health.
Available rpmbuild rebuild options:
--with(out) : cibsecrets coverage doc hardening pre_release profiling stonithd
--with(out) : cibsecrets doc hardening nls pre_release profiling stonithd
%package cli
License: GPLv2+ and LGPLv2+
@ -584,8 +561,8 @@ export LDFLAGS_HARDENED_LIB="%{?_hardening_ldflags}"
%{!?with_hardening: --disable-hardening} \
%{?with_legacy_links: --enable-legacy-links} \
%{?with_profiling: --with-profiling} \
%{?with_coverage: --with-coverage} \
%{?with_cibsecrets: --with-cibsecrets} \
%{?with_nls: --enable-nls} \
%{?with_sbd_sync: --with-sbd-sync-default="true"} \
%{?gnutls_priorities: --with-gnutls-priorities="%{gnutls_priorities}"} \
%{?bug_url: --with-bug-url=%{bug_url}} \
@ -644,10 +621,14 @@ done
mkdir -p ${RPM_BUILD_ROOT}%{_localstatedir}/lib/rpm-state/%{name}
%endif
%if %{with nls}
%find_lang %{name}
%endif
# Don't package libtool archives
find %{buildroot} -name '*.la' -type f -print0 | xargs -0 rm -f
# Do not package these either
# Do not package these either on RHEL
rm -f %{buildroot}/%{_sbindir}/fence_legacy
rm -f %{buildroot}/%{_mandir}/man8/fence_legacy.*
find %{buildroot} -name '*o2cb*' -type f -print0 | xargs -0 rm -f
@ -666,16 +647,6 @@ rm -f %{buildroot}/%{_sbindir}/ipmiservicelogd
%endif
%endif
%if %{with coverage}
GCOV_BASE=%{buildroot}/%{_var}/lib/pacemaker/gcov
mkdir -p $GCOV_BASE
find . -name '*.gcno' -type f | while read F ; do
D=`dirname $F`
mkdir -p ${GCOV_BASE}/$D
cp $F ${GCOV_BASE}/$D
done
%endif
%post
%if %{defined _unitdir}
%systemd_post pacemaker.service
@ -808,7 +779,6 @@ exit 0
%exclude %{_datadir}/pacemaker/nagios
%{_libexecdir}/pacemaker/*
%{_sbindir}/crm_attribute
%{_sbindir}/crm_master
%{_sbindir}/fence_watchdog
@ -817,7 +787,6 @@ exit 0
%doc %{_mandir}/man7/pacemaker-fenced.*
%doc %{_mandir}/man7/ocf_pacemaker_controld.*
%doc %{_mandir}/man7/ocf_pacemaker_remote.*
%doc %{_mandir}/man8/crm_attribute.*
%doc %{_mandir}/man8/crm_master.*
%doc %{_mandir}/man8/fence_watchdog.*
%doc %{_mandir}/man8/pacemakerd.*
@ -856,6 +825,7 @@ exit 0
%if %{with cibsecrets}
%{_sbindir}/cibsecret
%endif
%{_sbindir}/crm_attribute
%{_sbindir}/crm_diff
%{_sbindir}/crm_error
%{_sbindir}/crm_failcount
@ -892,7 +862,6 @@ exit 0
%exclude %{_mandir}/man7/ocf_pacemaker_controld.*
%exclude %{_mandir}/man7/ocf_pacemaker_remote.*
%doc %{_mandir}/man8/*
%exclude %{_mandir}/man8/crm_attribute.*
%exclude %{_mandir}/man8/crm_master.*
%exclude %{_mandir}/man8/fence_watchdog.*
%exclude %{_mandir}/man8/pacemakerd.*
@ -908,7 +877,7 @@ exit 0
%dir %attr (770, %{uname}, %{gname}) %{_var}/log/pacemaker
%dir %attr (770, %{uname}, %{gname}) %{_var}/log/pacemaker/bundles
%files -n %{pkgname_pcmk_libs}
%files -n %{pkgname_pcmk_libs} %{?with_nls:-f %{name}.lang}
%{_libdir}/libcib.so.*
%{_libdir}/liblrmd.so.*
%{_libdir}/libcrmservice.so.*
@ -964,9 +933,6 @@ exit 0
%files -n %{pkgname_pcmk_libs}-devel
%{_includedir}/pacemaker
%{_libdir}/*.so
%if %{with coverage}
%{_var}/lib/pacemaker/gcov
%endif
%{_libdir}/pkgconfig/*.pc
%license licenses/LGPLv2.1
%doc COPYING
@ -978,6 +944,7 @@ exit 0
%{_datadir}/pacemaker/*.rng
%{_datadir}/pacemaker/*.xsl
%{_datadir}/pacemaker/api
%{_datadir}/pacemaker/base
%{_datadir}/pkgconfig/pacemaker-schemas.pc
%files nagios-plugins-metadata
@ -986,13 +953,23 @@ exit 0
%license %{nagios_name}-%{nagios_hash}/COPYING
%changelog
* Fri Apr 22 2022 Ken Gaillot <kgaillot@redhat.com> - 2.1.2-4.2
- Fix issue with "stop_unexpected" value for "multiple-active" meta-attribute
- Resolves: rhbz2062848
* Fri Apr 8 2022 Ken Gaillot <kgaillot@redhat.com> - 2.1.2-4.1
- Support "stop_unexpected" value for "multiple-active" meta-attribute
- Resolves: rhbz2062848
* Wed May 18 2022 Ken Gaillot <kgaillot@redhat.com> - 2.1.3-1
- crm_resource --restart fails to restart clone instances except instance 0
- Add new multiple-active option for "stop unexpected instances"
- Unable to show metadata for "service" agents with "@" and "." in the name
- Resource ocf:pacemaker:attribute does not comply with the OCF 1.1 standard
- Allow resource meta-attribute to exempt resource from node health restrictions
- Show node health states in crm_mon
- Rebase pacemaker on upstream 2.1.3-rc2 release
- crm_mon API result does not validate against schema if fence event has exit-reason
- Resolves: rhbz1930578
- Resolves: rhbz2036815
- Resolves: rhbz2045096
- Resolves: rhbz2049722
- Resolves: rhbz2059638
- Resolves: rhbz2065812
- Resolves: rhbz2072107
- Resolves: rhbz2086230
* Wed Jan 26 2022 Ken Gaillot <kgaillot@redhat.com> - 2.1.2-4
- Fix regression in down event detection that affects remote nodes