import pacemaker-2.1.2-4.el9
This commit is contained in:
commit
7b51f9e49e
2
.gitignore
vendored
Normal file
2
.gitignore
vendored
Normal file
@ -0,0 +1,2 @@
|
||||
SOURCES/nagios-agents-metadata-105ab8a7b2c16b9a29cf1c1596b80136eeef332b.tar.gz
|
||||
SOURCES/pacemaker-ada5c3b36.tar.gz
|
2
.pacemaker.metadata
Normal file
2
.pacemaker.metadata
Normal file
@ -0,0 +1,2 @@
|
||||
2cbec94ad67dfbeba75e38d2c3c5c44961b3cd16 SOURCES/nagios-agents-metadata-105ab8a7b2c16b9a29cf1c1596b80136eeef332b.tar.gz
|
||||
1dec5b062ad8e9a89b4953e17a59e4597797a1e6 SOURCES/pacemaker-ada5c3b36.tar.gz
|
230
SOURCES/001-acl-group-schema.patch
Normal file
230
SOURCES/001-acl-group-schema.patch
Normal file
@ -0,0 +1,230 @@
|
||||
From f5ffbaf1f537d3d5b00e594211cd322f97df51ac Mon Sep 17 00:00:00 2001
|
||||
From: Grace Chin <gchin@redhat.com>
|
||||
Date: Fri, 5 Nov 2021 11:39:39 -0400
|
||||
Subject: [PATCH 1/3] Low: xml: clone acls schema in preparation for changes
|
||||
|
||||
---
|
||||
xml/acls-3.8.rng | 80 ++++++++++++++++++++++++++++++++++++++++++++++++
|
||||
1 file changed, 80 insertions(+)
|
||||
create mode 100644 xml/acls-3.8.rng
|
||||
|
||||
diff --git a/xml/acls-3.8.rng b/xml/acls-3.8.rng
|
||||
new file mode 100644
|
||||
index 000000000..0fe6eed96
|
||||
--- /dev/null
|
||||
+++ b/xml/acls-3.8.rng
|
||||
@@ -0,0 +1,80 @@
|
||||
+<?xml version="1.0" encoding="UTF-8"?>
|
||||
+<grammar xmlns="http://relaxng.org/ns/structure/1.0"
|
||||
+ datatypeLibrary="http://www.w3.org/2001/XMLSchema-datatypes">
|
||||
+ <start>
|
||||
+ <optional>
|
||||
+ <ref name="element-acls"/>
|
||||
+ </optional>
|
||||
+ </start>
|
||||
+
|
||||
+ <define name="element-acls">
|
||||
+ <element name="acls">
|
||||
+ <zeroOrMore>
|
||||
+ <choice>
|
||||
+ <element name="acl_target">
|
||||
+ <attribute name="id"><text/></attribute>
|
||||
+ <zeroOrMore>
|
||||
+ <element name="role">
|
||||
+ <attribute name="id"><data type="IDREF"/></attribute>
|
||||
+ </element>
|
||||
+ </zeroOrMore>
|
||||
+ </element>
|
||||
+ <element name="acl_group">
|
||||
+ <!-- Here 'id' is the name of a unix group -->
|
||||
+ <attribute name="id"><data type="ID"/></attribute>
|
||||
+ <zeroOrMore>
|
||||
+ <element name="role">
|
||||
+ <attribute name="id"><data type="IDREF"/></attribute>
|
||||
+ </element>
|
||||
+ </zeroOrMore>
|
||||
+ </element>
|
||||
+ <element name="acl_role">
|
||||
+ <attribute name="id"><data type="ID"/></attribute>
|
||||
+ <optional>
|
||||
+ <attribute name="description"><text/></attribute>
|
||||
+ </optional>
|
||||
+ <zeroOrMore>
|
||||
+ <ref name="element-permission"/>
|
||||
+ </zeroOrMore>
|
||||
+ </element>
|
||||
+ </choice>
|
||||
+ </zeroOrMore>
|
||||
+ </element>
|
||||
+ </define>
|
||||
+
|
||||
+ <define name="element-permission">
|
||||
+ <element name="acl_permission">
|
||||
+ <attribute name="id"><data type="ID"/></attribute>
|
||||
+
|
||||
+ <attribute name="kind">
|
||||
+ <choice>
|
||||
+ <value>read</value>
|
||||
+ <value>write</value>
|
||||
+ <value>deny</value>
|
||||
+ </choice>
|
||||
+ </attribute>
|
||||
+
|
||||
+ <choice>
|
||||
+ <attribute name="xpath"><text/></attribute>
|
||||
+ <!-- reference is already sufficiently specific without 'object-type' -->
|
||||
+ <attribute name="reference"><data type="IDREF"/></attribute>
|
||||
+ <group>
|
||||
+ <!-- Use 'object-type' to avoid conflicting with the 'tag' configuration concept -->
|
||||
+ <attribute name="object-type"><text/></attribute>
|
||||
+ <optional>
|
||||
+ <!--
|
||||
+ does not make sense with anything other than object-type
|
||||
+ xpath and reference are already sufficiently specific
|
||||
+ -->
|
||||
+ <attribute name="attribute"><text/></attribute>
|
||||
+ </optional>
|
||||
+ </group>
|
||||
+ </choice>
|
||||
+
|
||||
+ <optional>
|
||||
+ <attribute name="description"><text/></attribute>
|
||||
+ </optional>
|
||||
+ </element>
|
||||
+ </define>
|
||||
+
|
||||
+</grammar>
|
||||
--
|
||||
2.27.0
|
||||
|
||||
|
||||
From 7838213fc639236bdedf5f15320152d973f1bdad Mon Sep 17 00:00:00 2001
|
||||
From: Grace Chin <gchin@redhat.com>
|
||||
Date: Fri, 5 Nov 2021 11:40:48 -0400
|
||||
Subject: [PATCH 2/3] Add a 'name' attribute to acl_target and acl_group
|
||||
elements
|
||||
|
||||
---
|
||||
xml/acls-3.8.rng | 6 ++++++
|
||||
1 file changed, 6 insertions(+)
|
||||
|
||||
diff --git a/xml/acls-3.8.rng b/xml/acls-3.8.rng
|
||||
index 0fe6eed96..48bcdffe3 100644
|
||||
--- a/xml/acls-3.8.rng
|
||||
+++ b/xml/acls-3.8.rng
|
||||
@@ -13,6 +13,9 @@
|
||||
<choice>
|
||||
<element name="acl_target">
|
||||
<attribute name="id"><text/></attribute>
|
||||
+ <optional>
|
||||
+ <attribute name="name"><text/></attribute>
|
||||
+ </optional>
|
||||
<zeroOrMore>
|
||||
<element name="role">
|
||||
<attribute name="id"><data type="IDREF"/></attribute>
|
||||
@@ -22,6 +25,9 @@
|
||||
<element name="acl_group">
|
||||
<!-- Here 'id' is the name of a unix group -->
|
||||
<attribute name="id"><data type="ID"/></attribute>
|
||||
+ <optional>
|
||||
+ <attribute name="name"><text/></attribute>
|
||||
+ </optional>
|
||||
<zeroOrMore>
|
||||
<element name="role">
|
||||
<attribute name="id"><data type="IDREF"/></attribute>
|
||||
--
|
||||
2.27.0
|
||||
|
||||
|
||||
From c3c498f4636f57e29670f8e385b625024ed222d7 Mon Sep 17 00:00:00 2001
|
||||
From: Grace Chin <gchin@redhat.com>
|
||||
Date: Fri, 5 Nov 2021 11:42:48 -0400
|
||||
Subject: [PATCH 3/3] Changes made by run of 'cts/cts-cli -s'
|
||||
|
||||
---
|
||||
cts/cli/regression.upgrade.exp | 7 +++++--
|
||||
cts/cli/regression.validity.exp | 22 ++++++++++++++++++----
|
||||
2 files changed, 23 insertions(+), 6 deletions(-)
|
||||
|
||||
diff --git a/cts/cli/regression.upgrade.exp b/cts/cli/regression.upgrade.exp
|
||||
index e38adebdd..7ce7ec13b 100644
|
||||
--- a/cts/cli/regression.upgrade.exp
|
||||
+++ b/cts/cli/regression.upgrade.exp
|
||||
@@ -91,8 +91,11 @@ update_validation debug: Configuration valid for schema: pacemaker-3.6
|
||||
update_validation debug: pacemaker-3.6-style configuration is also valid for pacemaker-3.7
|
||||
update_validation debug: Testing 'pacemaker-3.7' validation (21 of X)
|
||||
update_validation debug: Configuration valid for schema: pacemaker-3.7
|
||||
-update_validation trace: Stopping at pacemaker-3.7
|
||||
-update_validation info: Transformed the configuration from pacemaker-2.10 to pacemaker-3.7
|
||||
+update_validation debug: pacemaker-3.7-style configuration is also valid for pacemaker-3.8
|
||||
+update_validation debug: Testing 'pacemaker-3.8' validation (22 of X)
|
||||
+update_validation debug: Configuration valid for schema: pacemaker-3.8
|
||||
+update_validation trace: Stopping at pacemaker-3.8
|
||||
+update_validation info: Transformed the configuration from pacemaker-2.10 to pacemaker-3.8
|
||||
=#=#=#= Current cib after: Upgrade to latest CIB schema (trigger 2.10.xsl + the wrapping) =#=#=#=
|
||||
<cib epoch="2" num_updates="0" admin_epoch="1">
|
||||
<configuration>
|
||||
diff --git a/cts/cli/regression.validity.exp b/cts/cli/regression.validity.exp
|
||||
index 5ace430e7..125035a47 100644
|
||||
--- a/cts/cli/regression.validity.exp
|
||||
+++ b/cts/cli/regression.validity.exp
|
||||
@@ -121,7 +121,11 @@ update_validation debug: Testing 'pacemaker-3.7' validation (21 of X)
|
||||
element rsc_order: Relax-NG validity error : Invalid attribute first-action for element rsc_order
|
||||
element rsc_order: Relax-NG validity error : Element constraints has extra content: rsc_order
|
||||
update_validation trace: pacemaker-3.7 validation failed
|
||||
-Cannot upgrade configuration (claiming schema pacemaker-1.2) to at least pacemaker-3.0 because it does not validate with any schema from pacemaker-1.2 to pacemaker-3.7
|
||||
+update_validation debug: Testing 'pacemaker-3.8' validation (22 of X)
|
||||
+element rsc_order: Relax-NG validity error : Invalid attribute first-action for element rsc_order
|
||||
+element rsc_order: Relax-NG validity error : Element constraints has extra content: rsc_order
|
||||
+update_validation trace: pacemaker-3.8 validation failed
|
||||
+Cannot upgrade configuration (claiming schema pacemaker-1.2) to at least pacemaker-3.0 because it does not validate with any schema from pacemaker-1.2 to pacemaker-3.8
|
||||
=#=#=#= End test: Run crm_simulate with invalid CIB (enum violation) - Invalid configuration (78) =#=#=#=
|
||||
* Passed: crm_simulate - Run crm_simulate with invalid CIB (enum violation)
|
||||
=#=#=#= Begin test: Try to make resulting CIB invalid (unrecognized validate-with) =#=#=#=
|
||||
@@ -226,7 +230,10 @@ update_validation trace: pacemaker-3.6 validation failed
|
||||
update_validation debug: Testing 'pacemaker-3.7' validation (21 of X)
|
||||
element cib: Relax-NG validity error : Invalid attribute validate-with for element cib
|
||||
update_validation trace: pacemaker-3.7 validation failed
|
||||
-Cannot upgrade configuration (claiming schema pacemaker-9999.0) to at least pacemaker-3.0 because it does not validate with any schema from unknown to pacemaker-3.7
|
||||
+update_validation debug: Testing 'pacemaker-3.8' validation (22 of X)
|
||||
+element cib: Relax-NG validity error : Invalid attribute validate-with for element cib
|
||||
+update_validation trace: pacemaker-3.8 validation failed
|
||||
+Cannot upgrade configuration (claiming schema pacemaker-9999.0) to at least pacemaker-3.0 because it does not validate with any schema from unknown to pacemaker-3.8
|
||||
=#=#=#= End test: Run crm_simulate with invalid CIB (unrecognized validate-with) - Invalid configuration (78) =#=#=#=
|
||||
* Passed: crm_simulate - Run crm_simulate with invalid CIB (unrecognized validate-with)
|
||||
=#=#=#= Begin test: Try to make resulting CIB invalid, but possibly recoverable (valid with X.Y+1) =#=#=#=
|
||||
@@ -326,8 +333,11 @@ update_validation debug: Configuration valid for schema: pacemaker-3.6
|
||||
update_validation debug: pacemaker-3.6-style configuration is also valid for pacemaker-3.7
|
||||
update_validation debug: Testing 'pacemaker-3.7' validation (21 of X)
|
||||
update_validation debug: Configuration valid for schema: pacemaker-3.7
|
||||
-update_validation trace: Stopping at pacemaker-3.7
|
||||
-update_validation info: Transformed the configuration from pacemaker-1.2 to pacemaker-3.7
|
||||
+update_validation debug: pacemaker-3.7-style configuration is also valid for pacemaker-3.8
|
||||
+update_validation debug: Testing 'pacemaker-3.8' validation (22 of X)
|
||||
+update_validation debug: Configuration valid for schema: pacemaker-3.8
|
||||
+update_validation trace: Stopping at pacemaker-3.8
|
||||
+update_validation info: Transformed the configuration from pacemaker-1.2 to pacemaker-3.8
|
||||
unpack_resources error: Resource start-up disabled since no STONITH resources have been defined
|
||||
unpack_resources error: Either configure some or disable STONITH with the stonith-enabled option
|
||||
unpack_resources error: NOTE: Clusters with shared data need STONITH to ensure data integrity
|
||||
@@ -437,6 +447,8 @@ element rsc_order: Relax-NG validity error : Invalid attribute first-action for
|
||||
element rsc_order: Relax-NG validity error : Element constraints has extra content: rsc_order
|
||||
element rsc_order: Relax-NG validity error : Invalid attribute first-action for element rsc_order
|
||||
element rsc_order: Relax-NG validity error : Element constraints has extra content: rsc_order
|
||||
+element rsc_order: Relax-NG validity error : Invalid attribute first-action for element rsc_order
|
||||
+element rsc_order: Relax-NG validity error : Element constraints has extra content: rsc_order
|
||||
=#=#=#= Current cib after: Make resulting CIB invalid, and without validate-with attribute =#=#=#=
|
||||
<cib epoch="41" num_updates="0" admin_epoch="0" validate-with="none">
|
||||
<configuration>
|
||||
@@ -502,6 +514,8 @@ validity.bad.xml:10: element rsc_order: Relax-NG validity error : Invalid attrib
|
||||
validity.bad.xml:10: element rsc_order: Relax-NG validity error : Element constraints has extra content: rsc_order
|
||||
validity.bad.xml:10: element rsc_order: Relax-NG validity error : Invalid attribute first-action for element rsc_order
|
||||
validity.bad.xml:10: element rsc_order: Relax-NG validity error : Element constraints has extra content: rsc_order
|
||||
+validity.bad.xml:10: element rsc_order: Relax-NG validity error : Invalid attribute first-action for element rsc_order
|
||||
+validity.bad.xml:10: element rsc_order: Relax-NG validity error : Element constraints has extra content: rsc_order
|
||||
unpack_resources error: Resource start-up disabled since no STONITH resources have been defined
|
||||
unpack_resources error: Either configure some or disable STONITH with the stonith-enabled option
|
||||
unpack_resources error: NOTE: Clusters with shared data need STONITH to ensure data integrity
|
||||
--
|
||||
2.27.0
|
||||
|
2100
SOURCES/002-fencing-reasons.patch
Normal file
2100
SOURCES/002-fencing-reasons.patch
Normal file
File diff suppressed because it is too large
Load Diff
2476
SOURCES/003-fencing-reasons.patch
Normal file
2476
SOURCES/003-fencing-reasons.patch
Normal file
File diff suppressed because it is too large
Load Diff
73
SOURCES/004-systemd-metadata.patch
Normal file
73
SOURCES/004-systemd-metadata.patch
Normal file
@ -0,0 +1,73 @@
|
||||
From 09ef95a2eed48b4eb7488788a1b655d67eafe783 Mon Sep 17 00:00:00 2001
|
||||
From: Chris Lumens <clumens@redhat.com>
|
||||
Date: Tue, 30 Nov 2021 14:47:12 -0500
|
||||
Subject: [PATCH] Low: libcrmservice: Handle systemd service templates.
|
||||
|
||||
These unit files (which have an @ sign at the end) expect to be
|
||||
parameterized by an instance name. Not providing an instance name
|
||||
causes the dbus lookup to fail, and we fall back to assume this is an
|
||||
LSB service. If the user doesn't provide an instance name, just add a
|
||||
fake one. It doesn't seem to matter what name is given for the lookup.
|
||||
|
||||
See: rhbz#2003151
|
||||
---
|
||||
lib/services/systemd.c | 22 ++++++++++++++++------
|
||||
1 file changed, 16 insertions(+), 6 deletions(-)
|
||||
|
||||
diff --git a/lib/services/systemd.c b/lib/services/systemd.c
|
||||
index 8e9fff484..27a3b376d 100644
|
||||
--- a/lib/services/systemd.c
|
||||
+++ b/lib/services/systemd.c
|
||||
@@ -206,17 +206,27 @@ systemd_unit_extension(const char *name)
|
||||
}
|
||||
|
||||
static char *
|
||||
-systemd_service_name(const char *name)
|
||||
+systemd_service_name(const char *name, bool add_instance_name)
|
||||
{
|
||||
- if (name == NULL) {
|
||||
+ if (pcmk__str_empty(name)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (systemd_unit_extension(name)) {
|
||||
return strdup(name);
|
||||
- }
|
||||
|
||||
- return crm_strdup_printf("%s.service", name);
|
||||
+ /* Services that end with an @ sign are systemd templates. They expect an
|
||||
+ * instance name to follow the service name. If no instance name was
|
||||
+ * provided, just add "x" to the string as the instance name. It doesn't
|
||||
+ * seem to matter for purposes of looking up whether a service exists or
|
||||
+ * not.
|
||||
+ */
|
||||
+ } else if (add_instance_name && *(name+strlen(name)-1) == '@') {
|
||||
+ return crm_strdup_printf("%sx.service", name);
|
||||
+
|
||||
+ } else {
|
||||
+ return crm_strdup_printf("%s.service", name);
|
||||
+ }
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -427,7 +437,7 @@ invoke_unit_by_name(const char *arg_name, svc_action_t *op, char **path)
|
||||
CRM_ASSERT(msg != NULL);
|
||||
|
||||
// Add the (expanded) unit name as the argument
|
||||
- name = systemd_service_name(arg_name);
|
||||
+ name = systemd_service_name(arg_name, op == NULL || pcmk__str_eq(op->action, "meta-data", pcmk__str_none));
|
||||
CRM_LOG_ASSERT(dbus_message_append_args(msg, DBUS_TYPE_STRING, &name,
|
||||
DBUS_TYPE_INVALID));
|
||||
free(name);
|
||||
@@ -944,7 +954,7 @@ invoke_unit_by_path(svc_action_t *op, const char *unit)
|
||||
/* (ss) */
|
||||
{
|
||||
const char *replace_s = "replace";
|
||||
- char *name = systemd_service_name(op->agent);
|
||||
+ char *name = systemd_service_name(op->agent, pcmk__str_eq(op->action, "meta-data", pcmk__str_none));
|
||||
|
||||
CRM_LOG_ASSERT(dbus_message_append_args(msg, DBUS_TYPE_STRING, &name, DBUS_TYPE_INVALID));
|
||||
CRM_LOG_ASSERT(dbus_message_append_args(msg, DBUS_TYPE_STRING, &replace_s, DBUS_TYPE_INVALID));
|
||||
--
|
||||
2.27.0
|
||||
|
2200
SOURCES/005-fencing-reasons.patch
Normal file
2200
SOURCES/005-fencing-reasons.patch
Normal file
File diff suppressed because it is too large
Load Diff
143
SOURCES/006-stateful-metadata.patch
Normal file
143
SOURCES/006-stateful-metadata.patch
Normal file
@ -0,0 +1,143 @@
|
||||
From b52fe799c89637e2a761a5725c2376db5c05f2d1 Mon Sep 17 00:00:00 2001
|
||||
From: Ken Gaillot <kgaillot@redhat.com>
|
||||
Date: Tue, 30 Nov 2021 15:51:54 -0600
|
||||
Subject: [PATCH 1/2] Low: resources: remove DOCTYPE from OCF 1.1-compliant
|
||||
agents
|
||||
|
||||
OCF 1.1 replaced the DTD schema with RNG, but DOCTYPE still refers to the DTD.
|
||||
There's no DOCTYPE for RNG, and DOCTYPE is optional, so just remove it.
|
||||
---
|
||||
extra/resources/Dummy | 3 +--
|
||||
extra/resources/HealthIOWait | 3 +--
|
||||
extra/resources/Stateful | 3 +--
|
||||
extra/resources/attribute | 3 +--
|
||||
extra/resources/ping | 3 +--
|
||||
extra/resources/remote | 3 +--
|
||||
6 files changed, 6 insertions(+), 12 deletions(-)
|
||||
|
||||
diff --git a/extra/resources/Dummy b/extra/resources/Dummy
|
||||
index a344deac0..56584e564 100755
|
||||
--- a/extra/resources/Dummy
|
||||
+++ b/extra/resources/Dummy
|
||||
@@ -58,8 +58,7 @@
|
||||
meta_data() {
|
||||
cat <<END
|
||||
<?xml version="1.0"?>
|
||||
-<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
|
||||
-<resource-agent name="Dummy" version="2.0">
|
||||
+<resource-agent name="Dummy" version="2.1">
|
||||
<version>1.1</version>
|
||||
|
||||
<longdesc lang="en">
|
||||
diff --git a/extra/resources/HealthIOWait b/extra/resources/HealthIOWait
|
||||
index 43a8b70c4..5f1483ef7 100755
|
||||
--- a/extra/resources/HealthIOWait
|
||||
+++ b/extra/resources/HealthIOWait
|
||||
@@ -25,8 +25,7 @@
|
||||
meta_data() {
|
||||
cat <<END
|
||||
<?xml version="1.0"?>
|
||||
-<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
|
||||
-<resource-agent name="HealthIOWait" version="1.1">
|
||||
+<resource-agent name="HealthIOWait" version="1.2">
|
||||
<version>1.1</version>
|
||||
|
||||
<longdesc lang="en">
|
||||
diff --git a/extra/resources/Stateful b/extra/resources/Stateful
|
||||
index ae3424bbf..0d2062d51 100755
|
||||
--- a/extra/resources/Stateful
|
||||
+++ b/extra/resources/Stateful
|
||||
@@ -39,8 +39,7 @@ SCORE_PROMOTED=10
|
||||
meta_data() {
|
||||
cat <<END
|
||||
<?xml version="1.0"?>
|
||||
-<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
|
||||
-<resource-agent name="Stateful" version="1.1">
|
||||
+<resource-agent name="Stateful" version="1.2">
|
||||
<version>1.1</version>
|
||||
|
||||
<longdesc lang="en">
|
||||
diff --git a/extra/resources/attribute b/extra/resources/attribute
|
||||
index 1800dff8f..a2bd353e0 100755
|
||||
--- a/extra/resources/attribute
|
||||
+++ b/extra/resources/attribute
|
||||
@@ -57,8 +57,7 @@ END
|
||||
meta_data() {
|
||||
cat <<END
|
||||
<?xml version="1.0"?>
|
||||
-<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
|
||||
-<resource-agent name="attribute" version="1.1">
|
||||
+<resource-agent name="attribute" version="1.2">
|
||||
<version>1.1</version>
|
||||
<shortdesc lang="en">Manages a node attribute</shortdesc>
|
||||
<longdesc lang="en">
|
||||
diff --git a/extra/resources/ping b/extra/resources/ping
|
||||
index 6e296979f..7cc6b802d 100755
|
||||
--- a/extra/resources/ping
|
||||
+++ b/extra/resources/ping
|
||||
@@ -36,8 +36,7 @@
|
||||
meta_data() {
|
||||
cat <<END
|
||||
<?xml version="1.0"?>
|
||||
-<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
|
||||
-<resource-agent name="ping" version="1.1">
|
||||
+<resource-agent name="ping" version="1.2">
|
||||
<version>1.1</version>
|
||||
|
||||
<longdesc lang="en">
|
||||
diff --git a/extra/resources/remote b/extra/resources/remote
|
||||
index a53262bb6..f7e40dc81 100755
|
||||
--- a/extra/resources/remote
|
||||
+++ b/extra/resources/remote
|
||||
@@ -24,8 +24,7 @@
|
||||
meta_data() {
|
||||
cat <<END
|
||||
<?xml version="1.0"?>
|
||||
-<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
|
||||
-<resource-agent name="remote" version="1.0">
|
||||
+<resource-agent name="remote" version="1.1">
|
||||
<version>1.1</version>
|
||||
<shortdesc lang="en">Pacemaker Remote connection</shortdesc>
|
||||
<parameters>
|
||||
--
|
||||
2.27.0
|
||||
|
||||
|
||||
From 70f469120f8db6a024c786466ee74a6c7fbd1f43 Mon Sep 17 00:00:00 2001
|
||||
From: Ken Gaillot <kgaillot@redhat.com>
|
||||
Date: Tue, 30 Nov 2021 15:53:39 -0600
|
||||
Subject: [PATCH 2/2] Fix: resources: use correct syntax in Stateful meta-data
|
||||
|
||||
The OCF standard only allows "0" or "1" for booleans.
|
||||
|
||||
This fixes incorrect ocf:pacemaker:Stateful meta-data syntax introduced by
|
||||
7024398 as a regression in the 2.1.0 release.
|
||||
---
|
||||
extra/resources/Stateful | 4 ++--
|
||||
1 file changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/extra/resources/Stateful b/extra/resources/Stateful
|
||||
index 0d2062d51..2ebe6725f 100755
|
||||
--- a/extra/resources/Stateful
|
||||
+++ b/extra/resources/Stateful
|
||||
@@ -57,7 +57,7 @@ Location to store the resource state in
|
||||
<content type="string" default="${HA_VARRUN%%/}/Stateful-${OCF_RESOURCE_INSTANCE}.state" />
|
||||
</parameter>
|
||||
|
||||
-<parameter name="envfile" reloadable="true">
|
||||
+<parameter name="envfile" reloadable="1">
|
||||
<longdesc lang="en">
|
||||
If this is set, the environment will be dumped to this file for every call.
|
||||
</longdesc>
|
||||
@@ -65,7 +65,7 @@ If this is set, the environment will be dumped to this file for every call.
|
||||
<content type="string" default="" />
|
||||
</parameter>
|
||||
|
||||
-<parameter name="notify_delay" reloadable="true">
|
||||
+<parameter name="notify_delay" reloadable="1">
|
||||
<longdesc lang="en">
|
||||
The notify action will sleep for this many seconds before returning,
|
||||
to simulate a long-running notify.
|
||||
--
|
||||
2.27.0
|
||||
|
39
SOURCES/007-memory-leak.patch
Normal file
39
SOURCES/007-memory-leak.patch
Normal file
@ -0,0 +1,39 @@
|
||||
From f491d9d5a7ed554fed985de356bb085fdec3421c Mon Sep 17 00:00:00 2001
|
||||
From: Ken Gaillot <kgaillot@redhat.com>
|
||||
Date: Tue, 7 Dec 2021 09:01:00 -0600
|
||||
Subject: [PATCH] Fix: fencer: avoid memory leak when broadcasting history
|
||||
differences
|
||||
|
||||
Regression introduced in 2.1.0 by dbc27b2
|
||||
---
|
||||
daemons/fenced/fenced_history.c | 7 +++++--
|
||||
1 file changed, 5 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/daemons/fenced/fenced_history.c b/daemons/fenced/fenced_history.c
|
||||
index bc159383c..a9c57dc86 100644
|
||||
--- a/daemons/fenced/fenced_history.c
|
||||
+++ b/daemons/fenced/fenced_history.c
|
||||
@@ -484,8 +484,6 @@ stonith_fence_history(xmlNode *msg, xmlNode **output,
|
||||
!pcmk__str_eq(remote_peer, stonith_our_uname, pcmk__str_casei)) {
|
||||
xmlNode *history = get_xpath_object("//" F_STONITH_HISTORY_LIST,
|
||||
msg, LOG_NEVER);
|
||||
- GHashTable *received_history =
|
||||
- history?stonith_xml_history_to_list(history):NULL;
|
||||
|
||||
/* either a broadcast created directly upon stonith-API request
|
||||
* or a diff as response to such a thing
|
||||
@@ -497,6 +495,11 @@ stonith_fence_history(xmlNode *msg, xmlNode **output,
|
||||
if (!history ||
|
||||
!crm_is_true(crm_element_value(history,
|
||||
F_STONITH_DIFFERENTIAL))) {
|
||||
+ GHashTable *received_history = NULL;
|
||||
+
|
||||
+ if (history != NULL) {
|
||||
+ received_history = stonith_xml_history_to_list(history);
|
||||
+ }
|
||||
out_history =
|
||||
stonith_local_history_diff_and_merge(received_history, TRUE, NULL);
|
||||
if (out_history) {
|
||||
--
|
||||
2.27.0
|
||||
|
43
SOURCES/008-fencing-history.patch
Normal file
43
SOURCES/008-fencing-history.patch
Normal file
@ -0,0 +1,43 @@
|
||||
From 0339e89f3238b31df78b864dae8684b82c370741 Mon Sep 17 00:00:00 2001
|
||||
From: Ken Gaillot <kgaillot@redhat.com>
|
||||
Date: Mon, 13 Dec 2021 15:22:40 -0600
|
||||
Subject: [PATCH] Fix: fencer: get current time correctly
|
||||
|
||||
f52bc8e1ce (2.1.2) introduced a regression by using clock_gettime() with
|
||||
CLOCK_MONOTONIC to get the current time. Use qb_util_timespec_from_epoch_get()
|
||||
instead (which as of this writing uses clock_gettime() with CLOCK_REALTIME if
|
||||
available, and falls back to gettimeofday() if not).
|
||||
---
|
||||
daemons/fenced/fenced_commands.c | 11 +++--------
|
||||
1 file changed, 3 insertions(+), 8 deletions(-)
|
||||
|
||||
diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
|
||||
index f34cb4f13..7685cb8c3 100644
|
||||
--- a/daemons/fenced/fenced_commands.c
|
||||
+++ b/daemons/fenced/fenced_commands.c
|
||||
@@ -2746,19 +2746,14 @@ bool fencing_peer_active(crm_node_t *peer)
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
-void set_fencing_completed(remote_fencing_op_t * op)
|
||||
+void
|
||||
+set_fencing_completed(remote_fencing_op_t *op)
|
||||
{
|
||||
-#ifdef CLOCK_MONOTONIC
|
||||
struct timespec tv;
|
||||
|
||||
- clock_gettime(CLOCK_MONOTONIC, &tv);
|
||||
-
|
||||
+ qb_util_timespec_from_epoch_get(&tv);
|
||||
op->completed = tv.tv_sec;
|
||||
op->completed_nsec = tv.tv_nsec;
|
||||
-#else
|
||||
- op->completed = time(NULL);
|
||||
- op->completed_nsec = 0L;
|
||||
-#endif
|
||||
}
|
||||
|
||||
/*!
|
||||
--
|
||||
2.27.0
|
||||
|
2985
SOURCES/009-fencing-reasons.patch
Normal file
2985
SOURCES/009-fencing-reasons.patch
Normal file
File diff suppressed because it is too large
Load Diff
4157
SOURCES/010-probe-failures.patch
Normal file
4157
SOURCES/010-probe-failures.patch
Normal file
File diff suppressed because it is too large
Load Diff
1450
SOURCES/011-fencing-reasons.patch
Normal file
1450
SOURCES/011-fencing-reasons.patch
Normal file
File diff suppressed because it is too large
Load Diff
65
SOURCES/012-notify-crash.patch
Normal file
65
SOURCES/012-notify-crash.patch
Normal file
@ -0,0 +1,65 @@
|
||||
From ed8b2c86ab77aaa3d7fd688c049ad5e1b922a9c6 Mon Sep 17 00:00:00 2001
|
||||
From: Reid Wahl <nrwahl@protonmail.com>
|
||||
Date: Thu, 13 Jan 2022 02:56:55 -0800
|
||||
Subject: [PATCH] Fix: liblrmd: Avoid double-free during notify operation
|
||||
|
||||
This commit fixes a regression introduced by 31c7fa8a, causing a
|
||||
double-free in notify operations. lrmd_dispatch_internal() assigns the
|
||||
exit_reason string directly from an XML node to a new lrmd_event_data_t
|
||||
object (without duplicating), and this string gets freed twice.
|
||||
|
||||
Free #1: pcmk__create_history_xml() (reached via callback) calls
|
||||
lrmd__set_result(), which frees event.exit_reason and sets it to NULL.
|
||||
Free #2: lrmd_ipc_dispatch() frees the XML node, which contains a
|
||||
pointer to the exit_reason string just freed, after
|
||||
lrmd_dispatch_internal() returns.
|
||||
|
||||
Prior to 31c7fa8a, pcmk__create_history_xml reset event.rc and
|
||||
event.op_status but **not** event.exit_reason.
|
||||
|
||||
In this commit we simply make a copy of event.exit_reason in
|
||||
lrmd_dispatch_internal() before the callback. This way we don't have to
|
||||
worry about whatever happens in the callback, and we can continue to
|
||||
unset the exit_reason alongside the rc and op_status. The added overhead
|
||||
should be minimal.
|
||||
|
||||
This commit also makes a copy of output. That's not strictly necessary
|
||||
but adds some futureproofing and allows us to call lrmd__reset_result()
|
||||
at the end of lrmd_dispatch_internal().
|
||||
|
||||
Resolves: RHBZ#2039675
|
||||
|
||||
Signed-off-by: Reid Wahl <nrwahl@protonmail.com>
|
||||
---
|
||||
lib/lrmd/lrmd_client.c | 8 +++++---
|
||||
1 file changed, 5 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/lib/lrmd/lrmd_client.c b/lib/lrmd/lrmd_client.c
|
||||
index ee31bb5ae9..5131a648b7 100644
|
||||
--- a/lib/lrmd/lrmd_client.c
|
||||
+++ b/lib/lrmd/lrmd_client.c
|
||||
@@ -305,9 +305,10 @@ lrmd_dispatch_internal(lrmd_t * lrmd, xmlNode * msg)
|
||||
event.user_data = crm_element_value(msg, F_LRMD_RSC_USERDATA_STR);
|
||||
event.type = lrmd_event_exec_complete;
|
||||
|
||||
- // No need to duplicate the memory, so don't use setter functions
|
||||
- event.output = crm_element_value(msg, F_LRMD_RSC_OUTPUT);
|
||||
- event.exit_reason = crm_element_value(msg, F_LRMD_RSC_EXIT_REASON);
|
||||
+ /* output and exit_reason may be freed by a callback */
|
||||
+ event.output = crm_element_value_copy(msg, F_LRMD_RSC_OUTPUT);
|
||||
+ lrmd__set_result(&event, event.rc, event.op_status,
|
||||
+ crm_element_value(msg, F_LRMD_RSC_EXIT_REASON));
|
||||
|
||||
event.params = xml2list(msg);
|
||||
} else if (pcmk__str_eq(type, LRMD_OP_NEW_CLIENT, pcmk__str_none)) {
|
||||
@@ -324,6 +325,7 @@ lrmd_dispatch_internal(lrmd_t * lrmd, xmlNode * msg)
|
||||
if (event.params) {
|
||||
g_hash_table_destroy(event.params);
|
||||
}
|
||||
+ lrmd__reset_result(&event);
|
||||
}
|
||||
|
||||
// \return Always 0, to indicate that IPC mainloop source should be kept
|
||||
--
|
||||
2.27.0
|
||||
|
26
SOURCES/013-probe-failures.patch
Normal file
26
SOURCES/013-probe-failures.patch
Normal file
@ -0,0 +1,26 @@
|
||||
From 186d5a02fba919c455fd6eeb050b4be107f82159 Mon Sep 17 00:00:00 2001
|
||||
From: Chris Lumens <clumens@redhat.com>
|
||||
Date: Thu, 13 Jan 2022 17:02:47 -0500
|
||||
Subject: [PATCH] Low: scheduler: Use the old RC code to log maskable probe
|
||||
failures.
|
||||
|
||||
---
|
||||
lib/pengine/unpack.c | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c
|
||||
index 8a2d2a6d6d..b01f86257a 100644
|
||||
--- a/lib/pengine/unpack.c
|
||||
+++ b/lib/pengine/unpack.c
|
||||
@@ -3780,7 +3780,7 @@ unpack_rsc_op(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op,
|
||||
|
||||
if (maskable_probe_failure) {
|
||||
crm_notice("Treating probe result '%s' for %s on %s as 'not running'",
|
||||
- services_ocf_exitcode_str(rc), rsc->id, node->details->uname);
|
||||
+ services_ocf_exitcode_str(old_rc), rsc->id, node->details->uname);
|
||||
update_resource_state(rsc, node, xml_op, task, target_rc, *last_failure,
|
||||
on_fail, data_set);
|
||||
crm_xml_add(xml_op, XML_ATTR_UNAME, node->details->uname);
|
||||
--
|
||||
2.27.0
|
||||
|
43
SOURCES/014-pcmk_delay_base.patch
Normal file
43
SOURCES/014-pcmk_delay_base.patch
Normal file
@ -0,0 +1,43 @@
|
||||
From 9d812b0401d4cedef53a3cc3653ec782a5c49e37 Mon Sep 17 00:00:00 2001
|
||||
From: Ken Gaillot <kgaillot@redhat.com>
|
||||
Date: Thu, 13 Jan 2022 10:42:02 -0600
|
||||
Subject: [PATCH] Doc: fencer: improve pcmk_delay_base meta-data
|
||||
|
||||
Update its type, since its value can now be a node map as well as a string,
|
||||
and add more detail to its description.
|
||||
---
|
||||
daemons/fenced/pacemaker-fenced.c | 18 +++++++++++-------
|
||||
1 file changed, 11 insertions(+), 7 deletions(-)
|
||||
|
||||
diff --git a/daemons/fenced/pacemaker-fenced.c b/daemons/fenced/pacemaker-fenced.c
|
||||
index 1b954be5a4..12f331496c 100644
|
||||
--- a/daemons/fenced/pacemaker-fenced.c
|
||||
+++ b/daemons/fenced/pacemaker-fenced.c
|
||||
@@ -1548,13 +1548,17 @@ main(int argc, char **argv)
|
||||
PCMK_STONITH_DELAY_BASE);
|
||||
printf(" <shortdesc lang=\"en\">Enable a base delay for "
|
||||
"fencing actions and specify base delay value.</shortdesc>\n");
|
||||
- printf(" <longdesc lang=\"en\">This prevents double fencing when "
|
||||
- "different delays are configured on the nodes.\nUse this to "
|
||||
- "enable a static delay for fencing actions.\nThe overall delay "
|
||||
- "is derived from a random delay value adding this static delay "
|
||||
- "so that the sum is kept below the maximum delay.\nSet to eg. "
|
||||
- "node1:1s;node2:5 to set different value per node.</longdesc>\n");
|
||||
- printf(" <content type=\"time\" default=\"0s\"/>\n");
|
||||
+ printf(" <longdesc lang=\"en\">This enables a static delay for "
|
||||
+ "fencing actions, which can help avoid \"death matches\" where "
|
||||
+ "two nodes try to fence each other at the same time. If "
|
||||
+ PCMK_STONITH_DELAY_MAX " is also used, a random delay will be "
|
||||
+ "added such that the total delay is kept below that value.\n"
|
||||
+ "This can be set to a single time value to apply to any node "
|
||||
+ "targeted by this device (useful if a separate device is "
|
||||
+ "configured for each target), or to a node map (for example, "
|
||||
+ "\"node1:1s;node2:5\") to set a different value per target.\n"
|
||||
+ " </longdesc>\n");
|
||||
+ printf(" <content type=\"string\" default=\"0s\"/>\n");
|
||||
printf(" </parameter>\n");
|
||||
|
||||
printf(" <parameter name=\"%s\" unique=\"0\">\n",
|
||||
--
|
||||
2.27.0
|
||||
|
1093
SOURCES/015-fencing-reasons.patch
Normal file
1093
SOURCES/015-fencing-reasons.patch
Normal file
File diff suppressed because it is too large
Load Diff
56
SOURCES/016-fencing-crash.patch
Normal file
56
SOURCES/016-fencing-crash.patch
Normal file
@ -0,0 +1,56 @@
|
||||
From e330568504ec379ea42460d21a2e20b1652d9445 Mon Sep 17 00:00:00 2001
|
||||
From: Reid Wahl <nrwahl@protonmail.com>
|
||||
Date: Fri, 14 Jan 2022 01:35:35 -0800
|
||||
Subject: [PATCH] Fix: fencing: Don't set stonith action to pending if fork
|
||||
fails
|
||||
|
||||
Currently, we set a stonith action to pending if
|
||||
services_action_async_fork_notify() returns true. However, "true" means
|
||||
that the svc_action should not be freed. This might be because the
|
||||
svc_action forked successfully and is pending, or it might be because
|
||||
the svc_action has already been freed.
|
||||
|
||||
In the case of stonith actions, if we fail to fork, the stonith_action_t
|
||||
object stored in svc_action->cb_data gets freed by the done callback,
|
||||
and services_action_async_fork_notify() returns true. If we try to set
|
||||
the action to pending, it causes a segfault.
|
||||
|
||||
This commit moves the "set to pending" step to the
|
||||
stonith_action_async_forked() callback. We avoid the segfault and only
|
||||
set it to pending if it's actually pending.
|
||||
|
||||
A slight difference in ordering was required to achieve this. Now, the
|
||||
action gets set to pending immediately before being added to the
|
||||
mainloop, instead of immediately after.
|
||||
|
||||
Signed-off-by: Reid Wahl <nrwahl@protonmail.com>
|
||||
---
|
||||
lib/fencing/st_actions.c | 5 +++--
|
||||
1 file changed, 3 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/lib/fencing/st_actions.c b/lib/fencing/st_actions.c
|
||||
index e4e43225cd..306001af69 100644
|
||||
--- a/lib/fencing/st_actions.c
|
||||
+++ b/lib/fencing/st_actions.c
|
||||
@@ -550,6 +550,9 @@ stonith_action_async_forked(svc_action_t *svc_action)
|
||||
(action->fork_cb) (svc_action->pid, action->userdata);
|
||||
}
|
||||
|
||||
+ pcmk__set_result(&(action->result), PCMK_OCF_UNKNOWN, PCMK_EXEC_PENDING,
|
||||
+ NULL);
|
||||
+
|
||||
crm_trace("Child process %d performing action '%s' successfully forked",
|
||||
action->pid, action->action);
|
||||
}
|
||||
@@ -619,8 +622,6 @@ internal_stonith_action_execute(stonith_action_t * action)
|
||||
if (services_action_async_fork_notify(svc_action,
|
||||
&stonith_action_async_done,
|
||||
&stonith_action_async_forked)) {
|
||||
- pcmk__set_result(&(action->result), PCMK_OCF_UNKNOWN,
|
||||
- PCMK_EXEC_PENDING, NULL);
|
||||
return pcmk_ok;
|
||||
}
|
||||
|
||||
--
|
||||
2.27.0
|
||||
|
875
SOURCES/017-fencing-reasons.patch
Normal file
875
SOURCES/017-fencing-reasons.patch
Normal file
@ -0,0 +1,875 @@
|
||||
From 523f62eb235836a01ea039c23ada261a494f7b32 Mon Sep 17 00:00:00 2001
|
||||
From: Ken Gaillot <kgaillot@redhat.com>
|
||||
Date: Wed, 10 Nov 2021 15:22:47 -0600
|
||||
Subject: [PATCH 01/11] Feature: libpacemaker: improve result for high-level
|
||||
fencing API
|
||||
|
||||
Previously, pcmk__fencing_action()'s helpers for asynchronous fencing actions
|
||||
initialized the result to a generic error, and then overrode that only on
|
||||
success.
|
||||
|
||||
Now, set a detailed result for early failures, and use the full result when
|
||||
available from the fencing API.
|
||||
|
||||
A standard return code is still returned to callers at this point.
|
||||
---
|
||||
lib/pacemaker/pcmk_fence.c | 31 ++++++++++++++++++-------------
|
||||
1 file changed, 18 insertions(+), 13 deletions(-)
|
||||
|
||||
diff --git a/lib/pacemaker/pcmk_fence.c b/lib/pacemaker/pcmk_fence.c
|
||||
index 7d6acd0de6..125e1b268b 100644
|
||||
--- a/lib/pacemaker/pcmk_fence.c
|
||||
+++ b/lib/pacemaker/pcmk_fence.c
|
||||
@@ -32,8 +32,8 @@ static struct {
|
||||
unsigned int timeout;
|
||||
unsigned int tolerance;
|
||||
int delay;
|
||||
- int rc;
|
||||
-} async_fence_data;
|
||||
+ pcmk__action_result_t result;
|
||||
+} async_fence_data = { NULL, };
|
||||
|
||||
static int
|
||||
handle_level(stonith_t *st, char *target, int fence_level,
|
||||
@@ -76,14 +76,13 @@ handle_level(stonith_t *st, char *target, int fence_level,
|
||||
static void
|
||||
notify_callback(stonith_t * st, stonith_event_t * e)
|
||||
{
|
||||
- if (e->result != pcmk_ok) {
|
||||
- return;
|
||||
- }
|
||||
+ if (pcmk__str_eq(async_fence_data.target, e->target, pcmk__str_casei)
|
||||
+ && pcmk__str_eq(async_fence_data.action, e->action, pcmk__str_casei)) {
|
||||
|
||||
- if (pcmk__str_eq(async_fence_data.target, e->target, pcmk__str_casei) &&
|
||||
- pcmk__str_eq(async_fence_data.action, e->action, pcmk__str_casei)) {
|
||||
-
|
||||
- async_fence_data.rc = e->result;
|
||||
+ pcmk__set_result(&async_fence_data.result,
|
||||
+ stonith__event_exit_status(e),
|
||||
+ stonith__event_execution_status(e),
|
||||
+ stonith__event_exit_reason(e));
|
||||
g_main_loop_quit(mainloop);
|
||||
}
|
||||
}
|
||||
@@ -91,8 +90,9 @@ notify_callback(stonith_t * st, stonith_event_t * e)
|
||||
static void
|
||||
fence_callback(stonith_t * stonith, stonith_callback_data_t * data)
|
||||
{
|
||||
- async_fence_data.rc = data->rc;
|
||||
-
|
||||
+ pcmk__set_result(&async_fence_data.result, stonith__exit_status(data),
|
||||
+ stonith__execution_status(data),
|
||||
+ stonith__exit_reason(data));
|
||||
g_main_loop_quit(mainloop);
|
||||
}
|
||||
|
||||
@@ -106,6 +106,8 @@ async_fence_helper(gpointer user_data)
|
||||
if (rc != pcmk_ok) {
|
||||
fprintf(stderr, "Could not connect to fencer: %s\n", pcmk_strerror(rc));
|
||||
g_main_loop_quit(mainloop);
|
||||
+ pcmk__set_result(&async_fence_data.result, CRM_EX_ERROR,
|
||||
+ PCMK_EXEC_NOT_CONNECTED, NULL);
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
@@ -121,6 +123,8 @@ async_fence_helper(gpointer user_data)
|
||||
|
||||
if (call_id < 0) {
|
||||
g_main_loop_quit(mainloop);
|
||||
+ pcmk__set_result(&async_fence_data.result, CRM_EX_ERROR,
|
||||
+ PCMK_EXEC_ERROR, pcmk_strerror(call_id));
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
@@ -146,7 +150,8 @@ pcmk__fence_action(stonith_t *st, const char *target, const char *action,
|
||||
async_fence_data.timeout = timeout;
|
||||
async_fence_data.tolerance = tolerance;
|
||||
async_fence_data.delay = delay;
|
||||
- async_fence_data.rc = pcmk_err_generic;
|
||||
+ pcmk__set_result(&async_fence_data.result, CRM_EX_ERROR, PCMK_EXEC_UNKNOWN,
|
||||
+ NULL);
|
||||
|
||||
trig = mainloop_add_trigger(G_PRIORITY_HIGH, async_fence_helper, NULL);
|
||||
mainloop_set_trigger(trig);
|
||||
@@ -156,7 +161,7 @@ pcmk__fence_action(stonith_t *st, const char *target, const char *action,
|
||||
|
||||
free(async_fence_data.name);
|
||||
|
||||
- return pcmk_legacy2rc(async_fence_data.rc);
|
||||
+ return stonith__result2rc(&async_fence_data.result);
|
||||
}
|
||||
|
||||
#ifdef BUILD_PUBLIC_LIBPACEMAKER
|
||||
--
|
||||
2.27.0
|
||||
|
||||
|
||||
From 008868fae5d1b0d6d8dc61f7acfb3856801ddd52 Mon Sep 17 00:00:00 2001
|
||||
From: Ken Gaillot <kgaillot@redhat.com>
|
||||
Date: Fri, 10 Dec 2021 15:36:10 -0600
|
||||
Subject: [PATCH 02/11] Refactor: libpacemaker: add exit reason to high-level
|
||||
fencing API
|
||||
|
||||
Nothing uses it as of this commit
|
||||
---
|
||||
include/pacemaker.h | 5 ++++-
|
||||
include/pcmki/pcmki_fence.h | 5 ++++-
|
||||
lib/pacemaker/pcmk_fence.c | 10 +++++++---
|
||||
tools/stonith_admin.c | 6 +++---
|
||||
4 files changed, 18 insertions(+), 8 deletions(-)
|
||||
|
||||
diff --git a/include/pacemaker.h b/include/pacemaker.h
|
||||
index a8523c969e..0daa4c5945 100644
|
||||
--- a/include/pacemaker.h
|
||||
+++ b/include/pacemaker.h
|
||||
@@ -189,12 +189,15 @@ int pcmk_list_nodes(xmlNodePtr *xml, char *node_types);
|
||||
* again.
|
||||
* \param[in] delay Apply a fencing delay. Value -1 means disable also any
|
||||
* static/random fencing delays from pcmk_delay_base/max.
|
||||
+ * \param[out] reason If not NULL, where to put descriptive failure reason
|
||||
*
|
||||
* \return Standard Pacemaker return code
|
||||
+ * \note If \p reason is not NULL, the caller is responsible for freeing its
|
||||
+ * returned value.
|
||||
*/
|
||||
int pcmk_fence_action(stonith_t *st, const char *target, const char *action,
|
||||
const char *name, unsigned int timeout, unsigned int tolerance,
|
||||
- int delay);
|
||||
+ int delay, char **reason);
|
||||
|
||||
/*!
|
||||
* \brief List the fencing operations that have occurred for a specific node.
|
||||
diff --git a/include/pcmki/pcmki_fence.h b/include/pcmki/pcmki_fence.h
|
||||
index d4cef68f5c..c3da0361d7 100644
|
||||
--- a/include/pcmki/pcmki_fence.h
|
||||
+++ b/include/pcmki/pcmki_fence.h
|
||||
@@ -28,12 +28,15 @@
|
||||
* again.
|
||||
* \param[in] delay Apply a fencing delay. Value -1 means disable also any
|
||||
* static/random fencing delays from pcmk_delay_base/max
|
||||
+ * \param[out] reason If not NULL, where to put descriptive failure reason
|
||||
*
|
||||
* \return Standard Pacemaker return code
|
||||
+ * \note If \p reason is not NULL, the caller is responsible for freeing its
|
||||
+ * returned value.
|
||||
*/
|
||||
int pcmk__fence_action(stonith_t *st, const char *target, const char *action,
|
||||
const char *name, unsigned int timeout, unsigned int tolerance,
|
||||
- int delay);
|
||||
+ int delay, char **reason);
|
||||
|
||||
/*!
|
||||
* \brief List the fencing operations that have occurred for a specific node.
|
||||
diff --git a/lib/pacemaker/pcmk_fence.c b/lib/pacemaker/pcmk_fence.c
|
||||
index 125e1b268b..dbf084fb6b 100644
|
||||
--- a/lib/pacemaker/pcmk_fence.c
|
||||
+++ b/lib/pacemaker/pcmk_fence.c
|
||||
@@ -139,7 +139,7 @@ async_fence_helper(gpointer user_data)
|
||||
int
|
||||
pcmk__fence_action(stonith_t *st, const char *target, const char *action,
|
||||
const char *name, unsigned int timeout, unsigned int tolerance,
|
||||
- int delay)
|
||||
+ int delay, char **reason)
|
||||
{
|
||||
crm_trigger_t *trig;
|
||||
|
||||
@@ -161,6 +161,9 @@ pcmk__fence_action(stonith_t *st, const char *target, const char *action,
|
||||
|
||||
free(async_fence_data.name);
|
||||
|
||||
+ if ((reason != NULL) && (async_fence_data.result.exit_reason != NULL)) {
|
||||
+ *reason = strdup(async_fence_data.result.exit_reason);
|
||||
+ }
|
||||
return stonith__result2rc(&async_fence_data.result);
|
||||
}
|
||||
|
||||
@@ -168,9 +171,10 @@ pcmk__fence_action(stonith_t *st, const char *target, const char *action,
|
||||
int
|
||||
pcmk_fence_action(stonith_t *st, const char *target, const char *action,
|
||||
const char *name, unsigned int timeout, unsigned int tolerance,
|
||||
- int delay)
|
||||
+ int delay, char **reason)
|
||||
{
|
||||
- return pcmk__fence_action(st, target, action, name, timeout, tolerance, delay);
|
||||
+ return pcmk__fence_action(st, target, action, name, timeout, tolerance,
|
||||
+ delay, reason);
|
||||
}
|
||||
#endif
|
||||
|
||||
diff --git a/tools/stonith_admin.c b/tools/stonith_admin.c
|
||||
index 2d48326e1b..fdc7c46d49 100644
|
||||
--- a/tools/stonith_admin.c
|
||||
+++ b/tools/stonith_admin.c
|
||||
@@ -571,17 +571,17 @@ main(int argc, char **argv)
|
||||
|
||||
case 'B':
|
||||
rc = pcmk__fence_action(st, target, "reboot", name, options.timeout*1000,
|
||||
- options.tolerance*1000, options.delay);
|
||||
+ options.tolerance*1000, options.delay, NULL);
|
||||
break;
|
||||
|
||||
case 'F':
|
||||
rc = pcmk__fence_action(st, target, "off", name, options.timeout*1000,
|
||||
- options.tolerance*1000, options.delay);
|
||||
+ options.tolerance*1000, options.delay, NULL);
|
||||
break;
|
||||
|
||||
case 'U':
|
||||
rc = pcmk__fence_action(st, target, "on", name, options.timeout*1000,
|
||||
- options.tolerance*1000, options.delay);
|
||||
+ options.tolerance*1000, options.delay, NULL);
|
||||
break;
|
||||
|
||||
case 'h':
|
||||
--
|
||||
2.27.0
|
||||
|
||||
|
||||
From 7570510f9985ba75ef73fb824f28109e135ace0a Mon Sep 17 00:00:00 2001
|
||||
From: Ken Gaillot <kgaillot@redhat.com>
|
||||
Date: Fri, 10 Dec 2021 15:40:48 -0600
|
||||
Subject: [PATCH 03/11] Refactor: libpacemaker: rename high-level fencing API
|
||||
|
||||
Rename pcmk_fence_action() to pcmk_request_fencing(), and its internal
|
||||
equivalent pcmk__fence_action() to pcmk__request_fencing(). The change is
|
||||
backward-compatible because pcmk_fence_action() has not been exposed publicly
|
||||
yet.
|
||||
|
||||
"Fence action" can be easily confused with libcrmservice actions, liblrmd
|
||||
actions, libstonithd actions, scheduler actions, and so forth.
|
||||
|
||||
Also, the new name makes it clearer that the caller is requesting that the
|
||||
cluster perform fencing, and not directly performing fencing.
|
||||
---
|
||||
include/pacemaker.h | 20 ++++++++++----------
|
||||
include/pcmki/pcmki_fence.h | 16 ++++++++--------
|
||||
lib/pacemaker/pcmk_fence.c | 16 ++++++++--------
|
||||
tools/stonith_admin.c | 18 ++++++++++++------
|
||||
4 files changed, 38 insertions(+), 32 deletions(-)
|
||||
|
||||
diff --git a/include/pacemaker.h b/include/pacemaker.h
|
||||
index 0daa4c5945..e581f975a9 100644
|
||||
--- a/include/pacemaker.h
|
||||
+++ b/include/pacemaker.h
|
||||
@@ -177,27 +177,27 @@ int pcmk_list_nodes(xmlNodePtr *xml, char *node_types);
|
||||
#ifdef BUILD_PUBLIC_LIBPACEMAKER
|
||||
|
||||
/*!
|
||||
- * \brief Perform a STONITH action.
|
||||
+ * \brief Ask the cluster to perform fencing
|
||||
*
|
||||
- * \param[in] st A connection to the STONITH API.
|
||||
- * \param[in] target The node receiving the action.
|
||||
- * \param[in] action The action to perform.
|
||||
+ * \param[in] st A connection to the fencer API
|
||||
+ * \param[in] target The node that should be fenced
|
||||
+ * \param[in] action The fencing action (on, off, reboot) to perform
|
||||
* \param[in] name Who requested the fence action?
|
||||
- * \param[in] timeout How long to wait for the operation to complete (in ms).
|
||||
+ * \param[in] timeout How long to wait for the operation to complete (in ms)
|
||||
* \param[in] tolerance If a successful action for \p target happened within
|
||||
* this many ms, return 0 without performing the action
|
||||
- * again.
|
||||
+ * again
|
||||
* \param[in] delay Apply a fencing delay. Value -1 means disable also any
|
||||
- * static/random fencing delays from pcmk_delay_base/max.
|
||||
+ * static/random fencing delays from pcmk_delay_base/max
|
||||
* \param[out] reason If not NULL, where to put descriptive failure reason
|
||||
*
|
||||
* \return Standard Pacemaker return code
|
||||
* \note If \p reason is not NULL, the caller is responsible for freeing its
|
||||
* returned value.
|
||||
*/
|
||||
-int pcmk_fence_action(stonith_t *st, const char *target, const char *action,
|
||||
- const char *name, unsigned int timeout, unsigned int tolerance,
|
||||
- int delay, char **reason);
|
||||
+int pcmk_request_fencing(stonith_t *st, const char *target, const char *action,
|
||||
+ const char *name, unsigned int timeout,
|
||||
+ unsigned int tolerance, int delay, char **reason);
|
||||
|
||||
/*!
|
||||
* \brief List the fencing operations that have occurred for a specific node.
|
||||
diff --git a/include/pcmki/pcmki_fence.h b/include/pcmki/pcmki_fence.h
|
||||
index c3da0361d7..e3a7e27264 100644
|
||||
--- a/include/pcmki/pcmki_fence.h
|
||||
+++ b/include/pcmki/pcmki_fence.h
|
||||
@@ -13,14 +13,14 @@
|
||||
# include <crm/common/output_internal.h>
|
||||
|
||||
/*!
|
||||
- * \brief Perform a STONITH action.
|
||||
+ * \brief Ask the cluster to perform fencing
|
||||
*
|
||||
- * \note This is the internal version of pcmk_fence_action(). External users
|
||||
+ * \note This is the internal version of pcmk_request_fencing(). External users
|
||||
* of the pacemaker API should use that function instead.
|
||||
*
|
||||
- * \param[in] st A connection to the STONITH API.
|
||||
- * \param[in] target The node receiving the action.
|
||||
- * \param[in] action The action to perform.
|
||||
+ * \param[in] st A connection to the fencer API
|
||||
+ * \param[in] target The node that should be fenced
|
||||
+ * \param[in] action The fencing action (on, off, reboot) to perform
|
||||
* \param[in] name Who requested the fence action?
|
||||
* \param[in] timeout How long to wait for the operation to complete (in ms).
|
||||
* \param[in] tolerance If a successful action for \p target happened within
|
||||
@@ -34,9 +34,9 @@
|
||||
* \note If \p reason is not NULL, the caller is responsible for freeing its
|
||||
* returned value.
|
||||
*/
|
||||
-int pcmk__fence_action(stonith_t *st, const char *target, const char *action,
|
||||
- const char *name, unsigned int timeout, unsigned int tolerance,
|
||||
- int delay, char **reason);
|
||||
+int pcmk__request_fencing(stonith_t *st, const char *target, const char *action,
|
||||
+ const char *name, unsigned int timeout,
|
||||
+ unsigned int tolerance, int delay, char **reason);
|
||||
|
||||
/*!
|
||||
* \brief List the fencing operations that have occurred for a specific node.
|
||||
diff --git a/lib/pacemaker/pcmk_fence.c b/lib/pacemaker/pcmk_fence.c
|
||||
index dbf084fb6b..1b7feb54b2 100644
|
||||
--- a/lib/pacemaker/pcmk_fence.c
|
||||
+++ b/lib/pacemaker/pcmk_fence.c
|
||||
@@ -137,9 +137,9 @@ async_fence_helper(gpointer user_data)
|
||||
}
|
||||
|
||||
int
|
||||
-pcmk__fence_action(stonith_t *st, const char *target, const char *action,
|
||||
- const char *name, unsigned int timeout, unsigned int tolerance,
|
||||
- int delay, char **reason)
|
||||
+pcmk__request_fencing(stonith_t *st, const char *target, const char *action,
|
||||
+ const char *name, unsigned int timeout,
|
||||
+ unsigned int tolerance, int delay, char **reason)
|
||||
{
|
||||
crm_trigger_t *trig;
|
||||
|
||||
@@ -169,12 +169,12 @@ pcmk__fence_action(stonith_t *st, const char *target, const char *action,
|
||||
|
||||
#ifdef BUILD_PUBLIC_LIBPACEMAKER
|
||||
int
|
||||
-pcmk_fence_action(stonith_t *st, const char *target, const char *action,
|
||||
- const char *name, unsigned int timeout, unsigned int tolerance,
|
||||
- int delay, char **reason)
|
||||
+pcmk_request_fencing(stonith_t *st, const char *target, const char *action,
|
||||
+ const char *name, unsigned int timeout,
|
||||
+ unsigned int tolerance, int delay, char **reason)
|
||||
{
|
||||
- return pcmk__fence_action(st, target, action, name, timeout, tolerance,
|
||||
- delay, reason);
|
||||
+ return pcmk__request_fencing(st, target, action, name, timeout, tolerance,
|
||||
+ delay, reason);
|
||||
}
|
||||
#endif
|
||||
|
||||
diff --git a/tools/stonith_admin.c b/tools/stonith_admin.c
|
||||
index fdc7c46d49..56948b3875 100644
|
||||
--- a/tools/stonith_admin.c
|
||||
+++ b/tools/stonith_admin.c
|
||||
@@ -570,18 +570,24 @@ main(int argc, char **argv)
|
||||
break;
|
||||
|
||||
case 'B':
|
||||
- rc = pcmk__fence_action(st, target, "reboot", name, options.timeout*1000,
|
||||
- options.tolerance*1000, options.delay, NULL);
|
||||
+ rc = pcmk__request_fencing(st, target, "reboot", name,
|
||||
+ options.timeout * 1000,
|
||||
+ options.tolerance * 1000,
|
||||
+ options.delay, NULL);
|
||||
break;
|
||||
|
||||
case 'F':
|
||||
- rc = pcmk__fence_action(st, target, "off", name, options.timeout*1000,
|
||||
- options.tolerance*1000, options.delay, NULL);
|
||||
+ rc = pcmk__request_fencing(st, target, "off", name,
|
||||
+ options.timeout * 1000,
|
||||
+ options.tolerance * 1000,
|
||||
+ options.delay, NULL);
|
||||
break;
|
||||
|
||||
case 'U':
|
||||
- rc = pcmk__fence_action(st, target, "on", name, options.timeout*1000,
|
||||
- options.tolerance*1000, options.delay, NULL);
|
||||
+ rc = pcmk__request_fencing(st, target, "on", name,
|
||||
+ options.timeout * 1000,
|
||||
+ options.tolerance * 1000,
|
||||
+ options.delay, NULL);
|
||||
break;
|
||||
|
||||
case 'h':
|
||||
--
|
||||
2.27.0
|
||||
|
||||
|
||||
From 247eb303df934944c0b72b162bb661cee6e0ed8b Mon Sep 17 00:00:00 2001
|
||||
From: Ken Gaillot <kgaillot@redhat.com>
|
||||
Date: Fri, 10 Dec 2021 15:52:37 -0600
|
||||
Subject: [PATCH 04/11] Refactor: tools: drop unnecessary string duplication in
|
||||
stonith_admin
|
||||
|
||||
---
|
||||
tools/stonith_admin.c | 11 ++++-------
|
||||
1 file changed, 4 insertions(+), 7 deletions(-)
|
||||
|
||||
diff --git a/tools/stonith_admin.c b/tools/stonith_admin.c
|
||||
index 56948b3875..c11e302e76 100644
|
||||
--- a/tools/stonith_admin.c
|
||||
+++ b/tools/stonith_admin.c
|
||||
@@ -360,8 +360,6 @@ main(int argc, char **argv)
|
||||
|
||||
pcmk__cli_init_logging("stonith_admin", args->verbosity);
|
||||
|
||||
- name = strdup(crm_system_name);
|
||||
-
|
||||
rc = pcmk__output_new(&out, args->output_ty, args->output_dest, argv);
|
||||
if (rc != pcmk_rc_ok) {
|
||||
exit_code = CRM_EX_ERROR;
|
||||
@@ -496,7 +494,7 @@ main(int argc, char **argv)
|
||||
if (st == NULL) {
|
||||
rc = -ENOMEM;
|
||||
} else if (!no_connect) {
|
||||
- rc = st->cmds->connect(st, name, NULL);
|
||||
+ rc = st->cmds->connect(st, crm_system_name, NULL);
|
||||
}
|
||||
if (rc < 0) {
|
||||
out->err(out, "Could not connect to fencer: %s", pcmk_strerror(rc));
|
||||
@@ -570,21 +568,21 @@ main(int argc, char **argv)
|
||||
break;
|
||||
|
||||
case 'B':
|
||||
- rc = pcmk__request_fencing(st, target, "reboot", name,
|
||||
+ rc = pcmk__request_fencing(st, target, "reboot", crm_system_name,
|
||||
options.timeout * 1000,
|
||||
options.tolerance * 1000,
|
||||
options.delay, NULL);
|
||||
break;
|
||||
|
||||
case 'F':
|
||||
- rc = pcmk__request_fencing(st, target, "off", name,
|
||||
+ rc = pcmk__request_fencing(st, target, "off", crm_system_name,
|
||||
options.timeout * 1000,
|
||||
options.tolerance * 1000,
|
||||
options.delay, NULL);
|
||||
break;
|
||||
|
||||
case 'U':
|
||||
- rc = pcmk__request_fencing(st, target, "on", name,
|
||||
+ rc = pcmk__request_fencing(st, target, "on", crm_system_name,
|
||||
options.timeout * 1000,
|
||||
options.tolerance * 1000,
|
||||
options.delay, NULL);
|
||||
@@ -619,7 +617,6 @@ main(int argc, char **argv)
|
||||
out->finish(out, exit_code, true, NULL);
|
||||
pcmk__output_free(out);
|
||||
}
|
||||
- free(name);
|
||||
stonith_key_value_freeall(options.params, 1, 1);
|
||||
|
||||
if (st != NULL) {
|
||||
--
|
||||
2.27.0
|
||||
|
||||
|
||||
From a7888bf6868d8d9d9c77f65ae9983cf748bb0548 Mon Sep 17 00:00:00 2001
|
||||
From: Ken Gaillot <kgaillot@redhat.com>
|
||||
Date: Fri, 10 Dec 2021 15:56:34 -0600
|
||||
Subject: [PATCH 05/11] Refactor: tools: functionize requesting fencing in
|
||||
stonith_admin
|
||||
|
||||
... to reduce code duplication and improve readability
|
||||
---
|
||||
tools/stonith_admin.c | 27 +++++++++++++++------------
|
||||
1 file changed, 15 insertions(+), 12 deletions(-)
|
||||
|
||||
diff --git a/tools/stonith_admin.c b/tools/stonith_admin.c
|
||||
index c11e302e76..f738a9c888 100644
|
||||
--- a/tools/stonith_admin.c
|
||||
+++ b/tools/stonith_admin.c
|
||||
@@ -331,6 +331,18 @@ build_arg_context(pcmk__common_args_t *args, GOptionGroup **group) {
|
||||
return context;
|
||||
}
|
||||
|
||||
+// \return Standard Pacemaker return code
|
||||
+static int
|
||||
+request_fencing(stonith_t *st, const char *target, const char *command)
|
||||
+{
|
||||
+ int rc = pcmk__request_fencing(st, target, command, crm_system_name,
|
||||
+ options.timeout * 1000,
|
||||
+ options.tolerance * 1000,
|
||||
+ options.delay, NULL);
|
||||
+
|
||||
+ return rc;
|
||||
+}
|
||||
+
|
||||
int
|
||||
main(int argc, char **argv)
|
||||
{
|
||||
@@ -568,24 +580,15 @@ main(int argc, char **argv)
|
||||
break;
|
||||
|
||||
case 'B':
|
||||
- rc = pcmk__request_fencing(st, target, "reboot", crm_system_name,
|
||||
- options.timeout * 1000,
|
||||
- options.tolerance * 1000,
|
||||
- options.delay, NULL);
|
||||
+ rc = request_fencing(st, target, "reboot");
|
||||
break;
|
||||
|
||||
case 'F':
|
||||
- rc = pcmk__request_fencing(st, target, "off", crm_system_name,
|
||||
- options.timeout * 1000,
|
||||
- options.tolerance * 1000,
|
||||
- options.delay, NULL);
|
||||
+ rc = request_fencing(st, target, "off");
|
||||
break;
|
||||
|
||||
case 'U':
|
||||
- rc = pcmk__request_fencing(st, target, "on", crm_system_name,
|
||||
- options.timeout * 1000,
|
||||
- options.tolerance * 1000,
|
||||
- options.delay, NULL);
|
||||
+ rc = request_fencing(st, target, "on");
|
||||
break;
|
||||
|
||||
case 'h':
|
||||
--
|
||||
2.27.0
|
||||
|
||||
|
||||
From 2da32df780983ec1197e857eed5eeb5bf1101889 Mon Sep 17 00:00:00 2001
|
||||
From: Ken Gaillot <kgaillot@redhat.com>
|
||||
Date: Fri, 10 Dec 2021 16:05:19 -0600
|
||||
Subject: [PATCH 06/11] Feature: tools: display failure reasons for
|
||||
stonith_admin fencing commands
|
||||
|
||||
Previously, stonith_admin's --fence/--unfence/--reboot options did not output
|
||||
any error message on failure. Now, they do, including the exit reason, if
|
||||
available.
|
||||
---
|
||||
tools/stonith_admin.c | 30 +++++++++++++++++++++++++-----
|
||||
1 file changed, 25 insertions(+), 5 deletions(-)
|
||||
|
||||
diff --git a/tools/stonith_admin.c b/tools/stonith_admin.c
|
||||
index f738a9c888..5590faf11e 100644
|
||||
--- a/tools/stonith_admin.c
|
||||
+++ b/tools/stonith_admin.c
|
||||
@@ -333,13 +333,33 @@ build_arg_context(pcmk__common_args_t *args, GOptionGroup **group) {
|
||||
|
||||
// \return Standard Pacemaker return code
|
||||
static int
|
||||
-request_fencing(stonith_t *st, const char *target, const char *command)
|
||||
+request_fencing(stonith_t *st, const char *target, const char *command,
|
||||
+ GError **error)
|
||||
{
|
||||
+ char *reason = NULL;
|
||||
int rc = pcmk__request_fencing(st, target, command, crm_system_name,
|
||||
options.timeout * 1000,
|
||||
options.tolerance * 1000,
|
||||
- options.delay, NULL);
|
||||
+ options.delay, &reason);
|
||||
|
||||
+ if (rc != pcmk_rc_ok) {
|
||||
+ const char *rc_str = pcmk_rc_str(rc);
|
||||
+
|
||||
+ // If reason is identical to return code string, don't display it twice
|
||||
+ if (pcmk__str_eq(rc_str, reason, pcmk__str_none)) {
|
||||
+ free(reason);
|
||||
+ reason = NULL;
|
||||
+ }
|
||||
+
|
||||
+ g_set_error(error, PCMK__RC_ERROR, rc,
|
||||
+ "Couldn't %sfence %s: %s%s%s%s",
|
||||
+ ((strcmp(command, "on") == 0)? "un" : ""),
|
||||
+ target, pcmk_rc_str(rc),
|
||||
+ ((reason == NULL)? "" : " ("),
|
||||
+ ((reason == NULL)? "" : reason),
|
||||
+ ((reason == NULL)? "" : ")"));
|
||||
+ }
|
||||
+ free(reason);
|
||||
return rc;
|
||||
}
|
||||
|
||||
@@ -580,15 +600,15 @@ main(int argc, char **argv)
|
||||
break;
|
||||
|
||||
case 'B':
|
||||
- rc = request_fencing(st, target, "reboot");
|
||||
+ rc = request_fencing(st, target, "reboot", &error);
|
||||
break;
|
||||
|
||||
case 'F':
|
||||
- rc = request_fencing(st, target, "off");
|
||||
+ rc = request_fencing(st, target, "off", &error);
|
||||
break;
|
||||
|
||||
case 'U':
|
||||
- rc = request_fencing(st, target, "on");
|
||||
+ rc = request_fencing(st, target, "on", &error);
|
||||
break;
|
||||
|
||||
case 'h':
|
||||
--
|
||||
2.27.0
|
||||
|
||||
|
||||
From 2d99eba4c326d3b13dbbe446971ea5febd5d05be Mon Sep 17 00:00:00 2001
|
||||
From: Ken Gaillot <kgaillot@redhat.com>
|
||||
Date: Fri, 10 Dec 2021 16:08:49 -0600
|
||||
Subject: [PATCH 07/11] Feature: libpacemaker: return exit reason for fencer
|
||||
connection failures
|
||||
|
||||
... instead of outputting to stderr directly, so that the caller (i.e.
|
||||
stonith_admin) can output the error in the correct output format.
|
||||
---
|
||||
lib/pacemaker/pcmk_fence.c | 3 +--
|
||||
1 file changed, 1 insertion(+), 2 deletions(-)
|
||||
|
||||
diff --git a/lib/pacemaker/pcmk_fence.c b/lib/pacemaker/pcmk_fence.c
|
||||
index 1b7feb54b2..d17b07cda2 100644
|
||||
--- a/lib/pacemaker/pcmk_fence.c
|
||||
+++ b/lib/pacemaker/pcmk_fence.c
|
||||
@@ -104,10 +104,9 @@ async_fence_helper(gpointer user_data)
|
||||
int rc = stonith_api_connect_retry(st, async_fence_data.name, 10);
|
||||
|
||||
if (rc != pcmk_ok) {
|
||||
- fprintf(stderr, "Could not connect to fencer: %s\n", pcmk_strerror(rc));
|
||||
g_main_loop_quit(mainloop);
|
||||
pcmk__set_result(&async_fence_data.result, CRM_EX_ERROR,
|
||||
- PCMK_EXEC_NOT_CONNECTED, NULL);
|
||||
+ PCMK_EXEC_NOT_CONNECTED, pcmk_strerror(rc));
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
--
|
||||
2.27.0
|
||||
|
||||
|
||||
From 4480ef0602f47450bdddfbde360a6a8327710927 Mon Sep 17 00:00:00 2001
|
||||
From: Ken Gaillot <kgaillot@redhat.com>
|
||||
Date: Mon, 17 Jan 2022 09:39:39 -0600
|
||||
Subject: [PATCH 08/11] Low: libpacemaker: compare fence action names
|
||||
case-sensitively
|
||||
|
||||
---
|
||||
lib/pacemaker/pcmk_fence.c | 6 +++---
|
||||
1 file changed, 3 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/lib/pacemaker/pcmk_fence.c b/lib/pacemaker/pcmk_fence.c
|
||||
index d17b07cda2..2a8f50a555 100644
|
||||
--- a/lib/pacemaker/pcmk_fence.c
|
||||
+++ b/lib/pacemaker/pcmk_fence.c
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
- * Copyright 2009-2021 the Pacemaker project contributors
|
||||
+ * Copyright 2009-2022 the Pacemaker project contributors
|
||||
*
|
||||
* The version control history for this file may have further details.
|
||||
*
|
||||
@@ -77,7 +77,7 @@ static void
|
||||
notify_callback(stonith_t * st, stonith_event_t * e)
|
||||
{
|
||||
if (pcmk__str_eq(async_fence_data.target, e->target, pcmk__str_casei)
|
||||
- && pcmk__str_eq(async_fence_data.action, e->action, pcmk__str_casei)) {
|
||||
+ && pcmk__str_eq(async_fence_data.action, e->action, pcmk__str_none)) {
|
||||
|
||||
pcmk__set_result(&async_fence_data.result,
|
||||
stonith__event_exit_status(e),
|
||||
@@ -549,7 +549,7 @@ pcmk__reduce_fence_history(stonith_history_t *history)
|
||||
if ((hp->state == st_done) || (hp->state == st_failed)) {
|
||||
/* action not in progress */
|
||||
if (pcmk__str_eq(hp->target, np->target, pcmk__str_casei) &&
|
||||
- pcmk__str_eq(hp->action, np->action, pcmk__str_casei) &&
|
||||
+ pcmk__str_eq(hp->action, np->action, pcmk__str_none) &&
|
||||
(hp->state == np->state) &&
|
||||
((hp->state == st_done) ||
|
||||
pcmk__str_eq(hp->delegate, np->delegate, pcmk__str_casei))) {
|
||||
--
|
||||
2.27.0
|
||||
|
||||
|
||||
From fe4c65a3b9e715c2b535709f989f2369d3637b78 Mon Sep 17 00:00:00 2001
|
||||
From: Ken Gaillot <kgaillot@redhat.com>
|
||||
Date: Mon, 17 Jan 2022 09:45:24 -0600
|
||||
Subject: [PATCH 09/11] Refactor: libpacemaker: avoid unnecessary string
|
||||
duplication
|
||||
|
||||
... and don't leave any dynamic memory hanging around
|
||||
---
|
||||
lib/pacemaker/pcmk_fence.c | 11 ++++++++---
|
||||
1 file changed, 8 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/lib/pacemaker/pcmk_fence.c b/lib/pacemaker/pcmk_fence.c
|
||||
index 2a8f50a555..260fa5ab8e 100644
|
||||
--- a/lib/pacemaker/pcmk_fence.c
|
||||
+++ b/lib/pacemaker/pcmk_fence.c
|
||||
@@ -141,6 +141,7 @@ pcmk__request_fencing(stonith_t *st, const char *target, const char *action,
|
||||
unsigned int tolerance, int delay, char **reason)
|
||||
{
|
||||
crm_trigger_t *trig;
|
||||
+ int rc = pcmk_rc_ok;
|
||||
|
||||
async_fence_data.st = st;
|
||||
async_fence_data.name = strdup(name);
|
||||
@@ -160,10 +161,14 @@ pcmk__request_fencing(stonith_t *st, const char *target, const char *action,
|
||||
|
||||
free(async_fence_data.name);
|
||||
|
||||
- if ((reason != NULL) && (async_fence_data.result.exit_reason != NULL)) {
|
||||
- *reason = strdup(async_fence_data.result.exit_reason);
|
||||
+ if (reason != NULL) {
|
||||
+ // Give the caller ownership of the exit reason
|
||||
+ *reason = async_fence_data.result.exit_reason;
|
||||
+ async_fence_data.result.exit_reason = NULL;
|
||||
}
|
||||
- return stonith__result2rc(&async_fence_data.result);
|
||||
+ rc = stonith__result2rc(&async_fence_data.result);
|
||||
+ pcmk__reset_result(&async_fence_data.result);
|
||||
+ return rc;
|
||||
}
|
||||
|
||||
#ifdef BUILD_PUBLIC_LIBPACEMAKER
|
||||
--
|
||||
2.27.0
|
||||
|
||||
|
||||
From 7b7af07796f05a1adabdac655582be2e17106f81 Mon Sep 17 00:00:00 2001
|
||||
From: Ken Gaillot <kgaillot@redhat.com>
|
||||
Date: Mon, 17 Jan 2022 10:07:10 -0600
|
||||
Subject: [PATCH 10/11] Doc: libpacemaker: improve pcmk__request_fencing()
|
||||
doxygen block
|
||||
|
||||
---
|
||||
include/pacemaker.h | 6 ++++--
|
||||
include/pcmki/pcmki_fence.h | 15 +++++++++------
|
||||
2 files changed, 13 insertions(+), 8 deletions(-)
|
||||
|
||||
diff --git a/include/pacemaker.h b/include/pacemaker.h
|
||||
index e581f975a9..266a844892 100644
|
||||
--- a/include/pacemaker.h
|
||||
+++ b/include/pacemaker.h
|
||||
@@ -187,8 +187,10 @@ int pcmk_list_nodes(xmlNodePtr *xml, char *node_types);
|
||||
* \param[in] tolerance If a successful action for \p target happened within
|
||||
* this many ms, return 0 without performing the action
|
||||
* again
|
||||
- * \param[in] delay Apply a fencing delay. Value -1 means disable also any
|
||||
- * static/random fencing delays from pcmk_delay_base/max
|
||||
+ * \param[in] delay Apply this delay (in milliseconds) before initiating the
|
||||
+ * fencing action (a value of -1 applies no delay and also
|
||||
+ * disables any fencing delay from pcmk_delay_base and
|
||||
+ * pcmk_delay_max)
|
||||
* \param[out] reason If not NULL, where to put descriptive failure reason
|
||||
*
|
||||
* \return Standard Pacemaker return code
|
||||
diff --git a/include/pcmki/pcmki_fence.h b/include/pcmki/pcmki_fence.h
|
||||
index e3a7e27264..4a2fe3c481 100644
|
||||
--- a/include/pcmki/pcmki_fence.h
|
||||
+++ b/include/pcmki/pcmki_fence.h
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
- * Copyright 2019-2021 the Pacemaker project contributors
|
||||
+ * Copyright 2019-2022 the Pacemaker project contributors
|
||||
*
|
||||
* The version control history for this file may have further details.
|
||||
*
|
||||
@@ -22,17 +22,20 @@
|
||||
* \param[in] target The node that should be fenced
|
||||
* \param[in] action The fencing action (on, off, reboot) to perform
|
||||
* \param[in] name Who requested the fence action?
|
||||
- * \param[in] timeout How long to wait for the operation to complete (in ms).
|
||||
+ * \param[in] timeout How long to wait for the operation to complete (in ms)
|
||||
* \param[in] tolerance If a successful action for \p target happened within
|
||||
- * this many ms, return 0 without performing the action
|
||||
- * again.
|
||||
- * \param[in] delay Apply a fencing delay. Value -1 means disable also any
|
||||
- * static/random fencing delays from pcmk_delay_base/max
|
||||
+ * this many milliseconds, return success without
|
||||
+ * performing the action again
|
||||
+ * \param[in] delay Apply this delay (in milliseconds) before initiating the
|
||||
+ * fencing action (a value of -1 applies no delay and also
|
||||
+ * disables any fencing delay from pcmk_delay_base and
|
||||
+ * pcmk_delay_max)
|
||||
* \param[out] reason If not NULL, where to put descriptive failure reason
|
||||
*
|
||||
* \return Standard Pacemaker return code
|
||||
* \note If \p reason is not NULL, the caller is responsible for freeing its
|
||||
* returned value.
|
||||
+ * \todo delay is eventually used with g_timeout_add() and should be guint
|
||||
*/
|
||||
int pcmk__request_fencing(stonith_t *st, const char *target, const char *action,
|
||||
const char *name, unsigned int timeout,
|
||||
--
|
||||
2.27.0
|
||||
|
||||
|
||||
From 61fb7271712e1246eb6d9472dc1afc7cd10e0a79 Mon Sep 17 00:00:00 2001
|
||||
From: Ken Gaillot <kgaillot@redhat.com>
|
||||
Date: Mon, 17 Jan 2022 10:18:02 -0600
|
||||
Subject: [PATCH 11/11] Fix: tools: get stonith_admin -T option working again
|
||||
|
||||
Regression introduced in 2.0.3 by 3910b6fec
|
||||
|
||||
This reverts commit 247eb303df934944c0b72b162bb661cee6e0ed8b
|
||||
("Refactor: tools: drop unnecessary string duplication in stonith_admin")
|
||||
and fixes a regression introduced when stonith_admin was converted to use
|
||||
GOption.
|
||||
|
||||
The -T option is intended to override the client name passed to the fencer API,
|
||||
but the client name was set to the default (crm_system_name) after option
|
||||
processing had already been done, so any value for -T was overwritten by the
|
||||
default, and its memory was leaked.
|
||||
|
||||
This commit sets the default only if -T was not used.
|
||||
---
|
||||
tools/stonith_admin.c | 15 ++++++++++-----
|
||||
1 file changed, 10 insertions(+), 5 deletions(-)
|
||||
|
||||
diff --git a/tools/stonith_admin.c b/tools/stonith_admin.c
|
||||
index 5590faf11e..54774b6fee 100644
|
||||
--- a/tools/stonith_admin.c
|
||||
+++ b/tools/stonith_admin.c
|
||||
@@ -337,10 +337,10 @@ request_fencing(stonith_t *st, const char *target, const char *command,
|
||||
GError **error)
|
||||
{
|
||||
char *reason = NULL;
|
||||
- int rc = pcmk__request_fencing(st, target, command, crm_system_name,
|
||||
- options.timeout * 1000,
|
||||
- options.tolerance * 1000,
|
||||
- options.delay, &reason);
|
||||
+ int rc = pcmk__request_fencing(st, target, command, name,
|
||||
+ options.timeout * 1000,
|
||||
+ options.tolerance * 1000,
|
||||
+ options.delay, &reason);
|
||||
|
||||
if (rc != pcmk_rc_ok) {
|
||||
const char *rc_str = pcmk_rc_str(rc);
|
||||
@@ -392,6 +392,10 @@ main(int argc, char **argv)
|
||||
|
||||
pcmk__cli_init_logging("stonith_admin", args->verbosity);
|
||||
|
||||
+ if (name == NULL) {
|
||||
+ name = strdup(crm_system_name);
|
||||
+ }
|
||||
+
|
||||
rc = pcmk__output_new(&out, args->output_ty, args->output_dest, argv);
|
||||
if (rc != pcmk_rc_ok) {
|
||||
exit_code = CRM_EX_ERROR;
|
||||
@@ -526,7 +530,7 @@ main(int argc, char **argv)
|
||||
if (st == NULL) {
|
||||
rc = -ENOMEM;
|
||||
} else if (!no_connect) {
|
||||
- rc = st->cmds->connect(st, crm_system_name, NULL);
|
||||
+ rc = st->cmds->connect(st, name, NULL);
|
||||
}
|
||||
if (rc < 0) {
|
||||
out->err(out, "Could not connect to fencer: %s", pcmk_strerror(rc));
|
||||
@@ -640,6 +644,7 @@ main(int argc, char **argv)
|
||||
out->finish(out, exit_code, true, NULL);
|
||||
pcmk__output_free(out);
|
||||
}
|
||||
+ free(name);
|
||||
stonith_key_value_freeall(options.params, 1, 1);
|
||||
|
||||
if (st != NULL) {
|
||||
--
|
||||
2.27.0
|
||||
|
796
SOURCES/018-failure-messages.patch
Normal file
796
SOURCES/018-failure-messages.patch
Normal file
@ -0,0 +1,796 @@
|
||||
From 08c3420f2c857e7b27cd960f355d787af534da7d Mon Sep 17 00:00:00 2001
|
||||
From: Ken Gaillot <kgaillot@redhat.com>
|
||||
Date: Tue, 18 Jan 2022 16:04:49 -0600
|
||||
Subject: [PATCH 01/12] Log: libcrmcommon: improve description for "not
|
||||
connected" status
|
||||
|
||||
PCMK_EXEC_NOT_CONNECTED was originally added to represent "No executor
|
||||
connection", but it can also now mean no fencer connection, so change it to
|
||||
"Internal communication failure" which is probably less mysterious to end users
|
||||
anyway (especially since it should be accompanied by a more descriptive exit
|
||||
reason).
|
||||
---
|
||||
include/crm/common/results.h | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/include/crm/common/results.h b/include/crm/common/results.h
|
||||
index 873faf5c43..3d322a7ce6 100644
|
||||
--- a/include/crm/common/results.h
|
||||
+++ b/include/crm/common/results.h
|
||||
@@ -349,7 +349,7 @@ pcmk_exec_status_str(enum pcmk_exec_status status)
|
||||
case PCMK_EXEC_ERROR_HARD: return "Hard error";
|
||||
case PCMK_EXEC_ERROR_FATAL: return "Fatal error";
|
||||
case PCMK_EXEC_NOT_INSTALLED: return "Not installed";
|
||||
- case PCMK_EXEC_NOT_CONNECTED: return "No executor connection";
|
||||
+ case PCMK_EXEC_NOT_CONNECTED: return "Internal communication failure";
|
||||
case PCMK_EXEC_INVALID: return "Cannot execute now";
|
||||
case PCMK_EXEC_NO_FENCE_DEVICE: return "No fence device";
|
||||
case PCMK_EXEC_NO_SECRETS: return "CIB secrets unavailable";
|
||||
--
|
||||
2.27.0
|
||||
|
||||
|
||||
From 7c345cf8cf0cb054f5634206880df035bfef7311 Mon Sep 17 00:00:00 2001
|
||||
From: Ken Gaillot <kgaillot@redhat.com>
|
||||
Date: Mon, 20 Dec 2021 15:12:36 -0600
|
||||
Subject: [PATCH 02/12] Refactor: libcrmcommon: drop unnecessary system error
|
||||
redefinitions
|
||||
|
||||
portability.h defines some system error codes that might not be present on
|
||||
non-Linux systems.
|
||||
|
||||
This was a bad idea, since there's no way to ensure the defined values don't
|
||||
conflict with existing system codes. However, we use a number of them, so it's
|
||||
probably best to keep them, at least until we can make a backward compatibility
|
||||
break.
|
||||
|
||||
However, we don't use EUNATCH, ENOSR, or ENOSTR, so we can delete those.
|
||||
---
|
||||
include/portability.h | 12 ------------
|
||||
lib/common/results.c | 9 ++++++---
|
||||
2 files changed, 6 insertions(+), 15 deletions(-)
|
||||
|
||||
diff --git a/include/portability.h b/include/portability.h
|
||||
index 9a60c583a7..ee065a376d 100644
|
||||
--- a/include/portability.h
|
||||
+++ b/include/portability.h
|
||||
@@ -131,10 +131,6 @@ typedef union
|
||||
# define EREMOTEIO 193
|
||||
# endif
|
||||
|
||||
-# ifndef EUNATCH
|
||||
-# define EUNATCH 194
|
||||
-# endif
|
||||
-
|
||||
# ifndef ENOKEY
|
||||
# define ENOKEY 195
|
||||
# endif
|
||||
@@ -147,14 +143,6 @@ typedef union
|
||||
# define ETIME 197
|
||||
# endif
|
||||
|
||||
-# ifndef ENOSR
|
||||
-# define ENOSR 198
|
||||
-# endif
|
||||
-
|
||||
-# ifndef ENOSTR
|
||||
-# define ENOSTR 199
|
||||
-# endif
|
||||
-
|
||||
# ifndef EKEYREJECTED
|
||||
# define EKEYREJECTED 200
|
||||
# endif
|
||||
diff --git a/lib/common/results.c b/lib/common/results.c
|
||||
index 6d120694cd..96cd4e5659 100644
|
||||
--- a/lib/common/results.c
|
||||
+++ b/lib/common/results.c
|
||||
@@ -118,9 +118,6 @@ pcmk_strerror(int rc)
|
||||
case EREMOTEIO:
|
||||
return "Remote I/O error";
|
||||
/* coverity[dead_error_condition] False positive on non-Linux */
|
||||
- case EUNATCH:
|
||||
- return "Protocol driver not attached";
|
||||
- /* coverity[dead_error_condition] False positive on non-Linux */
|
||||
case ENOKEY:
|
||||
return "Required key not available";
|
||||
}
|
||||
@@ -342,8 +339,12 @@ pcmk_rc_name(int rc)
|
||||
case ENOMSG: return "ENOMSG";
|
||||
case ENOPROTOOPT: return "ENOPROTOOPT";
|
||||
case ENOSPC: return "ENOSPC";
|
||||
+#ifdef ENOSR
|
||||
case ENOSR: return "ENOSR";
|
||||
+#endif
|
||||
+#ifdef ENOSTR
|
||||
case ENOSTR: return "ENOSTR";
|
||||
+#endif
|
||||
case ENOSYS: return "ENOSYS";
|
||||
case ENOTBLK: return "ENOTBLK";
|
||||
case ENOTCONN: return "ENOTCONN";
|
||||
@@ -376,7 +377,9 @@ pcmk_rc_name(int rc)
|
||||
case ETIME: return "ETIME";
|
||||
case ETIMEDOUT: return "ETIMEDOUT";
|
||||
case ETXTBSY: return "ETXTBSY";
|
||||
+#ifdef EUNATCH
|
||||
case EUNATCH: return "EUNATCH";
|
||||
+#endif
|
||||
case EUSERS: return "EUSERS";
|
||||
/* case EWOULDBLOCK: return "EWOULDBLOCK"; */
|
||||
case EXDEV: return "EXDEV";
|
||||
--
|
||||
2.27.0
|
||||
|
||||
|
||||
From eac8d1ca51eac3f437e18584f7e013d976ecee2c Mon Sep 17 00:00:00 2001
|
||||
From: Ken Gaillot <kgaillot@redhat.com>
|
||||
Date: Mon, 20 Dec 2021 15:33:12 -0600
|
||||
Subject: [PATCH 03/12] Log: libcrmcommon: improve handling of portability.h
|
||||
error codes
|
||||
|
||||
portability.h defines some system error codes that might not be present on
|
||||
non-Linux systems.
|
||||
|
||||
Define a constant for each one (for example, PCMK__ECOMM for ECOMM) when
|
||||
the system doesn't have the value, so we can detect that when relevant.
|
||||
|
||||
Also, make sure pcmk_rc_name() and pcmk_rc_str() handle all of these values.
|
||||
---
|
||||
include/portability.h | 8 ++++++++
|
||||
lib/common/results.c | 32 ++++++++++++++++++++++++++++++--
|
||||
2 files changed, 38 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/include/portability.h b/include/portability.h
|
||||
index ee065a376d..5d5fbf21cb 100644
|
||||
--- a/include/portability.h
|
||||
+++ b/include/portability.h
|
||||
@@ -116,34 +116,42 @@ typedef union
|
||||
# include <errno.h>
|
||||
|
||||
# ifndef ENOTUNIQ
|
||||
+# define PCMK__ENOTUNIQ
|
||||
# define ENOTUNIQ 190
|
||||
# endif
|
||||
|
||||
# ifndef ECOMM
|
||||
+# define PCMK__ECOMM
|
||||
# define ECOMM 191
|
||||
# endif
|
||||
|
||||
# ifndef ELIBACC
|
||||
+# define PCMK__ELIBACC
|
||||
# define ELIBACC 192
|
||||
# endif
|
||||
|
||||
# ifndef EREMOTEIO
|
||||
+# define PCMK__EREMOTIO
|
||||
# define EREMOTEIO 193
|
||||
# endif
|
||||
|
||||
# ifndef ENOKEY
|
||||
+# define PCMK__ENOKEY
|
||||
# define ENOKEY 195
|
||||
# endif
|
||||
|
||||
# ifndef ENODATA
|
||||
+# define PCMK__ENODATA
|
||||
# define ENODATA 196
|
||||
# endif
|
||||
|
||||
# ifndef ETIME
|
||||
+# define PCMK__ETIME
|
||||
# define ETIME 197
|
||||
# endif
|
||||
|
||||
# ifndef EKEYREJECTED
|
||||
+# define PCMK__EKEYREJECTED
|
||||
# define EKEYREJECTED 200
|
||||
# endif
|
||||
|
||||
diff --git a/lib/common/results.c b/lib/common/results.c
|
||||
index 96cd4e5659..bcf289d0d6 100644
|
||||
--- a/lib/common/results.c
|
||||
+++ b/lib/common/results.c
|
||||
@@ -395,9 +395,9 @@ pcmk_rc_name(int rc)
|
||||
#ifdef EISNAM // Not available on OS X, Illumos, Solaris
|
||||
case EISNAM: return "EISNAM";
|
||||
case EKEYEXPIRED: return "EKEYEXPIRED";
|
||||
- case EKEYREJECTED: return "EKEYREJECTED";
|
||||
case EKEYREVOKED: return "EKEYREVOKED";
|
||||
#endif
|
||||
+ case EKEYREJECTED: return "EKEYREJECTED";
|
||||
case EL2HLT: return "EL2HLT";
|
||||
case EL2NSYNC: return "EL2NSYNC";
|
||||
case EL3HLT: return "EL3HLT";
|
||||
@@ -443,7 +443,35 @@ pcmk_rc_str(int rc)
|
||||
if (rc < 0) {
|
||||
return "Unknown error";
|
||||
}
|
||||
- return strerror(rc);
|
||||
+
|
||||
+ // Handle values that could be defined by system or by portability.h
|
||||
+ switch (rc) {
|
||||
+#ifdef PCMK__ENOTUNIQ
|
||||
+ case ENOTUNIQ: return "Name not unique on network";
|
||||
+#endif
|
||||
+#ifdef PCMK__ECOMM
|
||||
+ case ECOMM: return "Communication error on send";
|
||||
+#endif
|
||||
+#ifdef PCMK__ELIBACC
|
||||
+ case ELIBACC: return "Can not access a needed shared library";
|
||||
+#endif
|
||||
+#ifdef PCMK__EREMOTEIO
|
||||
+ case EREMOTEIO: return "Remote I/O error";
|
||||
+#endif
|
||||
+#ifdef PCMK__ENOKEY
|
||||
+ case ENOKEY: return "Required key not available";
|
||||
+#endif
|
||||
+#ifdef PCMK__ENODATA
|
||||
+ case ENODATA: return "No data available";
|
||||
+#endif
|
||||
+#ifdef PCMK__ETIME
|
||||
+ case ETIME: return "Timer expired";
|
||||
+#endif
|
||||
+#ifdef PCMK__EKEYREJECTED
|
||||
+ case EKEYREJECTED: return "Key was rejected by service";
|
||||
+#endif
|
||||
+ default: return strerror(rc);
|
||||
+ }
|
||||
}
|
||||
|
||||
// This returns negative values for errors
|
||||
--
|
||||
2.27.0
|
||||
|
||||
|
||||
From 32a38ac6374f85c43e7f4051f5e519822cc481e6 Mon Sep 17 00:00:00 2001
|
||||
From: Ken Gaillot <kgaillot@redhat.com>
|
||||
Date: Mon, 20 Dec 2021 15:39:19 -0600
|
||||
Subject: [PATCH 04/12] Log: libcrmcommon: redefine pcmk_strerror() in terms of
|
||||
pcmk_rc_str()
|
||||
|
||||
... to reduce code duplication. This causes minor differences in the string for
|
||||
a few values.
|
||||
---
|
||||
lib/common/results.c | 67 +-------------------------------------------
|
||||
1 file changed, 1 insertion(+), 66 deletions(-)
|
||||
|
||||
diff --git a/lib/common/results.c b/lib/common/results.c
|
||||
index bcf289d0d6..b2c6e8d553 100644
|
||||
--- a/lib/common/results.c
|
||||
+++ b/lib/common/results.c
|
||||
@@ -57,72 +57,7 @@ pcmk_errorname(int rc)
|
||||
const char *
|
||||
pcmk_strerror(int rc)
|
||||
{
|
||||
- if (rc == 0) {
|
||||
- return "OK";
|
||||
- }
|
||||
-
|
||||
- rc = abs(rc);
|
||||
-
|
||||
- // Of course rc > 0 ... unless someone passed INT_MIN as rc
|
||||
- if ((rc > 0) && (rc < PCMK_ERROR_OFFSET)) {
|
||||
- return strerror(rc);
|
||||
- }
|
||||
-
|
||||
- switch (rc) {
|
||||
- case pcmk_err_generic:
|
||||
- return "Generic Pacemaker error";
|
||||
- case pcmk_err_no_quorum:
|
||||
- return "Operation requires quorum";
|
||||
- case pcmk_err_schema_validation:
|
||||
- return "Update does not conform to the configured schema";
|
||||
- case pcmk_err_transform_failed:
|
||||
- return "Schema transform failed";
|
||||
- case pcmk_err_old_data:
|
||||
- return "Update was older than existing configuration";
|
||||
- case pcmk_err_diff_failed:
|
||||
- return "Application of an update diff failed";
|
||||
- case pcmk_err_diff_resync:
|
||||
- return "Application of an update diff failed, requesting a full refresh";
|
||||
- case pcmk_err_cib_modified:
|
||||
- return "The on-disk configuration was manually modified";
|
||||
- case pcmk_err_cib_backup:
|
||||
- return "Could not archive the previous configuration";
|
||||
- case pcmk_err_cib_save:
|
||||
- return "Could not save the new configuration to disk";
|
||||
- case pcmk_err_cib_corrupt:
|
||||
- return "Could not parse on-disk configuration";
|
||||
- case pcmk_err_multiple:
|
||||
- return "Resource active on multiple nodes";
|
||||
- case pcmk_err_node_unknown:
|
||||
- return "Node not found";
|
||||
- case pcmk_err_already:
|
||||
- return "Situation already as requested";
|
||||
- case pcmk_err_bad_nvpair:
|
||||
- return "Bad name/value pair given";
|
||||
- case pcmk_err_schema_unchanged:
|
||||
- return "Schema is already the latest available";
|
||||
- case pcmk_err_unknown_format:
|
||||
- return "Unknown output format";
|
||||
-
|
||||
- /* The following cases will only be hit on systems for which they are non-standard */
|
||||
- /* coverity[dead_error_condition] False positive on non-Linux */
|
||||
- case ENOTUNIQ:
|
||||
- return "Name not unique on network";
|
||||
- /* coverity[dead_error_condition] False positive on non-Linux */
|
||||
- case ECOMM:
|
||||
- return "Communication error on send";
|
||||
- /* coverity[dead_error_condition] False positive on non-Linux */
|
||||
- case ELIBACC:
|
||||
- return "Can not access a needed shared library";
|
||||
- /* coverity[dead_error_condition] False positive on non-Linux */
|
||||
- case EREMOTEIO:
|
||||
- return "Remote I/O error";
|
||||
- /* coverity[dead_error_condition] False positive on non-Linux */
|
||||
- case ENOKEY:
|
||||
- return "Required key not available";
|
||||
- }
|
||||
- crm_err("Unknown error code: %d", rc);
|
||||
- return "Unknown error";
|
||||
+ return pcmk_rc_str(pcmk_legacy2rc(rc));
|
||||
}
|
||||
|
||||
// Standard Pacemaker API return codes
|
||||
--
|
||||
2.27.0
|
||||
|
||||
|
||||
From 7c331d7e2275ffebbfd5e2f6432a6137a66ee5db Mon Sep 17 00:00:00 2001
|
||||
From: Ken Gaillot <kgaillot@redhat.com>
|
||||
Date: Mon, 20 Dec 2021 15:41:24 -0600
|
||||
Subject: [PATCH 05/12] Log: libcrmcommon: don't say "Unknown error"
|
||||
|
||||
... which is unhelpful and annoying to users
|
||||
---
|
||||
lib/common/results.c | 4 ++--
|
||||
1 file changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/lib/common/results.c b/lib/common/results.c
|
||||
index b2c6e8d553..5ffac76549 100644
|
||||
--- a/lib/common/results.c
|
||||
+++ b/lib/common/results.c
|
||||
@@ -376,7 +376,7 @@ pcmk_rc_str(int rc)
|
||||
return pcmk__rcs[pcmk_rc_error - rc].desc;
|
||||
}
|
||||
if (rc < 0) {
|
||||
- return "Unknown error";
|
||||
+ return "Error";
|
||||
}
|
||||
|
||||
// Handle values that could be defined by system or by portability.h
|
||||
@@ -768,7 +768,7 @@ bz2_strerror(int rc)
|
||||
case BZ_OUTBUFF_FULL:
|
||||
return "output data will not fit into the buffer provided";
|
||||
}
|
||||
- return "Unknown error";
|
||||
+ return "Data compression error";
|
||||
}
|
||||
|
||||
crm_exit_t
|
||||
--
|
||||
2.27.0
|
||||
|
||||
|
||||
From 26883b4edda7d81bfcb79bd7b33bb3210beff110 Mon Sep 17 00:00:00 2001
|
||||
From: Ken Gaillot <kgaillot@redhat.com>
|
||||
Date: Mon, 20 Dec 2021 16:01:39 -0600
|
||||
Subject: [PATCH 06/12] Log: fencing: don't warn if cluster has no watchdog
|
||||
device
|
||||
|
||||
---
|
||||
lib/fencing/st_client.c | 7 ++++++-
|
||||
1 file changed, 6 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c
|
||||
index b1de912b2a..a0f3119f3b 100644
|
||||
--- a/lib/fencing/st_client.c
|
||||
+++ b/lib/fencing/st_client.c
|
||||
@@ -187,7 +187,12 @@ stonith__watchdog_fencing_enabled_for_node_api(stonith_t *st, const char *node)
|
||||
* we drop in here - so as not to make remote nodes
|
||||
* panic on that answer
|
||||
*/
|
||||
- crm_warn("watchdog-fencing-query failed");
|
||||
+ if (rc == -ENODEV) {
|
||||
+ crm_notice("Cluster does not have watchdog fencing device");
|
||||
+ } else {
|
||||
+ crm_warn("Could not check for watchdog fencing device: %s",
|
||||
+ pcmk_strerror(rc));
|
||||
+ }
|
||||
} else if (list[0] == '\0') {
|
||||
rv = TRUE;
|
||||
} else {
|
||||
--
|
||||
2.27.0
|
||||
|
||||
|
||||
From 72b3c42232deaca64ffba9582598c59331203761 Mon Sep 17 00:00:00 2001
|
||||
From: Ken Gaillot <kgaillot@redhat.com>
|
||||
Date: Mon, 20 Dec 2021 16:22:49 -0600
|
||||
Subject: [PATCH 07/12] Test: libcrmcommon: update pcmk_rc_str() unit test for
|
||||
recent change
|
||||
|
||||
---
|
||||
lib/common/tests/results/pcmk__results_test.c | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/lib/common/tests/results/pcmk__results_test.c b/lib/common/tests/results/pcmk__results_test.c
|
||||
index 57a520c501..e08d4b6261 100644
|
||||
--- a/lib/common/tests/results/pcmk__results_test.c
|
||||
+++ b/lib/common/tests/results/pcmk__results_test.c
|
||||
@@ -30,7 +30,7 @@ static void
|
||||
test_for_pcmk_rc_str(void **state) {
|
||||
assert_string_equal(pcmk_rc_str(pcmk_rc_error-1), "Unknown output format");
|
||||
assert_string_equal(pcmk_rc_str(pcmk_rc_ok), "OK");
|
||||
- assert_string_equal(pcmk_rc_str(-1), "Unknown error");
|
||||
+ assert_string_equal(pcmk_rc_str(-1), "Error");
|
||||
}
|
||||
|
||||
static void
|
||||
--
|
||||
2.27.0
|
||||
|
||||
|
||||
From c1ad3d6640f695321a83183c95fae2f105adc429 Mon Sep 17 00:00:00 2001
|
||||
From: Ken Gaillot <kgaillot@redhat.com>
|
||||
Date: Tue, 21 Dec 2021 10:20:38 -0600
|
||||
Subject: [PATCH 08/12] Test: cts-lab: update expected patterns for recent
|
||||
changes
|
||||
|
||||
---
|
||||
cts/lab/CTStests.py | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/cts/lab/CTStests.py b/cts/lab/CTStests.py
|
||||
index 62c832eb45..f4be998cfb 100644
|
||||
--- a/cts/lab/CTStests.py
|
||||
+++ b/cts/lab/CTStests.py
|
||||
@@ -3055,7 +3055,7 @@ class RemoteStonithd(RemoteDriver):
|
||||
r"pacemaker-controld.*:\s+error.*: Operation remote-.*_monitor",
|
||||
r"pacemaker-controld.*:\s+error.*: Result of monitor operation for remote-.*",
|
||||
r"schedulerd.*:\s+Recover remote-.*\s*\(.*\)",
|
||||
- r"error: Result of monitor operation for .* on remote-.*: No executor connection",
|
||||
+ r"error: Result of monitor operation for .* on remote-.*: Internal communication failure",
|
||||
]
|
||||
|
||||
ignore_pats.extend(RemoteDriver.errorstoignore(self))
|
||||
--
|
||||
2.27.0
|
||||
|
||||
|
||||
From f272e2f526633c707e894b39c7c7bce3c14de898 Mon Sep 17 00:00:00 2001
|
||||
From: Ken Gaillot <kgaillot@redhat.com>
|
||||
Date: Tue, 21 Dec 2021 15:40:49 -0600
|
||||
Subject: [PATCH 09/12] Log: controller,libpacemaker: make history XML creation
|
||||
less chatty
|
||||
|
||||
Other messages with the same info will already be logged at higher severity
|
||||
---
|
||||
daemons/controld/controld_execd.c | 3 +--
|
||||
daemons/controld/controld_te_actions.c | 7 ++-----
|
||||
include/pcmki/pcmki_sched_utils.h | 3 +--
|
||||
lib/pacemaker/pcmk_injections.c | 3 +--
|
||||
lib/pacemaker/pcmk_sched_actions.c | 12 +++++-------
|
||||
5 files changed, 10 insertions(+), 18 deletions(-)
|
||||
|
||||
diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c
|
||||
index 15784e7687..52157fa5d4 100644
|
||||
--- a/daemons/controld/controld_execd.c
|
||||
+++ b/daemons/controld/controld_execd.c
|
||||
@@ -693,9 +693,8 @@ build_operation_update(xmlNode * parent, lrmd_rsc_info_t * rsc, lrmd_event_data_
|
||||
caller_version = CRM_FEATURE_SET;
|
||||
}
|
||||
|
||||
- crm_trace("Building %s operation update with originator version: %s", op->rsc_id, caller_version);
|
||||
xml_op = pcmk__create_history_xml(parent, op, caller_version, target_rc,
|
||||
- fsa_our_uname, src, LOG_DEBUG);
|
||||
+ fsa_our_uname, src);
|
||||
if (xml_op == NULL) {
|
||||
return TRUE;
|
||||
}
|
||||
diff --git a/daemons/controld/controld_te_actions.c b/daemons/controld/controld_te_actions.c
|
||||
index 63b7c72359..b0bcb8b2e4 100644
|
||||
--- a/daemons/controld/controld_te_actions.c
|
||||
+++ b/daemons/controld/controld_te_actions.c
|
||||
@@ -181,7 +181,6 @@ controld_record_action_timeout(crm_action_t *action)
|
||||
lrmd_event_data_t *op = NULL;
|
||||
xmlNode *state = NULL;
|
||||
xmlNode *rsc = NULL;
|
||||
- xmlNode *xml_op = NULL;
|
||||
xmlNode *action_rsc = NULL;
|
||||
|
||||
int rc = pcmk_ok;
|
||||
@@ -245,12 +244,10 @@ controld_record_action_timeout(crm_action_t *action)
|
||||
op->user_data = pcmk__transition_key(transition_graph->id, action->id,
|
||||
target_rc, te_uuid);
|
||||
|
||||
- xml_op = pcmk__create_history_xml(rsc, op, CRM_FEATURE_SET, target_rc,
|
||||
- target, __func__, LOG_INFO);
|
||||
+ pcmk__create_history_xml(rsc, op, CRM_FEATURE_SET, target_rc, target,
|
||||
+ __func__);
|
||||
lrmd_free_event(op);
|
||||
|
||||
- crm_log_xml_trace(xml_op, "Action timeout");
|
||||
-
|
||||
rc = fsa_cib_conn->cmds->update(fsa_cib_conn, XML_CIB_TAG_STATUS, state, call_options);
|
||||
fsa_register_cib_callback(rc, FALSE, NULL, cib_action_updated);
|
||||
free_xml(state);
|
||||
diff --git a/include/pcmki/pcmki_sched_utils.h b/include/pcmki/pcmki_sched_utils.h
|
||||
index 68d60fc7db..144424a609 100644
|
||||
--- a/include/pcmki/pcmki_sched_utils.h
|
||||
+++ b/include/pcmki/pcmki_sched_utils.h
|
||||
@@ -52,8 +52,7 @@ extern void process_utilization(pe_resource_t * rsc, pe_node_t ** prefer, pe_wor
|
||||
|
||||
xmlNode *pcmk__create_history_xml(xmlNode *parent, lrmd_event_data_t *event,
|
||||
const char *caller_version, int target_rc,
|
||||
- const char *node, const char *origin,
|
||||
- int level);
|
||||
+ const char *node, const char *origin);
|
||||
|
||||
# define LOAD_STOPPED "load_stopped"
|
||||
|
||||
diff --git a/lib/pacemaker/pcmk_sched_transition.c b/lib/pacemaker/pcmk_sched_transition.c
|
||||
index 678c3f5dd2..1aa90a5a0b 100644
|
||||
--- a/lib/pacemaker/pcmk_sched_transition.c
|
||||
+++ b/lib/pacemaker/pcmk_sched_transition.c
|
||||
@@ -201,8 +201,7 @@ inject_op(xmlNode * cib_resource, lrmd_event_data_t * op, int target_rc)
|
||||
inject_op(xmlNode * cib_resource, lrmd_event_data_t * op, int target_rc)
|
||||
{
|
||||
return pcmk__create_history_xml(cib_resource, op, CRM_FEATURE_SET,
|
||||
- target_rc, NULL, crm_system_name,
|
||||
- LOG_TRACE);
|
||||
+ target_rc, NULL, crm_system_name);
|
||||
}
|
||||
|
||||
static xmlNode *
|
||||
diff --git a/lib/pacemaker/pcmk_sched_actions.c b/lib/pacemaker/pcmk_sched_actions.c
|
||||
index f8200b0efc..4f63d3374d 100644
|
||||
--- a/lib/pacemaker/pcmk_sched_utils.c
|
||||
+++ b/lib/pacemaker/pcmk_sched_utils.c
|
||||
@@ -892,14 +892,13 @@ add_op_digest_to_xml(lrmd_event_data_t *op, xmlNode *update)
|
||||
* \param[in] target_rc Expected result of operation
|
||||
* \param[in] node Name of node on which operation was performed
|
||||
* \param[in] origin Arbitrary description of update source
|
||||
- * \param[in] level A log message will be logged at this level
|
||||
*
|
||||
* \return Newly created XML node for history update
|
||||
*/
|
||||
xmlNode *
|
||||
pcmk__create_history_xml(xmlNode *parent, lrmd_event_data_t *op,
|
||||
const char *caller_version, int target_rc,
|
||||
- const char *node, const char *origin, int level)
|
||||
+ const char *node, const char *origin)
|
||||
{
|
||||
char *key = NULL;
|
||||
char *magic = NULL;
|
||||
@@ -912,11 +911,10 @@ pcmk__create_history_xml(xmlNode *parent, lrmd_event_data_t *op,
|
||||
const char *task = NULL;
|
||||
|
||||
CRM_CHECK(op != NULL, return NULL);
|
||||
- do_crm_log(level, "%s: Updating resource %s after %s op %s (interval=%u)",
|
||||
- origin, op->rsc_id, op->op_type,
|
||||
- pcmk_exec_status_str(op->op_status), op->interval_ms);
|
||||
-
|
||||
- crm_trace("DC version: %s", caller_version);
|
||||
+ crm_trace("Creating history XML for %s-interval %s action for %s on %s "
|
||||
+ "(DC version: %s, origin: %s)",
|
||||
+ pcmk__readable_interval(op->interval_ms), op->op_type, op->rsc_id,
|
||||
+ ((node == NULL)? "no node" : node), caller_version, origin);
|
||||
|
||||
task = op->op_type;
|
||||
|
||||
--
|
||||
2.27.0
|
||||
|
||||
|
||||
From 06b1da9e5345e0d1571042c11646fd7157961279 Mon Sep 17 00:00:00 2001
|
||||
From: Ken Gaillot <kgaillot@redhat.com>
|
||||
Date: Tue, 21 Dec 2021 17:09:44 -0600
|
||||
Subject: [PATCH 10/12] Feature: controller: improve exit reason for internal
|
||||
timeouts
|
||||
|
||||
Functionize the part of controld_record_action_timeout() that creates a fake
|
||||
executor event, into a new function synthesize_timeout_event(), and have it set
|
||||
a more detailed exit reason describing what timed out.
|
||||
---
|
||||
daemons/controld/controld_te_actions.c | 61 ++++++++++++++++++++------
|
||||
1 file changed, 48 insertions(+), 13 deletions(-)
|
||||
|
||||
diff --git a/daemons/controld/controld_te_actions.c b/daemons/controld/controld_te_actions.c
|
||||
index b0bcb8b2e4..de2fbb82bf 100644
|
||||
--- a/daemons/controld/controld_te_actions.c
|
||||
+++ b/daemons/controld/controld_te_actions.c
|
||||
@@ -175,6 +175,53 @@ te_crm_command(crm_graph_t * graph, crm_action_t * action)
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
+/*!
|
||||
+ * \internal
|
||||
+ * \brief Synthesize an executor event for a resource action timeout
|
||||
+ *
|
||||
+ * \param[in] action Resource action that timed out
|
||||
+ * \param[in] target_rc Expected result of action that timed out
|
||||
+ *
|
||||
+ * Synthesize an executor event for a resource action timeout. (If the executor
|
||||
+ * gets a timeout while waiting for a resource action to complete, that will be
|
||||
+ * reported via the usual callback. This timeout means we didn't hear from the
|
||||
+ * executor itself or the controller that relayed the action to the executor.)
|
||||
+ *
|
||||
+ * \return Newly created executor event for result of \p action
|
||||
+ * \note The caller is responsible for freeing the return value using
|
||||
+ * lrmd_free_event().
|
||||
+ */
|
||||
+static lrmd_event_data_t *
|
||||
+synthesize_timeout_event(crm_action_t *action, int target_rc)
|
||||
+{
|
||||
+ lrmd_event_data_t *op = NULL;
|
||||
+ const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
|
||||
+ const char *reason = NULL;
|
||||
+ char *dynamic_reason = NULL;
|
||||
+
|
||||
+ if (pcmk__str_eq(target, get_local_node_name(), pcmk__str_casei)) {
|
||||
+ reason = "Local executor did not return result in time";
|
||||
+ } else {
|
||||
+ const char *router_node = NULL;
|
||||
+
|
||||
+ router_node = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE);
|
||||
+ if (router_node == NULL) {
|
||||
+ router_node = target;
|
||||
+ }
|
||||
+ dynamic_reason = crm_strdup_printf("Controller on %s did not return "
|
||||
+ "result in time", router_node);
|
||||
+ reason = dynamic_reason;
|
||||
+ }
|
||||
+
|
||||
+ op = pcmk__event_from_graph_action(NULL, action, PCMK_EXEC_TIMEOUT,
|
||||
+ PCMK_OCF_UNKNOWN_ERROR, reason);
|
||||
+ op->call_id = -1;
|
||||
+ op->user_data = pcmk__transition_key(transition_graph->id, action->id,
|
||||
+ target_rc, te_uuid);
|
||||
+ free(dynamic_reason);
|
||||
+ return op;
|
||||
+}
|
||||
+
|
||||
void
|
||||
controld_record_action_timeout(crm_action_t *action)
|
||||
{
|
||||
@@ -231,19 +278,7 @@ controld_record_action_timeout(crm_action_t *action)
|
||||
crm_copy_xml_element(action_rsc, rsc, XML_AGENT_ATTR_CLASS);
|
||||
crm_copy_xml_element(action_rsc, rsc, XML_AGENT_ATTR_PROVIDER);
|
||||
|
||||
- /* If the executor gets a timeout while waiting for the action to complete,
|
||||
- * that will be reported via the usual callback. This timeout means that we
|
||||
- * didn't hear from the executor or the controller that relayed the action
|
||||
- * to the executor.
|
||||
- */
|
||||
- op = pcmk__event_from_graph_action(NULL, action, PCMK_EXEC_TIMEOUT,
|
||||
- PCMK_OCF_UNKNOWN_ERROR,
|
||||
- "Cluster communication timeout "
|
||||
- "(no response from executor)");
|
||||
- op->call_id = -1;
|
||||
- op->user_data = pcmk__transition_key(transition_graph->id, action->id,
|
||||
- target_rc, te_uuid);
|
||||
-
|
||||
+ op = synthesize_timeout_event(action, target_rc);
|
||||
pcmk__create_history_xml(rsc, op, CRM_FEATURE_SET, target_rc, target,
|
||||
__func__);
|
||||
lrmd_free_event(op);
|
||||
--
|
||||
2.27.0
|
||||
|
||||
|
||||
From be620d206faefab967d4c8567d6554d10c9e72ba Mon Sep 17 00:00:00 2001
|
||||
From: Ken Gaillot <kgaillot@redhat.com>
|
||||
Date: Wed, 22 Dec 2021 16:35:06 -0600
|
||||
Subject: [PATCH 11/12] Feature: fencing: improve exit reason for fencing
|
||||
timeouts
|
||||
|
||||
Troubleshooting timeouts is one of the more difficult aspects of cluster
|
||||
maintenance. We want to give as much of a hint as possible, but for fencing in
|
||||
particular it is difficult because an operation might involve multiple retries
|
||||
of multiple devices.
|
||||
|
||||
Barring another major project to track exactly which devices, retries, etc.,
|
||||
were used in a given operation, these changes in wording are probably the best
|
||||
we can do.
|
||||
---
|
||||
daemons/fenced/fenced_remote.c | 8 +++++---
|
||||
lib/fencing/st_client.c | 2 +-
|
||||
2 files changed, 6 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
|
||||
index 1e237150c5..6eebb7381e 100644
|
||||
--- a/daemons/fenced/fenced_remote.c
|
||||
+++ b/daemons/fenced/fenced_remote.c
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
- * Copyright 2009-2021 the Pacemaker project contributors
|
||||
+ * Copyright 2009-2022 the Pacemaker project contributors
|
||||
*
|
||||
* The version control history for this file may have further details.
|
||||
*
|
||||
@@ -715,8 +715,10 @@ remote_op_timeout(gpointer userdata)
|
||||
CRM_XS " id=%.8s",
|
||||
op->action, op->target, op->client_name, op->id);
|
||||
} else {
|
||||
- finalize_timed_out_op(userdata, "Fencing could not be completed "
|
||||
- "within overall timeout");
|
||||
+ finalize_timed_out_op(userdata, "Fencing did not complete within a "
|
||||
+ "total timeout based on the "
|
||||
+ "configured timeout and retries for "
|
||||
+ "any devices attempted");
|
||||
}
|
||||
return G_SOURCE_REMOVE;
|
||||
}
|
||||
diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c
|
||||
index a0f3119f3b..718739b321 100644
|
||||
--- a/lib/fencing/st_client.c
|
||||
+++ b/lib/fencing/st_client.c
|
||||
@@ -906,7 +906,7 @@ invoke_registered_callbacks(stonith_t *stonith, xmlNode *msg, int call_id)
|
||||
if (msg == NULL) {
|
||||
// Fencer didn't reply in time
|
||||
pcmk__set_result(&result, CRM_EX_ERROR, PCMK_EXEC_TIMEOUT,
|
||||
- "Timeout waiting for reply from fencer");
|
||||
+ "Fencer accepted request but did not reply in time");
|
||||
CRM_LOG_ASSERT(call_id > 0);
|
||||
|
||||
} else {
|
||||
--
|
||||
2.27.0
|
||||
|
||||
|
||||
From 0fe8ede2f8e838e335fe42846bdf147111ce9955 Mon Sep 17 00:00:00 2001
|
||||
From: Ken Gaillot <kgaillot@redhat.com>
|
||||
Date: Wed, 22 Dec 2021 17:09:09 -0600
|
||||
Subject: [PATCH 12/12] Feature: libcrmservice: improve exit reason for
|
||||
timeouts
|
||||
|
||||
The services library doesn't have enough information about an action to say
|
||||
(for example) what configuration parameters might be relevant, but we can at
|
||||
least distinguish what kind of agent timed out.
|
||||
---
|
||||
lib/services/services_linux.c | 12 +++++++++++-
|
||||
lib/services/systemd.c | 2 +-
|
||||
2 files changed, 12 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/lib/services/services_linux.c b/lib/services/services_linux.c
|
||||
index f15eee860e..d6aafcfe46 100644
|
||||
--- a/lib/services/services_linux.c
|
||||
+++ b/lib/services/services_linux.c
|
||||
@@ -677,9 +677,19 @@ async_action_complete(mainloop_child_t *p, pid_t pid, int core, int signo,
|
||||
parse_exit_reason_from_stderr(op);
|
||||
|
||||
} else if (mainloop_child_timeout(p)) {
|
||||
+ const char *reason = NULL;
|
||||
+
|
||||
+ if (op->rsc != NULL) {
|
||||
+ reason = "Resource agent did not complete in time";
|
||||
+ } else if (pcmk__str_eq(op->standard, PCMK_RESOURCE_CLASS_STONITH,
|
||||
+ pcmk__str_none)) {
|
||||
+ reason = "Fence agent did not complete in time";
|
||||
+ } else {
|
||||
+ reason = "Process did not complete in time";
|
||||
+ }
|
||||
crm_info("%s[%d] timed out after %dms", op->id, op->pid, op->timeout);
|
||||
services__set_result(op, services__generic_error(op), PCMK_EXEC_TIMEOUT,
|
||||
- "Process did not exit within specified timeout");
|
||||
+ reason);
|
||||
|
||||
} else if (op->cancel) {
|
||||
/* If an in-flight recurring operation was killed because it was
|
||||
diff --git a/lib/services/systemd.c b/lib/services/systemd.c
|
||||
index 27a3b376db..d87b287424 100644
|
||||
--- a/lib/services/systemd.c
|
||||
+++ b/lib/services/systemd.c
|
||||
@@ -995,7 +995,7 @@ systemd_timeout_callback(gpointer p)
|
||||
crm_info("%s action for systemd unit %s named '%s' timed out",
|
||||
op->action, op->agent, op->rsc);
|
||||
services__set_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_TIMEOUT,
|
||||
- "Systemd action did not complete within specified timeout");
|
||||
+ "Systemd unit action did not complete in time");
|
||||
services__finalize_async_op(op);
|
||||
return FALSE;
|
||||
}
|
||||
--
|
||||
2.27.0
|
||||
|
29
SOURCES/019-corosync-tracking.patch
Normal file
29
SOURCES/019-corosync-tracking.patch
Normal file
@ -0,0 +1,29 @@
|
||||
From e8bf0161b872267f1bb7143a9866fdc15ec218f2 Mon Sep 17 00:00:00 2001
|
||||
From: Jan Friesse <jfriesse@redhat.com>
|
||||
Date: Tue, 18 Jan 2022 16:35:24 +0100
|
||||
Subject: [PATCH] Fix: corosync: Repeat corosync_cfg_trackstart
|
||||
|
||||
corosync_cfg_trackstart can fail with CS_ERR_TRY_AGAIN failure so
|
||||
(similarly as for corosync_cfg_local_get, ...) handle failure with
|
||||
using cs_repeat macro.
|
||||
---
|
||||
daemons/pacemakerd/pcmkd_corosync.c | 3 ++-
|
||||
1 file changed, 2 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/daemons/pacemakerd/pcmkd_corosync.c b/daemons/pacemakerd/pcmkd_corosync.c
|
||||
index 7990bc43c5..cd7a40321d 100644
|
||||
--- a/daemons/pacemakerd/pcmkd_corosync.c
|
||||
+++ b/daemons/pacemakerd/pcmkd_corosync.c
|
||||
@@ -186,7 +186,8 @@ cluster_connect_cfg(void)
|
||||
crm_debug("Corosync reports local node ID is %lu", (unsigned long) nodeid);
|
||||
|
||||
#ifdef HAVE_COROSYNC_CFG_TRACKSTART
|
||||
- rc = corosync_cfg_trackstart(cfg_handle, 0);
|
||||
+ retries = 0;
|
||||
+ cs_repeat(retries, 30, rc = corosync_cfg_trackstart(cfg_handle, 0));
|
||||
if (rc != CS_OK) {
|
||||
crm_crit("Could not enable Corosync CFG shutdown tracker: %s " CRM_XS " rc=%d",
|
||||
cs_strerror(rc), rc);
|
||||
--
|
||||
2.27.0
|
||||
|
41
SOURCES/020-systemd-unit.patch
Normal file
41
SOURCES/020-systemd-unit.patch
Normal file
@ -0,0 +1,41 @@
|
||||
From e316840a7e1d2a72e3089ee194334244c959905a Mon Sep 17 00:00:00 2001
|
||||
From: Ken Gaillot <kgaillot@redhat.com>
|
||||
Date: Wed, 19 Jan 2022 09:53:53 -0600
|
||||
Subject: [PATCH] Fix: pacemakerd: tweak systemd unit respawn settings
|
||||
|
||||
If pacemaker exits immediately after starting, wait 1 second before trying to
|
||||
respawn, since the default of 100ms is a bit aggressive for a Pacemaker
|
||||
cluster.
|
||||
|
||||
Also, allow 5 attempts in 25 seconds before giving up.
|
||||
---
|
||||
daemons/pacemakerd/pacemaker.service.in | 6 ++++++
|
||||
1 file changed, 6 insertions(+)
|
||||
|
||||
diff --git a/daemons/pacemakerd/pacemaker.service.in b/daemons/pacemakerd/pacemaker.service.in
|
||||
index 0363a2259c..3fd53d9ffb 100644
|
||||
--- a/daemons/pacemakerd/pacemaker.service.in
|
||||
+++ b/daemons/pacemakerd/pacemaker.service.in
|
||||
@@ -31,6 +31,9 @@ After=rsyslog.service
|
||||
After=corosync.service
|
||||
Requires=corosync.service
|
||||
|
||||
+# If Pacemaker respawns repeatedly, give up after this many tries in this time
|
||||
+StartLimitBurst=5
|
||||
+StartLimitIntervalSec=25s
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
@@ -57,6 +60,9 @@ TasksMax=infinity
|
||||
# resource. Sending -KILL will just get the node fenced
|
||||
SendSIGKILL=no
|
||||
|
||||
+# Systemd's default of respawning a failed service after 100ms is too aggressive
|
||||
+RestartSec=1s
|
||||
+
|
||||
# If we ever hit the StartLimitInterval/StartLimitBurst limit, and the
|
||||
# admin wants to stop the cluster while pacemakerd is not running, it
|
||||
# might be a good idea to enable the ExecStopPost directive below.
|
||||
--
|
||||
2.27.0
|
||||
|
354
SOURCES/021-daemon-tracking.patch
Normal file
354
SOURCES/021-daemon-tracking.patch
Normal file
@ -0,0 +1,354 @@
|
||||
From 9ee9fd6b98d8a5ff5eac57a14cbc0ce1009b10e4 Mon Sep 17 00:00:00 2001
|
||||
From: Klaus Wenninger <klaus.wenninger@aon.at>
|
||||
Date: Thu, 18 Nov 2021 13:23:34 +0100
|
||||
Subject: [PATCH 1/2] Feature: pacemakerd: keep tracking pacemakerd for
|
||||
liveness
|
||||
|
||||
---
|
||||
daemons/pacemakerd/pacemakerd.c | 2 +
|
||||
daemons/pacemakerd/pacemakerd.h | 3 +-
|
||||
daemons/pacemakerd/pcmkd_messages.c | 6 +-
|
||||
daemons/pacemakerd/pcmkd_subdaemons.c | 139 +++++++++++++++++---------
|
||||
4 files changed, 98 insertions(+), 52 deletions(-)
|
||||
|
||||
diff --git a/daemons/pacemakerd/pacemakerd.c b/daemons/pacemakerd/pacemakerd.c
|
||||
index 34d64c4053..062c2d5326 100644
|
||||
--- a/daemons/pacemakerd/pacemakerd.c
|
||||
+++ b/daemons/pacemakerd/pacemakerd.c
|
||||
@@ -259,6 +259,8 @@ main(int argc, char **argv)
|
||||
pcmk_ipc_api_t *old_instance = NULL;
|
||||
qb_ipcs_service_t *ipcs = NULL;
|
||||
|
||||
+ subdaemon_check_progress = time(NULL);
|
||||
+
|
||||
crm_log_preinit(NULL, argc, argv);
|
||||
mainloop_add_signal(SIGHUP, pcmk_ignore);
|
||||
mainloop_add_signal(SIGQUIT, pcmk_sigquit);
|
||||
diff --git a/daemons/pacemakerd/pacemakerd.h b/daemons/pacemakerd/pacemakerd.h
|
||||
index 7c541bbf9e..424dbbcc5d 100644
|
||||
--- a/daemons/pacemakerd/pacemakerd.h
|
||||
+++ b/daemons/pacemakerd/pacemakerd.h
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
- * Copyright 2010-2021 the Pacemaker project contributors
|
||||
+ * Copyright 2010-2022 the Pacemaker project contributors
|
||||
*
|
||||
* The version control history for this file may have further details.
|
||||
*
|
||||
@@ -21,6 +21,7 @@ extern unsigned int shutdown_complete_state_reported_to;
|
||||
extern gboolean shutdown_complete_state_reported_client_closed;
|
||||
extern crm_trigger_t *shutdown_trigger;
|
||||
extern crm_trigger_t *startup_trigger;
|
||||
+extern time_t subdaemon_check_progress;
|
||||
|
||||
gboolean mcp_read_config(void);
|
||||
|
||||
diff --git a/daemons/pacemakerd/pcmkd_messages.c b/daemons/pacemakerd/pcmkd_messages.c
|
||||
index 0439986ecf..f2cddc353e 100644
|
||||
--- a/daemons/pacemakerd/pcmkd_messages.c
|
||||
+++ b/daemons/pacemakerd/pcmkd_messages.c
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
- * Copyright 2010-2021 the Pacemaker project contributors
|
||||
+ * Copyright 2010-2022 the Pacemaker project contributors
|
||||
*
|
||||
* The version control history for this file may have further details.
|
||||
*
|
||||
@@ -25,7 +25,6 @@ pcmk_handle_ping_request(pcmk__client_t *c, xmlNode *msg, uint32_t id)
|
||||
const char *value = NULL;
|
||||
xmlNode *ping = NULL;
|
||||
xmlNode *reply = NULL;
|
||||
- time_t pinged = time(NULL);
|
||||
const char *from = crm_element_value(msg, F_CRM_SYS_FROM);
|
||||
|
||||
/* Pinged for status */
|
||||
@@ -36,7 +35,8 @@ pcmk_handle_ping_request(pcmk__client_t *c, xmlNode *msg, uint32_t id)
|
||||
value = crm_element_value(msg, F_CRM_SYS_TO);
|
||||
crm_xml_add(ping, XML_PING_ATTR_SYSFROM, value);
|
||||
crm_xml_add(ping, XML_PING_ATTR_PACEMAKERDSTATE, pacemakerd_state);
|
||||
- crm_xml_add_ll(ping, XML_ATTR_TSTAMP, (long long) pinged);
|
||||
+ crm_xml_add_ll(ping, XML_ATTR_TSTAMP,
|
||||
+ (long long) subdaemon_check_progress);
|
||||
crm_xml_add(ping, XML_PING_ATTR_STATUS, "ok");
|
||||
reply = create_reply(msg, ping);
|
||||
free_xml(ping);
|
||||
diff --git a/daemons/pacemakerd/pcmkd_subdaemons.c b/daemons/pacemakerd/pcmkd_subdaemons.c
|
||||
index a54fcce1ba..c03903c99e 100644
|
||||
--- a/daemons/pacemakerd/pcmkd_subdaemons.c
|
||||
+++ b/daemons/pacemakerd/pcmkd_subdaemons.c
|
||||
@@ -32,14 +32,16 @@ typedef struct pcmk_child_s {
|
||||
const char *command;
|
||||
const char *endpoint; /* IPC server name */
|
||||
bool needs_cluster;
|
||||
+ int check_count;
|
||||
|
||||
/* Anything below here will be dynamically initialized */
|
||||
bool needs_retry;
|
||||
bool active_before_startup;
|
||||
} pcmk_child_t;
|
||||
|
||||
-#define PCMK_PROCESS_CHECK_INTERVAL 5
|
||||
-#define SHUTDOWN_ESCALATION_PERIOD 180000 /* 3m */
|
||||
+#define PCMK_PROCESS_CHECK_INTERVAL 1
|
||||
+#define PCMK_PROCESS_CHECK_RETRIES 5
|
||||
+#define SHUTDOWN_ESCALATION_PERIOD 180000 /* 3m */
|
||||
|
||||
/* Index into the array below */
|
||||
#define PCMK_CHILD_CONTROLD 5
|
||||
@@ -82,6 +84,7 @@ static char *opts_vgrind[] = { NULL, NULL, NULL, NULL, NULL };
|
||||
|
||||
crm_trigger_t *shutdown_trigger = NULL;
|
||||
crm_trigger_t *startup_trigger = NULL;
|
||||
+time_t subdaemon_check_progress = 0;
|
||||
|
||||
/* When contacted via pacemakerd-api by a client having sbd in
|
||||
* the name we assume it is sbd-daemon which wants to know
|
||||
@@ -103,7 +106,6 @@ gboolean running_with_sbd = FALSE; /* local copy */
|
||||
GMainLoop *mainloop = NULL;
|
||||
|
||||
static gboolean fatal_error = FALSE;
|
||||
-static bool global_keep_tracking = false;
|
||||
|
||||
static gboolean check_active_before_startup_processes(gpointer user_data);
|
||||
static int child_liveness(pcmk_child_t *child);
|
||||
@@ -127,44 +129,94 @@ pcmkd_cluster_connected(void)
|
||||
static gboolean
|
||||
check_active_before_startup_processes(gpointer user_data)
|
||||
{
|
||||
- gboolean keep_tracking = FALSE;
|
||||
-
|
||||
- for (int i = 0; i < PCMK__NELEM(pcmk_children); i++) {
|
||||
- if (!pcmk_children[i].active_before_startup) {
|
||||
- /* we are already tracking it as a child process. */
|
||||
- continue;
|
||||
- } else {
|
||||
- int rc = child_liveness(&pcmk_children[i]);
|
||||
-
|
||||
- switch (rc) {
|
||||
- case pcmk_rc_ok:
|
||||
- break;
|
||||
- case pcmk_rc_ipc_unresponsive:
|
||||
- case pcmk_rc_ipc_pid_only: // This case: it was previously OK
|
||||
- if (pcmk_children[i].respawn) {
|
||||
- crm_err("%s[%lld] terminated%s", pcmk_children[i].name,
|
||||
- (long long) PCMK__SPECIAL_PID_AS_0(pcmk_children[i].pid),
|
||||
- (rc == pcmk_rc_ipc_pid_only)? " as IPC server" : "");
|
||||
- } else {
|
||||
- /* orderly shutdown */
|
||||
- crm_notice("%s[%lld] terminated%s", pcmk_children[i].name,
|
||||
- (long long) PCMK__SPECIAL_PID_AS_0(pcmk_children[i].pid),
|
||||
- (rc == pcmk_rc_ipc_pid_only)? " as IPC server" : "");
|
||||
- }
|
||||
- pcmk_process_exit(&(pcmk_children[i]));
|
||||
- continue;
|
||||
- default:
|
||||
- crm_exit(CRM_EX_FATAL);
|
||||
- break; /* static analysis/noreturn */
|
||||
+ static int next_child = 0;
|
||||
+ int rc = child_liveness(&pcmk_children[next_child]);
|
||||
+
|
||||
+ crm_trace("%s[%lld] checked as %d",
|
||||
+ pcmk_children[next_child].name,
|
||||
+ (long long) PCMK__SPECIAL_PID_AS_0(
|
||||
+ pcmk_children[next_child].pid),
|
||||
+ rc);
|
||||
+
|
||||
+ switch (rc) {
|
||||
+ case pcmk_rc_ok:
|
||||
+ pcmk_children[next_child].check_count = 0;
|
||||
+ next_child++;
|
||||
+ subdaemon_check_progress = time(NULL);
|
||||
+ break;
|
||||
+ case pcmk_rc_ipc_pid_only: // This case: it was previously OK
|
||||
+ pcmk_children[next_child].check_count++;
|
||||
+ if (pcmk_children[next_child].check_count >= PCMK_PROCESS_CHECK_RETRIES) {
|
||||
+ crm_err("%s[%lld] is unresponsive to ipc after %d tries but "
|
||||
+ "we found the pid so have it killed that we can restart",
|
||||
+ pcmk_children[next_child].name,
|
||||
+ (long long) PCMK__SPECIAL_PID_AS_0(
|
||||
+ pcmk_children[next_child].pid),
|
||||
+ pcmk_children[next_child].check_count);
|
||||
+ stop_child(&pcmk_children[next_child], SIGKILL);
|
||||
+ if (pcmk_children[next_child].respawn) {
|
||||
+ /* as long as the respawn-limit isn't reached
|
||||
+ give it another round of check retries
|
||||
+ */
|
||||
+ pcmk_children[next_child].check_count = 0;
|
||||
+ }
|
||||
+ } else {
|
||||
+ crm_notice("%s[%lld] is unresponsive to ipc after %d tries",
|
||||
+ pcmk_children[next_child].name,
|
||||
+ (long long) PCMK__SPECIAL_PID_AS_0(
|
||||
+ pcmk_children[next_child].pid),
|
||||
+ pcmk_children[next_child].check_count);
|
||||
+ if (pcmk_children[next_child].respawn) {
|
||||
+ /* as long as the respawn-limit isn't reached
|
||||
+ and we haven't run out of connect retries
|
||||
+ we account this as progress we are willing
|
||||
+ to tell to sbd
|
||||
+ */
|
||||
+ subdaemon_check_progress = time(NULL);
|
||||
+ }
|
||||
}
|
||||
- }
|
||||
- /* at least one of the processes found at startup
|
||||
- * is still going, so keep this recurring timer around */
|
||||
- keep_tracking = TRUE;
|
||||
+ /* go to the next child and see if
|
||||
+ we can make progress there
|
||||
+ */
|
||||
+ next_child++;
|
||||
+ break;
|
||||
+ case pcmk_rc_ipc_unresponsive:
|
||||
+ if (pcmk_children[next_child].respawn) {
|
||||
+ crm_err("%s[%lld] terminated",
|
||||
+ pcmk_children[next_child].name,
|
||||
+ (long long) PCMK__SPECIAL_PID_AS_0(
|
||||
+ pcmk_children[next_child].pid));
|
||||
+ } else {
|
||||
+ /* orderly shutdown */
|
||||
+ crm_notice("%s[%lld] terminated",
|
||||
+ pcmk_children[next_child].name,
|
||||
+ (long long) PCMK__SPECIAL_PID_AS_0(
|
||||
+ pcmk_children[next_child].pid));
|
||||
+ }
|
||||
+ pcmk_process_exit(&(pcmk_children[next_child]));
|
||||
+ if (!pcmk_children[next_child].respawn) {
|
||||
+ /* if a subdaemon is down and we don't want it
|
||||
+ to be restarted this is a success during
|
||||
+ shutdown. if it isn't restarted anymore
|
||||
+ due to MAX_RESPAWN it is
|
||||
+ rather no success.
|
||||
+ */
|
||||
+ if (pcmk_children[next_child].respawn_count <= MAX_RESPAWN) {
|
||||
+ subdaemon_check_progress = time(NULL);
|
||||
+ }
|
||||
+ next_child++;
|
||||
+ }
|
||||
+ break;
|
||||
+ default:
|
||||
+ crm_exit(CRM_EX_FATAL);
|
||||
+ break; /* static analysis/noreturn */
|
||||
}
|
||||
|
||||
- global_keep_tracking = keep_tracking;
|
||||
- return keep_tracking;
|
||||
+ if (next_child >= PCMK__NELEM(pcmk_children)) {
|
||||
+ next_child = 0;
|
||||
+ }
|
||||
+
|
||||
+ return G_SOURCE_CONTINUE;
|
||||
}
|
||||
|
||||
static gboolean
|
||||
@@ -257,11 +309,6 @@ pcmk_process_exit(pcmk_child_t * child)
|
||||
child->name, child->endpoint);
|
||||
/* need to monitor how it evolves, and start new process if badly */
|
||||
child->active_before_startup = true;
|
||||
- if (!global_keep_tracking) {
|
||||
- global_keep_tracking = true;
|
||||
- g_timeout_add_seconds(PCMK_PROCESS_CHECK_INTERVAL,
|
||||
- check_active_before_startup_processes, NULL);
|
||||
- }
|
||||
|
||||
} else {
|
||||
if (child->needs_cluster && !pcmkd_cluster_connected()) {
|
||||
@@ -648,7 +695,6 @@ child_liveness(pcmk_child_t *child)
|
||||
int
|
||||
find_and_track_existing_processes(void)
|
||||
{
|
||||
- bool tracking = false;
|
||||
bool wait_in_progress;
|
||||
int rc;
|
||||
size_t i, rounds;
|
||||
@@ -716,7 +762,6 @@ find_and_track_existing_processes(void)
|
||||
pcmk_children[i].pid));
|
||||
pcmk_children[i].respawn_count = -1; /* 0~keep watching */
|
||||
pcmk_children[i].active_before_startup = true;
|
||||
- tracking = true;
|
||||
break;
|
||||
case pcmk_rc_ipc_pid_only:
|
||||
if (pcmk_children[i].respawn_count == WAIT_TRIES) {
|
||||
@@ -751,10 +796,8 @@ find_and_track_existing_processes(void)
|
||||
pcmk_children[i].respawn_count = 0; /* restore pristine state */
|
||||
}
|
||||
|
||||
- if (tracking) {
|
||||
- g_timeout_add_seconds(PCMK_PROCESS_CHECK_INTERVAL,
|
||||
+ g_timeout_add_seconds(PCMK_PROCESS_CHECK_INTERVAL,
|
||||
check_active_before_startup_processes, NULL);
|
||||
- }
|
||||
return pcmk_rc_ok;
|
||||
}
|
||||
|
||||
--
|
||||
2.27.0
|
||||
|
||||
|
||||
From 4b60aa100669ff494dd3f1303ca9586dc52e95e4 Mon Sep 17 00:00:00 2001
|
||||
From: Klaus Wenninger <klaus.wenninger@aon.at>
|
||||
Date: Thu, 9 Dec 2021 11:25:22 +0100
|
||||
Subject: [PATCH 2/2] Fix: ipc_client: use libqb async API for connect
|
||||
|
||||
---
|
||||
configure.ac | 3 +++
|
||||
lib/common/ipc_client.c | 22 ++++++++++++++++++++++
|
||||
2 files changed, 25 insertions(+)
|
||||
|
||||
diff --git a/configure.ac b/configure.ac
|
||||
index f43fb724c7..c747fe1193 100644
|
||||
--- a/configure.ac
|
||||
+++ b/configure.ac
|
||||
@@ -1309,6 +1309,9 @@ PKG_CHECK_MODULES(libqb, libqb >= 0.17)
|
||||
CPPFLAGS="$libqb_CFLAGS $CPPFLAGS"
|
||||
LIBS="$libqb_LIBS $LIBS"
|
||||
|
||||
+dnl libqb libqb-2.0.3 + ipc-connect-async-API (2022-01)
|
||||
+AC_CHECK_FUNCS([qb_ipcc_connect_async])
|
||||
+
|
||||
dnl libqb 2.0.2+ (2020-10)
|
||||
AC_CHECK_FUNCS(qb_ipcc_auth_get,
|
||||
AC_DEFINE(HAVE_IPCC_AUTH_GET, 1,
|
||||
diff --git a/lib/common/ipc_client.c b/lib/common/ipc_client.c
|
||||
index c5afdf3a3d..417b9ef175 100644
|
||||
--- a/lib/common/ipc_client.c
|
||||
+++ b/lib/common/ipc_client.c
|
||||
@@ -1407,13 +1407,35 @@ pcmk__ipc_is_authentic_process_active(const char *name, uid_t refuid,
|
||||
int32_t qb_rc;
|
||||
pid_t found_pid = 0; uid_t found_uid = 0; gid_t found_gid = 0;
|
||||
qb_ipcc_connection_t *c;
|
||||
+#ifdef HAVE_QB_IPCC_CONNECT_ASYNC
|
||||
+ struct pollfd pollfd = { 0, };
|
||||
+ int poll_rc;
|
||||
|
||||
+ c = qb_ipcc_connect_async(name, 0,
|
||||
+ &(pollfd.fd));
|
||||
+#else
|
||||
c = qb_ipcc_connect(name, 0);
|
||||
+#endif
|
||||
if (c == NULL) {
|
||||
crm_info("Could not connect to %s IPC: %s", name, strerror(errno));
|
||||
rc = pcmk_rc_ipc_unresponsive;
|
||||
goto bail;
|
||||
}
|
||||
+#ifdef HAVE_QB_IPCC_CONNECT_ASYNC
|
||||
+ pollfd.events = POLLIN;
|
||||
+ do {
|
||||
+ poll_rc = poll(&pollfd, 1, 2000);
|
||||
+ } while ((poll_rc == -1) && (errno == EINTR));
|
||||
+ if ((poll_rc <= 0) || (qb_ipcc_connect_continue(c) != 0)) {
|
||||
+ crm_info("Could not connect to %s IPC: %s", name,
|
||||
+ (poll_rc == 0)?"timeout":strerror(errno));
|
||||
+ rc = pcmk_rc_ipc_unresponsive;
|
||||
+ if (poll_rc > 0) {
|
||||
+ c = NULL; // qb_ipcc_connect_continue cleaned up for us
|
||||
+ }
|
||||
+ goto bail;
|
||||
+ }
|
||||
+#endif
|
||||
|
||||
qb_rc = qb_ipcc_fd_get(c, &fd);
|
||||
if (qb_rc != 0) {
|
||||
--
|
||||
2.27.0
|
||||
|
1338
SOURCES/022-failure-messages.patch
Normal file
1338
SOURCES/022-failure-messages.patch
Normal file
File diff suppressed because it is too large
Load Diff
82
SOURCES/023-memory-leak.patch
Normal file
82
SOURCES/023-memory-leak.patch
Normal file
@ -0,0 +1,82 @@
|
||||
From 8034a203bbff0aa3b53f2946dc58e409bd7246c9 Mon Sep 17 00:00:00 2001
|
||||
From: Ken Gaillot <kgaillot@redhat.com>
|
||||
Date: Thu, 20 Jan 2022 15:03:31 -0600
|
||||
Subject: [PATCH] Fix: scheduler: avoid memory leak when displaying clones
|
||||
|
||||
Previously, pe__clone_default() unconditionally created a hash table for
|
||||
stopped instances, but didn't free it in every code path.
|
||||
|
||||
Now, only create the table when we have something to put in it and might
|
||||
actually use it, and ensure it always gets freed.
|
||||
---
|
||||
lib/pengine/clone.c | 18 +++++++++++++-----
|
||||
1 file changed, 13 insertions(+), 5 deletions(-)
|
||||
|
||||
diff --git a/lib/pengine/clone.c b/lib/pengine/clone.c
|
||||
index 742e2920b0..920a04c32c 100644
|
||||
--- a/lib/pengine/clone.c
|
||||
+++ b/lib/pengine/clone.c
|
||||
@@ -761,7 +761,7 @@ pe__clone_default(pcmk__output_t *out, va_list args)
|
||||
GList *only_node = va_arg(args, GList *);
|
||||
GList *only_rsc = va_arg(args, GList *);
|
||||
|
||||
- GHashTable *stopped = pcmk__strkey_table(free, free);
|
||||
+ GHashTable *stopped = NULL;
|
||||
|
||||
char *list_text = NULL;
|
||||
size_t list_text_len = 0;
|
||||
@@ -818,7 +818,11 @@ pe__clone_default(pcmk__output_t *out, va_list args)
|
||||
} else if (partially_active == FALSE) {
|
||||
// List stopped instances when requested (except orphans)
|
||||
if (!pcmk_is_set(child_rsc->flags, pe_rsc_orphan)
|
||||
+ && !pcmk_is_set(show_opts, pcmk_show_clone_detail)
|
||||
&& pcmk_is_set(show_opts, pcmk_show_inactive_rscs)) {
|
||||
+ if (stopped == NULL) {
|
||||
+ stopped = pcmk__strkey_table(free, free);
|
||||
+ }
|
||||
g_hash_table_insert(stopped, strdup(child_rsc->id), strdup("Stopped"));
|
||||
}
|
||||
|
||||
@@ -873,7 +877,6 @@ pe__clone_default(pcmk__output_t *out, va_list args)
|
||||
}
|
||||
|
||||
if (pcmk_is_set(show_opts, pcmk_show_clone_detail)) {
|
||||
- g_hash_table_destroy(stopped);
|
||||
PCMK__OUTPUT_LIST_FOOTER(out, rc);
|
||||
return pcmk_rc_ok;
|
||||
}
|
||||
@@ -948,8 +951,10 @@ pe__clone_default(pcmk__output_t *out, va_list args)
|
||||
GList *list = g_hash_table_get_values(rsc->allowed_nodes);
|
||||
|
||||
/* Custom stopped table for non-unique clones */
|
||||
- g_hash_table_destroy(stopped);
|
||||
- stopped = pcmk__strkey_table(free, free);
|
||||
+ if (stopped != NULL) {
|
||||
+ g_hash_table_destroy(stopped);
|
||||
+ stopped = NULL;
|
||||
+ }
|
||||
|
||||
if (list == NULL) {
|
||||
/* Clusters with symmetrical=false haven't calculated allowed_nodes yet
|
||||
@@ -972,6 +977,9 @@ pe__clone_default(pcmk__output_t *out, va_list args)
|
||||
state = "Stopped (disabled)";
|
||||
}
|
||||
|
||||
+ if (stopped == NULL) {
|
||||
+ stopped = pcmk__strkey_table(free, free);
|
||||
+ }
|
||||
if (probe_op != NULL) {
|
||||
int rc;
|
||||
|
||||
@@ -987,7 +995,7 @@ pe__clone_default(pcmk__output_t *out, va_list args)
|
||||
g_list_free(list);
|
||||
}
|
||||
|
||||
- if (g_hash_table_size(stopped) > 0) {
|
||||
+ if (stopped != NULL) {
|
||||
GList *list = sorted_hash_table_values(stopped);
|
||||
|
||||
clone_header(out, &rc, rsc, clone_data);
|
||||
--
|
||||
2.27.0
|
||||
|
108
SOURCES/024-daemon-tracking.patch
Normal file
108
SOURCES/024-daemon-tracking.patch
Normal file
@ -0,0 +1,108 @@
|
||||
From ac92690d8426ec4d1c8be1e0eb4b9289411afe75 Mon Sep 17 00:00:00 2001
|
||||
From: Klaus Wenninger <klaus.wenninger@aon.at>
|
||||
Date: Mon, 24 Jan 2022 12:18:42 +0100
|
||||
Subject: [PATCH] Fix: pacemakerd: have signal-handler take care of lost
|
||||
processes
|
||||
|
||||
regression from introduction of periodic subdaemon checking
|
||||
in cases they are pacemakerd children - previously it was either
|
||||
periodic checking or signal-handler per process.
|
||||
---
|
||||
daemons/pacemakerd/pcmkd_subdaemons.c | 38 ++++++++++++++++-----------
|
||||
1 file changed, 22 insertions(+), 16 deletions(-)
|
||||
|
||||
diff --git a/daemons/pacemakerd/pcmkd_subdaemons.c b/daemons/pacemakerd/pcmkd_subdaemons.c
|
||||
index c03903c99e..84ecdc1ee8 100644
|
||||
--- a/daemons/pacemakerd/pcmkd_subdaemons.c
|
||||
+++ b/daemons/pacemakerd/pcmkd_subdaemons.c
|
||||
@@ -141,7 +141,6 @@ check_active_before_startup_processes(gpointer user_data)
|
||||
switch (rc) {
|
||||
case pcmk_rc_ok:
|
||||
pcmk_children[next_child].check_count = 0;
|
||||
- next_child++;
|
||||
subdaemon_check_progress = time(NULL);
|
||||
break;
|
||||
case pcmk_rc_ipc_pid_only: // This case: it was previously OK
|
||||
@@ -178,9 +177,27 @@ check_active_before_startup_processes(gpointer user_data)
|
||||
/* go to the next child and see if
|
||||
we can make progress there
|
||||
*/
|
||||
- next_child++;
|
||||
break;
|
||||
case pcmk_rc_ipc_unresponsive:
|
||||
+ if (!pcmk_children[next_child].respawn) {
|
||||
+ /* if a subdaemon is down and we don't want it
|
||||
+ to be restarted this is a success during
|
||||
+ shutdown. if it isn't restarted anymore
|
||||
+ due to MAX_RESPAWN it is
|
||||
+ rather no success.
|
||||
+ */
|
||||
+ if (pcmk_children[next_child].respawn_count <= MAX_RESPAWN) {
|
||||
+ subdaemon_check_progress = time(NULL);
|
||||
+ }
|
||||
+ }
|
||||
+ if (!pcmk_children[next_child].active_before_startup) {
|
||||
+ crm_trace("found %s[%lld] missing - signal-handler "
|
||||
+ "will take care of it",
|
||||
+ pcmk_children[next_child].name,
|
||||
+ (long long) PCMK__SPECIAL_PID_AS_0(
|
||||
+ pcmk_children[next_child].pid));
|
||||
+ break;
|
||||
+ }
|
||||
if (pcmk_children[next_child].respawn) {
|
||||
crm_err("%s[%lld] terminated",
|
||||
pcmk_children[next_child].name,
|
||||
@@ -194,24 +211,13 @@ check_active_before_startup_processes(gpointer user_data)
|
||||
pcmk_children[next_child].pid));
|
||||
}
|
||||
pcmk_process_exit(&(pcmk_children[next_child]));
|
||||
- if (!pcmk_children[next_child].respawn) {
|
||||
- /* if a subdaemon is down and we don't want it
|
||||
- to be restarted this is a success during
|
||||
- shutdown. if it isn't restarted anymore
|
||||
- due to MAX_RESPAWN it is
|
||||
- rather no success.
|
||||
- */
|
||||
- if (pcmk_children[next_child].respawn_count <= MAX_RESPAWN) {
|
||||
- subdaemon_check_progress = time(NULL);
|
||||
- }
|
||||
- next_child++;
|
||||
- }
|
||||
break;
|
||||
default:
|
||||
crm_exit(CRM_EX_FATAL);
|
||||
break; /* static analysis/noreturn */
|
||||
}
|
||||
|
||||
+ next_child++;
|
||||
if (next_child >= PCMK__NELEM(pcmk_children)) {
|
||||
next_child = 0;
|
||||
}
|
||||
@@ -285,6 +291,7 @@ pcmk_process_exit(pcmk_child_t * child)
|
||||
{
|
||||
child->pid = 0;
|
||||
child->active_before_startup = false;
|
||||
+ child->check_count = 0;
|
||||
|
||||
child->respawn_count += 1;
|
||||
if (child->respawn_count > MAX_RESPAWN) {
|
||||
@@ -307,8 +314,6 @@ pcmk_process_exit(pcmk_child_t * child)
|
||||
crm_warn("One-off suppressing strict respawning of a child process %s,"
|
||||
" appears alright per %s IPC end-point",
|
||||
child->name, child->endpoint);
|
||||
- /* need to monitor how it evolves, and start new process if badly */
|
||||
- child->active_before_startup = true;
|
||||
|
||||
} else {
|
||||
if (child->needs_cluster && !pcmkd_cluster_connected()) {
|
||||
@@ -422,6 +427,7 @@ start_child(pcmk_child_t * child)
|
||||
const char *env_callgrind = getenv("PCMK_callgrind_enabled");
|
||||
|
||||
child->active_before_startup = false;
|
||||
+ child->check_count = 0;
|
||||
|
||||
if (child->command == NULL) {
|
||||
crm_info("Nothing to do for child \"%s\"", child->name);
|
||||
--
|
||||
2.27.0
|
||||
|
30
SOURCES/025-regression.patch
Normal file
30
SOURCES/025-regression.patch
Normal file
@ -0,0 +1,30 @@
|
||||
From 16928cfc69136bc56b1574bee9966e0d5de73abd Mon Sep 17 00:00:00 2001
|
||||
From: Ken Gaillot <kgaillot@redhat.com>
|
||||
Date: Wed, 26 Jan 2022 09:15:43 -0600
|
||||
Subject: [PATCH] Fix: controller: correctly match "node down" events
|
||||
|
||||
regression introduced in 2.1.2 by 03ce7376e
|
||||
|
||||
The symptom that led to this was that removing a remote node connection
|
||||
resource would lead to the remote node getting fenced when the connection stop
|
||||
was not recognized as an expected down event.
|
||||
---
|
||||
daemons/controld/controld_te_events.c | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/daemons/controld/controld_te_events.c b/daemons/controld/controld_te_events.c
|
||||
index 36fd832ba0..1fd7129922 100644
|
||||
--- a/daemons/controld/controld_te_events.c
|
||||
+++ b/daemons/controld/controld_te_events.c
|
||||
@@ -304,7 +304,7 @@ match_down_event(const char *target)
|
||||
gIter2 = gIter2->next) {
|
||||
|
||||
match = (crm_action_t*)gIter2->data;
|
||||
- if (pcmk_is_set(match->flags, pcmk__graph_action_confirmed)) {
|
||||
+ if (pcmk_is_set(match->flags, pcmk__graph_action_executed)) {
|
||||
xpath_ret = xpath_search(match->xml, xpath);
|
||||
if (numXpathResults(xpath_ret) < 1) {
|
||||
match = NULL;
|
||||
--
|
||||
2.27.0
|
||||
|
1492
SPECS/pacemaker.spec
Normal file
1492
SPECS/pacemaker.spec
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user