import sbd-1.4.0-15.el8

This commit is contained in:
CentOS Sources 2019-11-05 14:04:48 -05:00 committed by Andrew Lukoshko
parent c30f7b670b
commit 65d1f89931
35 changed files with 959 additions and 1479 deletions

2
.gitignore vendored
View File

@ -1 +1 @@
SOURCES/sbd-a74b4d25a3eb93fe1abbe6e3ebfd2b16cf48873f.tar.gz
SOURCES/sbd-7f33d1a409d0a4e2cd69946688c48eaa8f3c5d26.tar.gz

View File

@ -1 +1 @@
c7d993891714de43c052a87b0165a97a6e27cfb5 SOURCES/sbd-a74b4d25a3eb93fe1abbe6e3ebfd2b16cf48873f.tar.gz
7bf4ad26875cafa743ca96aec9dae1739bd6281b SOURCES/sbd-7f33d1a409d0a4e2cd69946688c48eaa8f3c5d26.tar.gz

View File

@ -0,0 +1,79 @@
From f8f980340256ab5bef5385cd3bc082fdfb7613ed Mon Sep 17 00:00:00 2001
From: "Gao,Yan" <ygao@suse.com>
Date: Thu, 7 Mar 2019 15:01:26 +0100
Subject: [PATCH] Fix: sbd-cluster: finalize cmap connection if disconnected
from cluster
Previously if sbd cluster servant anyhow got dis-/reconnected from the
cluster, it'd start hogging CPU keeping polling the main loop source
from the old cmap connection.
---
src/sbd-cluster.c | 38 ++++++++++++++++++++++++++------------
1 file changed, 26 insertions(+), 12 deletions(-)
diff --git a/src/sbd-cluster.c b/src/sbd-cluster.c
index 51bb456..541212f 100644
--- a/src/sbd-cluster.c
+++ b/src/sbd-cluster.c
@@ -174,6 +174,25 @@ cmap_dispatch_callback (gpointer user_data)
return TRUE;
}
+static void
+cmap_destroy(void)
+{
+ if (cmap_source) {
+ g_source_destroy(cmap_source);
+ cmap_source = NULL;
+ }
+
+ if (track_handle) {
+ cmap_track_delete(cmap_handle, track_handle);
+ track_handle = 0;
+ }
+
+ if (cmap_handle) {
+ cmap_finalize(cmap_handle);
+ cmap_handle = 0;
+ }
+}
+
static gboolean
sbd_get_two_node(void)
{
@@ -217,18 +236,7 @@ sbd_get_two_node(void)
return TRUE;
out:
- if (cmap_source) {
- g_source_destroy(cmap_source);
- cmap_source = NULL;
- }
- if (track_handle) {
- cmap_track_delete(cmap_handle, track_handle);
- track_handle = 0;
- }
- if (cmap_handle) {
- cmap_finalize(cmap_handle);
- cmap_handle = 0;
- }
+ cmap_destroy();
return FALSE;
}
@@ -327,6 +335,12 @@ sbd_membership_destroy(gpointer user_data)
{
cl_log(LOG_WARNING, "Lost connection to %s", name_for_cluster_type(get_cluster_type()));
+ if (get_cluster_type() != pcmk_cluster_unknown) {
+#if SUPPORT_COROSYNC && CHECK_TWO_NODE
+ cmap_destroy();
+#endif
+ }
+
set_servant_health(pcmk_health_unclean, LOG_ERR, "Cluster connection terminated");
notify_parent();
--
1.8.3.1

View File

@ -1,43 +0,0 @@
From 5f1ac8f07fd81a2c60db39dd5a28debbadfe3ec5 Mon Sep 17 00:00:00 2001
From: Klaus Wenninger <klaus.wenninger@aon.at>
Date: Thu, 14 Dec 2017 17:32:08 +0100
Subject: [PATCH] Fix: systemd: make pacemaker & dlm wait for sbd-start to
complete
---
src/sbd.service.in | 4 +++-
src/sbd_remote.service.in | 2 +-
2 files changed, 4 insertions(+), 2 deletions(-)
diff --git a/src/sbd.service.in b/src/sbd.service.in
index ef1bd16..401c2d6 100644
--- a/src/sbd.service.in
+++ b/src/sbd.service.in
@@ -1,6 +1,7 @@
[Unit]
Description=Shared-storage based fencing daemon
Before=pacemaker.service
+Before=dlm.service
After=systemd-modules-load.service iscsi.service
PartOf=corosync.service
RefuseManualStop=true
@@ -22,4 +23,5 @@ Restart=on-abort
[Install]
RequiredBy=corosync.service
-
+RequiredBy=pacemaker.service
+RequiredBy=dlm.service
diff --git a/src/sbd_remote.service.in b/src/sbd_remote.service.in
index e05f80e..cefd511 100644
--- a/src/sbd_remote.service.in
+++ b/src/sbd_remote.service.in
@@ -21,4 +21,4 @@ Restart=on-abort
[Install]
RequiredBy=pacemaker_remote.service
-
+RequiredBy=dlm.service
--
1.8.3.1

View File

@ -1,27 +0,0 @@
From f79d09ec8dd744f69d87008e868297b308043b56 Mon Sep 17 00:00:00 2001
From: Klaus Wenninger <klaus.wenninger@aon.at>
Date: Thu, 14 Dec 2017 18:21:07 +0100
Subject: [PATCH] Doc: sbd.sysconfig: mention timeout caveat with
SBD_DELAY_START
---
src/sbd.sysconfig | 3 +++
1 file changed, 3 insertions(+)
diff --git a/src/sbd.sysconfig b/src/sbd.sysconfig
index cbc1501..75ff980 100644
--- a/src/sbd.sysconfig
+++ b/src/sbd.sysconfig
@@ -31,6 +31,9 @@ SBD_STARTMODE=always
# other nodes are still waiting in the fence acknowledgement phase.
# This is an occasional issue with virtual machines.
#
+# Consider that you might have to adapt the startup-timeout accordingly
+# if the default isn't sufficient. (TimeoutStartSec for systemd)
+#
# This option may be ignored at a later point, once pacemaker handles
# this case better.
#
--
1.8.3.1

View File

@ -1,48 +0,0 @@
From e073271f53583f2d0cf2675ea665ed50712b65dd Mon Sep 17 00:00:00 2001
From: Klaus Wenninger <klaus.wenninger@aon.at>
Date: Mon, 8 Jan 2018 12:07:33 +0100
Subject: [PATCH] Doc: sbd.8.pod: add sections for query-watchdog &
test-watchdog
---
man/sbd.8.pod | 24 ++++++++++++++++++++++++
1 file changed, 24 insertions(+)
diff --git a/man/sbd.8.pod b/man/sbd.8.pod
index c1bf67a..ffd01c2 100644
--- a/man/sbd.8.pod
+++ b/man/sbd.8.pod
@@ -394,6 +394,30 @@ a fencing message by the cluster.
=back
+=head2 query-watchdog
+
+Example usage:
+
+ sbd query-watchdog
+
+Check for available watchdog devices and print some info.
+
+B<Warning>: This command will arm the watchdog during query, and if your
+watchdog refuses disarming (for example, if its kernel module has the
+'nowayout' parameter set) this will reset your system.
+
+=head2 test-watchdog
+
+Example usage:
+
+ sbd test-watchdog [-w /dev/watchdog3]
+
+Test specified watchdog device (/dev/watchdog by default).
+
+B<Warning>: This command will arm the watchdog and have your system reset
+in case your watchdog is working properly! If issued from an interactive
+session, it will prompt for confirmation.
+
=head1 Base system configuration
=head2 Configure a watchdog
--
1.8.3.1

View File

@ -1,86 +0,0 @@
From ef40f6a0fdc178828fbde6f1303e5ee58bfb822a Mon Sep 17 00:00:00 2001
From: "Gao,Yan" <ygao@suse.com>
Date: Wed, 7 Mar 2018 17:50:29 +0100
Subject: [PATCH] Build: sbd-pacemaker: Query CIB directly with the API instead
of get_cib_copy()
get_cib_copy() has been dropped from pacemaker 2.0 branch as of:
https://github.com/ClusterLabs/pacemaker/commit/32c75b7be
---
src/sbd-pacemaker.c | 39 +++++++++++++++++++++++++++++++++++----
1 file changed, 35 insertions(+), 4 deletions(-)
diff --git a/src/sbd-pacemaker.c b/src/sbd-pacemaker.c
index b6a8fb6..2f06109 100644
--- a/src/sbd-pacemaker.c
+++ b/src/sbd-pacemaker.c
@@ -109,6 +109,38 @@ mon_cib_connection_destroy(gpointer user_data)
return;
}
+static void
+mon_retrieve_current_cib()
+{
+ xmlNode *xml_cib = NULL;
+ int options = cib_scope_local | cib_sync_call;
+ int rc = pcmk_ok;
+
+ free_xml(current_cib);
+ current_cib = NULL;
+
+ rc = cib->cmds->query(cib, NULL, &xml_cib, options);
+
+ if (rc != pcmk_ok) {
+ crm_err("Couldn't retrieve the CIB: %s (%d)", pcmk_strerror(rc), rc);
+ free_xml(xml_cib);
+ return;
+
+ } else if (xml_cib == NULL) {
+ crm_err("Couldn't retrieve the CIB: empty result");
+ return;
+ }
+
+ if (safe_str_eq(crm_element_name(xml_cib), XML_TAG_CIB)) {
+ current_cib = xml_cib;
+
+ } else {
+ free_xml(xml_cib);
+ }
+
+ return;
+}
+
static gboolean
mon_timer_notify(gpointer data)
{
@@ -121,8 +153,7 @@ mon_timer_notify(gpointer data)
if (cib_connected) {
if (counter == counter_max) {
- free_xml(current_cib);
- current_cib = get_cib_copy(cib);
+ mon_retrieve_current_cib();
mon_refresh_state(NULL);
counter = 0;
} else {
@@ -163,7 +194,7 @@ cib_connect(gboolean full)
return rc;
}
- current_cib = get_cib_copy(cib);
+ mon_retrieve_current_cib();
mon_refresh_state(NULL);
if (full) {
@@ -308,7 +339,7 @@ crm_diff_update(const char *event, xmlNode * msg)
}
if (current_cib == NULL) {
- current_cib = get_cib_copy(cib);
+ mon_retrieve_current_cib();
}
/* Refresh
--
1.8.3.1

View File

@ -0,0 +1,26 @@
From 6d4289655dacad4b72fb64373c37bd1ad33649e6 Mon Sep 17 00:00:00 2001
From: "Gao,Yan" <ygao@suse.com>
Date: Tue, 22 Jan 2019 16:33:04 +0100
Subject: [PATCH] Doc: sbd.8.pod: use the generic term "cluster services"
instead of the specific "openais"
---
man/sbd.8.pod | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/man/sbd.8.pod b/man/sbd.8.pod
index ffd01c2..fed6bd3 100644
--- a/man/sbd.8.pod
+++ b/man/sbd.8.pod
@@ -476,7 +476,7 @@ storage (with internal redundancy) anyway; the SBD device does not
introduce an additional single point of failure then.
If the SBD device is not accessible, the daemon will fail to start and
-inhibit openais startup.
+inhibit startup of cluster services.
=item Two devices
--
1.8.3.1

View File

@ -0,0 +1,27 @@
From 091e10ae3f62239251b53bf7d81d47a57a9b82f2 Mon Sep 17 00:00:00 2001
From: "Gao,Yan" <ygao@suse.com>
Date: Wed, 23 Jan 2019 17:21:15 +0100
Subject: [PATCH] Doc: sbd.sysconfig: watchdog timeout set in the on-disk
metadata takes precedence
---
src/sbd.sysconfig | 3 +++
1 file changed, 3 insertions(+)
diff --git a/src/sbd.sysconfig b/src/sbd.sysconfig
index e661f96..f163f21 100644
--- a/src/sbd.sysconfig
+++ b/src/sbd.sysconfig
@@ -68,6 +68,9 @@ SBD_WATCHDOG_DEV=/dev/watchdog
# If your sbd device(s) reside on a multipath setup or iSCSI, this
# should be the time required to detect a path failure.
#
+# Be aware that watchdog timeout set in the on-disk metadata takes
+# precedence.
+#
SBD_WATCHDOG_TIMEOUT=5
## Type: string
--
1.8.3.1

View File

@ -1,33 +0,0 @@
From ba3b4127f658cb59ff09939e8de93a06a138dddb Mon Sep 17 00:00:00 2001
From: Valentin Vidic <Valentin.Vidic@CARNet.hr>
Date: Sat, 25 Nov 2017 09:18:41 +0100
Subject: [PATCH] Fix: build error with glibc 2.25
Add include for makedev, major and minor
sbd-common.c:268:13: error: In the GNU C Library, "makedev" is defined
by <sys/sysmacros.h>. For historical compatibility, it is
currently defined by <sys/types.h> as well, but we plan to
remove this soon. To use "makedev", include <sys/sysmacros.h>
directly. If you did not intend to use a system-defined macro
"makedev", you should undefine it after including <sys/types.h>. [-Werror]
{makedev(10,130), 0};
---
src/sbd-common.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/src/sbd-common.c b/src/sbd-common.c
index 1d7dbc2..25aaeae 100644
--- a/src/sbd-common.c
+++ b/src/sbd-common.c
@@ -19,6 +19,7 @@
#include "sbd.h"
#include <sys/reboot.h>
#include <sys/types.h>
+#include <sys/sysmacros.h>
#include <sys/stat.h>
#include <pwd.h>
#include <unistd.h>
--
1.8.3.1

View File

@ -1,25 +0,0 @@
From ee232b251c7072935d0507dc0bad27f375a12492 Mon Sep 17 00:00:00 2001
From: Valentin Vidic <Valentin.Vidic@CARNet.hr>
Date: Wed, 8 Nov 2017 22:02:29 +0100
Subject: [PATCH] Fix: gcc format string error
---
src/sbd-common.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/sbd-common.c b/src/sbd-common.c
index 803bc3a..1d7dbc2 100644
--- a/src/sbd-common.c
+++ b/src/sbd-common.c
@@ -268,7 +268,7 @@ watchdog_populate_list(void)
{makedev(10,130), 0};
int num_watchdogs = 1;
struct dirent *entry;
- char entry_name[64];
+ char entry_name[280];
DIR *dp;
char buf[256] = "";
--
1.8.3.1

View File

@ -0,0 +1,142 @@
From 8301cbafed191f30656a22876941cc7c9189b623 Mon Sep 17 00:00:00 2001
From: Klaus Wenninger <klaus.wenninger@aon.at>
Date: Thu, 31 Jan 2019 14:42:01 +0100
Subject: [PATCH] Refactor: fail earlier on invalid servants
---
src/sbd-inquisitor.c | 51 ++++++++++++++++++++++++++++++++-------------------
src/sbd-md.c | 7 +------
src/sbd.h | 2 +-
3 files changed, 34 insertions(+), 26 deletions(-)
diff --git a/src/sbd-inquisitor.c b/src/sbd-inquisitor.c
index 8e0bc87..9be6c99 100644
--- a/src/sbd-inquisitor.c
+++ b/src/sbd-inquisitor.c
@@ -42,19 +42,36 @@ void recruit_servant(const char *devname, pid_t pid)
struct servants_list_item *newbie;
if (lookup_servant_by_dev(devname)) {
- cl_log(LOG_DEBUG, "Servant %s already exists", devname);
- return;
+ cl_log(LOG_DEBUG, "Servant %s already exists", devname);
+ return;
}
newbie = malloc(sizeof(*newbie));
- if (!newbie) {
- fprintf(stderr, "malloc failed in recruit_servant.\n");
- exit(1);
+ if (newbie) {
+ memset(newbie, 0, sizeof(*newbie));
+ newbie->devname = strdup(devname);
+ newbie->pid = pid;
+ newbie->first_start = 1;
+ }
+ if (!newbie || !newbie->devname) {
+ fprintf(stderr, "heap allocation failed in recruit_servant.\n");
+ exit(1);
+ }
+
+ /* some sanity-check on our newbie */
+ if (sbd_is_disk(newbie)) {
+ cl_log(LOG_INFO, "Monitoring %s", devname);
+ disk_count++;
+ } else if (sbd_is_pcmk(newbie) || sbd_is_cluster(newbie)) {
+ /* alive just after pcmk and cluster servants have shown up */
+ newbie->outdated = 1;
+ } else {
+ /* toss our newbie */
+ cl_log(LOG_ERR, "Refusing to recruit unrecognized servant %s", devname);
+ free((void *) newbie->devname);
+ free(newbie);
+ return;
}
- memset(newbie, 0, sizeof(*newbie));
- newbie->devname = strdup(devname);
- newbie->pid = pid;
- newbie->first_start = 1;
if (!s) {
servants_leader = newbie;
@@ -65,12 +82,6 @@ void recruit_servant(const char *devname, pid_t pid)
}
servant_count++;
- if(sbd_is_disk(newbie)) {
- cl_log(LOG_INFO, "Monitoring %s", devname);
- disk_count++;
- } else {
- newbie->outdated = 1;
- }
}
int assign_servant(const char* devname, functionp_t functionp, int mode, const void* argp)
@@ -148,7 +159,7 @@ void servant_start(struct servants_list_item *s)
if (sbd_is_disk(s)) {
#if SUPPORT_SHARED_DISK
DBGLOG(LOG_INFO, "Starting servant for device %s", s->devname);
- s->pid = assign_servant(s->devname, servant, start_mode, s);
+ s->pid = assign_servant(s->devname, servant_md, start_mode, s);
#else
cl_log(LOG_ERR, "Shared disk functionality not supported");
return;
@@ -785,12 +796,14 @@ parse_device_line(const char *line)
if (lpc > last) {
entry = calloc(1, 1 + lpc - last);
+ if (!entry) {
+ fprintf(stderr, "heap allocation failed parsing device-line.\n");
+ exit(1);
+ }
rc = sscanf(line + last, "%[^;]", entry);
}
- if (entry == NULL) {
- /* Skip */
- } else if (rc != 1) {
+ if (rc != 1) {
cl_log(LOG_WARNING, "Could not parse (%d %d): %s", last, lpc, line + last);
} else {
cl_log(LOG_DEBUG, "Adding '%s'", entry);
diff --git a/src/sbd-md.c b/src/sbd-md.c
index 579d273..ba2c34d 100644
--- a/src/sbd-md.c
+++ b/src/sbd-md.c
@@ -1031,7 +1031,7 @@ static int servant_check_timeout_inconsistent(struct sector_header_s *hdr)
return 0;
}
-int servant(const char *diskname, int mode, const void* argp)
+int servant_md(const char *diskname, int mode, const void* argp)
{
struct sector_mbox_s *s_mbox = NULL;
struct sector_node_s *s_node = NULL;
@@ -1046,11 +1046,6 @@ int servant(const char *diskname, int mode, const void* argp)
char uuid[37];
const struct servants_list_item *s = argp;
- if (!diskname) {
- cl_log(LOG_ERR, "Empty disk name %s.", diskname);
- return -1;
- }
-
cl_log(LOG_INFO, "Servant starting for device %s", diskname);
/* Block most of the signals */
diff --git a/src/sbd.h b/src/sbd.h
index 386c85c..6fe07f9 100644
--- a/src/sbd.h
+++ b/src/sbd.h
@@ -175,7 +175,7 @@ int ping_via_slots(const char *name, struct servants_list_item *servants);
int dump_headers(struct servants_list_item *servants);
unsigned long get_first_msgwait(struct servants_list_item *servants);
int messenger(const char *name, const char *msg, struct servants_list_item *servants);
-int servant(const char *diskname, int mode, const void* argp);
+int servant_md(const char *diskname, int mode, const void* argp);
#endif
int servant_pcmk(const char *diskname, int mode, const void* argp);
--
1.8.3.1

View File

@ -1,51 +0,0 @@
From 04d32266b378f5f47088e8f34703bdd9c95f5a4c Mon Sep 17 00:00:00 2001
From: "Gao,Yan" <ygao@suse.com>
Date: Thu, 30 Nov 2017 16:11:00 +0100
Subject: [PATCH] Build: cluster-servant: Compile with pacemaker-2.0
Pacemaker-2.0 removed support for corosync 1 cluster layer:
https://github.com/ClusterLabs/pacemaker/commit/7a9891f29
---
configure.ac | 4 ++++
src/sbd-cluster.c | 4 ++++
2 files changed, 8 insertions(+)
diff --git a/configure.ac b/configure.ac
index 1eb8758..1f328c2 100644
--- a/configure.ac
+++ b/configure.ac
@@ -72,6 +72,10 @@ AC_CHECK_HEADERS(pacemaker/crm/cluster.h)
AC_CHECK_LIB(crmcommon, pcmk_strerror, , missing="yes")
AC_CHECK_LIB(cib, cib_apply_patch_event, , missing="yes")
+dnl pacemaker-2.0 removed support for corosync 1 cluster layer
+AC_CHECK_DECLS([pcmk_cluster_classic_ais, pcmk_cluster_cman],,,
+ [#include <pacemaker/crm/cluster.h>])
+
if test "$missing" = "yes"; then
AC_MSG_ERROR([Missing required libraries or functions.])
fi
diff --git a/src/sbd-cluster.c b/src/sbd-cluster.c
index de99d0c..ae4750e 100644
--- a/src/sbd-cluster.c
+++ b/src/sbd-cluster.c
@@ -238,12 +238,16 @@ notify_timer_cb(gpointer data)
}
switch (get_cluster_type()) {
+#if HAVE_DECL_PCMK_CLUSTER_CLASSIC_AIS
case pcmk_cluster_classic_ais:
send_cluster_text(crm_class_quorum, NULL, TRUE, NULL, crm_msg_ais);
break;
+#endif
case pcmk_cluster_corosync:
+#if HAVE_DECL_PCMK_CLUSTER_CMAN
case pcmk_cluster_cman:
+#endif
/* TODO - Make a CPG call and only call notify_parent() when we get a reply */
notify_parent();
break;
--
1.8.3.1

View File

@ -0,0 +1,112 @@
From d3be2caffb9edbb6bfe0e2658c66a1826f4e9c3a Mon Sep 17 00:00:00 2001
From: Klaus Wenninger <klaus.wenninger@aon.at>
Date: Mon, 15 Apr 2019 17:41:51 +0200
Subject: [PATCH] Fix: sbd-inquisitor: overhaul device-list-parser
for readability and robustness
---
src/sbd-inquisitor.c | 60 ++++++++++++++++++++++++++--------------------------
1 file changed, 30 insertions(+), 30 deletions(-)
diff --git a/src/sbd-inquisitor.c b/src/sbd-inquisitor.c
index 715e978..b4b5585 100644
--- a/src/sbd-inquisitor.c
+++ b/src/sbd-inquisitor.c
@@ -780,56 +780,56 @@ int inquisitor(void)
int
parse_device_line(const char *line)
{
- int lpc = 0;
- int last = 0;
- int max = 0;
+ size_t lpc = 0;
+ size_t last = 0;
+ size_t max = 0;
int found = 0;
+ bool skip_space = true;
+ int space_run = 0;
- if(line) {
- max = strlen(line);
+ if (!line) {
+ return 0;
}
- if (max <= 0) {
- return found;
- }
+ max = strlen(line);
- cl_log(LOG_DEBUG, "Processing %d bytes: [%s]", max, line);
- /* Skip initial whitespace */
- for (lpc = 0; lpc <= max && isspace(line[lpc]); lpc++) {
- last = lpc + 1;
- }
+ cl_log(LOG_DEBUG, "Processing %d bytes: [%s]", (int) max, line);
- /* Now the actual content */
for (lpc = 0; lpc <= max; lpc++) {
- int a_space = isspace(line[lpc]);
-
- if (a_space && lpc < max && isspace(line[lpc + 1])) {
- /* fast-forward to the end of the spaces */
-
- } else if (a_space || line[lpc] == ';' || line[lpc] == 0) {
- int rc = 1;
- char *entry = NULL;
+ if (isspace(line[lpc])) {
+ if (skip_space) {
+ last = lpc + 1;
+ } else {
+ space_run++;
+ }
+ continue;
+ }
+ skip_space = false;
+ if (line[lpc] == ';' || line[lpc] == 0) {
+ int rc = 0;
+ char *entry = calloc(1, 1 + lpc - last);
- if (lpc > last) {
- entry = calloc(1, 1 + lpc - last);
- if (!entry) {
- fprintf(stderr, "heap allocation failed parsing device-line.\n");
- exit(1);
- }
+ if (entry) {
rc = sscanf(line + last, "%[^;]", entry);
+ } else {
+ fprintf(stderr, "Heap allocation failed parsing device-line.\n");
+ exit(1);
}
if (rc != 1) {
- cl_log(LOG_WARNING, "Could not parse (%d %d): %s", last, lpc, line + last);
+ cl_log(LOG_WARNING, "Could not parse: '%s'", line + last);
} else {
+ entry[strlen(entry)-space_run] = '\0';
cl_log(LOG_DEBUG, "Adding '%s'", entry);
recruit_servant(entry, 0);
found++;
}
free(entry);
+ skip_space = true;
last = lpc + 1;
}
+ space_run = 0;
}
return found;
}
@@ -890,7 +890,7 @@ int main(int argc, char **argv, char **envp)
int devices = parse_device_line(value);
if(devices < 1) {
fprintf(stderr, "Invalid device line: %s\n", value);
- exit_status = -2;
+ exit_status = -2;
goto out;
}
#else
--
1.8.3.1

View File

@ -1,78 +0,0 @@
From 1d8fd2540ccf254d90e831f612415226043fc5b3 Mon Sep 17 00:00:00 2001
From: "Gao,Yan" <ygao@suse.com>
Date: Fri, 27 Apr 2018 13:41:00 +0200
Subject: [PATCH] Log: change sbd's default logging level to LOG_NOTICE
With the refactoring of logging parts and 1ee3503c, sbd became too
silent given the default logging level LOG_WARNING, even under the
situations where it's supposed to tell something.
This commit changes sbd's default logging level to LOG_NOTICE.
Meanwhile pacemaker library's logging level remains at LOG_WARNING.
With "-v", sbd's logging level is set to LOG_INFO.
With "-vv", sbd's logging level is set to LOG_DEBUG.
With "-vvv", both sbd's and pacemaker library's logging levels are set
to LOG_DEBUG.
---
src/sbd-inquisitor.c | 28 ++++++++++++++++++++++------
1 file changed, 22 insertions(+), 6 deletions(-)
diff --git a/src/sbd-inquisitor.c b/src/sbd-inquisitor.c
index 59408b3..237bf43 100644
--- a/src/sbd-inquisitor.c
+++ b/src/sbd-inquisitor.c
@@ -803,6 +803,19 @@ parse_device_line(const char *line)
return found;
}
+#define SBD_SOURCE_FILES "sbd-cluster.c,sbd-common.c,sbd-inquisitor.c,sbd-md.c,sbd-pacemaker.c,setproctitle.c"
+
+static void
+sbd_log_filter_ctl(const char *files, uint8_t priority)
+{
+ if (files == NULL) {
+ files = SBD_SOURCE_FILES;
+ }
+
+ qb_log_filter_ctl(QB_LOG_SYSLOG, QB_LOG_FILTER_ADD, QB_LOG_FILTER_FILE, files, priority);
+ qb_log_filter_ctl(QB_LOG_STDERR, QB_LOG_FILTER_ADD, QB_LOG_FILTER_FILE, files, priority);
+}
+
int
arg_enabled(int arg_count)
{
@@ -834,6 +847,7 @@ int main(int argc, char **argv, char **envp)
qb_log_ctl(QB_LOG_SYSLOG, QB_LOG_CONF_ENABLED, QB_TRUE);
qb_log_ctl(QB_LOG_STDERR, QB_LOG_CONF_ENABLED, QB_FALSE);
+ sbd_log_filter_ctl(NULL, LOG_NOTICE);
sbd_get_uname();
@@ -926,15 +940,17 @@ int main(int argc, char **argv, char **envp)
case 'v':
debug++;
if(debug == 1) {
- qb_log_filter_ctl(QB_LOG_SYSLOG, QB_LOG_FILTER_ADD, QB_LOG_FILTER_FILE, "sbd-common.c,sbd-inquisitor.c,sbd-md.c,sbd-pacemaker.c", LOG_DEBUG);
- qb_log_filter_ctl(QB_LOG_STDERR, QB_LOG_FILTER_ADD, QB_LOG_FILTER_FILE, "sbd-common.c,sbd-inquisitor.c,sbd-md.c,sbd-pacemaker.c", LOG_DEBUG);
- cl_log(LOG_INFO, "Verbose mode enabled.");
+ sbd_log_filter_ctl(NULL, LOG_INFO);
+ cl_log(LOG_INFO, "Verbose mode enabled.");
} else if(debug == 2) {
+ sbd_log_filter_ctl(NULL, LOG_DEBUG);
+ cl_log(LOG_INFO, "Debug mode enabled.");
+
+ } else if(debug == 3) {
/* Go nuts, turn on pacemaker's logging too */
- qb_log_filter_ctl(QB_LOG_SYSLOG, QB_LOG_FILTER_ADD, QB_LOG_FILTER_FILE, "*", LOG_DEBUG);
- qb_log_filter_ctl(QB_LOG_STDERR, QB_LOG_FILTER_ADD, QB_LOG_FILTER_FILE, "*", LOG_DEBUG);
- cl_log(LOG_INFO, "Verbose library mode enabled.");
+ sbd_log_filter_ctl("*", LOG_DEBUG);
+ cl_log(LOG_INFO, "Debug library mode enabled.");
}
break;
case 'T':
--
1.8.3.1

View File

@ -0,0 +1,47 @@
From 8e94781169fc2f36eb49078de1978ceb53df6b6c Mon Sep 17 00:00:00 2001
From: Klaus Wenninger <klaus.wenninger@aon.at>
Date: Mon, 15 Apr 2019 17:40:26 +0200
Subject: [PATCH] Refactor: sbd-common: no reason for stack-hogger having
retval
---
src/sbd-common.c | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/src/sbd-common.c b/src/sbd-common.c
index 3966f25..873a76e 100644
--- a/src/sbd-common.c
+++ b/src/sbd-common.c
@@ -568,13 +568,13 @@ enum {
#define IOPRIO_PRIO_DATA(mask) ((mask) & IOPRIO_PRIO_MASK)
#define IOPRIO_PRIO_VALUE(class, data) (((class) << IOPRIO_CLASS_SHIFT) | data)
-static unsigned char
+static void
sbd_stack_hogger(unsigned char * inbuf, int kbytes)
{
unsigned char buf[1024];
if(kbytes <= 0) {
- return HOG_CHAR;
+ return;
}
if (inbuf == NULL) {
@@ -584,10 +584,10 @@ sbd_stack_hogger(unsigned char * inbuf, int kbytes)
}
if (kbytes > 0) {
- return sbd_stack_hogger(buf, kbytes-1);
- } else {
- return buf[sizeof(buf)-1];
+ sbd_stack_hogger(buf, kbytes-1);
}
+
+ return;
}
static void
--
1.8.3.1

View File

@ -1,161 +0,0 @@
From 2dbdee29736fcbf0fe1d41c306959b22d05f72b0 Mon Sep 17 00:00:00 2001
From: "Gao,Yan" <ygao@suse.com>
Date: Mon, 30 Apr 2018 18:02:04 +0200
Subject: [PATCH] Log: upgrade important messages and downgrade unimportant
ones
It also fixes a message that's supposed to be "quorum.two_node not
present in cmap".
---
src/sbd-cluster.c | 11 ++++++-----
src/sbd-common.c | 4 ++--
src/sbd-inquisitor.c | 6 +++---
src/sbd-md.c | 6 +++---
src/sbd-pacemaker.c | 2 +-
5 files changed, 15 insertions(+), 14 deletions(-)
diff --git a/src/sbd-cluster.c b/src/sbd-cluster.c
index ae4750e..c7328af 100644
--- a/src/sbd-cluster.c
+++ b/src/sbd-cluster.c
@@ -201,10 +201,11 @@ sbd_get_two_node(void)
}
if (cmap_get_uint8(cmap_handle, "quorum.two_node", &two_node_u8) == CS_OK) {
- cl_log(LOG_NOTICE, "Corosync is%s in 2Node-mode", two_node_u8?"":" not");
+ cl_log(two_node_u8? LOG_NOTICE : LOG_INFO,
+ "Corosync is%s in 2Node-mode", two_node_u8?"":" not");
two_node = two_node_u8;
} else {
- cl_log(LOG_NOTICE, "quorum.two_node present in cmap\n");
+ cl_log(LOG_INFO, "quorum.two_node not present in cmap\n");
}
return TRUE;
@@ -264,7 +265,7 @@ sbd_membership_connect(void)
{
bool connected = false;
- cl_log(LOG_NOTICE, "Attempting cluster connection");
+ cl_log(LOG_INFO, "Attempting cluster connection");
cluster.destroy = sbd_membership_destroy;
@@ -308,7 +309,7 @@ sbd_membership_connect(void)
}
}
- set_servant_health(pcmk_health_transient, LOG_NOTICE, "Connected, waiting for initial membership");
+ set_servant_health(pcmk_health_transient, LOG_INFO, "Connected, waiting for initial membership");
notify_parent();
notify_timer_cb(NULL);
@@ -530,7 +531,7 @@ servant_cluster(const char *diskname, int mode, const void* argp)
enum cluster_type_e cluster_stack = get_cluster_type();
crm_system_name = strdup("sbd:cluster");
- cl_log(LOG_INFO, "Monitoring %s cluster health", name_for_cluster_type(cluster_stack));
+ cl_log(LOG_NOTICE, "Monitoring %s cluster health", name_for_cluster_type(cluster_stack));
set_proc_title("sbd: watcher: Cluster");
sbd_membership_connect();
diff --git a/src/sbd-common.c b/src/sbd-common.c
index f22c4f2..0ce6478 100644
--- a/src/sbd-common.c
+++ b/src/sbd-common.c
@@ -893,7 +893,7 @@ notify_parent(void)
case pcmk_health_pending:
case pcmk_health_shutdown:
case pcmk_health_transient:
- DBGLOG(LOG_INFO, "Not notifying parent: state transient (%d)", servant_health);
+ DBGLOG(LOG_DEBUG, "Not notifying parent: state transient (%d)", servant_health);
break;
case pcmk_health_unknown:
@@ -904,7 +904,7 @@ notify_parent(void)
break;
case pcmk_health_online:
- DBGLOG(LOG_INFO, "Notifying parent: healthy");
+ DBGLOG(LOG_DEBUG, "Notifying parent: healthy");
sigqueue(ppid, SIG_LIVENESS, signal_value);
break;
diff --git a/src/sbd-inquisitor.c b/src/sbd-inquisitor.c
index 237bf43..90c7d26 100644
--- a/src/sbd-inquisitor.c
+++ b/src/sbd-inquisitor.c
@@ -64,7 +64,7 @@ void recruit_servant(const char *devname, pid_t pid)
servant_count++;
if(sbd_is_disk(newbie)) {
- cl_log(LOG_NOTICE, "Monitoring %s", devname);
+ cl_log(LOG_INFO, "Monitoring %s", devname);
disk_count++;
} else {
newbie->outdated = 1;
@@ -565,7 +565,7 @@ void inquisitor_child(void)
if(cluster_alive(true)) {
/* We LIVE! */
if(cluster_appeared == false) {
- cl_log(LOG_NOTICE, "Active cluster detected");
+ cl_log(LOG_INFO, "Active cluster detected");
}
tickle = 1;
can_detach = 1;
@@ -574,7 +574,7 @@ void inquisitor_child(void)
} else if(cluster_alive(false)) {
if(!decoupled) {
/* On the way up, detach and arm the watchdog */
- cl_log(LOG_NOTICE, "Partial cluster detected, detaching");
+ cl_log(LOG_INFO, "Partial cluster detected, detaching");
}
can_detach = 1;
diff --git a/src/sbd-md.c b/src/sbd-md.c
index 6a964dd..6f152c4 100644
--- a/src/sbd-md.c
+++ b/src/sbd-md.c
@@ -1097,7 +1097,7 @@ int servant(const char *diskname, int mode, const void* argp)
exit(EXIT_MD_IO_FAIL);
}
- DBGLOG(LOG_INFO, "Monitoring slot %d on disk %s", mbox, diskname);
+ cl_log(LOG_NOTICE, "Monitoring slot %d on disk %s", mbox, diskname);
if (s_header->minor_version == 0) {
set_proc_title("sbd: watcher: %s - slot: %d", diskname, mbox);
} else {
@@ -1180,7 +1180,7 @@ int servant(const char *diskname, int mode, const void* argp)
}
if (s_mbox->cmd > 0) {
- cl_log(LOG_INFO,
+ cl_log(LOG_NOTICE,
"Received command %s from %s on disk %s",
char2cmd(s_mbox->cmd), s_mbox->from, diskname);
@@ -1222,7 +1222,7 @@ int servant(const char *diskname, int mode, const void* argp)
(int)latency, (int)timeout_watchdog_warn,
diskname);
} else if (debug) {
- DBGLOG(LOG_INFO, "Latency: %d on disk %s", (int)latency,
+ DBGLOG(LOG_DEBUG, "Latency: %d on disk %s", (int)latency,
diskname);
}
}
diff --git a/src/sbd-pacemaker.c b/src/sbd-pacemaker.c
index 2f06109..a435d01 100644
--- a/src/sbd-pacemaker.c
+++ b/src/sbd-pacemaker.c
@@ -416,7 +416,7 @@ servant_pcmk(const char *diskname, int mode, const void* argp)
int exit_code = 0;
crm_system_name = strdup("sbd:pcmk");
- cl_log(LOG_INFO, "Monitoring Pacemaker health");
+ cl_log(LOG_NOTICE, "Monitoring Pacemaker health");
set_proc_title("sbd: watcher: Pacemaker");
setenv("PCMK_watchdog", "true", 1);
--
1.8.3.1

View File

@ -0,0 +1,36 @@
From 5c80753afb4abc2b5b024f4a5f2fc78669bda70b Mon Sep 17 00:00:00 2001
From: Klaus Wenninger <klaus.wenninger@aon.at>
Date: Mon, 15 Apr 2019 17:39:12 +0200
Subject: [PATCH] Sanity: sbd-inquisitor: free timeout action on bail out
---
src/sbd-inquisitor.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/src/sbd-inquisitor.c b/src/sbd-inquisitor.c
index 77c6e4f..715e978 100644
--- a/src/sbd-inquisitor.c
+++ b/src/sbd-inquisitor.c
@@ -1088,7 +1088,8 @@ int main(int argc, char **argv, char **envp)
break;
case 'h':
usage();
- return (0);
+ goto out;
+ break;
default:
exit_status = -2;
goto out;
@@ -1241,6 +1242,9 @@ int main(int argc, char **argv, char **envp)
}
out:
+ if (timeout_action) {
+ free(timeout_action);
+ }
if (exit_status < 0) {
if (exit_status == -2) {
usage();
--
1.8.3.1

View File

@ -1,54 +0,0 @@
From 13295dec0f567d6795522241fff6817a68b02033 Mon Sep 17 00:00:00 2001
From: Klaus Wenninger <klaus.wenninger@aon.at>
Date: Wed, 2 May 2018 20:07:12 +0200
Subject: [PATCH] Refactor: sbd-cluster: let scan do the job of proc-parsing
Now it would as well parse names containing spaces properly.
---
src/sbd-cluster.c | 9 ++++-----
1 file changed, 4 insertions(+), 5 deletions(-)
diff --git a/src/sbd-cluster.c b/src/sbd-cluster.c
index 8512f23..f5e9ff0 100644
--- a/src/sbd-cluster.c
+++ b/src/sbd-cluster.c
@@ -338,7 +338,7 @@ sbd_membership_destroy(gpointer user_data)
* \brief Get process ID and name associated with a /proc directory entry
*
* \param[in] entry Directory entry (must be result of readdir() on /proc)
- * \param[out] name If not NULL, a char[64] to hold the process name
+ * \param[out] name If not NULL, a char[16] to hold the process name
* \param[out] pid If not NULL, will be set to process ID of entry
*
* \return 0 on success, -1 if entry is not for a process or info not found
@@ -353,7 +353,7 @@ sbd_procfs_process_info(struct dirent *entry, char *name, int *pid)
int fd, local_pid;
FILE *file;
struct stat statbuf;
- char key[16] = { 0 }, procpath[128] = { 0 };
+ char procpath[128] = { 0 };
/* We're only interested in entries whose name is a PID,
* so skip anything non-numeric or that is too long.
@@ -396,8 +396,7 @@ sbd_procfs_process_info(struct dirent *entry, char *name, int *pid)
if (!file) {
return -1;
}
- if ((fscanf(file, "%15s%63s", key, name) != 2)
- || safe_str_neq(key, "Name:")) {
+ if (fscanf(file, "Name:\t%15[a-zA-Z0-9 _-]", name) != 1) {
fclose(file);
return -1;
}
@@ -484,7 +483,7 @@ static long unsigned int
find_pacemaker_remote(void)
{
DIR *dp;
- char entry_name[64];
+ char entry_name[16];
struct dirent *entry;
dp = opendir("/proc");
--
1.8.3.1

View File

@ -0,0 +1,29 @@
From f6af36a0fb05b5a37b3dfb153677e28ca5cb3fd8 Mon Sep 17 00:00:00 2001
From: Klaus Wenninger <klaus.wenninger@aon.at>
Date: Mon, 15 Apr 2019 17:37:42 +0200
Subject: [PATCH] Sanity: sbd-md: prevent unrealistic overflow on sector io
calc
---
src/sbd-md.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/src/sbd-md.c b/src/sbd-md.c
index 60a1873..f437c41 100644
--- a/src/sbd-md.c
+++ b/src/sbd-md.c
@@ -162,9 +162,9 @@ sector_io(struct sbd_context *st, int sector, void *data, int rw)
memset(&st->io, 0, sizeof(struct iocb));
if (rw) {
- io_prep_pwrite(&st->io, st->devfd, data, sector_size, sector_size * sector);
+ io_prep_pwrite(&st->io, st->devfd, data, sector_size, (long long) sector_size * sector);
} else {
- io_prep_pread(&st->io, st->devfd, data, sector_size, sector_size * sector);
+ io_prep_pread(&st->io, st->devfd, data, sector_size, (long long) sector_size * sector);
}
if (io_submit(st->ioctx, 1, ios) != 1) {
--
1.8.3.1

View File

@ -1,61 +0,0 @@
From a6acd38756fc7f93afcf5c08b8cdf139a3e354e7 Mon Sep 17 00:00:00 2001
From: Klaus Wenninger <klaus.wenninger@aon.at>
Date: Wed, 2 May 2018 13:30:42 +0200
Subject: [PATCH] Fix: sbd-cluster: search for pacemaker-remoted with
pcmk-2.0.0rc3
---
src/sbd-cluster.c | 16 ++++++++++------
1 file changed, 10 insertions(+), 6 deletions(-)
diff --git a/src/sbd-cluster.c b/src/sbd-cluster.c
index ae4750e..8512f23 100644
--- a/src/sbd-cluster.c
+++ b/src/sbd-cluster.c
@@ -42,6 +42,13 @@
//undef SUPPORT_PLUGIN
//define SUPPORT_PLUGIN 1
+/* binary for pacemaker-remote has changed with pacemaker 2 */
+#ifdef CRM_SCORE_INFINITY
+#define PACEMAKER_REMOTE_BINARY "pacemaker-remoted"
+#else
+#define PACEMAKER_REMOTE_BINARY "pacemaker_remoted"
+#endif
+
static bool remote_node = false;
static pid_t remoted_pid = 0;
static int reconnect_msec = 1000;
@@ -435,7 +442,7 @@ sbd_remote_check(gpointer user_data)
} else {
int rc = 0;
- char proc_path[PATH_MAX], exe_path[PATH_MAX], expected_path[PATH_MAX];
+ char proc_path[PATH_MAX], exe_path[PATH_MAX];
/* check to make sure pid hasn't been reused by another process */
snprintf(proc_path, sizeof(proc_path), "/proc/%lu/exe", (long unsigned int)remoted_pid);
@@ -447,10 +454,7 @@ sbd_remote_check(gpointer user_data)
}
exe_path[rc] = 0;
- rc = snprintf(expected_path, sizeof(proc_path), "%s/pacemaker_remoted", SBINDIR);
- expected_path[rc] = 0;
-
- if (strcmp(exe_path, expected_path) == 0) {
+ if (strcmp(exe_path, SBINDIR "/" PACEMAKER_REMOTE_BINARY) == 0) {
cl_log(LOG_DEBUG, "Process %s (%ld) is active",
exe_path, (long)remoted_pid);
running = 1;
@@ -499,7 +503,7 @@ find_pacemaker_remote(void)
/* entry_name is truncated to 16 characters including the nul terminator */
cl_log(LOG_DEBUG, "Found %s at %u", entry_name, pid);
- if (strcmp(entry_name, "pacemaker_remot") == 0) {
+ if (strncmp(entry_name, PACEMAKER_REMOTE_BINARY, 15) == 0) {
cl_log(LOG_NOTICE, "Found Pacemaker Remote at PID %u", pid);
remoted_pid = pid;
remote_node = true;
--
1.8.3.1

View File

@ -0,0 +1,35 @@
From a80fe9392fd910074eccc4733ff2cd3e1625e48e Mon Sep 17 00:00:00 2001
From: Klaus Wenninger <klaus.wenninger@aon.at>
Date: Mon, 15 Apr 2019 17:36:12 +0200
Subject: [PATCH] Sanity: sbd-md: remove some left over code
---
src/sbd-md.c | 5 -----
1 file changed, 5 deletions(-)
diff --git a/src/sbd-md.c b/src/sbd-md.c
index c51d381..60a1873 100644
--- a/src/sbd-md.c
+++ b/src/sbd-md.c
@@ -373,7 +373,6 @@ init_device(struct sbd_context *st)
struct sector_header_s *s_header;
struct sector_node_s *s_node;
struct sector_mbox_s *s_mbox;
- struct stat s;
char uuid[37];
int i;
int rc = 0;
@@ -394,10 +393,6 @@ init_device(struct sbd_context *st)
uuid_generate(s_header->uuid);
uuid_unparse_lower(s_header->uuid, uuid);
- fstat(st->devfd, &s);
- /* printf("st_size = %ld, st_blksize = %ld, st_blocks = %ld\n",
- s.st_size, s.st_blksize, s.st_blocks); */
-
cl_log(LOG_INFO, "Creating version %d.%d header on device %d (uuid: %s)",
s_header->version, s_header->minor_version,
st->devfd, uuid);
--
1.8.3.1

View File

@ -1,96 +0,0 @@
From 5d52fa8c3c903df4be0e4e954fbca9b3b15285c6 Mon Sep 17 00:00:00 2001
From: Klaus Wenninger <klaus.wenninger@aon.at>
Date: Fri, 14 Sep 2018 17:51:50 +0200
Subject: [PATCH] Fix: sbd-common: don't follow symlinks outside /dev for
watchdog
This makes it easier to define a SELinux-policy that keeps
avc-log clean on /dev traversal triggered by query-watchdog.
---
src/sbd-common.c | 42 ++++++++++++++++++++++++++++++++++++++----
1 file changed, 38 insertions(+), 4 deletions(-)
diff --git a/src/sbd-common.c b/src/sbd-common.c
index 0ce6478..fcb7a31 100644
--- a/src/sbd-common.c
+++ b/src/sbd-common.c
@@ -251,7 +251,8 @@ watchdog_close(bool disarm)
#define MAX_WATCHDOGS 64
#define SYS_CLASS_WATCHDOG "/sys/class/watchdog"
#define SYS_CHAR_DEV_DIR "/sys/dev/char"
-#define WATCHDOG_NODEDIR "/dev"
+#define WATCHDOG_NODEDIR "/dev/"
+#define WATCHDOG_NODEDIR_LEN 5
struct watchdog_list_item {
dev_t dev;
@@ -273,7 +274,7 @@ watchdog_populate_list(void)
struct dirent *entry;
char entry_name[280];
DIR *dp;
- char buf[256] = "";
+ char buf[280] = "";
if (watchdog_list != NULL) {
return;
@@ -313,7 +314,38 @@ watchdog_populate_list(void)
struct stat statbuf;
snprintf(entry_name, sizeof(entry_name),
- WATCHDOG_NODEDIR "/%s", entry->d_name);
+ WATCHDOG_NODEDIR "%s", entry->d_name);
+ if (entry->d_type == DT_LNK) {
+ int len;
+
+ /* !realpath(entry_name, buf) unfortunately does a stat on
+ * target so we can't really use it to check if links stay
+ * within /dev without triggering e.g. AVC-logs (with
+ * SELinux policy that just allows stat within /dev).
+ * Without canonicalization that doesn't actually touch the
+ * filesystem easily available introduce some limitations
+ * for simplicity:
+ * - just simple path without '..'
+ * - just one level of symlinks (avoid e.g. loop-checking)
+ */
+ len = readlink(entry_name, buf, sizeof(buf) - 1);
+ if ((len < 1) ||
+ (len > sizeof(buf) - WATCHDOG_NODEDIR_LEN - 1)) {
+ continue;
+ }
+ buf[len] = '\0';
+ if (buf[0] != '/') {
+ memmove(&buf[WATCHDOG_NODEDIR_LEN], buf, len+1);
+ memcpy(buf, WATCHDOG_NODEDIR, WATCHDOG_NODEDIR_LEN);
+ len += WATCHDOG_NODEDIR_LEN;
+ }
+ if (strstr(buf, "/../") ||
+ strncmp(WATCHDOG_NODEDIR, buf, WATCHDOG_NODEDIR_LEN) ||
+ lstat(buf, &statbuf) ||
+ !S_ISCHR(statbuf.st_mode)) {
+ continue;
+ }
+ }
if(!stat(entry_name, &statbuf) && S_ISCHR(statbuf.st_mode)) {
int i;
@@ -322,6 +354,7 @@ watchdog_populate_list(void)
int wdfd = watchdog_init_fd(entry_name, -1);
struct watchdog_list_item *wdg =
calloc(1, sizeof(struct watchdog_list_item));
+ int len;
wdg->dev = watchdogs[i];
wdg->dev_node = strdup(entry_name);
@@ -343,7 +376,8 @@ watchdog_populate_list(void)
snprintf(entry_name, sizeof(entry_name),
SYS_CHAR_DEV_DIR "/%d:%d/device/driver",
major(watchdogs[i]), minor(watchdogs[i]));
- if (readlink(entry_name, buf, sizeof(buf)) > 0) {
+ if ((len = readlink(entry_name, buf, sizeof(buf) - 1)) > 0) {
+ buf[len] = '\0';
wdg->dev_driver = strdup(basename(buf));
} else if ((wdg->dev_ident) &&
(strcmp(wdg->dev_ident,
--
1.8.3.1

View File

@ -0,0 +1,215 @@
From eaeed6cca46a0223617ead834aaa576dd5ad07ff Mon Sep 17 00:00:00 2001
From: Klaus Wenninger <klaus.wenninger@aon.at>
Date: Fri, 31 May 2019 16:11:16 +0200
Subject: [PATCH] Fix: sbd-common: query rt-budget > 0 otherwise try moving to
root-slice
---
src/sbd-common.c | 110 +++++++++++++++++++++++++++++++++++++++++++++++++++
src/sbd-inquisitor.c | 15 +++++++
src/sbd.h | 2 +
src/sbd.sysconfig | 14 +++++++
4 files changed, 141 insertions(+)
diff --git a/src/sbd-common.c b/src/sbd-common.c
index 873a76e..ebfdaa3 100644
--- a/src/sbd-common.c
+++ b/src/sbd-common.c
@@ -662,6 +662,112 @@ static void sbd_memlock(int stackgrowK, int heapgrowK)
#endif
}
+static int get_realtime_budget(void)
+{
+ FILE *f;
+ char fname[PATH_MAX];
+ int res = -1, lnum = 0;
+ char *cgroup = NULL, *namespecs = NULL;
+
+ snprintf(fname, PATH_MAX, "/proc/%jd/cgroup", (intmax_t)getpid());
+ f = fopen(fname, "rt");
+ if (f == NULL) {
+ cl_log(LOG_WARNING, "Can't open cgroup file for pid=%jd",
+ (intmax_t)getpid());
+ goto exit_res;
+ }
+ while( fscanf(f, "%d:%m[^:]:%m[^\n]", &lnum, &namespecs, &cgroup) !=EOF ) {
+ if (namespecs && strstr(namespecs, "cpuacct")) {
+ free(namespecs);
+ break;
+ }
+ if (cgroup) {
+ free(cgroup);
+ cgroup = NULL;
+ }
+ if (namespecs) {
+ free(namespecs);
+ namespecs = NULL;
+ }
+ }
+ fclose(f);
+ if (cgroup == NULL) {
+ cl_log(LOG_WARNING, "Failed getting cgroup for pid=%jd",
+ (intmax_t)getpid());
+ goto exit_res;
+ }
+ snprintf(fname, PATH_MAX, "/sys/fs/cgroup/cpu%s/cpu.rt_runtime_us",
+ cgroup);
+ f = fopen(fname, "rt");
+ if (f == NULL) {
+ cl_log(LOG_WARNING, "cpu.rt_runtime_us existed for root-slice but "
+ "doesn't for '%s'", cgroup);
+ goto exit_res;
+ }
+ if (fscanf(f, "%d", &res) != 1) {
+ cl_log(LOG_WARNING, "failed reading rt-budget from %s", fname);
+ } else {
+ cl_log(LOG_INFO, "slice='%s' has rt-budget=%d", cgroup, res);
+ }
+ fclose(f);
+
+exit_res:
+ if (cgroup) {
+ free(cgroup);
+ }
+ return res;
+}
+
+/* stolen from corosync */
+static int sbd_move_to_root_cgroup(bool enforce_root_cgroup) {
+ FILE *f;
+ int res = -1;
+
+ /*
+ * /sys/fs/cgroup is hardcoded, because most of Linux distributions are now
+ * using systemd and systemd uses hardcoded path of cgroup mount point.
+ *
+ * This feature is expected to be removed as soon as systemd gets support
+ * for managing RT configuration.
+ */
+ f = fopen("/sys/fs/cgroup/cpu/cpu.rt_runtime_us", "rt");
+ if (f == NULL) {
+ cl_log(LOG_DEBUG, "cpu.rt_runtime_us doesn't exist -> "
+ "system without cgroup or with disabled CONFIG_RT_GROUP_SCHED");
+ res = 0;
+ goto exit_res;
+ }
+ fclose(f);
+
+ if ((!enforce_root_cgroup) && (get_realtime_budget() > 0)) {
+ cl_log(LOG_DEBUG, "looks as if we have rt-budget in the slice we are "
+ "-> skip moving to root-slice");
+ res = 0;
+ goto exit_res;
+ }
+
+ f = fopen("/sys/fs/cgroup/cpu/tasks", "w");
+ if (f == NULL) {
+ cl_log(LOG_WARNING, "Can't open cgroups tasks file for writing");
+
+ goto exit_res;
+ }
+
+ if (fprintf(f, "%jd\n", (intmax_t)getpid()) <= 0) {
+ cl_log(LOG_WARNING, "Can't write sbd pid into cgroups tasks file");
+ goto close_and_exit_res;
+ }
+
+close_and_exit_res:
+ if (fclose(f) != 0) {
+ cl_log(LOG_WARNING, "Can't close cgroups tasks file");
+ goto exit_res;
+ }
+
+exit_res:
+ return (res);
+}
+
void
sbd_make_realtime(int priority, int stackgrowK, int heapgrowK)
{
@@ -670,6 +776,10 @@ sbd_make_realtime(int priority, int stackgrowK, int heapgrowK)
}
#ifdef SCHED_RR
+ if (move_to_root_cgroup) {
+ sbd_move_to_root_cgroup(enforce_moving_to_root_cgroup);
+ }
+
{
int pcurrent = 0;
int pmin = sched_get_priority_min(SCHED_RR);
diff --git a/src/sbd-inquisitor.c b/src/sbd-inquisitor.c
index abde4e5..cef5cc7 100644
--- a/src/sbd-inquisitor.c
+++ b/src/sbd-inquisitor.c
@@ -33,6 +33,8 @@ int start_mode = 0;
char* pidfile = NULL;
bool do_flush = true;
char timeout_sysrq_char = 'b';
+bool move_to_root_cgroup = true;
+bool enforce_moving_to_root_cgroup = false;
int parse_device_line(const char *line);
@@ -965,6 +967,19 @@ int main(int argc, char **argv, char **envp)
timeout_action = strdup(value);
}
+ value = getenv("SBD_MOVE_TO_ROOT_CGROUP");
+ if(value) {
+ move_to_root_cgroup = crm_is_true(value);
+
+ if (move_to_root_cgroup) {
+ enforce_moving_to_root_cgroup = true;
+ } else {
+ if (strcmp(value, "auto") == 0) {
+ move_to_root_cgroup = true;
+ }
+ }
+ }
+
while ((c = getopt(argc, argv, "czC:DPRTWZhvw:d:n:p:1:2:3:4:5:t:I:F:S:s:r:")) != -1) {
switch (c) {
case 'D':
diff --git a/src/sbd.h b/src/sbd.h
index 3b05a11..ac30ec7 100644
--- a/src/sbd.h
+++ b/src/sbd.h
@@ -159,6 +159,8 @@ extern bool watchdogdev_is_default;
extern char* local_uname;
extern bool do_flush;
extern char timeout_sysrq_char;
+extern bool move_to_root_cgroup;
+extern bool enforce_moving_to_root_cgroup;
/* Global, non-tunable variables: */
extern int sector_size;
diff --git a/src/sbd.sysconfig b/src/sbd.sysconfig
index f163f21..e1a60ed 100644
--- a/src/sbd.sysconfig
+++ b/src/sbd.sysconfig
@@ -91,6 +91,20 @@ SBD_WATCHDOG_TIMEOUT=5
#
SBD_TIMEOUT_ACTION=flush,reboot
+## Type: yesno / auto
+## Default: auto
+#
+# If CPUAccounting is enabled default is not to assign any RT-budget
+# to the system.slice which prevents sbd from running RR-scheduled.
+#
+# One way to escape that issue is to move sbd-processes from the
+# slice they were originally started to root-slice.
+# Of course starting sbd in a certain slice might be intentional.
+# Thus in auto-mode sbd will check if the slice has RT-budget assigned.
+# If that is the case sbd will stay in that slice while it will
+# be moved to root-slice otherwise.
+SBD_MOVE_TO_ROOT_CGROUP=auto
+
## Type: string
## Default: ""
#
--
1.8.3.1

View File

@ -1,33 +0,0 @@
From e13297f45b4c5868800b1d3fc359bfd0723fcc5f Mon Sep 17 00:00:00 2001
From: Klaus Wenninger <klaus.wenninger@aon.at>
Date: Mon, 17 Sep 2018 23:13:37 +0200
Subject: [PATCH] Refactor: sbd-common: separate assignment and comparison
---
src/sbd-common.c | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/src/sbd-common.c b/src/sbd-common.c
index fcb7a31..679f946 100644
--- a/src/sbd-common.c
+++ b/src/sbd-common.c
@@ -376,12 +376,13 @@ watchdog_populate_list(void)
snprintf(entry_name, sizeof(entry_name),
SYS_CHAR_DEV_DIR "/%d:%d/device/driver",
major(watchdogs[i]), minor(watchdogs[i]));
- if ((len = readlink(entry_name, buf, sizeof(buf) - 1)) > 0) {
+ len = readlink(entry_name, buf, sizeof(buf) - 1);
+ if (len > 0) {
buf[len] = '\0';
wdg->dev_driver = strdup(basename(buf));
} else if ((wdg->dev_ident) &&
- (strcmp(wdg->dev_ident,
- "Software Watchdog") == 0)) {
+ (strcmp(wdg->dev_ident,
+ "Software Watchdog") == 0)) {
wdg->dev_driver = strdup("softdog");
}
break;
--
1.8.3.1

View File

@ -0,0 +1,123 @@
From 1387ed890e3a9e246e9b9f780b2a7cb5379459ab Mon Sep 17 00:00:00 2001
From: Klaus Wenninger <klaus.wenninger@aon.at>
Date: Wed, 5 Jun 2019 11:32:49 +0200
Subject: [PATCH] Fix: sbd-cluster: periodically check corosync-daemon liveness
using votequorum_getinfo.
---
configure.ac | 12 +++++++++++-
src/sbd-cluster.c | 36 ++++++++++++++++++++++++++++++++++--
2 files changed, 45 insertions(+), 3 deletions(-)
diff --git a/configure.ac b/configure.ac
index fac26a8..c44e747 100644
--- a/configure.ac
+++ b/configure.ac
@@ -33,6 +33,7 @@ PKG_CHECK_MODULES(glib, [glib-2.0])
dnl PKG_CHECK_MODULES(libcoroipcc, [libcoroipcc])
PKG_CHECK_MODULES(cmap, [libcmap], HAVE_cmap=1, HAVE_cmap=0)
+PKG_CHECK_MODULES(votequorum, [libvotequorum], HAVE_votequorum=1, HAVE_votequorum=0)
dnl pacemaker > 1.1.8
PKG_CHECK_MODULES(pacemaker, [pacemaker, pacemaker-cib], HAVE_pacemaker=1, HAVE_pacemaker=0)
@@ -49,7 +50,12 @@ elif test $HAVE_pacemaker = 1; then
if test $HAVE_cmap = 0; then
AC_MSG_NOTICE(No package 'cmap' found)
else
- CPPFLAGS="$CPPFLAGS $cmap_CFLAGS"
+ CPPFLAGS="$CPPFLAGS $cmap_CFLAGS"
+ fi
+ if test $HAVE_votequorum = 0; then
+ AC_MSG_NOTICE(No library 'votequorum' found)
+ else
+ CPPFLAGS="$CPPFLAGS $votequorum_CFLAGS"
fi
fi
@@ -66,6 +72,7 @@ AC_CHECK_LIB(pe_rules, test_rule, , missing="yes")
AC_CHECK_LIB(crmcluster, crm_peer_init, , missing="yes")
AC_CHECK_LIB(uuid, uuid_unparse, , missing="yes")
AC_CHECK_LIB(cmap, cmap_initialize, , HAVE_cmap=0)
+AC_CHECK_LIB(votequorum, votequorum_getinfo, , HAVE_votequorum=0)
dnl pacemaker >= 1.1.8
AC_CHECK_HEADERS(pacemaker/crm/cluster.h)
@@ -107,6 +114,9 @@ fi
AC_DEFINE_UNQUOTED(CHECK_TWO_NODE, $HAVE_cmap, Turn on checking for 2-node cluster)
AM_CONDITIONAL(CHECK_TWO_NODE, test "$HAVE_cmap" = "1")
+AC_DEFINE_UNQUOTED(CHECK_VOTEQUORUM_HANDLE, $HAVE_votequorum, Turn on periodic checking of votequorum-handle)
+AM_CONDITIONAL(CHECK_VOTEQUORUM_HANDLE, test "$HAVE_votequorum" = "1")
+
CONFIGDIR=""
AC_ARG_WITH(configdir,
[ --with-configdir=DIR
diff --git a/src/sbd-cluster.c b/src/sbd-cluster.c
index 541212f..9fb6224 100644
--- a/src/sbd-cluster.c
+++ b/src/sbd-cluster.c
@@ -80,6 +80,12 @@ sbd_plugin_membership_dispatch(cpg_handle_t handle,
#if SUPPORT_COROSYNC
+#if CHECK_VOTEQUORUM_HANDLE
+#include <corosync/votequorum.h>
+
+static votequorum_handle_t votequorum_handle = 0;
+#endif
+
static bool two_node = false;
static bool ever_seen_both = false;
static int cpg_membership_entries = -1;
@@ -261,12 +267,32 @@ notify_timer_cb(gpointer data)
#endif
case pcmk_cluster_corosync:
+ do {
+#if SUPPORT_COROSYNC && CHECK_VOTEQUORUM_HANDLE
+ struct votequorum_info info;
+
+ if (votequorum_getinfo(votequorum_handle, 0, &info) != CS_OK) {
+
+ votequorum_finalize(votequorum_handle);
+ if (votequorum_initialize(&votequorum_handle, NULL) != CS_OK) {
+ votequorum_handle = 0;
+ break;
+ }
+ if (votequorum_getinfo(votequorum_handle, 0, &info) != CS_OK) {
+ break;
+ }
+ }
+#endif
+ notify_parent();
+ } while (0);
+ break;
+
#if HAVE_DECL_PCMK_CLUSTER_CMAN
case pcmk_cluster_cman:
-#endif
- /* TODO - Make a CPG call and only call notify_parent() when we get a reply */
+
notify_parent();
break;
+#endif
default:
break;
@@ -533,6 +559,12 @@ find_pacemaker_remote(void)
static void
clean_up(int rc)
{
+#if CHECK_VOTEQUORUM_HANDLE
+ votequorum_finalize(votequorum_handle);
+ votequorum_handle = 0; /* there isn't really an invalid handle value
+ * just to be back where we started
+ */
+#endif
return;
}
--
1.8.3.1

View File

@ -1,214 +0,0 @@
From 5b4c866f7c0b4ef8061e131a1ee0d1c608d35054 Mon Sep 17 00:00:00 2001
From: Klaus Wenninger <klaus.wenninger@aon.at>
Date: Wed, 19 Sep 2018 16:15:27 +0200
Subject: [PATCH] Fix: sbd-common: avoid statting potential links
These potential links might be anything and statting - if just
allowed to stat chr-nodes (e.g. SELinux) - them would lead
to avc-logs in the SELinux case.
---
src/sbd-common.c | 133 +++++++++++++++++++++++++++++++++++++++----------------
1 file changed, 96 insertions(+), 37 deletions(-)
diff --git a/src/sbd-common.c b/src/sbd-common.c
index 679f946..cc84cd0 100644
--- a/src/sbd-common.c
+++ b/src/sbd-common.c
@@ -262,6 +262,12 @@ struct watchdog_list_item {
struct watchdog_list_item *next;
};
+struct link_list_item {
+ char *dev_node;
+ char *link_name;
+ struct link_list_item *next;
+};
+
static struct watchdog_list_item *watchdog_list = NULL;
static int watchdog_list_items = 0;
@@ -275,6 +281,7 @@ watchdog_populate_list(void)
char entry_name[280];
DIR *dp;
char buf[280] = "";
+ struct link_list_item *link_list = NULL;
if (watchdog_list != NULL) {
return;
@@ -288,7 +295,7 @@ watchdog_populate_list(void)
FILE *file;
snprintf(entry_name, sizeof(entry_name),
- SYS_CLASS_WATCHDOG "/%s/dev", entry->d_name);
+ SYS_CLASS_WATCHDOG "/%s/dev", entry->d_name);
file = fopen(entry_name, "r");
if (file) {
int major, minor;
@@ -309,43 +316,59 @@ watchdog_populate_list(void)
/* search for watchdog nodes in /dev */
dp = opendir(WATCHDOG_NODEDIR);
if (dp) {
+ /* first go for links and memorize them */
while ((entry = readdir(dp))) {
- if ((entry->d_type == DT_CHR) || (entry->d_type == DT_LNK)) {
- struct stat statbuf;
+ if (entry->d_type == DT_LNK) {
+ int len;
snprintf(entry_name, sizeof(entry_name),
- WATCHDOG_NODEDIR "%s", entry->d_name);
- if (entry->d_type == DT_LNK) {
- int len;
-
- /* !realpath(entry_name, buf) unfortunately does a stat on
- * target so we can't really use it to check if links stay
- * within /dev without triggering e.g. AVC-logs (with
- * SELinux policy that just allows stat within /dev).
- * Without canonicalization that doesn't actually touch the
- * filesystem easily available introduce some limitations
- * for simplicity:
- * - just simple path without '..'
- * - just one level of symlinks (avoid e.g. loop-checking)
- */
- len = readlink(entry_name, buf, sizeof(buf) - 1);
- if ((len < 1) ||
- (len > sizeof(buf) - WATCHDOG_NODEDIR_LEN - 1)) {
- continue;
- }
- buf[len] = '\0';
- if (buf[0] != '/') {
- memmove(&buf[WATCHDOG_NODEDIR_LEN], buf, len+1);
- memcpy(buf, WATCHDOG_NODEDIR, WATCHDOG_NODEDIR_LEN);
- len += WATCHDOG_NODEDIR_LEN;
- }
- if (strstr(buf, "/../") ||
- strncmp(WATCHDOG_NODEDIR, buf, WATCHDOG_NODEDIR_LEN) ||
- lstat(buf, &statbuf) ||
- !S_ISCHR(statbuf.st_mode)) {
- continue;
- }
+ WATCHDOG_NODEDIR "%s", entry->d_name);
+
+ /* !realpath(entry_name, buf) unfortunately does a stat on
+ * target so we can't really use it to check if links stay
+ * within /dev without triggering e.g. AVC-logs (with
+ * SELinux policy that just allows stat within /dev).
+ * Without canonicalization that doesn't actually touch the
+ * filesystem easily available introduce some limitations
+ * for simplicity:
+ * - just simple path without '..'
+ * - just one level of symlinks (avoid e.g. loop-checking)
+ */
+ len = readlink(entry_name, buf, sizeof(buf) - 1);
+ if ((len < 1) ||
+ (len > sizeof(buf) - WATCHDOG_NODEDIR_LEN - 1)) {
+ continue;
+ }
+ buf[len] = '\0';
+ if (buf[0] != '/') {
+ memmove(&buf[WATCHDOG_NODEDIR_LEN], buf, len+1);
+ memcpy(buf, WATCHDOG_NODEDIR, WATCHDOG_NODEDIR_LEN);
+ len += WATCHDOG_NODEDIR_LEN;
+ }
+ if (strstr(buf, "/../") ||
+ strncmp(WATCHDOG_NODEDIR, buf, WATCHDOG_NODEDIR_LEN)) {
+ continue;
+ } else {
+ /* just memorize to avoid statting the target - SELinux */
+ struct link_list_item *lli =
+ calloc(1, sizeof(struct link_list_item));
+
+ lli->dev_node = strdup(buf);
+ lli->link_name = strdup(entry_name);
+ lli->next = link_list;
+ link_list = lli;
}
+ }
+ }
+
+ rewinddir(dp);
+
+ while ((entry = readdir(dp))) {
+ if (entry->d_type == DT_CHR) {
+ struct stat statbuf;
+
+ snprintf(entry_name, sizeof(entry_name),
+ WATCHDOG_NODEDIR "%s", entry->d_name);
if(!stat(entry_name, &statbuf) && S_ISCHR(statbuf.st_mode)) {
int i;
@@ -353,8 +376,9 @@ watchdog_populate_list(void)
if (statbuf.st_rdev == watchdogs[i]) {
int wdfd = watchdog_init_fd(entry_name, -1);
struct watchdog_list_item *wdg =
- calloc(1, sizeof(struct watchdog_list_item));
+ calloc(1, sizeof(struct watchdog_list_item));
int len;
+ struct link_list_item *tmp_list = NULL;
wdg->dev = watchdogs[i];
wdg->dev_node = strdup(entry_name);
@@ -374,8 +398,8 @@ watchdog_populate_list(void)
}
snprintf(entry_name, sizeof(entry_name),
- SYS_CHAR_DEV_DIR "/%d:%d/device/driver",
- major(watchdogs[i]), minor(watchdogs[i]));
+ SYS_CHAR_DEV_DIR "/%d:%d/device/driver",
+ major(watchdogs[i]), minor(watchdogs[i]));
len = readlink(entry_name, buf, sizeof(buf) - 1);
if (len > 0) {
buf[len] = '\0';
@@ -385,14 +409,49 @@ watchdog_populate_list(void)
"Software Watchdog") == 0)) {
wdg->dev_driver = strdup("softdog");
}
+
+ /* create dupes if we have memorized links
+ * to this node
+ */
+ for (tmp_list = link_list; tmp_list;
+ tmp_list = tmp_list->next) {
+ if (!strcmp(tmp_list->dev_node,
+ wdg->dev_node)) {
+ struct watchdog_list_item *dupe_wdg =
+ calloc(1, sizeof(struct watchdog_list_item));
+
+ /* as long as we never purge watchdog_list
+ * there is no need to dupe strings
+ */
+ *dupe_wdg = *wdg;
+ dupe_wdg->dev_node = strdup(tmp_list->link_name);
+ dupe_wdg->next = watchdog_list;
+ watchdog_list = dupe_wdg;
+ watchdog_list_items++;
+ }
+ /* for performance reasons we could remove
+ * the link_list entry
+ */
+ }
break;
}
}
}
}
}
+
closedir(dp);
}
+
+ /* cleanup link list */
+ while (link_list) {
+ struct link_list_item *tmp_list = link_list;
+
+ link_list = link_list->next;
+ free(tmp_list->dev_node);
+ free(tmp_list->link_name);
+ free(tmp_list);
+ }
}
int watchdog_info(void)
--
1.8.3.1

View File

@ -1,139 +0,0 @@
From a34cafa9d69194e3cbfe3af20ceb2d08848c483c Mon Sep 17 00:00:00 2001
From: Klaus Wenninger <klaus.wenninger@aon.at>
Date: Mon, 19 Nov 2018 20:56:35 +0100
Subject: [PATCH] Refactor: use pacemaker's new pe api with
constructors/destructors
For backward compatibility add some compatibility code
for if pe_new_working_set isn't available.
---
configure.ac | 3 +++
src/sbd-pacemaker.c | 61 +++++++++++++++++++++++++++++++++++++++++++++++------
2 files changed, 57 insertions(+), 7 deletions(-)
diff --git a/configure.ac b/configure.ac
index 1f328c2..1dc273b 100644
--- a/configure.ac
+++ b/configure.ac
@@ -76,6 +76,9 @@ dnl pacemaker-2.0 removed support for corosync 1 cluster layer
AC_CHECK_DECLS([pcmk_cluster_classic_ais, pcmk_cluster_cman],,,
[#include <pacemaker/crm/cluster.h>])
+dnl check for new pe-API
+AC_CHECK_FUNCS(pe_new_working_set)
+
if test "$missing" = "yes"; then
AC_MSG_ERROR([Missing required libraries or functions.])
fi
diff --git a/src/sbd-pacemaker.c b/src/sbd-pacemaker.c
index a435d01..aac355a 100644
--- a/src/sbd-pacemaker.c
+++ b/src/sbd-pacemaker.c
@@ -58,6 +58,31 @@
#include "sbd.h"
+#ifndef HAVE_PE_NEW_WORKING_SET
+
+#define pe_reset_working_set(data_set) cleanup_calculations(data_set)
+
+static pe_working_set_t *
+pe_new_working_set()
+{
+ pe_working_set_t *data_set = calloc(1, sizeof(pe_working_set_t));
+ if (data_set != NULL) {
+ set_working_set_defaults(data_set);
+ }
+ return data_set;
+}
+
+static void
+pe_free_working_set(pe_working_set_t *data_set)
+{
+ if (data_set != NULL) {
+ pe_reset_working_set(data_set);
+ free(data_set);
+ }
+}
+
+#endif
+
extern int disk_count;
static void clean_up(int rc);
@@ -74,6 +99,7 @@ static int cib_connected = 0;
static cib_t *cib = NULL;
static xmlNode *current_cib = NULL;
+static pe_working_set_t *data_set = NULL;
static long last_refresh = 0;
@@ -361,7 +387,6 @@ static gboolean
mon_refresh_state(gpointer user_data)
{
xmlNode *cib_copy = NULL;
- pe_working_set_t data_set;
if(current_cib == NULL) {
return FALSE;
@@ -382,14 +407,13 @@ mon_refresh_state(gpointer user_data)
} else {
last_refresh = time(NULL);
- set_working_set_defaults(&data_set);
- data_set.input = cib_copy;
- data_set.flags |= pe_flag_have_stonith_resource;
- cluster_status(&data_set);
+ data_set->input = cib_copy;
+ data_set->flags |= pe_flag_have_stonith_resource;
+ cluster_status(data_set);
- compute_status(&data_set);
+ compute_status(data_set);
- cleanup_calculations(&data_set);
+ pe_reset_working_set(data_set);
}
return FALSE;
@@ -398,6 +422,21 @@ mon_refresh_state(gpointer user_data)
static void
clean_up(int rc)
{
+ if (timer_id_reconnect > 0) {
+ g_source_remove(timer_id_reconnect);
+ timer_id_reconnect = 0;
+ }
+
+ if (timer_id_notify > 0) {
+ g_source_remove(timer_id_notify);
+ timer_id_notify = 0;
+ }
+
+ if (data_set != NULL) {
+ pe_free_working_set(data_set);
+ data_set = NULL;
+ }
+
if (cib != NULL) {
cib->cmds->signoff(cib);
cib_delete(cib);
@@ -425,6 +464,14 @@ servant_pcmk(const char *diskname, int mode, const void* argp)
set_crm_log_level(LOG_CRIT);
}
+
+ if (data_set == NULL) {
+ data_set = pe_new_working_set();
+ }
+ if (data_set == NULL) {
+ return -1;
+ }
+
if (current_cib == NULL) {
cib = cib_new();
--
1.8.3.1

View File

@ -0,0 +1,26 @@
From 0de14256fc873aee735117955662503b773bf71c Mon Sep 17 00:00:00 2001
From: Klaus Wenninger <klaus.wenninger@aon.at>
Date: Tue, 11 Jun 2019 08:05:33 +0200
Subject: [PATCH] build: say library when missing cmap not package to avoid
confusion
---
configure.ac | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/configure.ac b/configure.ac
index c44e747..1c55094 100644
--- a/configure.ac
+++ b/configure.ac
@@ -48,7 +48,7 @@ if test $HAVE_pacemaker = 0 -a $HAVE_pcmk = 0; then
elif test $HAVE_pacemaker = 1; then
CPPFLAGS="$CPPFLAGS $glib_CFLAGS $pacemaker_CFLAGS"
if test $HAVE_cmap = 0; then
- AC_MSG_NOTICE(No package 'cmap' found)
+ AC_MSG_NOTICE(No library 'cmap' found)
else
CPPFLAGS="$CPPFLAGS $cmap_CFLAGS"
fi
--
1.8.3.1

View File

@ -1,294 +0,0 @@
From b64c30af56e7eabd63ce1db25bc5ed9b953485af Mon Sep 17 00:00:00 2001
From: Klaus Wenninger <klaus.wenninger@aon.at>
Date: Fri, 23 Nov 2018 14:09:22 +0100
Subject: [PATCH] Feature: make timeout-action executed by sbd configurable
---
man/sbd.8.pod | 19 +++++++++++++++++++
src/sbd-common.c | 22 ++++++++++++++++------
src/sbd-inquisitor.c | 53 +++++++++++++++++++++++++++++++++++++++++++++++++---
src/sbd-md.c | 2 +-
src/sbd.h | 3 +++
src/sbd.sysconfig | 18 ++++++++++++++++++
6 files changed, 107 insertions(+), 10 deletions(-)
diff --git a/man/sbd.8.pod b/man/sbd.8.pod
index ffd01c2..dbb3855 100644
--- a/man/sbd.8.pod
+++ b/man/sbd.8.pod
@@ -333,6 +333,23 @@ prevent a successful crashdump from ever being written.
Defaults to 240 seconds. Set to zero to disable.
+=item B<-r> I<N>
+
+Actions to be executed when the watchers don't timely report to the sbd
+master process or one of the watchers detects that the master process
+has died.
+
+Set timeout-action to comma-separated combination of
+noflush|flush plus reboot|crashdump|off.
+If just one of both is given the other stays at the default.
+
+This doesn't affect actions like off, crashdump, reboot explicitly
+triggered via message slots.
+And it does as well not configure the action a watchdog would
+trigger should it run off (there is no generic interface).
+
+Defaults to flush,reboot.
+
=back
=head2 allocate
@@ -552,6 +569,8 @@ options to pass to the daemon:
C<sbd> will fail to start if no C<SBD_DEVICE> is specified. See the
installed template for more options that can be configured here.
+In general configuration done via parameters takes precedence over
+the configuration from the configuration file.
=head2 Testing the sbd installation
diff --git a/src/sbd-common.c b/src/sbd-common.c
index cc84cd0..0e8be65 100644
--- a/src/sbd-common.c
+++ b/src/sbd-common.c
@@ -98,6 +98,8 @@ usage(void)
" (default is 1, set to 0 to disable)\n"
"-P Check Pacemaker quorum and node health (optional, watch only)\n"
"-Z Enable trace mode. WARNING: UNSAFE FOR PRODUCTION!\n"
+"-r Set timeout-action to comma-separated combination of\n"
+" noflush|flush plus reboot|crashdump|off (default is flush,reboot)\n"
"Commands:\n"
#if SUPPORT_SHARED_DISK
"create initialize N slots on <dev> - OVERWRITES DEVICE!\n"
@@ -769,7 +771,7 @@ sysrq_trigger(char t)
static void
-do_exit(char kind)
+do_exit(char kind, bool do_flush)
{
/* TODO: Turn debug_mode into a bit field? Delay + kdump for example */
const char *reason = NULL;
@@ -814,7 +816,9 @@ do_exit(char kind)
}
cl_log(LOG_EMERG, "Rebooting system: %s", reason);
- sync();
+ if (do_flush) {
+ sync();
+ }
if(kind == 'c') {
watchdog_close(true);
@@ -834,19 +838,25 @@ do_exit(char kind)
void
do_crashdump(void)
{
- do_exit('c');
+ do_exit('c', true);
}
void
do_reset(void)
{
- do_exit('b');
+ do_exit('b', true);
}
void
do_off(void)
{
- do_exit('o');
+ do_exit('o', true);
+}
+
+void
+do_timeout_action(void)
+{
+ do_exit(timeout_sysrq_char, do_flush);
}
/*
@@ -980,7 +990,7 @@ notify_parent(void)
/* Our parent died unexpectedly. Triggering
* self-fence. */
cl_log(LOG_WARNING, "Our parent is dead.");
- do_reset();
+ do_timeout_action();
}
switch (servant_health) {
diff --git a/src/sbd-inquisitor.c b/src/sbd-inquisitor.c
index 9b193d4..8e0bc87 100644
--- a/src/sbd-inquisitor.c
+++ b/src/sbd-inquisitor.c
@@ -31,6 +31,8 @@ int servant_restart_interval = 5;
int servant_restart_count = 1;
int start_mode = 0;
char* pidfile = NULL;
+bool do_flush = true;
+char timeout_sysrq_char = 'b';
int parse_device_line(const char *line);
@@ -655,7 +657,7 @@ void inquisitor_child(void)
/* At level 2 or above, we do nothing, but expect
* things to eventually return to
* normal. */
- do_reset();
+ do_timeout_action();
} else {
cl_log(LOG_ERR, "SBD: DEBUG MODE: Would have fenced due to timeout!");
}
@@ -668,7 +670,7 @@ void inquisitor_child(void)
if (debug_mode && watchdog_use) {
/* In debug mode, trigger a reset before the watchdog can panic the machine */
- do_reset();
+ do_timeout_action();
}
}
@@ -833,6 +835,7 @@ int main(int argc, char **argv, char **envp)
int qb_facility;
const char *value = NULL;
int start_delay = 0;
+ char *timeout_action = NULL;
if ((cmdname = strrchr(argv[0], '/')) == NULL) {
cmdname = argv[0];
@@ -928,7 +931,12 @@ int main(int argc, char **argv, char **envp)
}
cl_log(LOG_DEBUG, "Start delay: %d (%s)", (int)start_delay, value?value:"default");
- while ((c = getopt(argc, argv, "czC:DPRTWZhvw:d:n:p:1:2:3:4:5:t:I:F:S:s:")) != -1) {
+ value = getenv("SBD_TIMEOUT_ACTION");
+ if(value) {
+ timeout_action = strdup(value);
+ }
+
+ while ((c = getopt(argc, argv, "czC:DPRTWZhvw:d:n:p:1:2:3:4:5:t:I:F:S:s:r:")) != -1) {
switch (c) {
case 'D':
break;
@@ -1043,6 +1051,12 @@ int main(int argc, char **argv, char **envp)
cl_log(LOG_INFO, "Servant restart count set to %d",
(int)servant_restart_count);
break;
+ case 'r':
+ if (timeout_action) {
+ free(timeout_action);
+ }
+ timeout_action = strdup(optarg);
+ break;
case 'h':
usage();
return (0);
@@ -1101,6 +1115,39 @@ int main(int argc, char **argv, char **envp)
goto out;
}
+ if (timeout_action) {
+ char *p[2];
+ int i;
+ char c;
+ int nrflags = sscanf(timeout_action, "%m[a-z],%m[a-z]%c", &p[0], &p[1], &c);
+ bool parse_error = (nrflags < 1) || (nrflags > 2);
+
+ for (i = 0; (i < nrflags) && (i < 2); i++) {
+ if (!strcmp(p[i], "reboot")) {
+ timeout_sysrq_char = 'b';
+ } else if (!strcmp(p[i], "crashdump")) {
+ timeout_sysrq_char = 'c';
+ } else if (!strcmp(p[i], "off")) {
+ timeout_sysrq_char = 'o';
+ } else if (!strcmp(p[i], "flush")) {
+ do_flush = true;
+ } else if (!strcmp(p[i], "noflush")) {
+ do_flush = false;
+ } else {
+ parse_error = true;
+ }
+ free(p[i]);
+ }
+ if (parse_error) {
+ fprintf(stderr, "Failed to parse timeout-action \"%s\".\n",
+ timeout_action);
+ exit_status = -1;
+ goto out;
+ }
+ }
+ cl_log(LOG_NOTICE, "%s flush + writing \'%c\' to sysrq on timeout",
+ do_flush?"Doing":"Skipping", timeout_sysrq_char);
+
#if SUPPORT_SHARED_DISK
if (strcmp(argv[optind], "create") == 0) {
exit_status = init_devices(servants_leader);
diff --git a/src/sbd-md.c b/src/sbd-md.c
index a736118..579d273 100644
--- a/src/sbd-md.c
+++ b/src/sbd-md.c
@@ -1149,7 +1149,7 @@ int servant(const char *diskname, int mode, const void* argp)
if (ppid == 1) {
/* Our parent died unexpectedly. Triggering
* self-fence. */
- do_reset();
+ do_timeout_action();
}
/* These attempts are, by definition, somewhat racy. If
diff --git a/src/sbd.h b/src/sbd.h
index 0f8847a..386c85c 100644
--- a/src/sbd.h
+++ b/src/sbd.h
@@ -130,6 +130,7 @@ void sysrq_trigger(char t);
void do_crashdump(void);
void do_reset(void);
void do_off(void);
+void do_timeout_action(void);
pid_t make_daemon(void);
void maximize_priority(void);
void sbd_get_uname(void);
@@ -153,6 +154,8 @@ extern int debug_mode;
extern char *watchdogdev;
extern bool watchdogdev_is_default;
extern char* local_uname;
+extern bool do_flush;
+extern char timeout_sysrq_char;
/* Global, non-tunable variables: */
extern int sector_size;
diff --git a/src/sbd.sysconfig b/src/sbd.sysconfig
index c6d7c07..8f38426 100644
--- a/src/sbd.sysconfig
+++ b/src/sbd.sysconfig
@@ -71,6 +71,24 @@ SBD_WATCHDOG_DEV=/dev/watchdog
SBD_WATCHDOG_TIMEOUT=5
## Type: string
+## Default: "flush,reboot"
+#
+# Actions to be executed when the watchers don't timely report to the sbd
+# master process or one of the watchers detects that the master process
+# has died.
+#
+# Set timeout-action to comma-separated combination of
+# noflush|flush plus reboot|crashdump|off.
+# If just one of both is given the other stays at the default.
+#
+# This doesn't affect actions like off, crashdump, reboot explicitly
+# triggered via message slots.
+# And it does as well not configure the action a watchdog would
+# trigger should it run off (there is no generic interface).
+#
+SBD_TIMEOUT_ACTION=flush,reboot
+
+## Type: string
## Default: ""
#
# Additional options for starting sbd
--
1.8.3.1

View File

@ -15,39 +15,35 @@
# Please submit bugfixes or comments via http://bugs.opensuse.org/
#
%global commit a74b4d25a3eb93fe1abbe6e3ebfd2b16cf48873f
%global commit 7f33d1a409d0a4e2cd69946688c48eaa8f3c5d26
%global shortcommit %(c=%{commit}; echo ${c:0:7})
%global github_owner Clusterlabs
%global buildnum 18
%global buildnum 15
Name: sbd
Summary: Storage-based death
License: GPLv2+
Group: System Environment/Daemons
Version: 1.3.1
Release: %{buildnum}%{?dist}.3
Version: 1.4.0
Release: %{buildnum}%{?dist}
Url: https://github.com/%{github_owner}/%{name}
Source0: https://github.com/%{github_owner}/%{name}/archive/%{commit}/%{name}-%{commit}.tar.gz
Patch0: 0001-make-pacemaker-dlm-wait-for-sbd-start.patch
Patch1: 0002-mention-timeout-caveat-with-SBD_DELAY_START.patch
Patch2: 0003-Doc-sbd.8.pod-add-query-test-watchdog.patch
Patch3: 0004-Build-sbd-pacemaker-Query-CIB-directly-with-the-API.patch
Patch4: 0005-Fix-build-error-with-glibc-2.25.patch
Patch5: 0006-Fix-gcc-format-string-error.patch
Patch6: 0007-Build-cluster-servant-Compile-with-pacemaker-2.0.patch
Patch7: 0008-Log-change-sbd-s-default-logging-level-to-LOG_NOTICE.patch
Patch8: 0009-Log-upgrade-important-messages-and-downgrade-unimpor.patch
Patch9: 0010-Refactor-sbd-cluster-let-scan-do-the-job-of-proc-par.patch
Patch10: 0011-Fix-search-for-pacemaker-remoted-with-pacemaker-2.0.patch
Patch11: 0012-Fix-sbd-common-don-t-follow-symlinks-outside-dev-for.patch
Patch12: 0013-Refactor-sbd-common-separate-assignment-and-comparis.patch
Patch13: 0014-Fix-sbd-common-avoid-statting-potential-links.patch
Patch14: 0015-Refactor-use-pacemaker-s-new-pe-api-with-constructor.patch
Patch15: 0016-Feature-make-timeout-action-executed-by-sbd-configur.patch
Patch16: 0017-Fix-sbd-pacemaker-make-handling-of-cib-connection-lo.patch
Patch17: 0018-Fix-sbd-pacemaker-bail-out-of-status-earlier.patch
Patch18: 0019-Fix-sbd-pacemaker-assume-graceful-exit-if-leftovers-.patch
Patch19: 0020-Fix-sbd-pacemaker-check-for-shutdown-attribute-on-ev.patch
Patch0: 0001-Fix-sbd-cluster-finalize-cmap-connection-if-disconne.patch
Patch1: 0002-Fix-sbd-pacemaker-make-handling-of-cib-connection-lo.patch
Patch2: 0003-Fix-sbd-pacemaker-bail-out-of-status-earlier.patch
Patch3: 0004-Doc-sbd.8.pod-use-the-generic-term-cluster-services-.patch
Patch4: 0005-Doc-sbd.sysconfig-watchdog-timeout-set-in-the-on-dis.patch
Patch5: 0006-Refactor-fail-earlier-on-invalid-servants.patch
Patch6: 0007-Fix-sbd-inquisitor-overhaul-device-list-parser.patch
Patch7: 0008-Refactor-sbd-common-no-reason-for-stack-hogger-havin.patch
Patch8: 0009-Sanity-sbd-inquisitor-free-timeout-action-on-bail-ou.patch
Patch9: 0010-Sanity-sbd-md-prevent-unrealistic-overflow-on-sector.patch
Patch10: 0011-Sanity-sbd-md-remove-some-left-over-code.patch
Patch11: 0012-Fix-sbd-common-query-rt-budget-0-otherwise-try-movin.patch
Patch12: 0013-Fix-sbd-pacemaker-assume-graceful-exit-if-leftovers-.patch
Patch13: 0014-Fix-sbd-cluster-periodically-check-corosync-daemon-l.patch
Patch14: 0015-build-say-library-when-missing-cmap-not-package-to-a.patch
Patch15: 0016-Fix-sbd-pacemaker-check-for-shutdown-attribute-on-ev.patch
BuildRoot: %{_tmppath}/%{name}-%{version}-build
BuildRequires: autoconf
BuildRequires: automake
@ -145,21 +141,51 @@ fi
%doc COPYING
%changelog
* Mon Aug 26 2019 Klaus Wenninger <kwenning@redhat.com> - 1.3.1-18.3
- added missing patch
Resolves: rhbz#1734061
* Fri Aug 16 2019 Klaus Wenninger <kwenning@redhat.com> - 1.3.1-18.2
* Fri Aug 16 2019 Klaus Wenninger <kwenning@redhat.com> - 1.4.0-15
- check for shutdown attribute on every cib-diff
Resolves: rhbz#1734061
Resolves: rhbz#1718296
* Tue Jul 30 2019 Klaus Wenninger <kwenning@redhat.com> - 1.3.1-18.1
* Wed Jun 12 2019 Klaus Wenninger <kwenning@redhat.com> - 1.4.0-10
- added missing patches to git
Resolves: rhbz#1702727
Resolves: rhbz#1718296
* Tue Jun 11 2019 Klaus Wenninger <kwenning@redhat.com> - 1.4.0-9
- assume graceful pacemaker exit if leftovers are unmanaged
- make handling of cib-connection loss more robust
- query corosync liveness via votequorum-api
Resolves: rhbz#1734061
Resolves: rhbz#1702727
Resolves: rhbz#1718296
* Mon Jun 3 2019 Klaus Wenninger <kwenning@redhat.com> - 1.4.0-8
- check for rt-budget > 0 and move to root-slice otherwise
Resolves: rhbz#1713021
* Wed Apr 10 2019 Klaus Wenninger <kwenning@redhat.com> - 1.4.0-7
- add some minor fixes from upstream found by coverity
Resolves: rhbz#1698056
* Wed Apr 10 2019 Klaus Wenninger <kwenning@redhat.com> - 1.4.0-6
- add decision-context to gating.yaml
Resolves: rhbz#1682137
* Mon Jan 14 2019 Klaus Wenninger <kwenning@redhat.com> - 1.4.0-5
- rebase to upstream v1.4.0
- finalize cmap connection if disconnected from cluster
- make handling of cib-connection loss more robust
- add ci test files
- use generic term cluster-services in doc
- stress in doc that on-disk metadata watchdog-timeout
takes precedence
- fail earlier on invalid servants to make gcc 9 happy
Resolves: rhbz#1698056
Resolves: rhbz#1682137
* Mon Dec 17 2018 Klaus Wenninger <kwenning@redhat.com> - 1.3.1-18
- make timeout-action executed by sbd configurable