From 7cb75f49d2b5a9fa354d76d5574c682fc2abf8d3 Mon Sep 17 00:00:00 2001 From: CentOS Sources Date: Tue, 3 Nov 2020 07:11:00 -0500 Subject: [PATCH] import sbd-1.4.1-7.el8 --- ...acemaker-handle-new-no_quorum_demote.patch | 71 ++++ ...match-qdevice-sync_timeout-against-w.patch | 399 ++++++++++++++++++ ...r-sync-with-pacemakerd-for-robustnes.patch | 231 ++++++++++ ...-of-pacemaker-resource-startup-confi.patch | 110 +++++ SPECS/sbd.spec | 31 +- 5 files changed, 841 insertions(+), 1 deletion(-) create mode 100644 SOURCES/0003-Fix-sbd-pacemaker-handle-new-no_quorum_demote.patch create mode 100644 SOURCES/0004-Fix-sbd-cluster-match-qdevice-sync_timeout-against-w.patch create mode 100644 SOURCES/0005-Fix-sbd-pacemaker-sync-with-pacemakerd-for-robustnes.patch create mode 100644 SOURCES/0006-Fix-make-syncing-of-pacemaker-resource-startup-confi.patch diff --git a/SOURCES/0003-Fix-sbd-pacemaker-handle-new-no_quorum_demote.patch b/SOURCES/0003-Fix-sbd-pacemaker-handle-new-no_quorum_demote.patch new file mode 100644 index 0000000..10602b7 --- /dev/null +++ b/SOURCES/0003-Fix-sbd-pacemaker-handle-new-no_quorum_demote.patch @@ -0,0 +1,71 @@ +From 3048119bf4a0ddb2da01d4ca827ae659a089b622 Mon Sep 17 00:00:00 2001 +From: Klaus Wenninger +Date: Wed, 24 Jun 2020 14:33:21 +0200 +Subject: [PATCH] Fix: sbd-pacemaker: handle new no_quorum_demote + +and be robust against unknown no-quorum-policies handling them +as would be done with no_quorum_suicide +--- + configure.ac | 17 ++++++++++++++++- + src/sbd-pacemaker.c | 11 ++++++++++- + 2 files changed, 26 insertions(+), 2 deletions(-) + +diff --git a/configure.ac b/configure.ac +index 02e2678..3391c5f 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -89,7 +89,22 @@ AC_CHECK_LIB(cib, cib_apply_patch_event, , missing="yes") + + dnl pacemaker-2.0 removed support for corosync 1 cluster layer + AC_CHECK_DECLS([pcmk_cluster_classic_ais, pcmk_cluster_cman],,, +- [#include ]) ++ [#include ]) ++ ++dnl check for additional no-quorum-policies ++dnl AC_TEST_NO_QUORUM_POLICY(POLICY) ++AC_DEFUN([AC_TEST_NO_QUORUM_POLICY],[ ++ AC_MSG_CHECKING([whether enum pe_quorum_policy defines value $1]) ++ AC_LANG_PUSH([C]) ++ AC_COMPILE_IFELSE([AC_LANG_PROGRAM( ++ [#include ], ++ [enum pe_quorum_policy policy = $1; return policy;])], ++ AC_DEFINE_UNQUOTED(m4_toupper(HAVE_ENUM_$1), 1, ++ [Does pe_types.h have $1 value in enum pe_quorum_policy?]) ++ AC_MSG_RESULT([yes]), AC_MSG_RESULT([no])) ++ AC_LANG_POP([C]) ++]) ++AC_TEST_NO_QUORUM_POLICY(no_quorum_demote) + + dnl check for new pe-API + AC_CHECK_FUNCS(pe_new_working_set) +diff --git a/src/sbd-pacemaker.c b/src/sbd-pacemaker.c +index 11e104d..6e53557 100644 +--- a/src/sbd-pacemaker.c ++++ b/src/sbd-pacemaker.c +@@ -321,13 +321,22 @@ compute_status(pe_working_set_t * data_set) + case no_quorum_freeze: + set_servant_health(pcmk_health_transient, LOG_INFO, "Quorum lost: Freeze resources"); + break; ++#if HAVE_ENUM_NO_QUORUM_DEMOTE ++ case no_quorum_demote: ++ set_servant_health(pcmk_health_transient, LOG_INFO, ++ "Quorum lost: Demote promotable resources and stop others"); ++ break; ++#endif + case no_quorum_stop: + set_servant_health(pcmk_health_transient, LOG_INFO, "Quorum lost: Stop ALL resources"); + break; + case no_quorum_ignore: + set_servant_health(pcmk_health_transient, LOG_INFO, "Quorum lost: Ignore"); + break; +- case no_quorum_suicide: ++ default: ++ /* immediate reboot is the most excessive action we take ++ use for no_quorum_suicide and everything we don't know yet ++ */ + set_servant_health(pcmk_health_unclean, LOG_INFO, "Quorum lost: Self-fence"); + break; + } +-- +1.8.3.1 + diff --git a/SOURCES/0004-Fix-sbd-cluster-match-qdevice-sync_timeout-against-w.patch b/SOURCES/0004-Fix-sbd-cluster-match-qdevice-sync_timeout-against-w.patch new file mode 100644 index 0000000..2a9b144 --- /dev/null +++ b/SOURCES/0004-Fix-sbd-cluster-match-qdevice-sync_timeout-against-w.patch @@ -0,0 +1,399 @@ +From 4c3e4049b08799094a64dac289a48deef4d3d916 Mon Sep 17 00:00:00 2001 +From: Klaus Wenninger +Date: Fri, 24 Jul 2020 14:31:01 +0200 +Subject: [PATCH] Fix: sbd-cluster: match qdevice-sync_timeout against + wd-timeout + +--- + configure.ac | 13 +++ + src/sbd-cluster.c | 252 +++++++++++++++++++++++++++++++++++++++++++++--------- + 2 files changed, 223 insertions(+), 42 deletions(-) + +diff --git a/configure.ac b/configure.ac +index 3391c5f..23547cf 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -109,6 +109,12 @@ AC_TEST_NO_QUORUM_POLICY(no_quorum_demote) + dnl check for new pe-API + AC_CHECK_FUNCS(pe_new_working_set) + ++dnl check if votequorum comes with default for qdevice-sync_timeout ++AC_CHECK_DECLS([VOTEQUORUM_QDEVICE_DEFAULT_SYNC_TIMEOUT], ++ HAVE_DECL_VOTEQUORUM_QDEVICE_DEFAULT_SYNC_TIMEOUT=1, ++ HAVE_DECL_VOTEQUORUM_QDEVICE_DEFAULT_SYNC_TIMEOUT=0, ++ [#include ]) ++ + if test "$missing" = "yes"; then + AC_MSG_ERROR([Missing required libraries or functions.]) + fi +@@ -140,6 +146,13 @@ AM_CONDITIONAL(CHECK_TWO_NODE, test "$HAVE_cmap" = "1") + AC_DEFINE_UNQUOTED(CHECK_VOTEQUORUM_HANDLE, $HAVE_votequorum, Turn on periodic checking of votequorum-handle) + AM_CONDITIONAL(CHECK_VOTEQUORUM_HANDLE, test "$HAVE_votequorum" = "1") + ++AC_DEFINE_UNQUOTED(CHECK_QDEVICE_SYNC_TIMEOUT, ++ ($HAVE_DECL_VOTEQUORUM_QDEVICE_DEFAULT_SYNC_TIMEOUT && $HAVE_cmap), ++ Turn on checking if watchdog-timeout and qdevice-sync_timeout are matching) ++AM_CONDITIONAL(CHECK_QDEVICE_SYNC_TIMEOUT, ++ test "$HAVE_DECL_VOTEQUORUM_QDEVICE_DEFAULT_SYNC_TIMEOUT" = "1" && ++ test "$HAVE_cmap" = "1") ++ + CONFIGDIR="" + AC_ARG_WITH(configdir, + [ --with-configdir=DIR +diff --git a/src/sbd-cluster.c b/src/sbd-cluster.c +index 13fa580..b6c5512 100644 +--- a/src/sbd-cluster.c ++++ b/src/sbd-cluster.c +@@ -33,7 +33,7 @@ + #include + #include + +-#if CHECK_TWO_NODE ++#if CHECK_TWO_NODE || CHECK_QDEVICE_SYNC_TIMEOUT + #include + #endif + +@@ -86,11 +86,20 @@ sbd_plugin_membership_dispatch(cpg_handle_t handle, + static votequorum_handle_t votequorum_handle = 0; + #endif + ++#if CHECK_TWO_NODE + static bool two_node = false; ++#endif + static bool ever_seen_both = false; + static int cpg_membership_entries = -1; + +-#if CHECK_TWO_NODE ++#if CHECK_QDEVICE_SYNC_TIMEOUT ++#include ++static bool using_qdevice = false; ++static uint32_t qdevice_sync_timeout = /* in seconds */ ++ VOTEQUORUM_QDEVICE_DEFAULT_SYNC_TIMEOUT / 1000; ++#endif ++ ++#if CHECK_TWO_NODE || CHECK_QDEVICE_SYNC_TIMEOUT + #include + + static cmap_handle_t cmap_handle = 0; +@@ -102,28 +111,59 @@ void + sbd_cpg_membership_health_update() + { + if(cpg_membership_entries > 0) { +- bool quorum_is_suspect = ++#if CHECK_TWO_NODE ++ bool quorum_is_suspect_two_node = + (two_node && ever_seen_both && cpg_membership_entries == 1); ++#endif ++#if CHECK_QDEVICE_SYNC_TIMEOUT ++ bool quorum_is_suspect_qdevice_timing = ++ using_qdevice && (qdevice_sync_timeout > timeout_watchdog); ++#endif + +- if (!quorum_is_suspect) { ++ do { ++#if CHECK_TWO_NODE ++ if (quorum_is_suspect_two_node) { ++ /* Alternative would be asking votequorum for number of votes. ++ * Using pacemaker's cpg as source for number of active nodes ++ * avoids binding to an additional library, is definitely ++ * less code to write and we wouldn't have to combine data ++ * from 3 sources (cmap, cpg & votequorum) in a potentially ++ * racy environment. ++ */ ++ set_servant_health(pcmk_health_noquorum, LOG_WARNING, ++ "Connected to %s but requires both nodes present", ++ name_for_cluster_type(get_cluster_type()) ++ ); ++ break; ++ } ++#endif ++#if CHECK_QDEVICE_SYNC_TIMEOUT ++ if (quorum_is_suspect_qdevice_timing) { ++ /* We can't really trust quorum info as qdevice-sync_timeout ++ * makes reaction of quorum too sluggish for our ++ * watchdog-timeout. ++ */ ++ set_servant_health(pcmk_health_noquorum, LOG_WARNING, ++ "Connected to %s but quorum using qdevice is distrusted " ++ "for SBD as qdevice-sync_timeout (%ds) > watchdog-timeout " ++ "(%lus).", ++ name_for_cluster_type(get_cluster_type()), ++ qdevice_sync_timeout, timeout_watchdog ++ ); ++ break; ++ } ++#endif + set_servant_health(pcmk_health_online, LOG_INFO, +- "Connected to %s (%u members)", +- name_for_cluster_type(get_cluster_type()), +- cpg_membership_entries +- ); +- } else { +- /* Alternative would be asking votequorum for number of votes. +- * Using pacemaker's cpg as source for number of active nodes +- * avoids binding to an additional library, is definitely +- * less code to write and we wouldn't have to combine data +- * from 3 sources (cmap, cpq & votequorum) in a potentially +- * racy environment. +- */ +- set_servant_health(pcmk_health_noquorum, LOG_WARNING, +- "Connected to %s but requires both nodes present", +- name_for_cluster_type(get_cluster_type()) +- ); +- } ++ "Connected to %s (%u members)%s", ++ name_for_cluster_type(get_cluster_type()), ++ cpg_membership_entries, ++#if CHECK_QDEVICE_SYNC_TIMEOUT ++ using_qdevice?" using qdevice for quorum":"" ++#else ++ "" ++#endif ++ ); ++ } while (false); + + if (cpg_membership_entries > 1) { + ever_seen_both = true; +@@ -146,7 +186,7 @@ sbd_cpg_membership_dispatch(cpg_handle_t handle, + notify_parent(); + } + +-#if CHECK_TWO_NODE ++#if CHECK_TWO_NODE || CHECK_QDEVICE_SYNC_TIMEOUT + static void sbd_cmap_notify_fn( + cmap_handle_t cmap_handle, + cmap_track_handle_t cmap_track_handle, +@@ -156,21 +196,99 @@ static void sbd_cmap_notify_fn( + struct cmap_notify_value old_val, + void *user_data) + { +- if (new_val.type == CMAP_VALUETYPE_UINT8) { +- switch (event) { +- case CMAP_TRACK_ADD: +- case CMAP_TRACK_MODIFY: +- two_node = *((uint8_t *) new_val.data); +- break; +- case CMAP_TRACK_DELETE: +- two_node = false; +- break; +- default: +- return; +- } +- sbd_cpg_membership_health_update(); +- notify_parent(); ++ switch (event) { ++ case CMAP_TRACK_ADD: ++ case CMAP_TRACK_MODIFY: ++ switch (new_val.type) { ++ case CMAP_VALUETYPE_UINT8: ++#if CHECK_TWO_NODE ++ if (!strcmp(key_name, "quorum.two_node")) { ++ two_node = *((uint8_t *) new_val.data); ++ } else { ++ return; ++ } ++ break; ++#else ++ return; ++#endif ++ case CMAP_VALUETYPE_STRING: ++#if CHECK_QDEVICE_SYNC_TIMEOUT ++ if (!strcmp(key_name, "quorum.device.model")) { ++ using_qdevice = ++ ((new_val.data) && strlen((char *) new_val.data)); ++ } else { ++ return; ++ } ++ break; ++#else ++ return; ++#endif ++ case CMAP_VALUETYPE_UINT32: ++#if CHECK_QDEVICE_SYNC_TIMEOUT ++ if (!strcmp(key_name, "quorum.device.sync_timeout")) { ++ if (new_val.data) { ++ qdevice_sync_timeout = ++ *((uint32_t *) new_val.data) / 1000; ++ } else { ++ qdevice_sync_timeout = ++ VOTEQUORUM_QDEVICE_DEFAULT_SYNC_TIMEOUT / 1000; ++ } ++ } else { ++ return; ++ } ++ break; ++#else ++ return; ++#endif ++ default: ++ return; ++ } ++ break; ++ case CMAP_TRACK_DELETE: ++ switch (new_val.type) { ++ case CMAP_VALUETYPE_UINT8: ++#if CHECK_TWO_NODE ++ if (!strcmp(key_name, "quorum.two_node")) { ++ two_node = false; ++ } else { ++ return; ++ } ++ break; ++#else ++ return; ++#endif ++ case CMAP_VALUETYPE_STRING: ++#if CHECK_QDEVICE_SYNC_TIMEOUT ++ if (!strcmp(key_name, "quorum.device.model")) { ++ using_qdevice = false; ++ } else { ++ return; ++ } ++ break; ++#else ++ return; ++#endif ++ case CMAP_VALUETYPE_UINT32: ++#if CHECK_QDEVICE_SYNC_TIMEOUT ++ if (!strcmp(key_name, "quorum.device.sync_timeout")) { ++ qdevice_sync_timeout = ++ VOTEQUORUM_QDEVICE_DEFAULT_SYNC_TIMEOUT / 1000; ++ } else { ++ return; ++ } ++ break; ++#else ++ return; ++#endif ++ default: ++ return; ++ } ++ break; ++ default: ++ return; + } ++ sbd_cpg_membership_health_update(); ++ notify_parent(); + } + + static gboolean +@@ -200,9 +318,14 @@ cmap_destroy(void) + } + + static gboolean +-sbd_get_two_node(void) ++verify_against_cmap_config(void) + { ++#if CHECK_TWO_NODE + uint8_t two_node_u8 = 0; ++#endif ++#if CHECK_QDEVICE_SYNC_TIMEOUT ++ char *qdevice_model = NULL; ++#endif + int cmap_fd; + + if (!track_handle) { +@@ -211,12 +334,31 @@ sbd_get_two_node(void) + goto out; + } + ++#if CHECK_TWO_NODE + if (cmap_track_add(cmap_handle, "quorum.two_node", + CMAP_TRACK_DELETE|CMAP_TRACK_MODIFY|CMAP_TRACK_ADD, + sbd_cmap_notify_fn, NULL, &track_handle) != CS_OK) { + cl_log(LOG_WARNING, "Failed adding CMAP tracker for 2Node-mode\n"); + goto out; + } ++#endif ++ ++#if CHECK_QDEVICE_SYNC_TIMEOUT ++ if (cmap_track_add(cmap_handle, "quorum.device.model", ++ CMAP_TRACK_DELETE|CMAP_TRACK_MODIFY|CMAP_TRACK_ADD, ++ sbd_cmap_notify_fn, NULL, &track_handle) != CS_OK) { ++ cl_log(LOG_WARNING, "Failed adding CMAP tracker for qdevice-model\n"); ++ goto out; ++ } ++ ++ if (cmap_track_add(cmap_handle, "quorum.device.sync_timeout", ++ CMAP_TRACK_DELETE|CMAP_TRACK_MODIFY|CMAP_TRACK_ADD, ++ sbd_cmap_notify_fn, NULL, &track_handle) != CS_OK) { ++ cl_log(LOG_WARNING, ++ "Failed adding CMAP tracker for qdevice-sync_timeout\n"); ++ goto out; ++ } ++#endif + + /* add the tracker to mainloop */ + if (cmap_fd_get(cmap_handle, &cmap_fd) != CS_OK) { +@@ -232,13 +374,39 @@ sbd_get_two_node(void) + g_source_attach(cmap_source, NULL); + } + +- if (cmap_get_uint8(cmap_handle, "quorum.two_node", &two_node_u8) == CS_OK) { ++#if CHECK_TWO_NODE ++ if (cmap_get_uint8(cmap_handle, "quorum.two_node", &two_node_u8) ++ == CS_OK) { + cl_log(two_node_u8? LOG_NOTICE : LOG_INFO, + "Corosync is%s in 2Node-mode", two_node_u8?"":" not"); + two_node = two_node_u8; + } else { + cl_log(LOG_INFO, "quorum.two_node not present in cmap\n"); + } ++#endif ++ ++#if CHECK_QDEVICE_SYNC_TIMEOUT ++ if (cmap_get_string(cmap_handle, "quorum.device.model", ++ &qdevice_model) == CS_OK) { ++ using_qdevice = qdevice_model && strlen(qdevice_model); ++ cl_log(using_qdevice? LOG_NOTICE : LOG_INFO, ++ "Corosync is%s using qdevice", using_qdevice?"":" not"); ++ } else { ++ cl_log(LOG_INFO, "quorum.device.model not present in cmap\n"); ++ } ++ ++ if (cmap_get_uint32(cmap_handle, "quorum.device.sync_timeout", ++ &qdevice_sync_timeout) == CS_OK) { ++ qdevice_sync_timeout /= 1000; ++ cl_log(LOG_INFO, ++ "Corosync is using qdevice-sync_timeout=%ds", ++ qdevice_sync_timeout); ++ } else { ++ cl_log(LOG_INFO, ++ "quorum.device.sync_timeout not present in cmap\n"); ++ } ++#endif ++ + return TRUE; + + out: +@@ -331,15 +499,15 @@ sbd_membership_connect(void) + } else { + cl_log(LOG_INFO, "Attempting connection to %s", name_for_cluster_type(stack)); + +-#if SUPPORT_COROSYNC && CHECK_TWO_NODE +- if (sbd_get_two_node()) { ++#if SUPPORT_COROSYNC && (CHECK_TWO_NODE || CHECK_QDEVICE_SYNC_TIMEOUT) ++ if (verify_against_cmap_config()) { + #endif + + if(crm_cluster_connect(&cluster)) { + connected = true; + } + +-#if SUPPORT_COROSYNC && CHECK_TWO_NODE ++#if SUPPORT_COROSYNC && (CHECK_TWO_NODE || CHECK_QDEVICE_SYNC_TIMEOUT) + } + #endif + } +@@ -362,7 +530,7 @@ sbd_membership_destroy(gpointer user_data) + cl_log(LOG_WARNING, "Lost connection to %s", name_for_cluster_type(get_cluster_type())); + + if (get_cluster_type() != pcmk_cluster_unknown) { +-#if SUPPORT_COROSYNC && CHECK_TWO_NODE ++#if SUPPORT_COROSYNC && (CHECK_TWO_NODE || CHECK_QDEVICE_SYNC_TIMEOUT) + cmap_destroy(); + #endif + } +-- +1.8.3.1 + diff --git a/SOURCES/0005-Fix-sbd-pacemaker-sync-with-pacemakerd-for-robustnes.patch b/SOURCES/0005-Fix-sbd-pacemaker-sync-with-pacemakerd-for-robustnes.patch new file mode 100644 index 0000000..6d920ab --- /dev/null +++ b/SOURCES/0005-Fix-sbd-pacemaker-sync-with-pacemakerd-for-robustnes.patch @@ -0,0 +1,231 @@ +From 5b5ffac4cce861f3621267a73d2ad29f6d807335 Mon Sep 17 00:00:00 2001 +From: Klaus Wenninger +Date: Tue, 10 Dec 2019 13:16:45 +0100 +Subject: [PATCH] Fix: sbd-pacemaker: sync with pacemakerd for robustness + +State query ping of pacemakerd prevents pacemakerd from +starting any sub-daemons (and thus services) if sbd can't +reach it via ipc. As a health-check get timestamp from +pacemakerd. On shudown fetch info about graceful +shutdown from pacemakerd. +Use new pacemakerd-api provided by pacemaker. +--- + configure.ac | 4 ++ + src/sbd-pacemaker.c | 132 ++++++++++++++++++++++++++++++++++++++++++++++++---- + 2 files changed, 126 insertions(+), 10 deletions(-) + +diff --git a/configure.ac b/configure.ac +index 23547cf..11d12f0 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -81,6 +81,7 @@ AC_CHECK_LIB(crmcluster, crm_peer_init, , missing="yes") + AC_CHECK_LIB(uuid, uuid_unparse, , missing="yes") + AC_CHECK_LIB(cmap, cmap_initialize, , HAVE_cmap=0) + AC_CHECK_LIB(votequorum, votequorum_getinfo, , HAVE_votequorum=0) ++AC_CHECK_LIB(crmcommon, pcmk_pacemakerd_api_ping, HAVE_pacemakerd_api=1, HAVE_pacemakerd_api=0) + + dnl pacemaker >= 1.1.8 + AC_CHECK_HEADERS(crm/cluster.h) +@@ -153,6 +154,9 @@ AM_CONDITIONAL(CHECK_QDEVICE_SYNC_TIMEOUT, + test "$HAVE_DECL_VOTEQUORUM_QDEVICE_DEFAULT_SYNC_TIMEOUT" = "1" && + test "$HAVE_cmap" = "1") + ++AC_DEFINE_UNQUOTED(USE_PACEMAKERD_API, $HAVE_pacemakerd_api, Turn on synchronization between sbd & pacemakerd) ++AM_CONDITIONAL(USE_PACEMAKERD_API, test "$HAVE_pacemakerd_api" = "1") ++ + CONFIGDIR="" + AC_ARG_WITH(configdir, + [ --with-configdir=DIR +diff --git a/src/sbd-pacemaker.c b/src/sbd-pacemaker.c +index 6e53557..1243bfc 100644 +--- a/src/sbd-pacemaker.c ++++ b/src/sbd-pacemaker.c +@@ -83,6 +83,62 @@ pe_free_working_set(pe_working_set_t *data_set) + + #endif + ++static void clean_up(int rc); ++ ++#if USE_PACEMAKERD_API ++#include ++ ++static pcmk_ipc_api_t *pacemakerd_api = NULL; ++static time_t last_ok = (time_t) 0; ++ ++static void ++pacemakerd_event_cb(pcmk_ipc_api_t *pacemakerd_api, ++ enum pcmk_ipc_event event_type, crm_exit_t status, ++ void *event_data, void *user_data) ++{ ++ pcmk_pacemakerd_api_reply_t *reply = event_data; ++ ++ switch (event_type) { ++ case pcmk_ipc_event_disconnect: ++ /* Unexpected */ ++ cl_log(LOG_ERR, "Lost connection to pacemakerd\n"); ++ return; ++ ++ case pcmk_ipc_event_reply: ++ break; ++ ++ default: ++ return; ++ } ++ ++ if (status != CRM_EX_OK) { ++ cl_log(LOG_ERR, "Bad reply from pacemakerd: %s", ++ crm_exit_str(status)); ++ return; ++ } ++ ++ if (reply->reply_type != pcmk_pacemakerd_reply_ping) { ++ cl_log(LOG_ERR, "Unknown reply type %d from pacemakerd\n", ++ reply->reply_type); ++ } else { ++ if ((reply->data.ping.last_good != (time_t) 0) && ++ (reply->data.ping.status == pcmk_rc_ok)) { ++ switch (reply->data.ping.state) { ++ case pcmk_pacemakerd_state_running: ++ case pcmk_pacemakerd_state_shutting_down: ++ last_ok = reply->data.ping.last_good; ++ break; ++ case pcmk_pacemakerd_state_shutdown_complete: ++ clean_up(EXIT_PCMK_SERVANT_GRACEFUL_SHUTDOWN); ++ break; ++ default: ++ break; ++ } ++ } ++ } ++} ++#endif ++ + extern int disk_count; + + static void clean_up(int rc); +@@ -133,10 +189,13 @@ mon_cib_connection_destroy(gpointer user_data) + cib->cmds->signoff(cib); + /* retrigger as last one might have been skipped */ + mon_refresh_state(NULL); ++ ++#if !USE_PACEMAKERD_API + if (pcmk_clean_shutdown) { + /* assume a graceful pacemaker-shutdown */ + clean_up(EXIT_PCMK_SERVANT_GRACEFUL_SHUTDOWN); + } ++#endif + /* getting here we aren't sure about the pacemaker-state + so try to use the timeout to reconnect and get + everything sorted out again +@@ -196,6 +255,13 @@ mon_timer_notify(gpointer data) + g_source_remove(timer_id_notify); + } + ++#if USE_PACEMAKERD_API ++ { ++ time_t now = time(NULL); ++ ++ if ((last_ok <= now) && (now - last_ok < timeout_watchdog)) { ++#endif ++ + if (cib_connected) { + if (counter == counter_max) { + mon_retrieve_current_cib(); +@@ -207,6 +273,16 @@ mon_timer_notify(gpointer data) + counter++; + } + } ++ ++#if USE_PACEMAKERD_API ++ } ++ } ++ if (pcmk_connect_ipc(pacemakerd_api, ++ pcmk_ipc_dispatch_main) == pcmk_rc_ok) { ++ pcmk_pacemakerd_api_ping(pacemakerd_api, crm_system_name); ++ } ++#endif ++ + timer_id_notify = g_timeout_add(timeout_loop * 1000, mon_timer_notify, NULL); + return FALSE; + } +@@ -526,6 +602,14 @@ clean_up(int rc) + cib = NULL; + } + ++#if USE_PACEMAKERD_API ++ if (pacemakerd_api != NULL) { ++ pcmk_ipc_api_t *capi = pacemakerd_api; ++ pacemakerd_api = NULL; // Ensure we can't free this twice ++ pcmk_free_ipc_api(capi); ++ } ++#endif ++ + if (rc >= 0) { + exit(rc); + } +@@ -535,11 +619,11 @@ clean_up(int rc) + int + servant_pcmk(const char *diskname, int mode, const void* argp) + { +- int exit_code = 0; ++ int exit_code = 0; + +- crm_system_name = strdup("sbd:pcmk"); +- cl_log(LOG_NOTICE, "Monitoring Pacemaker health"); +- set_proc_title("sbd: watcher: Pacemaker"); ++ crm_system_name = strdup("sbd:pcmk"); ++ cl_log(LOG_NOTICE, "Monitoring Pacemaker health"); ++ set_proc_title("sbd: watcher: Pacemaker"); + setenv("PCMK_watchdog", "true", 1); + + if(debug == 0) { +@@ -548,12 +632,40 @@ servant_pcmk(const char *diskname, int mode, const void* argp) + } + + +- if (data_set == NULL) { +- data_set = pe_new_working_set(); +- } +- if (data_set == NULL) { +- return -1; +- } ++ if (data_set == NULL) { ++ data_set = pe_new_working_set(); ++ } ++ if (data_set == NULL) { ++ return -1; ++ } ++ ++#if USE_PACEMAKERD_API ++ { ++ int rc; ++ ++ rc = pcmk_new_ipc_api(&pacemakerd_api, pcmk_ipc_pacemakerd); ++ if (pacemakerd_api == NULL) { ++ cl_log(LOG_ERR, "Could not connect to pacemakerd: %s\n", ++ pcmk_rc_str(rc)); ++ return -1; ++ } ++ pcmk_register_ipc_callback(pacemakerd_api, pacemakerd_event_cb, NULL); ++ do { ++ rc = pcmk_connect_ipc(pacemakerd_api, pcmk_ipc_dispatch_main); ++ if (rc != pcmk_rc_ok) { ++ cl_log(LOG_DEBUG, "Could not connect to pacemakerd: %s\n", ++ pcmk_rc_str(rc)); ++ sleep(reconnect_msec / 1000); ++ } ++ } while (rc != pcmk_rc_ok); ++ /* send a ping to pacemakerd to wake it up */ ++ pcmk_pacemakerd_api_ping(pacemakerd_api, crm_system_name); ++ /* cib should come up now as well so it's time ++ * to have the inquisitor have a closer look ++ */ ++ notify_parent(); ++ } ++#endif + + if (current_cib == NULL) { + cib = cib_new(); +-- +1.8.3.1 + diff --git a/SOURCES/0006-Fix-make-syncing-of-pacemaker-resource-startup-confi.patch b/SOURCES/0006-Fix-make-syncing-of-pacemaker-resource-startup-confi.patch new file mode 100644 index 0000000..0c38862 --- /dev/null +++ b/SOURCES/0006-Fix-make-syncing-of-pacemaker-resource-startup-confi.patch @@ -0,0 +1,110 @@ +From f4d38a073ce3bfa2078792f1cc85229457430292 Mon Sep 17 00:00:00 2001 +From: Klaus Wenninger +Date: Tue, 21 Jul 2020 18:30:30 +0200 +Subject: [PATCH] Fix: make syncing of pacemaker resource startup configurable + +--- + src/sbd-inquisitor.c | 20 ++++++++++++++++++++ + src/sbd-pacemaker.c | 6 +++--- + src/sbd.h | 1 + + src/sbd.sysconfig | 14 ++++++++++++++ + 4 files changed, 38 insertions(+), 3 deletions(-) + +diff --git a/src/sbd-inquisitor.c b/src/sbd-inquisitor.c +index 52ede8a..962725e 100644 +--- a/src/sbd-inquisitor.c ++++ b/src/sbd-inquisitor.c +@@ -35,6 +35,7 @@ bool do_flush = true; + char timeout_sysrq_char = 'b'; + bool move_to_root_cgroup = true; + bool enforce_moving_to_root_cgroup = false; ++bool sync_resource_startup = false; + + int parse_device_line(const char *line); + +@@ -964,6 +965,25 @@ int main(int argc, char **argv, char **envp) + } + } + ++ value = getenv("SBD_SYNC_RESOURCE_STARTUP"); ++ if(value) { ++ sync_resource_startup = crm_is_true(value); ++ } ++#if !USE_PACEMAKERD_API ++ if (sync_resource_startup) { ++ fprintf(stderr, "Failed to sync resource-startup as " ++ "SBD was built against pacemaker not supporting pacemakerd-API.\n"); ++ exit_status = -1; ++ goto out; ++ } ++#else ++ if (!sync_resource_startup) { ++ cl_log(LOG_WARNING, "SBD built against pacemaker supporting " ++ "pacemakerd-API. Should think about enabling " ++ "SBD_SYNC_RESOURCE_STARTUP."); ++ } ++#endif ++ + while ((c = getopt(argc, argv, "czC:DPRTWZhvw:d:n:p:1:2:3:4:5:t:I:F:S:s:r:")) != -1) { + switch (c) { + case 'D': +diff --git a/src/sbd-pacemaker.c b/src/sbd-pacemaker.c +index 1243bfc..aa1fb57 100644 +--- a/src/sbd-pacemaker.c ++++ b/src/sbd-pacemaker.c +@@ -190,12 +190,12 @@ mon_cib_connection_destroy(gpointer user_data) + /* retrigger as last one might have been skipped */ + mon_refresh_state(NULL); + +-#if !USE_PACEMAKERD_API +- if (pcmk_clean_shutdown) { ++ ++ if ((pcmk_clean_shutdown) && (!sync_resource_startup)) { + /* assume a graceful pacemaker-shutdown */ + clean_up(EXIT_PCMK_SERVANT_GRACEFUL_SHUTDOWN); + } +-#endif ++ + /* getting here we aren't sure about the pacemaker-state + so try to use the timeout to reconnect and get + everything sorted out again +diff --git a/src/sbd.h b/src/sbd.h +index 382e553..3b6647c 100644 +--- a/src/sbd.h ++++ b/src/sbd.h +@@ -161,6 +161,7 @@ extern bool do_flush; + extern char timeout_sysrq_char; + extern bool move_to_root_cgroup; + extern bool enforce_moving_to_root_cgroup; ++extern bool sync_resource_startup; + + /* Global, non-tunable variables: */ + extern int sector_size; +diff --git a/src/sbd.sysconfig b/src/sbd.sysconfig +index 33b50d0..b32e826 100644 +--- a/src/sbd.sysconfig ++++ b/src/sbd.sysconfig +@@ -106,6 +106,20 @@ SBD_TIMEOUT_ACTION=flush,reboot + # + SBD_MOVE_TO_ROOT_CGROUP=auto + ++## Type: yesno ++## Default: no ++# ++# If resource startup syncing is enabled then pacemakerd is ++# gonna wait to be pinged via IPC before it starts resources. ++# On shutdown pacemakerd is going to wait in a state where it ++# has cleanly shutdown resources till sbd fetches that state. ++# ++# Default is 'no' to prevent pacemaker from waiting for a ++# ping that will never come when working together with an sbd ++# version that doesn't support the feature. ++# ++SBD_SYNC_RESOURCE_STARTUP=no ++ + ## Type: string + ## Default: "" + # +-- +1.8.3.1 + diff --git a/SPECS/sbd.spec b/SPECS/sbd.spec index eca754b..0a15574 100644 --- a/SPECS/sbd.spec +++ b/SPECS/sbd.spec @@ -18,7 +18,7 @@ %global commit 25fce8a7d5e8cd5abc2379077381b10bd6cec183 %global shortcommit %(c=%{commit}; echo ${c:0:7}) %global github_owner Clusterlabs -%global buildnum 3 +%global buildnum 7 Name: sbd Summary: Storage-based death @@ -30,6 +30,10 @@ Url: https://github.com/%{github_owner}/%{name} Source0: https://github.com/%{github_owner}/%{name}/archive/%{commit}/%{name}-%{commit}.tar.gz Patch1: 0001-Fix-regressions.sh-make-parameter-passing-consistent.patch Patch2: 0002-Doc-add-environment-section-to-man-page.patch +Patch3: 0003-Fix-sbd-pacemaker-handle-new-no_quorum_demote.patch +Patch4: 0004-Fix-sbd-cluster-match-qdevice-sync_timeout-against-w.patch +Patch5: 0005-Fix-sbd-pacemaker-sync-with-pacemakerd-for-robustnes.patch +Patch6: 0006-Fix-make-syncing-of-pacemaker-resource-startup-confi.patch BuildRoot: %{_tmppath}/%{name}-%{version}-build BuildRequires: autoconf BuildRequires: automake @@ -45,6 +49,7 @@ BuildRequires: pkgconfig BuildRequires: systemd BuildRequires: make Conflicts: fence-agents-sbd < 4.2.1-38 +Conflicts: pacemaker-libs < 2.0.4-5 %if 0%{?rhel} > 0 ExclusiveArch: i686 x86_64 s390x ppc64le aarch64 @@ -75,6 +80,7 @@ regression-testing sbd. sed -i src/sbd.sysconfig -e "s/Default: 5/Default: 15/" sed -i src/sbd.sysconfig -e "s/SBD_WATCHDOG_TIMEOUT=5/SBD_WATCHDOG_TIMEOUT=15/" %endif +sed -i src/sbd.sysconfig -e "s/SBD_SYNC_RESOURCE_STARTUP=no/SBD_SYNC_RESOURCE_STARTUP=yes/" ########################################################### @@ -155,6 +161,29 @@ fi %{_libdir}/libsbdtestbed* %changelog +* Thu Jul 30 2020 Klaus Wenninger - 1.4.1-7 +- conflict with pacemaker-libs < 2.0.4-5 instead of requiring + a minimum pacemaker version + + Resolves: rhbz#1861713 + +* Mon Jul 27 2020 Klaus Wenninger - 1.4.1-6 +- match qdevice-sync_timeout against wd-timeout +- sync startup/shutdown via pacemakerd-api + + Resolves: rhbz#1703128 + Resolves: rhbz#1743726 + +* Wed Jun 24 2020 Klaus Wenninger - 1.4.1-5 +- rebuild against pacemaker having new no_quorum_demote + + Resolves: rhbz#1850078 + +* Wed Jun 24 2020 Klaus Wenninger - 1.4.1-4 +- handle new no_quorum_demote in pacemaker + + Resolves: rhbz#1850078 + * Mon Feb 17 2020 Klaus Wenninger - 1.4.1-3 - append the man-page by a section auto-generated from sbd.sysconfig