diff --git a/0001-Fix-regressions.sh-make-parameter-passing-consistent.patch b/0001-Fix-regressions.sh-make-parameter-passing-consistent.patch deleted file mode 100644 index 6f17a5a..0000000 --- a/0001-Fix-regressions.sh-make-parameter-passing-consistent.patch +++ /dev/null @@ -1,82 +0,0 @@ -From 1d2a7b8d059d4f090b351b8decca0ddf274c82a0 Mon Sep 17 00:00:00 2001 -From: Klaus Wenninger -Date: Wed, 20 Nov 2019 15:20:19 +0100 -Subject: [PATCH] Fix: regressions.sh: make parameter passing consistent - ---- - tests/regressions.sh | 24 ++++++++++++------------ - 1 file changed, 12 insertions(+), 12 deletions(-) - -diff --git a/tests/regressions.sh b/tests/regressions.sh -index 6cfb303..7ab80be 100755 ---- a/tests/regressions.sh -+++ b/tests/regressions.sh -@@ -32,7 +32,7 @@ - : ${SBD_USE_DM:="yes"} - - sbd() { -- LD_PRELOAD=${SBD_PRELOAD} SBD_WATCHDOG_TIMEOUT=5 SBD_DEVICE="${SBD_DEVICE}" SBD_PRELOAD_LOG=${SBD_PRELOAD_LOG} SBD_WATCHDOG_DEV=/dev/watchdog setsid ${SBD_BINARY} -p ${SBD_PIDFILE} $* -+ LD_PRELOAD=${SBD_PRELOAD} SBD_WATCHDOG_TIMEOUT=5 SBD_DEVICE="${SBD_DEVICE}" SBD_PRELOAD_LOG=${SBD_PRELOAD_LOG} SBD_WATCHDOG_DEV=/dev/watchdog setsid ${SBD_BINARY} -p ${SBD_PIDFILE} "$@" - } - - sbd_wipe_disk() { -@@ -98,26 +98,26 @@ sbd_daemon_cleanup() { - pkill -TERM --pidfile ${SBD_PIDFILE} 2>/dev/null - sleep 5 - pkill -KILL --pidfile ${SBD_PIDFILE} 2>/dev/null -- pkill -KILL --parent $(cat ${SBD_PIDFILE} 2>/dev/null) 2>/dev/null -+ pkill -KILL --parent "$(cat ${SBD_PIDFILE} 2>/dev/null)" 2>/dev/null - echo > ${SBD_PIDFILE} - } - - _ok() { -- echo -- $@ -- $@ -+ echo "-- $*" -+ "$@" - rc=$? - if [ $rc -ne 0 ]; then -- echo "$@ failed with $rc" -+ echo "$* failed with $rc" - exit $rc - fi - } - - _no() { -- echo -- $@ -- $@ -+ echo "-- $*" -+ "$@" - rc=$? - if [ $rc -eq 0 ]; then -- echo "$@ did NOT fail ($rc)" -+ echo "$* did NOT fail ($rc)" - exit $rc - fi - return 0 -@@ -126,7 +126,7 @@ _no() { - _in_log() { - grep "$@" ${SBD_PRELOAD_LOG} >/dev/null - if [ $? -ne 0 ]; then -- echo "didn't find '$@' in log:" -+ echo "didn't find '$*' in log:" - cat ${SBD_PRELOAD_LOG} - sbd_daemon_cleanup - exit 1 -@@ -227,10 +227,10 @@ test_stall_inquisitor() { - sbd_daemon_cleanup - sbd -d ${D[1]} -d ${D[2]} -d ${D[3]} -n test-1 watch - sleep 10 -- _ok kill -0 $(cat ${SBD_PIDFILE}) -- kill -STOP $(cat ${SBD_PIDFILE}) -+ _ok kill -0 "$(cat ${SBD_PIDFILE})" -+ kill -STOP "$(cat ${SBD_PIDFILE})" - sleep 10 -- kill -CONT $(cat ${SBD_PIDFILE}) 2>/dev/null -+ kill -CONT "$(cat ${SBD_PIDFILE})" 2>/dev/null - _in_log "watchdog fired" - } - --- -1.8.3.1 - diff --git a/0002-Doc-add-environment-section-to-man-page.patch b/0002-Doc-add-environment-section-to-man-page.patch deleted file mode 100644 index 2ad9556..0000000 --- a/0002-Doc-add-environment-section-to-man-page.patch +++ /dev/null @@ -1,1459 +0,0 @@ -From 9dd82a8b4daa5a7bd8ab3afa43b081f212efb1ac Mon Sep 17 00:00:00 2001 -From: Klaus Wenninger -Date: Wed, 29 Jan 2020 20:34:18 +0100 -Subject: [PATCH] Doc: add environment section to man-page - -Environment section is auto-generated from sbd.sysconfig. ---- - .gitignore | 1 + - Makefile.am | 6 +- - README.md | 3 +- - man/Makefile.am | 8 +- - man/sbd.8.pod | 668 ----------------------------------------------------- - man/sbd.8.pod.in | 675 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ - src/sbd.sysconfig | 3 +- - 7 files changed, 690 insertions(+), 674 deletions(-) - delete mode 100644 man/sbd.8.pod - create mode 100644 man/sbd.8.pod.in - -diff --git a/Makefile.am b/Makefile.am -index 1c29f75..bd4346d 100644 ---- a/Makefile.am -+++ b/Makefile.am -@@ -9,8 +9,8 @@ TARFILE = $(distdir).tar.gz - DIST_ARCHIVES = $(TARFILE) - KEEP_EXISTING_TAR = no - INJECT_GIT_COMMIT = yes --DISTCLEANFILES = sbd-* sbd-*/ - CLEANFILES = *.rpm *.tar.* sbd-* -+DISTCLEANFILES = sbd-* sbd-*/ - - RPM_ROOT = $(shell pwd) - RPM_OPTS = --define "_sourcedir $(RPM_ROOT)" \ -@@ -31,7 +31,7 @@ export SBD_BINARY := src/sbd - export SBD_PRELOAD := tests/.libs/libsbdtestbed.so - export SBD_USE_DM := no - --EXTRA_DIST = sbd.spec tests/regressions.sh -+EXTRA_DIST = sbd.spec tests/regressions.sh man/sbd.8.pod.in - - export: - rm -f $(PACKAGE)-HEAD.tar.* -@@ -43,7 +43,7 @@ export: - echo `date`: Using existing tarball: $(TARFILE); \ - else \ - rm -f $(PACKAGE).tar.*; \ -- (git archive --prefix=$(distdir)/ $(shell echo $(TAG)|cut -f1 -d-) || tar -c --transform="s,^,$(distdir)/," --exclude="*.tar.*" --exclude="$(distdir)" --exclude="*.o" --exclude="*.8" --exclude="config.*" --exclude="libtool" --exclusive="ltmain.sh*" --exclude="Makefile" --exclude="Makefile.in" --exclude="stamp-*" --exclude="*.service" --exclude="sbd" --exclude="*.m4" --exclude="*.cache" --exclude="configure" --exclude="*.list" --exclude="depcomp" --exclude="install-sh" --exclude="missing" --exclude="compile" --exclude="sbd.sh" --exclude="~" --exclude="*.swp" --exclude="*.patch" --exclude="*.diff" --exclude="*.orig" --exclude="*.rej" --exclude="*.rpm" --exclude=".deps" --exclude="test-driver" *) | gzip > $(TARFILE); \ -+ (git archive --prefix=$(distdir)/ $(shell echo $(TAG)|cut -f1 -d-) || tar -c --transform="s,^,$(distdir)/," --exclude="*.tar.*" --exclude="$(distdir)" --exclude="*.o" --exclude="*.8" --exclude="config.*" --exclude="libtool" --exclude="ltmain.sh*" --exclude="Makefile" --exclude="Makefile.in" --exclude="stamp-*" --exclude="*.service" --exclude="sbd" --exclude="*.m4" --exclude="*.cache" --exclude="configure" --exclude="*.list" --exclude="depcomp" --exclude="install-sh" --exclude="missing" --exclude="compile" --exclude="sbd.sh" --exclude="~" --exclude="*.swp" --exclude="*.patch" --exclude="*.diff" --exclude="*.orig" --exclude="*.rej" --exclude="*.rpm" --exclude="*.pod" --exclude=".deps" --exclude="test-driver" *) | gzip > $(TARFILE); \ - if test -n "$$(git status -s)" || test "$(INJECT_GIT_COMMIT)" = "yes"; then \ - if test -n "$$(git status -s)"; then git diff HEAD --name-only|grep -v "^\."|xargs -n1 git diff HEAD > uncommitted.diff; fi; \ - rm -rf $(distdir); tar -xzf $(TARFILE); rm $(TARFILE); \ -diff --git a/README.md b/README.md -index d02a8bd..42a3fde 100644 ---- a/README.md -+++ b/README.md -@@ -5,5 +5,6 @@ A highly reliable fencing or Shoot-the-other-node-in-the-head (STONITH) mechanis - The component works with Pacemaker clusters, and is currently known to - compile and function on Pacemaker 1.1.7+ and corosync 1.4.x or 2.3.x. - --Please see https://github.com/l-mb/sbd/blob/master/man/sbd.8.pod for the full documentation. -+Please see https://github.com/clusterlabs/sbd/blob/master/man/sbd.8.pod.in & -+https://github.com/clusterlabs/sbd/blob/master/src/sbd.sysconfig for the full documentation. - -diff --git a/man/Makefile.am b/man/Makefile.am -index 3f89085..995712d 100644 ---- a/man/Makefile.am -+++ b/man/Makefile.am -@@ -1,6 +1,12 @@ - dist_man_MANS = sbd.8 - --EXTRA_DIST = sbd.8.pod -+DISTCLEANFILES = sbd.8.pod sbd.8 sbd.sysconfig.pod -+ -+sbd.sysconfig.pod: ../src/sbd.sysconfig -+ sed -r -n -e "s/^## Type: (.*)/Allows C<\1>/;t type;s/^## Default: (.*)/ defaulting to C<\1>/;t default;s/^#*(.*)=.*/=item B<\1>\n/;t variable;s/^#*//;s/^ *//;H;d;:type;h;d;:default;H;x;s/\n//;x;d;:variable;G;p" $< > $@ -+ -+sbd.8.pod: sbd.8.pod.in sbd.sysconfig.pod -+ sed -e "s/@environment_section@//;t insert;p;d;:insert;rsbd.sysconfig.pod" $< > $@ - - sbd.8: sbd.8.pod - @POD2MAN@ -s 8 -c "STONITH Block Device" -r "SBD" -n "SBD" $< $@ -diff --git a/man/sbd.8.pod b/man/sbd.8.pod -deleted file mode 100644 -index 377c579..0000000 ---- a/man/sbd.8.pod -+++ /dev/null -@@ -1,668 +0,0 @@ --=head1 NAME -- --sbd - STONITH Block Device daemon -- --=head1 SYNOPSIS -- --sbd <-d F> [options] C -- --=head1 SUMMARY -- --SBD provides a node fencing mechanism (Shoot the other node in the head, --STONITH) for Pacemaker-based clusters through the exchange of messages --via shared block storage such as for example a SAN, iSCSI, FCoE. This --isolates the fencing mechanism from changes in firmware version or --dependencies on specific firmware controllers, and it can be used as a --STONITH mechanism in all configurations that have reliable shared --storage. -- --SBD can also be used without any shared storage. In this mode, the --watchdog device will be used to reset the node if it loses quorum, if --any monitored daemon is lost and not recovered or if Pacemaker decides --that the node requires fencing. -- --The F binary implements both the daemon that watches the message --slots as well as the management tool for interacting with the block --storage device(s). This mode of operation is specified via the --C parameter; some of these modes take additional parameters. -- --To use SBD with shared storage, you must first C the messaging --layout on one to three block devices. Second, configure --F to list those devices (and possibly adjust other --options), and restart the cluster stack on each node to ensure that --C is started. Third, configure the C fencing --resource in the Pacemaker CIB. -- --Each of these steps is documented in more detail below the description --of the command options. -- --C can only be used as root. -- --=head2 GENERAL OPTIONS -- --=over -- --=item B<-d> F -- --Specify the block device(s) to be used. If you have more than one, --specify this option up to three times. This parameter is mandatory for --all modes, since SBD always needs a block device to interact with. -- --This man page uses F, F, and F as --example device names for brevity. However, in your production --environment, you should instead always refer to them by using the long, --stable device name (e.g., --F). -- --=item B<-v|-vv|-vvv> -- --Enable verbose|debug|debug-library logging (optional) -- --=item B<-h> -- --Display a concise summary of C options. -- --=item B<-n> I -- --Set local node name; defaults to C. This should not need to be --set. -- --=item B<-R> -- --Do B enable realtime priority. By default, C runs at realtime --priority, locks itself into memory, and also acquires highest IO --priority to protect itself against interference from other processes on --the system. This is a debugging-only option. -- --=item B<-I> I -- --Async IO timeout (defaults to 3 seconds, optional). You should not need --to adjust this unless your IO setup is really very slow. -- --(In daemon mode, the watchdog is refreshed when the majority of devices --could be read within this time.) -- --=back -- --=head2 create -- --Example usage: -- -- sbd -d /dev/sdc2 -d /dev/sdd3 create -- --If you specify the I command, sbd will write a metadata header --to the device(s) specified and also initialize the messaging slots for --up to 255 nodes. -- --B: This command will not prompt for confirmation. Roughly the --first megabyte of the specified block device(s) will be overwritten --immediately and without backup. -- --This command accepts a few options to adjust the default timings that --are written to the metadata (to ensure they are identical across all --nodes accessing the device). -- --=over -- --=item B<-1> I -- --Set watchdog timeout to N seconds. This depends mostly on your storage --latency; the majority of devices must be successfully read within this --time, or else the node will self-fence. -- --If your sbd device(s) reside on a multipath setup or iSCSI, this should --be the time required to detect a path failure. You may be able to reduce --this if your device outages are independent, or if you are using the --Pacemaker integration. -- --=item B<-2> I -- --Set slot allocation timeout to N seconds. You should not need to tune --this. -- --=item B<-3> I -- --Set daemon loop timeout to N seconds. You should not need to tune this. -- --=item B<-4> I -- --Set I timeout to N seconds. This should be twice the I --timeout. This is the time after which a message written to a node's slot --will be considered delivered. (Or long enough for the node to detect --that it needed to self-fence.) -- --This also affects the I in Pacemaker's CIB; see below. -- --=back -- --=head2 list -- --Example usage: -- -- # sbd -d /dev/sda1 list -- 0 hex-0 clear -- 1 hex-7 clear -- 2 hex-9 clear -- --List all allocated slots on device, and messages. You should see all --cluster nodes that have ever been started against this device. Nodes --that are currently running should have a I state; nodes that have --been fenced, but not yet restarted, will show the appropriate fencing --message. -- --=head2 dump -- --Example usage: -- -- # sbd -d /dev/sda1 dump -- ==Dumping header on disk /dev/sda1 -- Header version : 2 -- Number of slots : 255 -- Sector size : 512 -- Timeout (watchdog) : 15 -- Timeout (allocate) : 2 -- Timeout (loop) : 1 -- Timeout (msgwait) : 30 -- ==Header on disk /dev/sda1 is dumped -- --Dump meta-data header from device. -- --=head2 watch -- --Example usage: -- -- sbd -d /dev/sdc2 -d /dev/sdd3 -P watch -- --This command will make C start in daemon mode. It will constantly monitor --the message slot of the local node for incoming messages, reachability, and --optionally take Pacemaker's state into account. -- --C B be started on boot before the cluster stack! See below --for enabling this according to your boot environment. -- --The options for this mode are rarely specified directly on the --commandline directly, but most frequently set via F. -- --It also constantly monitors connectivity to the storage device, and --self-fences in case the partition becomes unreachable, guaranteeing that it --does not disconnect from fencing messages. -- --A node slot is automatically allocated on the device(s) the first time --the daemon starts watching the device; hence, manual allocation is not --usually required. -- --If a watchdog is used together with the C as is strongly --recommended, the watchdog is activated at initial start of the sbd --daemon. The watchdog is refreshed every time the majority of SBD devices --has been successfully read. Using a watchdog provides additional --protection against C crashing. -- --If the Pacemaker integration is activated, C will B self-fence --if device majority is lost, if: -- --=over -- --=item 1. -- --The partition the node is in is still quorate according to the CIB; -- --=item 2. -- --it is still quorate according to Corosync's node count; -- --=item 3. -- --the node itself is considered online and healthy by Pacemaker. -- --=back -- --This allows C to survive temporary outages of the majority of --devices. However, while the cluster is in such a degraded state, it can --neither successfully fence nor be shutdown cleanly (as taking the --cluster below the quorum threshold will immediately cause all remaining --nodes to self-fence). In short, it will not tolerate any further faults. --Please repair the system before continuing. -- --There is one C process that acts as a master to which all watchers --report; one per device to monitor the node's slot; and, optionally, one --that handles the Pacemaker integration. -- --=over -- --=item B<-W> -- --Enable or disable use of the system watchdog to protect against the sbd --processes failing and the node being left in an undefined state. Specify --this once to enable, twice to disable. -- --Defaults to I. -- --=item B<-w> F -- --This can be used to override the default watchdog device used and should not --usually be necessary. -- --=item B<-p> F -- --This option can be used to specify a pidfile for the main sbd process. -- --=item B<-F> I -- --Number of failures before a failing servant process will not be restarted --immediately until the dampening delay has expired. If set to zero, servants --will be restarted immediately and indefinitely. If set to one, a failed --servant will be restarted once every B<-t> seconds. If set to a different --value, the servant will be restarted that many times within the dampening --period and then delay. -- --Defaults to I<1>. -- --=item B<-t> I -- --Dampening delay before faulty servants are restarted. Combined with C<-F 1>, --the most logical way to tune the restart frequency of servant processes. --Default is 5 seconds. -- --If set to zero, processes will be restarted indefinitely and immediately. -- --=item B<-P> -- --Enable Pacemaker integration which checks Pacemaker quorum and node health. --Specify this once to enable, twice to disable. -- --Defaults to I. -- --=item B<-S> I -- --Set the start mode. (Defaults to I<0>.) -- --If this is set to zero, sbd will always start up unconditionally, --regardless of whether the node was previously fenced or not. -- --If set to one, sbd will only start if the node was previously shutdown --cleanly (as indicated by an exit request message in the slot), or if the --slot is empty. A reset, crashdump, or power-off request in any slot will --halt the start up. -- --This is useful to prevent nodes from rejoining if they were faulty. The --node must be manually "unfenced" by sending an empty message to it: -- -- sbd -d /dev/sda1 message node1 clear -- --=item B<-s> I -- --Set the start-up wait time for devices. (Defaults to I<120>.) -- --Dynamic block devices such as iSCSI might not be fully initialized and --present yet. This allows one to set a timeout for waiting for devices to --appear on start-up. If set to 0, start-up will be aborted immediately if --no devices are available. -- --=item B<-Z> -- --Enable trace mode. B Specifying this once will turn all reboots or power-offs, be --they caused by self-fence decisions or messages, into a crashdump. --Specifying this twice will just log them but not continue running. -- --=item B<-T> -- --By default, the daemon will set the watchdog timeout as specified in the --device metadata. However, this does not work for every watchdog device. --In this case, you must manually ensure that the watchdog timeout used by --the system correctly matches the SBD settings, and then specify this --option to allow C to continue with start-up. -- --=item B<-5> I -- --Warn if the time interval for tickling the watchdog exceeds this many seconds. --Since the node is unable to log the watchdog expiry (it reboots immediately --without a chance to write its logs to disk), this is very useful for getting --an indication that the watchdog timeout is too short for the IO load of the --system. -- --Default is 3 seconds, set to zero to disable. -- --=item B<-C> I -- --Watchdog timeout to set before crashdumping. If SBD is set to crashdump --instead of reboot - either via the trace mode settings or the I --fencing agent's parameter -, SBD will adjust the watchdog timeout to this --setting before triggering the dump. Otherwise, the watchdog might trigger and --prevent a successful crashdump from ever being written. -- --Set to zero (= default) to disable. -- --=item B<-r> I -- --Actions to be executed when the watchers don't timely report to the sbd --master process or one of the watchers detects that the master process --has died. -- --Set timeout-action to comma-separated combination of --noflush|flush plus reboot|crashdump|off. --If just one of both is given the other stays at the default. -- --This doesn't affect actions like off, crashdump, reboot explicitly --triggered via message slots. --And it does as well not configure the action a watchdog would --trigger should it run off (there is no generic interface). -- --Defaults to flush,reboot. -- --=back -- --=head2 allocate -- --Example usage: -- -- sbd -d /dev/sda1 allocate node1 -- --Explicitly allocates a slot for the specified node name. This should --rarely be necessary, as every node will automatically allocate itself a --slot the first time it starts up on watch mode. -- --=head2 message -- --Example usage: -- -- sbd -d /dev/sda1 message node1 test -- --Writes the specified message to node's slot. This is rarely done --directly, but rather abstracted via the C fencing agent --configured as a cluster resource. -- --Supported message types are: -- --=over -- --=item test -- --This only generates a log message on the receiving node and can be used --to check if SBD is seeing the device. Note that this could overwrite a --fencing request send by the cluster, so should not be used during --production. -- --=item reset -- --Reset the target upon receipt of this message. -- --=item off -- --Power-off the target. -- --=item crashdump -- --Cause the target node to crashdump. -- --=item exit -- --This will make the C daemon exit cleanly on the target. You should --B send this message manually; this is handled properly during --shutdown of the cluster stack. Manually stopping the daemon means the --node is unprotected! -- --=item clear -- --This message indicates that no real message has been sent to the node. --You should not set this manually; C will clear the message slot --automatically during start-up, and setting this manually could overwrite --a fencing message by the cluster. -- --=back -- --=head2 query-watchdog -- --Example usage: -- -- sbd query-watchdog -- --Check for available watchdog devices and print some info. -- --B: This command will arm the watchdog during query, and if your --watchdog refuses disarming (for example, if its kernel module has the --'nowayout' parameter set) this will reset your system. -- --=head2 test-watchdog -- --Example usage: -- -- sbd test-watchdog [-w /dev/watchdog3] -- --Test specified watchdog device (/dev/watchdog by default). -- --B: This command will arm the watchdog and have your system reset --in case your watchdog is working properly! If issued from an interactive --session, it will prompt for confirmation. -- --=head1 Base system configuration -- --=head2 Configure a watchdog -- --It is highly recommended that you configure your Linux system to load a --watchdog driver with hardware assistance (as is available on most modern --systems), such as I, I, or others. As a fall-back, you --can use the I module. -- --No other software must access the watchdog timer; it can only be --accessed by one process at any given time. Some hardware vendors ship --systems management software that use the watchdog for system resets --(f.e. HP ASR daemon). Such software has to be disabled if the watchdog --is to be used by SBD. -- --=head2 Choosing and initializing the block device(s) -- --First, you have to decide if you want to use one, two, or three devices. -- --If you are using multiple ones, they should reside on independent --storage setups. Putting all three of them on the same logical unit for --example would not provide any additional redundancy. -- --The SBD device can be connected via Fibre Channel, Fibre Channel over --Ethernet, or even iSCSI. Thus, an iSCSI target can become a sort-of --network-based quorum server; the advantage is that it does not require --a smart host at your third location, just block storage. -- --The SBD partitions themselves B be mirrored (via MD, --DRBD, or the storage layer itself), since this could result in a --split-mirror scenario. Nor can they reside on cLVM2 volume groups, since --they must be accessed by the cluster stack before it has started the --cLVM2 daemons; hence, these should be either raw partitions or logical --units on (multipath) storage. -- --The block device(s) must be accessible from all nodes. (While it is not --necessary that they share the same path name on all nodes, this is --considered a very good idea.) -- --SBD will only use about one megabyte per device, so you can easily --create a small partition, or very small logical units. (The size of the --SBD device depends on the block size of the underlying device. Thus, 1MB --is fine on plain SCSI devices and SAN storage with 512 byte blocks. On --the IBM s390x architecture in particular, disks default to 4k blocks, --and thus require roughly 4MB.) -- --The number of devices will affect the operation of SBD as follows: -- --=over -- --=item One device -- --In its most simple implementation, you use one device only. This is --appropriate for clusters where all your data is on the same shared --storage (with internal redundancy) anyway; the SBD device does not --introduce an additional single point of failure then. -- --If the SBD device is not accessible, the daemon will fail to start and --inhibit startup of cluster services. -- --=item Two devices -- --This configuration is a trade-off, primarily aimed at environments where --host-based mirroring is used, but no third storage device is available. -- --SBD will not commit suicide if it loses access to one mirror leg; this --allows the cluster to continue to function even in the face of one outage. -- --However, SBD will not fence the other side while only one mirror leg is --available, since it does not have enough knowledge to detect an asymmetric --split of the storage. So it will not be able to automatically tolerate a --second failure while one of the storage arrays is down. (Though you --can use the appropriate crm command to acknowledge the fence manually.) -- --It will not start unless both devices are accessible on boot. -- --=item Three devices -- --In this most reliable and recommended configuration, SBD will only --self-fence if more than one device is lost; hence, this configuration is --resilient against temporary single device outages (be it due to failures --or maintenance). Fencing messages can still be successfully relayed if --at least two devices remain accessible. -- --This configuration is appropriate for more complex scenarios where --storage is not confined to a single array. For example, host-based --mirroring solutions could have one SBD per mirror leg (not mirrored --itself), and an additional tie-breaker on iSCSI. -- --It will only start if at least two devices are accessible on boot. -- --=back -- --After you have chosen the devices and created the appropriate partitions --and perhaps multipath alias names to ease management, use the C --command described above to initialize the SBD metadata on them. -- --=head3 Sharing the block device(s) between multiple clusters -- --It is possible to share the block devices between multiple clusters, --provided the total number of nodes accessing them does not exceed I<255> --nodes, and they all must share the same SBD timeouts (since these are --part of the metadata). -- --If you are using multiple devices this can reduce the setup overhead --required. However, you should B share devices between clusters in --different security domains. -- --=head2 Configure SBD to start on boot -- --On systems using C, the C or C system --start-up scripts must handle starting or stopping C as required --before starting the rest of the cluster stack. -- --For C, sbd simply has to be enabled using -- -- systemctl enable sbd.service -- --The daemon is brought online on each node before corosync and Pacemaker --are started, and terminated only after all other cluster components have --been shut down - ensuring that cluster resources are never activated --without SBD supervision. -- --=head2 Configuration via sysconfig -- --The system instance of C is configured via F. --In this file, you must specify the device(s) used, as well as any --options to pass to the daemon: -- -- SBD_DEVICE="/dev/sda1;/dev/sdb1;/dev/sdc1" -- SBD_PACEMAKER="true" -- --C will fail to start if no C is specified. See the --installed template for more options that can be configured here. --In general configuration done via parameters takes precedence over --the configuration from the configuration file. -- --=head2 Testing the sbd installation -- --After a restart of the cluster stack on this node, you can now try --sending a test message to it as root, from this or any other node: -- -- sbd -d /dev/sda1 message node1 test -- --The node will acknowledge the receipt of the message in the system logs: -- -- Aug 29 14:10:00 node1 sbd: [13412]: info: Received command test from node2 -- --This confirms that SBD is indeed up and running on the node, and that it --is ready to receive messages. -- --Make B that F is identical on all cluster --nodes, and that all cluster nodes are running the daemon. -- --=head1 Pacemaker CIB integration -- --=head2 Fencing resource -- --Pacemaker can only interact with SBD to issue a node fence if there is a --configure fencing resource. This should be a primitive, not a clone, as --follows: -- -- primitive fencing-sbd stonith:external/sbd \ -- params pcmk_delay_max=30 -- --This will automatically use the same devices as configured in --F. -- --While you should not configure this as a clone (as Pacemaker will register --the fencing device on each node automatically), the I --setting enables random fencing delay which ensures, in a scenario where a --split-brain scenario did occur in a two node cluster, that one of the nodes --has a better chance to survive to avoid double fencing. -- --SBD also supports turning the reset request into a crash request, which --may be helpful for debugging if you have kernel crashdumping configured; --then, every fence request will cause the node to dump core. You can --enable this via the C parameter on the fencing --resource. This is B recommended for production use, but only for --debugging phases. -- --=head2 General cluster properties -- --You must also enable STONITH in general, and set the STONITH timeout to --be at least twice the I timeout you have configured, to allow --enough time for the fencing message to be delivered. If your I --timeout is 60 seconds, this is a possible configuration: -- -- property stonith-enabled="true" -- property stonith-timeout="120s" -- --B: if I is too low for I and the --system overhead, sbd will never be able to successfully complete a fence --request. This will create a fencing loop. -- --Note that the sbd fencing agent will try to detect this and --automatically extend the I setting to a reasonable --value, on the assumption that sbd modifying your configuration is --preferable to not fencing. -- --=head1 Management tasks -- --=head2 Recovering from temporary SBD device outage -- --If you have multiple devices, failure of a single device is not immediately --fatal. C will retry to restart the monitor for the device every 5 --seconds by default. However, you can tune this via the options to the --I command. -- --In case you wish the immediately force a restart of all currently --disabled monitor processes, you can send a I to the SBD --I process. -- -- --=head1 LICENSE -- --Copyright (C) 2008-2013 Lars Marowsky-Bree -- --This program is free software; you can redistribute it and/or --modify it under the terms of the GNU General Public --License as published by the Free Software Foundation; either --version 2 of the License, or (at your option) any later version. -- --This software is distributed in the hope that it will be useful, --but WITHOUT ANY WARRANTY; without even the implied warranty of --MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU --General Public License for more details. -- --For details see the GNU General Public License at --http://www.gnu.org/licenses/gpl-2.0.html (version 2) and/or --http://www.gnu.org/licenses/gpl.html (the newest as per "any later"). -diff --git a/man/sbd.8.pod.in b/man/sbd.8.pod.in -new file mode 100644 -index 0000000..ff89c82 ---- /dev/null -+++ b/man/sbd.8.pod.in -@@ -0,0 +1,675 @@ -+=head1 NAME -+ -+sbd - STONITH Block Device daemon -+ -+=head1 SYNOPSIS -+ -+sbd <-d F> [options] C -+ -+=head1 SUMMARY -+ -+SBD provides a node fencing mechanism (Shoot the other node in the head, -+STONITH) for Pacemaker-based clusters through the exchange of messages -+via shared block storage such as for example a SAN, iSCSI, FCoE. This -+isolates the fencing mechanism from changes in firmware version or -+dependencies on specific firmware controllers, and it can be used as a -+STONITH mechanism in all configurations that have reliable shared -+storage. -+ -+SBD can also be used without any shared storage. In this mode, the -+watchdog device will be used to reset the node if it loses quorum, if -+any monitored daemon is lost and not recovered or if Pacemaker decides -+that the node requires fencing. -+ -+The F binary implements both the daemon that watches the message -+slots as well as the management tool for interacting with the block -+storage device(s). This mode of operation is specified via the -+C parameter; some of these modes take additional parameters. -+ -+To use SBD with shared storage, you must first C the messaging -+layout on one to three block devices. Second, configure -+F to list those devices (and possibly adjust other -+options), and restart the cluster stack on each node to ensure that -+C is started. Third, configure the C fencing -+resource in the Pacemaker CIB. -+ -+Each of these steps is documented in more detail below the description -+of the command options. -+ -+C can only be used as root. -+ -+=head2 GENERAL OPTIONS -+ -+=over -+ -+=item B<-d> F -+ -+Specify the block device(s) to be used. If you have more than one, -+specify this option up to three times. This parameter is mandatory for -+all modes, since SBD always needs a block device to interact with. -+ -+This man page uses F, F, and F as -+example device names for brevity. However, in your production -+environment, you should instead always refer to them by using the long, -+stable device name (e.g., -+F). -+ -+=item B<-v|-vv|-vvv> -+ -+Enable verbose|debug|debug-library logging (optional) -+ -+=item B<-h> -+ -+Display a concise summary of C options. -+ -+=item B<-n> I -+ -+Set local node name; defaults to C. This should not need to be -+set. -+ -+=item B<-R> -+ -+Do B enable realtime priority. By default, C runs at realtime -+priority, locks itself into memory, and also acquires highest IO -+priority to protect itself against interference from other processes on -+the system. This is a debugging-only option. -+ -+=item B<-I> I -+ -+Async IO timeout (defaults to 3 seconds, optional). You should not need -+to adjust this unless your IO setup is really very slow. -+ -+(In daemon mode, the watchdog is refreshed when the majority of devices -+could be read within this time.) -+ -+=back -+ -+=head2 create -+ -+Example usage: -+ -+ sbd -d /dev/sdc2 -d /dev/sdd3 create -+ -+If you specify the I command, sbd will write a metadata header -+to the device(s) specified and also initialize the messaging slots for -+up to 255 nodes. -+ -+B: This command will not prompt for confirmation. Roughly the -+first megabyte of the specified block device(s) will be overwritten -+immediately and without backup. -+ -+This command accepts a few options to adjust the default timings that -+are written to the metadata (to ensure they are identical across all -+nodes accessing the device). -+ -+=over -+ -+=item B<-1> I -+ -+Set watchdog timeout to N seconds. This depends mostly on your storage -+latency; the majority of devices must be successfully read within this -+time, or else the node will self-fence. -+ -+If your sbd device(s) reside on a multipath setup or iSCSI, this should -+be the time required to detect a path failure. You may be able to reduce -+this if your device outages are independent, or if you are using the -+Pacemaker integration. -+ -+=item B<-2> I -+ -+Set slot allocation timeout to N seconds. You should not need to tune -+this. -+ -+=item B<-3> I -+ -+Set daemon loop timeout to N seconds. You should not need to tune this. -+ -+=item B<-4> I -+ -+Set I timeout to N seconds. This should be twice the I -+timeout. This is the time after which a message written to a node's slot -+will be considered delivered. (Or long enough for the node to detect -+that it needed to self-fence.) -+ -+This also affects the I in Pacemaker's CIB; see below. -+ -+=back -+ -+=head2 list -+ -+Example usage: -+ -+ # sbd -d /dev/sda1 list -+ 0 hex-0 clear -+ 1 hex-7 clear -+ 2 hex-9 clear -+ -+List all allocated slots on device, and messages. You should see all -+cluster nodes that have ever been started against this device. Nodes -+that are currently running should have a I state; nodes that have -+been fenced, but not yet restarted, will show the appropriate fencing -+message. -+ -+=head2 dump -+ -+Example usage: -+ -+ # sbd -d /dev/sda1 dump -+ ==Dumping header on disk /dev/sda1 -+ Header version : 2 -+ Number of slots : 255 -+ Sector size : 512 -+ Timeout (watchdog) : 15 -+ Timeout (allocate) : 2 -+ Timeout (loop) : 1 -+ Timeout (msgwait) : 30 -+ ==Header on disk /dev/sda1 is dumped -+ -+Dump meta-data header from device. -+ -+=head2 watch -+ -+Example usage: -+ -+ sbd -d /dev/sdc2 -d /dev/sdd3 -P watch -+ -+This command will make C start in daemon mode. It will constantly monitor -+the message slot of the local node for incoming messages, reachability, and -+optionally take Pacemaker's state into account. -+ -+C B be started on boot before the cluster stack! See below -+for enabling this according to your boot environment. -+ -+The options for this mode are rarely specified directly on the -+commandline directly, but most frequently set via F. -+ -+It also constantly monitors connectivity to the storage device, and -+self-fences in case the partition becomes unreachable, guaranteeing that it -+does not disconnect from fencing messages. -+ -+A node slot is automatically allocated on the device(s) the first time -+the daemon starts watching the device; hence, manual allocation is not -+usually required. -+ -+If a watchdog is used together with the C as is strongly -+recommended, the watchdog is activated at initial start of the sbd -+daemon. The watchdog is refreshed every time the majority of SBD devices -+has been successfully read. Using a watchdog provides additional -+protection against C crashing. -+ -+If the Pacemaker integration is activated, C will B self-fence -+if device majority is lost, if: -+ -+=over -+ -+=item 1. -+ -+The partition the node is in is still quorate according to the CIB; -+ -+=item 2. -+ -+it is still quorate according to Corosync's node count; -+ -+=item 3. -+ -+the node itself is considered online and healthy by Pacemaker. -+ -+=back -+ -+This allows C to survive temporary outages of the majority of -+devices. However, while the cluster is in such a degraded state, it can -+neither successfully fence nor be shutdown cleanly (as taking the -+cluster below the quorum threshold will immediately cause all remaining -+nodes to self-fence). In short, it will not tolerate any further faults. -+Please repair the system before continuing. -+ -+There is one C process that acts as a master to which all watchers -+report; one per device to monitor the node's slot; and, optionally, one -+that handles the Pacemaker integration. -+ -+=over -+ -+=item B<-W> -+ -+Enable or disable use of the system watchdog to protect against the sbd -+processes failing and the node being left in an undefined state. Specify -+this once to enable, twice to disable. -+ -+Defaults to I. -+ -+=item B<-w> F -+ -+This can be used to override the default watchdog device used and should not -+usually be necessary. -+ -+=item B<-p> F -+ -+This option can be used to specify a pidfile for the main sbd process. -+ -+=item B<-F> I -+ -+Number of failures before a failing servant process will not be restarted -+immediately until the dampening delay has expired. If set to zero, servants -+will be restarted immediately and indefinitely. If set to one, a failed -+servant will be restarted once every B<-t> seconds. If set to a different -+value, the servant will be restarted that many times within the dampening -+period and then delay. -+ -+Defaults to I<1>. -+ -+=item B<-t> I -+ -+Dampening delay before faulty servants are restarted. Combined with C<-F 1>, -+the most logical way to tune the restart frequency of servant processes. -+Default is 5 seconds. -+ -+If set to zero, processes will be restarted indefinitely and immediately. -+ -+=item B<-P> -+ -+Enable Pacemaker integration which checks Pacemaker quorum and node health. -+Specify this once to enable, twice to disable. -+ -+Defaults to I. -+ -+=item B<-S> I -+ -+Set the start mode. (Defaults to I<0>.) -+ -+If this is set to zero, sbd will always start up unconditionally, -+regardless of whether the node was previously fenced or not. -+ -+If set to one, sbd will only start if the node was previously shutdown -+cleanly (as indicated by an exit request message in the slot), or if the -+slot is empty. A reset, crashdump, or power-off request in any slot will -+halt the start up. -+ -+This is useful to prevent nodes from rejoining if they were faulty. The -+node must be manually "unfenced" by sending an empty message to it: -+ -+ sbd -d /dev/sda1 message node1 clear -+ -+=item B<-s> I -+ -+Set the start-up wait time for devices. (Defaults to I<120>.) -+ -+Dynamic block devices such as iSCSI might not be fully initialized and -+present yet. This allows one to set a timeout for waiting for devices to -+appear on start-up. If set to 0, start-up will be aborted immediately if -+no devices are available. -+ -+=item B<-Z> -+ -+Enable trace mode. B Specifying this once will turn all reboots or power-offs, be -+they caused by self-fence decisions or messages, into a crashdump. -+Specifying this twice will just log them but not continue running. -+ -+=item B<-T> -+ -+By default, the daemon will set the watchdog timeout as specified in the -+device metadata. However, this does not work for every watchdog device. -+In this case, you must manually ensure that the watchdog timeout used by -+the system correctly matches the SBD settings, and then specify this -+option to allow C to continue with start-up. -+ -+=item B<-5> I -+ -+Warn if the time interval for tickling the watchdog exceeds this many seconds. -+Since the node is unable to log the watchdog expiry (it reboots immediately -+without a chance to write its logs to disk), this is very useful for getting -+an indication that the watchdog timeout is too short for the IO load of the -+system. -+ -+Default is 3 seconds, set to zero to disable. -+ -+=item B<-C> I -+ -+Watchdog timeout to set before crashdumping. If SBD is set to crashdump -+instead of reboot - either via the trace mode settings or the I -+fencing agent's parameter -, SBD will adjust the watchdog timeout to this -+setting before triggering the dump. Otherwise, the watchdog might trigger and -+prevent a successful crashdump from ever being written. -+ -+Set to zero (= default) to disable. -+ -+=item B<-r> I -+ -+Actions to be executed when the watchers don't timely report to the sbd -+master process or one of the watchers detects that the master process -+has died. -+ -+Set timeout-action to comma-separated combination of -+noflush|flush plus reboot|crashdump|off. -+If just one of both is given the other stays at the default. -+ -+This doesn't affect actions like off, crashdump, reboot explicitly -+triggered via message slots. -+And it does as well not configure the action a watchdog would -+trigger should it run off (there is no generic interface). -+ -+Defaults to flush,reboot. -+ -+=back -+ -+=head2 allocate -+ -+Example usage: -+ -+ sbd -d /dev/sda1 allocate node1 -+ -+Explicitly allocates a slot for the specified node name. This should -+rarely be necessary, as every node will automatically allocate itself a -+slot the first time it starts up on watch mode. -+ -+=head2 message -+ -+Example usage: -+ -+ sbd -d /dev/sda1 message node1 test -+ -+Writes the specified message to node's slot. This is rarely done -+directly, but rather abstracted via the C fencing agent -+configured as a cluster resource. -+ -+Supported message types are: -+ -+=over -+ -+=item test -+ -+This only generates a log message on the receiving node and can be used -+to check if SBD is seeing the device. Note that this could overwrite a -+fencing request send by the cluster, so should not be used during -+production. -+ -+=item reset -+ -+Reset the target upon receipt of this message. -+ -+=item off -+ -+Power-off the target. -+ -+=item crashdump -+ -+Cause the target node to crashdump. -+ -+=item exit -+ -+This will make the C daemon exit cleanly on the target. You should -+B send this message manually; this is handled properly during -+shutdown of the cluster stack. Manually stopping the daemon means the -+node is unprotected! -+ -+=item clear -+ -+This message indicates that no real message has been sent to the node. -+You should not set this manually; C will clear the message slot -+automatically during start-up, and setting this manually could overwrite -+a fencing message by the cluster. -+ -+=back -+ -+=head2 query-watchdog -+ -+Example usage: -+ -+ sbd query-watchdog -+ -+Check for available watchdog devices and print some info. -+ -+B: This command will arm the watchdog during query, and if your -+watchdog refuses disarming (for example, if its kernel module has the -+'nowayout' parameter set) this will reset your system. -+ -+=head2 test-watchdog -+ -+Example usage: -+ -+ sbd test-watchdog [-w /dev/watchdog3] -+ -+Test specified watchdog device (/dev/watchdog by default). -+ -+B: This command will arm the watchdog and have your system reset -+in case your watchdog is working properly! If issued from an interactive -+session, it will prompt for confirmation. -+ -+=head1 Base system configuration -+ -+=head2 Configure a watchdog -+ -+It is highly recommended that you configure your Linux system to load a -+watchdog driver with hardware assistance (as is available on most modern -+systems), such as I, I, or others. As a fall-back, you -+can use the I module. -+ -+No other software must access the watchdog timer; it can only be -+accessed by one process at any given time. Some hardware vendors ship -+systems management software that use the watchdog for system resets -+(f.e. HP ASR daemon). Such software has to be disabled if the watchdog -+is to be used by SBD. -+ -+=head2 Choosing and initializing the block device(s) -+ -+First, you have to decide if you want to use one, two, or three devices. -+ -+If you are using multiple ones, they should reside on independent -+storage setups. Putting all three of them on the same logical unit for -+example would not provide any additional redundancy. -+ -+The SBD device can be connected via Fibre Channel, Fibre Channel over -+Ethernet, or even iSCSI. Thus, an iSCSI target can become a sort-of -+network-based quorum server; the advantage is that it does not require -+a smart host at your third location, just block storage. -+ -+The SBD partitions themselves B be mirrored (via MD, -+DRBD, or the storage layer itself), since this could result in a -+split-mirror scenario. Nor can they reside on cLVM2 volume groups, since -+they must be accessed by the cluster stack before it has started the -+cLVM2 daemons; hence, these should be either raw partitions or logical -+units on (multipath) storage. -+ -+The block device(s) must be accessible from all nodes. (While it is not -+necessary that they share the same path name on all nodes, this is -+considered a very good idea.) -+ -+SBD will only use about one megabyte per device, so you can easily -+create a small partition, or very small logical units. (The size of the -+SBD device depends on the block size of the underlying device. Thus, 1MB -+is fine on plain SCSI devices and SAN storage with 512 byte blocks. On -+the IBM s390x architecture in particular, disks default to 4k blocks, -+and thus require roughly 4MB.) -+ -+The number of devices will affect the operation of SBD as follows: -+ -+=over -+ -+=item One device -+ -+In its most simple implementation, you use one device only. This is -+appropriate for clusters where all your data is on the same shared -+storage (with internal redundancy) anyway; the SBD device does not -+introduce an additional single point of failure then. -+ -+If the SBD device is not accessible, the daemon will fail to start and -+inhibit startup of cluster services. -+ -+=item Two devices -+ -+This configuration is a trade-off, primarily aimed at environments where -+host-based mirroring is used, but no third storage device is available. -+ -+SBD will not commit suicide if it loses access to one mirror leg; this -+allows the cluster to continue to function even in the face of one outage. -+ -+However, SBD will not fence the other side while only one mirror leg is -+available, since it does not have enough knowledge to detect an asymmetric -+split of the storage. So it will not be able to automatically tolerate a -+second failure while one of the storage arrays is down. (Though you -+can use the appropriate crm command to acknowledge the fence manually.) -+ -+It will not start unless both devices are accessible on boot. -+ -+=item Three devices -+ -+In this most reliable and recommended configuration, SBD will only -+self-fence if more than one device is lost; hence, this configuration is -+resilient against temporary single device outages (be it due to failures -+or maintenance). Fencing messages can still be successfully relayed if -+at least two devices remain accessible. -+ -+This configuration is appropriate for more complex scenarios where -+storage is not confined to a single array. For example, host-based -+mirroring solutions could have one SBD per mirror leg (not mirrored -+itself), and an additional tie-breaker on iSCSI. -+ -+It will only start if at least two devices are accessible on boot. -+ -+=back -+ -+After you have chosen the devices and created the appropriate partitions -+and perhaps multipath alias names to ease management, use the C -+command described above to initialize the SBD metadata on them. -+ -+=head3 Sharing the block device(s) between multiple clusters -+ -+It is possible to share the block devices between multiple clusters, -+provided the total number of nodes accessing them does not exceed I<255> -+nodes, and they all must share the same SBD timeouts (since these are -+part of the metadata). -+ -+If you are using multiple devices this can reduce the setup overhead -+required. However, you should B share devices between clusters in -+different security domains. -+ -+=head2 Configure SBD to start on boot -+ -+On systems using C, the C or C system -+start-up scripts must handle starting or stopping C as required -+before starting the rest of the cluster stack. -+ -+For C, sbd simply has to be enabled using -+ -+ systemctl enable sbd.service -+ -+The daemon is brought online on each node before corosync and Pacemaker -+are started, and terminated only after all other cluster components have -+been shut down - ensuring that cluster resources are never activated -+without SBD supervision. -+ -+=head2 Configuration via sysconfig -+ -+The system instance of C is configured via F. -+In this file, you must specify the device(s) used, as well as any -+options to pass to the daemon: -+ -+ SBD_DEVICE="/dev/sda1;/dev/sdb1;/dev/sdc1" -+ SBD_PACEMAKER="true" -+ -+C will fail to start if no C is specified. See the -+installed template or section for configuration via environment -+for more options that can be configured here. -+In general configuration done via parameters takes precedence over -+the configuration from the configuration file. -+ -+=head2 Configuration via environment -+ -+=over -+@environment_section@ -+=back -+ -+=head2 Testing the sbd installation -+ -+After a restart of the cluster stack on this node, you can now try -+sending a test message to it as root, from this or any other node: -+ -+ sbd -d /dev/sda1 message node1 test -+ -+The node will acknowledge the receipt of the message in the system logs: -+ -+ Aug 29 14:10:00 node1 sbd: [13412]: info: Received command test from node2 -+ -+This confirms that SBD is indeed up and running on the node, and that it -+is ready to receive messages. -+ -+Make B that F is identical on all cluster -+nodes, and that all cluster nodes are running the daemon. -+ -+=head1 Pacemaker CIB integration -+ -+=head2 Fencing resource -+ -+Pacemaker can only interact with SBD to issue a node fence if there is a -+configure fencing resource. This should be a primitive, not a clone, as -+follows: -+ -+ primitive fencing-sbd stonith:external/sbd \ -+ params pcmk_delay_max=30 -+ -+This will automatically use the same devices as configured in -+F. -+ -+While you should not configure this as a clone (as Pacemaker will register -+the fencing device on each node automatically), the I -+setting enables random fencing delay which ensures, in a scenario where a -+split-brain scenario did occur in a two node cluster, that one of the nodes -+has a better chance to survive to avoid double fencing. -+ -+SBD also supports turning the reset request into a crash request, which -+may be helpful for debugging if you have kernel crashdumping configured; -+then, every fence request will cause the node to dump core. You can -+enable this via the C parameter on the fencing -+resource. This is B recommended for production use, but only for -+debugging phases. -+ -+=head2 General cluster properties -+ -+You must also enable STONITH in general, and set the STONITH timeout to -+be at least twice the I timeout you have configured, to allow -+enough time for the fencing message to be delivered. If your I -+timeout is 60 seconds, this is a possible configuration: -+ -+ property stonith-enabled="true" -+ property stonith-timeout="120s" -+ -+B: if I is too low for I and the -+system overhead, sbd will never be able to successfully complete a fence -+request. This will create a fencing loop. -+ -+Note that the sbd fencing agent will try to detect this and -+automatically extend the I setting to a reasonable -+value, on the assumption that sbd modifying your configuration is -+preferable to not fencing. -+ -+=head1 Management tasks -+ -+=head2 Recovering from temporary SBD device outage -+ -+If you have multiple devices, failure of a single device is not immediately -+fatal. C will retry to restart the monitor for the device every 5 -+seconds by default. However, you can tune this via the options to the -+I command. -+ -+In case you wish the immediately force a restart of all currently -+disabled monitor processes, you can send a I to the SBD -+I process. -+ -+ -+=head1 LICENSE -+ -+Copyright (C) 2008-2013 Lars Marowsky-Bree -+ -+This program is free software; you can redistribute it and/or -+modify it under the terms of the GNU General Public -+License as published by the Free Software Foundation; either -+version 2 of the License, or (at your option) any later version. -+ -+This software is distributed in the hope that it will be useful, -+but WITHOUT ANY WARRANTY; without even the implied warranty of -+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+General Public License for more details. -+ -+For details see the GNU General Public License at -+http://www.gnu.org/licenses/gpl-2.0.html (version 2) and/or -+http://www.gnu.org/licenses/gpl.html (the newest as per "any later"). -diff --git a/src/sbd.sysconfig b/src/sbd.sysconfig -index e1a60ed..33b50d0 100644 ---- a/src/sbd.sysconfig -+++ b/src/sbd.sysconfig -@@ -14,7 +14,7 @@ - # - SBD_PACEMAKER=yes - --## Type: list(always,clean) -+## Type: always / clean - ## Default: always - # - # Specify the start mode for sbd. Setting this to "clean" will only -@@ -103,6 +103,7 @@ SBD_TIMEOUT_ACTION=flush,reboot - # Thus in auto-mode sbd will check if the slice has RT-budget assigned. - # If that is the case sbd will stay in that slice while it will - # be moved to root-slice otherwise. -+# - SBD_MOVE_TO_ROOT_CGROUP=auto - - ## Type: string --- -1.8.3.1 - diff --git a/0003-Fix-scheduling-overhaul-the-whole-thing.patch b/0003-Fix-scheduling-overhaul-the-whole-thing.patch deleted file mode 100644 index 05fab9d..0000000 --- a/0003-Fix-scheduling-overhaul-the-whole-thing.patch +++ /dev/null @@ -1,152 +0,0 @@ -From 4bc08cf76fc01e98cbec76bf32bb333b77f69217 Mon Sep 17 00:00:00 2001 -From: Klaus Wenninger -Date: Thu, 27 Feb 2020 19:02:57 +0100 -Subject: [PATCH] Fix: scheduling: overhaul the whole thing - -- prevent possible lockup when format in proc changes -- properly get and handle scheduler policy & prio -- on SCHED_RR failing push to the max with SCHED_OTHER ---- - src/sbd-common.c | 56 ++++++++++++++++++++++++++++++++++++++++++++------------ - 1 file changed, 44 insertions(+), 12 deletions(-) - -diff --git a/src/sbd-common.c b/src/sbd-common.c -index 9ec43b2..c2da758 100644 ---- a/src/sbd-common.c -+++ b/src/sbd-common.c -@@ -26,6 +26,9 @@ - #include - #include - #include -+#include -+#include -+#include - - #ifdef _POSIX_MEMLOCK - # include -@@ -298,7 +301,7 @@ watchdog_populate_list(void) - FILE *file; - - snprintf(entry_name, sizeof(entry_name), -- SYS_CLASS_WATCHDOG "/%s/dev", entry->d_name); -+ SYS_CLASS_WATCHDOG "/%s/dev", entry->d_name); - file = fopen(entry_name, "r"); - if (file) { - int major, minor; -@@ -667,7 +670,7 @@ static int get_realtime_budget(void) - { - FILE *f; - char fname[PATH_MAX]; -- int res = -1, lnum = 0; -+ int res = -1, lnum = 0, num; - char *cgroup = NULL, *namespecs = NULL; - - snprintf(fname, PATH_MAX, "/proc/%jd/cgroup", (intmax_t)getpid()); -@@ -677,7 +680,8 @@ static int get_realtime_budget(void) - (intmax_t)getpid()); - goto exit_res; - } -- while( fscanf(f, "%d:%m[^:]:%m[^\n]", &lnum, &namespecs, &cgroup) !=EOF ) { -+ while( (num = fscanf(f, "%d:%m[^:]:%m[^\n]\n", &lnum, -+ &namespecs, &cgroup)) !=EOF ) { - if (namespecs && strstr(namespecs, "cpuacct")) { - free(namespecs); - break; -@@ -690,6 +694,11 @@ static int get_realtime_budget(void) - free(namespecs); - namespecs = NULL; - } -+ /* not to get stuck if format changes */ -+ if ((num < 3) && ((fscanf(f, "%*[^\n]") == EOF) || -+ (fscanf(f, "\n") == EOF))) { -+ break; -+ } - } - fclose(f); - if (cgroup == NULL) { -@@ -776,15 +785,17 @@ sbd_make_realtime(int priority, int stackgrowK, int heapgrowK) - return; - } - -+do { - #ifdef SCHED_RR - if (move_to_root_cgroup) { - sbd_move_to_root_cgroup(enforce_moving_to_root_cgroup); - } - - { -- int pcurrent = 0; - int pmin = sched_get_priority_min(SCHED_RR); - int pmax = sched_get_priority_max(SCHED_RR); -+ struct sched_param sp; -+ int pcurrent; - - if (priority == 0) { - priority = pmax; -@@ -794,26 +805,47 @@ sbd_make_realtime(int priority, int stackgrowK, int heapgrowK) - priority = pmax; - } - -- pcurrent = sched_getscheduler(0); -- if (pcurrent < 0) { -+ if (sched_getparam(0, &sp) < 0) { - cl_perror("Unable to get scheduler priority"); - -- } else if(pcurrent < priority) { -- struct sched_param sp; -+ } else if ((pcurrent = sched_getscheduler(0)) < 0) { -+ cl_perror("Unable to get scheduler policy"); - -+ } else if ((pcurrent == SCHED_RR) && -+ (sp.sched_priority >= priority)) { -+ cl_log(LOG_INFO, -+ "Stay with priority (%d) for policy SCHED_RR", -+ sp.sched_priority); -+ break; -+ } else { - memset(&sp, 0, sizeof(sp)); - sp.sched_priority = priority; - - if (sched_setscheduler(0, SCHED_RR, &sp) < 0) { -- cl_perror("Unable to set scheduler priority to %d", priority); -+ cl_perror( -+ "Unable to set scheduler policy to SCHED_RR priority %d", -+ priority); - } else { -- cl_log(LOG_INFO, "Scheduler priority is now %d", priority); -+ cl_log(LOG_INFO, -+ "Scheduler policy is now SCHED_RR priority %d", -+ priority); -+ break; - } - } - } - #else -- cl_log(LOG_ERR, "System does not support updating the scheduler priority"); -+ cl_log(LOG_ERR, "System does not support updating the scheduler policy"); -+#endif -+#ifdef PRIO_PGRP -+ if (setpriority(PRIO_PGRP, 0, INT_MIN) < 0) { -+ cl_perror("Unable to raise the scheduler priority"); -+ } else { -+ cl_log(LOG_INFO, "Scheduler priority raised to the maximum"); -+ } -+#else -+ cl_perror("System does not support setting the scheduler priority"); - #endif -+} while (0); - - sbd_memlock(heapgrowK, stackgrowK); - } -@@ -826,7 +858,7 @@ maximize_priority(void) - return; - } - -- sbd_make_realtime(0, 256, 256); -+ sbd_make_realtime(0, 256, 256); - - if (ioprio_set(IOPRIO_WHO_PROCESS, getpid(), - IOPRIO_PRIO_VALUE(IOPRIO_CLASS_RT, 1)) != 0) { --- -1.8.3.1 - diff --git a/sbd.spec b/sbd.spec index 61b4e8a..45e159a 100644 --- a/sbd.spec +++ b/sbd.spec @@ -15,21 +15,28 @@ # Please submit bugfixes or comments via http://bugs.opensuse.org/ # -%global commit 25fce8a7d5e8cd5abc2379077381b10bd6cec183 -%global shortcommit %(c=%{commit}; echo ${c:0:7}) +%global longcommit 044643922c3044c0bac2f91b1382ff3f2c64a4a2 +%global shortcommit %(echo %{longcommit}|cut -c1-8) %global github_owner Clusterlabs -%global buildnum 7 +%global buildnum 1 + +%ifarch s390x s390 +# minimum timeout on LPAR diag288 watchdog is 15s +%global watchdog_timeout_default 15 +%else +%global watchdog_timeout_default 5 +%endif + +%global sync_resource_startup_default no +%global sync_resource_startup_sysconfig yes Name: sbd Summary: Storage-based death License: GPLv2+ -Version: 1.4.1 -Release: %{buildnum}%{?dist}.1 +Version: 1.4.2 +Release: %{buildnum}%{?dist} Url: https://github.com/%{github_owner}/%{name} -Source0: https://github.com/%{github_owner}/%{name}/archive/%{commit}/%{name}-%{commit}.tar.gz -Patch0: 0001-Fix-regressions.sh-make-parameter-passing-consistent.patch -Patch1: 0002-Doc-add-environment-section-to-man-page.patch -Patch2: 0003-Fix-scheduling-overhaul-the-whole-thing.patch +Source0: https://github.com/%{github_owner}/%{name}/archive/%{longcommit}/%{name}-%{longcommit}.tar.gz BuildRequires: autoconf BuildRequires: automake BuildRequires: libuuid-devel @@ -44,6 +51,7 @@ BuildRequires: pkgconfig BuildRequires: make BuildRequires: systemd Conflicts: fence-agents-sbd < 4.5.0 +Conflicts: pacemaker-libs < 2.0.5-10 %if 0%{?rhel} ExclusiveArch: i686 x86_64 s390x aarch64 ppc64le @@ -68,18 +76,16 @@ regression-testing sbd. ########################################################### %prep -%autosetup -n %{name}-%{commit} -p1 -%ifarch s390x s390 -sed -i src/sbd.sysconfig -e "s/Default: 5/Default: 15/" -sed -i src/sbd.sysconfig -e "s/SBD_WATCHDOG_TIMEOUT=5/SBD_WATCHDOG_TIMEOUT=15/" -%endif +%autosetup -n %{name}-%{longcommit} -p1 ########################################################### %build ./autogen.sh export CFLAGS="$RPM_OPT_FLAGS -Wall -Werror" -%configure +%configure --with-watchdog-timeout-default=%{watchdog_timeout_default} \ + --with-sync-resource-startup-default=%{sync_resource_startup_default} \ + --with-sync-resource-startup-sysconfig=%{sync_resource_startup_sysconfig} make %{?_smp_mflags} ########################################################### @@ -133,6 +139,8 @@ fi %defattr(-,root,root) %config(noreplace) %{_sysconfdir}/sysconfig/sbd %{_sbindir}/sbd +%{_datadir}/sbd +%{_datadir}/pkgconfig/sbd.pc %exclude %{_datadir}/sbd/regressions.sh %doc %{_mandir}/man8/sbd* %if %{defined _unitdir} @@ -149,6 +157,12 @@ fi %{_libdir}/libsbdtestbed* %changelog +* Mon Dec 7 2020 Klaus Wenninger - 1.4.2-1 +- Rebase to upstream v1.4.2 +- Conflict with pacemaker that doesn't have pacemakerd-api +- Set default to not do startup/shutdown-syncing via pacemakerd-api + but enable in template-config + * Wed Jul 29 2020 Fedora Release Engineering - 1.4.1-7.1 - Rebuilt for https://fedoraproject.org/wiki/Fedora_33_Mass_Rebuild diff --git a/sources b/sources index 50b7de1..943a046 100644 --- a/sources +++ b/sources @@ -1 +1 @@ -SHA512 (sbd-25fce8a7d5e8cd5abc2379077381b10bd6cec183.tar.gz) = 3b89ee0aa88282f17c8daf725a1e7a8c2f2affdcf6ff6f4ca4faf250760d778a65c5693e5df3fcc7554d60dd9b0cb1a0350e266fadb7668320f3c676d8799a29 +SHA512 (sbd-044643922c3044c0bac2f91b1382ff3f2c64a4a2.tar.gz) = de8b4c06b6382a56102af85b7b9f295ccf2ee5118213c4e95c4d314b681d2dcfa58702cd1ad7de667283508d69a36499bb767f4594745883c6fca0c54e365216