From 8c61f2019d11781b737251b5cf839437b25fc53f Mon Sep 17 00:00:00 2001 From: David Disseldorp Date: Wed, 25 Jul 2018 23:15:10 +0200 Subject: [PATCH 1/3] CTDB: fix incorrect db corruption reports (bsc#1101668) If a database was disconnected during an active transaction, then tdbdump may fail with e.g.: > /usr/bin/tdbdump /var/lib/ctdb/persistent/secrets.tdb.1 Failed to open /var/lib/ctdb/persistent/secrets.tdb.1 tdb(/var/lib/ctdb/persistent/secrets.tdb.1): FATAL: tdb_transaction_recover: attempt to recover read only database This does *not* indicate corruption, only that tdbdump, which opens the database readonly, isn't able to perform recovery. Using tdbtool check, instead of tdbdump, passes: > tdbtool /var/lib/ctdb/persistent/secrets.tdb.1 check tdb_transaction_recover: recovered 2146304 byte database Database integrity is OK and has 2 records. Drop the tdbdump checks, and instead rely on the core ctdb event script, which performs the same checks with tdbtool. Signed-off-by: David Disseldorp --- heartbeat/CTDB.in | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/heartbeat/CTDB.in b/heartbeat/CTDB.in index 1456ea32b..28e58cea0 100755 --- a/heartbeat/CTDB.in +++ b/heartbeat/CTDB.in @@ -392,6 +392,8 @@ enable_event_scripts() { local event_dir event_dir=$OCF_RESKEY_ctdb_config_dir/events.d + chmod u+x "$event_dir/00.ctdb" # core database health check + if [ -f "${OCF_RESKEY_ctdb_config_dir}/public_addresses" ]; then chmod u+x "$event_dir/10.interface" else @@ -563,17 +565,6 @@ ctdb_start() { rv=$? [ $rv -ne 0 ] && return $rv - # Die if databases are corrupted - persistent_db_dir="${OCF_RESKEY_ctdb_dbdir}/persistent" - mkdir -p $persistent_db_dir 2>/dev/null - for pdbase in $persistent_db_dir/*.tdb.[0-9]; do - [ -f "$pdbase" ] || break - /usr/bin/tdbdump "$pdbase" >/dev/null 2>/dev/null || { - ocf_exit_reason "Persistent database $pdbase is corrupted! CTDB will not start." - return $OCF_ERR_GENERIC - } - done - # Add necessary configuration to smb.conf init_smb_conf if [ $? -ne 0 ]; then @@ -737,9 +728,8 @@ ctdb_monitor() { ctdb_validate() { - # Required binaries (full path to tdbdump is intentional, as that's - # what's used in ctdb_start, which was lifted from the init script) - for binary in pkill /usr/bin/tdbdump; do + # Required binaries + for binary in pkill; do check_binary $binary done From 1ff4ce7cbe58b5309f00ac1bbe124c562b6dcaf6 Mon Sep 17 00:00:00 2001 From: David Disseldorp Date: Fri, 27 Jul 2018 16:02:26 +0200 Subject: [PATCH 2/3] CTDB: explicitly use bash shell Upcoming recovery lock substring processing is bash specific. Signed-off-by: David Disseldorp --- configure.ac | 1 + heartbeat/CTDB.in | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/heartbeat/CTDB.in b/heartbeat/CTDB.in index 7d87a4ef7..f9b5c564f 100755 --- a/heartbeat/CTDB.in +++ b/heartbeat/CTDB.in @@ -134,8 +134,8 @@ For more information see http://linux-ha.org/wiki/CTDB_(resource_agent) -The location of a shared lock file, common across all nodes. -This must be on shared storage, e.g.: /shared-fs/samba/ctdb.lock +The location of a shared lock file or helper binary, common across all nodes. +See CTDB documentation for details. CTDB shared lock file @@ -757,13 +757,24 @@ ctdb_validate() { return $OCF_ERR_CONFIGURED fi - lock_dir=$(dirname "$OCF_RESKEY_ctdb_recovery_lock") - touch "$lock_dir/$$" 2>/dev/null - if [ $? != 0 ]; then - ocf_exit_reason "Directory for lock file '$OCF_RESKEY_ctdb_recovery_lock' does not exist, or is not writable." - return $OCF_ERR_ARGS + if [ "${OCF_RESKEY_ctdb_recovery_lock:0:1}" == '!' ]; then + # '!' prefix means recovery lock is handled via a helper binary + binary="${OCF_RESKEY_ctdb_recovery_lock:1}" + binary="${binary%% *}" # trim any parameters + if [ -z "$binary" ]; then + ocf_exit_reason "ctdb_recovery_lock invalid helper" + return $OCF_ERR_CONFIGURED + fi + check_binary "${binary}" + else + lock_dir=$(dirname "$OCF_RESKEY_ctdb_recovery_lock") + touch "$lock_dir/$$" 2>/dev/null + if [ $? != 0 ]; then + ocf_exit_reason "Directory for lock file '$OCF_RESKEY_ctdb_recovery_lock' does not exist, or is not writable." + return $OCF_ERR_ARGS + fi + rm "$lock_dir/$$" fi - rm "$lock_dir/$$" return $OCF_SUCCESS }