kexec-tools/kdumpctl

1407 lines
33 KiB
Bash
Executable File

#!/bin/bash
KEXEC=/sbin/kexec
KDUMP_KERNELVER=""
KDUMP_KERNEL=""
KDUMP_COMMANDLINE=""
KEXEC_ARGS=""
KDUMP_CONFIG_FILE="/etc/kdump.conf"
KDUMP_LOG_PATH="/var/log"
MKDUMPRD="/sbin/mkdumprd -f"
MKFADUMPRD="/sbin/mkfadumprd"
DRACUT_MODULES_FILE="/usr/lib/dracut/modules.txt"
SAVE_PATH=/var/crash
SSH_KEY_LOCATION="/root/.ssh/kdump_id_rsa"
INITRD_CHECKSUM_LOCATION="/boot/.fadump_initrd_checksum"
DUMP_TARGET=""
DEFAULT_INITRD=""
DEFAULT_INITRD_BAK=""
KDUMP_INITRD=""
TARGET_INITRD=""
FADUMP_REGISTER_SYS_NODE="/sys/kernel/fadump_registered"
#kdump shall be the default dump mode
DEFAULT_DUMP_MODE="kdump"
image_time=0
standard_kexec_args="-d -p"
# Some default values in case /etc/sysconfig/kdump doesn't include
KDUMP_COMMANDLINE_REMOVE="hugepages hugepagesz slub_debug"
if [ -f /etc/sysconfig/kdump ]; then
. /etc/sysconfig/kdump
fi
[[ $dracutbasedir ]] || dracutbasedir=/usr/lib/dracut
. $dracutbasedir/dracut-functions.sh
. /lib/kdump/kdump-lib.sh
. /lib/kdump/kdump-logger.sh
#initiate the kdump logger
dlog_init
if [ $? -ne 0 ]; then
echo "failed to initiate the kdump logger."
exit 1
fi
single_instance_lock()
{
local rc timeout=5
exec 9>/var/lock/kdump
if [ $? -ne 0 ]; then
derror "Create file lock failed"
exit 1
fi
flock -n 9
rc=$?
while [ $rc -ne 0 ]; do
dinfo "Another app is currently holding the kdump lock; waiting for it to exit..."
flock -w $timeout 9
rc=$?
done
}
determine_dump_mode()
{
# Check if firmware-assisted dump is enabled
# if yes, set the dump mode as fadump
if is_fadump_capable; then
dinfo "Dump mode is fadump"
DEFAULT_DUMP_MODE="fadump"
fi
ddebug "DEFAULT_DUMP_MODE=$DEFAULT_DUMP_MODE"
}
save_core()
{
coredir="/var/crash/`date +"%Y-%m-%d-%H:%M"`"
mkdir -p $coredir
ddebug "cp --sparse=always /proc/vmcore $coredir/vmcore-incomplete"
cp --sparse=always /proc/vmcore $coredir/vmcore-incomplete
if [ $? == 0 ]; then
mv $coredir/vmcore-incomplete $coredir/vmcore
dinfo "saved a vmcore to $coredir"
else
derror "failed to save a vmcore to $coredir"
fi
# pass the dmesg to Abrt tool if exists, in order
# to collect the kernel oops message.
# https://fedorahosted.org/abrt/
if [ -x /usr/bin/dumpoops ]; then
ddebug "makedumpfile --dump-dmesg $coredir/vmcore $coredir/dmesg"
makedumpfile --dump-dmesg $coredir/vmcore $coredir/dmesg >/dev/null 2>&1
ddebug "dumpoops -d $coredir/dmesg"
dumpoops -d $coredir/dmesg >/dev/null 2>&1
if [ $? == 0 ]; then
dinfo "kernel oops has been collected by abrt tool"
fi
fi
}
rebuild_fadump_initrd()
{
if ! $MKFADUMPRD "$DEFAULT_INITRD_BAK" "$TARGET_INITRD" --kver "$KDUMP_KERNELVER"; then
derror "mkfadumprd: failed to make fadump initrd"
return 1
fi
sync -f "$TARGET_INITRD"
return 0
}
check_earlykdump_is_enabled()
{
grep -q -w "rd.earlykdump" /proc/cmdline
return $?
}
rebuild_kdump_initrd()
{
ddebug "rebuild kdump initrd: $MKDUMPRD $TARGET_INITRD $KDUMP_KERNELVER"
$MKDUMPRD $TARGET_INITRD $KDUMP_KERNELVER
if [ $? != 0 ]; then
derror "mkdumprd: failed to make kdump initrd"
return 1
fi
if check_earlykdump_is_enabled; then
dwarn "Tips: If early kdump is enabled, also require rebuilding the system initramfs to make the changes take effect for early kdump."
fi
sync -f "$TARGET_INITRD"
return 0
}
rebuild_initrd()
{
if [[ ! -w $(dirname $TARGET_INITRD) ]];then
derror "$(dirname $TARGET_INITRD) does not have write permission. Cannot rebuild $TARGET_INITRD"
return 1
fi
if [ $DEFAULT_DUMP_MODE == "fadump" ]; then
rebuild_fadump_initrd
else
rebuild_kdump_initrd
fi
return $?
}
#$1: the files to be checked with IFS=' '
check_exist()
{
for file in $1; do
if [ ! -e "$file" ]; then
derror "Error: $file not found."
return 1
fi
done
}
#$1: the files to be checked with IFS=' '
check_executable()
{
for file in $1; do
if [ ! -x "$file" ]; then
derror "Error: $file is not executable."
return 1
fi
done
}
backup_default_initrd()
{
ddebug "backup default initrd: $DEFAULT_INITRD"
if [ ! -f "$DEFAULT_INITRD" ]; then
return
fi
if [ ! -e $DEFAULT_INITRD_BAK ]; then
dinfo "Backing up $DEFAULT_INITRD before rebuild."
# save checksum to verify before restoring
sha1sum $DEFAULT_INITRD > $INITRD_CHECKSUM_LOCATION
cp $DEFAULT_INITRD $DEFAULT_INITRD_BAK
if [ $? -ne 0 ]; then
dwarn "WARNING: failed to backup $DEFAULT_INITRD."
rm -f $DEFAULT_INITRD_BAK
fi
fi
}
restore_default_initrd()
{
ddebug "restore default initrd: $DEFAULT_INITRD"
if [ ! -f "$DEFAULT_INITRD" ]; then
return
fi
# If a backup initrd exists, we must be switching back from
# fadump to kdump. Restore the original default initrd.
if [ -f $DEFAULT_INITRD_BAK ] && [ -f $INITRD_CHECKSUM_LOCATION ]; then
# verify checksum before restoring
backup_checksum=`sha1sum $DEFAULT_INITRD_BAK | awk '{ print $1 }'`
default_checksum=`cat $INITRD_CHECKSUM_LOCATION | awk '{ print $1 }'`
if [ "$default_checksum" != "$backup_checksum" ]; then
dwarn "WARNING: checksum mismatch! Can't restore original initrd.."
else
rm -f $INITRD_CHECKSUM_LOCATION
mv $DEFAULT_INITRD_BAK $DEFAULT_INITRD
if [[ $? -eq 0 ]]; then
derror "Restoring original initrd as fadump mode is disabled."
sync -f "DEFAULT_INITRD"
fi
fi
fi
}
check_config()
{
local -A _opt_rec
while read -r config_opt config_val; do
case "$config_opt" in
dracut_args)
if [[ $config_val == *--mount* ]]; then
if [ $(echo $config_val | grep -o "\-\-mount" | wc -l) -ne 1 ]; then
derror "Multiple mount targets specified in one \"dracut_args\"."
return 1
fi
config_opt=_target
fi
;;
raw)
if [ -d "/proc/device-tree/ibm,opal/dump" ]; then
derror "WARNING: Won't capture opalcore when 'raw' dump target is used."
return 1
fi
config_opt=_target
;;
ext[234]|minix|btrfs|xfs|nfs|ssh)
config_opt=_target
;;
sshkey|path|core_collector|kdump_post|kdump_pre|extra_bins|extra_modules|failure_action|default|final_action|force_rebuild|force_no_rebuild|fence_kdump_args|fence_kdump_nodes)
;;
net|options|link_delay|disk_timeout|debug_mem_level|blacklist)
derror "Deprecated kdump config option: $config_opt. Refer to kdump.conf manpage for alternatives."
return 1
;;
'')
continue
;;
*)
derror "Invalid kdump config option $config_opt"
return 1
;;
esac
if [[ -z "$config_val" ]]; then
derror "Invalid kdump config value for option '$config_opt'"
return 1
fi
if [ -n "${_opt_rec[$config_opt]}" ]; then
if [ $config_opt == _target ]; then
derror "More than one dump targets specified"
else
derror "Duplicated kdump config value of option $config_opt"
fi
return 1
fi
_opt_rec[$config_opt]="$config_val"
done <<< "$(read_strip_comments $KDUMP_CONFIG_FILE)"
check_failure_action_config || return 1
check_final_action_config || return 1
check_fence_kdump_config || return 1
return 0
}
# get_pcs_cluster_modified_files <image timestamp>
# return list of modified file for fence_kdump modified in Pacemaker cluster
get_pcs_cluster_modified_files()
{
local time_stamp
local modified_files
is_generic_fence_kdump && return 1
is_pcs_fence_kdump || return 1
time_stamp=`pcs cluster cib | xmllint --xpath 'string(/cib/@cib-last-written)' - | \
xargs -0 date +%s --date`
if [ -n $time_stamp -a $time_stamp -gt $image_time ]; then
modified_files="cluster-cib"
fi
if [ -f $FENCE_KDUMP_CONFIG_FILE ]; then
time_stamp=`stat -c "%Y" $FENCE_KDUMP_CONFIG_FILE`
if [ "$time_stamp" -gt "$image_time" ]; then
modified_files="$modified_files $FENCE_KDUMP_CONFIG_FILE"
fi
fi
echo $modified_files
}
setup_initrd()
{
prepare_kdump_bootinfo
if [ $? -ne 0 ]; then
derror "failed to prepare for kdump bootinfo."
return 1
fi
DEFAULT_INITRD_BAK="$KDUMP_BOOTDIR/.$(basename $DEFAULT_INITRD).default"
if [ $DEFAULT_DUMP_MODE == "fadump" ]; then
TARGET_INITRD="$DEFAULT_INITRD"
if [ ! -s "$TARGET_INITRD" ]; then
derror "Error: No initrd found to rebuild!"
return 1
fi
# backup initrd for reference before replacing it
# with fadump aware initrd
backup_default_initrd
else
TARGET_INITRD="$KDUMP_INITRD"
# check if a backup of default initrd exists. If yes,
# it signifies a switch from fadump mode. So, restore
# the backed up default initrd.
restore_default_initrd
fi
}
check_files_modified()
{
local modified_files=""
#also rebuild when Pacemaker cluster conf is changed and fence kdump is enabled.
modified_files=$(get_pcs_cluster_modified_files)
EXTRA_BINS=`grep ^kdump_post $KDUMP_CONFIG_FILE | cut -d\ -f2`
CHECK_FILES=`grep ^kdump_pre $KDUMP_CONFIG_FILE | cut -d\ -f2`
HOOKS="/etc/kdump/post.d/ /etc/kdump/pre.d/"
if [ -d /etc/kdump/post.d ]; then
for file in /etc/kdump/post.d/*; do
if [ -x "$file" ]; then
POST_FILES="$POST_FILES $file"
fi
done
fi
if [ -d /etc/kdump/pre.d ]; then
for file in /etc/kdump/pre.d/*; do
if [ -x "$file" ]; then
PRE_FILES="$PRE_FILES $file"
fi
done
fi
HOOKS="$HOOKS $POST_FILES $PRE_FILES"
CORE_COLLECTOR=`grep ^core_collector $KDUMP_CONFIG_FILE | cut -d\ -f2`
CORE_COLLECTOR=`type -P $CORE_COLLECTOR`
# POST_FILES and PRE_FILES are already checked against executable, need not to check again.
EXTRA_BINS="$EXTRA_BINS $CHECK_FILES"
CHECK_FILES=`grep ^extra_bins $KDUMP_CONFIG_FILE | cut -d\ -f2-`
EXTRA_BINS="$EXTRA_BINS $CHECK_FILES"
files="$KDUMP_CONFIG_FILE $KDUMP_KERNEL $EXTRA_BINS $CORE_COLLECTOR"
[[ -e /etc/fstab ]] && files="$files /etc/fstab"
# Check for any updated extra module
EXTRA_MODULES="$(grep ^extra_modules $KDUMP_CONFIG_FILE | sed 's/^extra_modules\s*//')"
if [ -n "$EXTRA_MODULES" ]; then
if [ -e /lib/modules/$KDUMP_KERNELVER/modules.dep ]; then
files="$files /lib/modules/$KDUMP_KERNELVER/modules.dep"
fi
for _module in $EXTRA_MODULES; do
_module_file="$(modinfo --set-version "$KDUMP_KERNELVER" --filename "$_module" 2>/dev/null)"
if [[ $? -eq 0 ]]; then
files="$files $_module_file"
for _dep_modules in $(modinfo -F depends $_module | tr ',' ' '); do
files="$files $(modinfo --set-version "$KDUMP_KERNELVER" --filename $_dep_modules 2>/dev/null)"
done
else
# If it's not a module nor builtin, give an error
if ! ( modprobe --set-version "$KDUMP_KERNELVER" --dry-run "$_module" &>/dev/null ); then
dwarn "Module $_module not found"
fi
fi
done
fi
# HOOKS is mandatory and need to check the modification time
files="$files $HOOKS"
check_exist "$files" && check_executable "$EXTRA_BINS"
[ $? -ne 0 ] && return 2
for file in $files; do
if [ -e "$file" ]; then
time_stamp=`stat -c "%Y" $file`
if [ "$time_stamp" -gt "$image_time" ]; then
modified_files="$modified_files $file"
fi
if [ -L "$file" ]; then
file=$(readlink -m $file)
time_stamp=`stat -c "%Y" $file`
if [ "$time_stamp" -gt "$image_time" ]; then
modified_files="$modified_files $file"
fi
fi
else
dwarn "$file doesn't exist"
fi
done
if [ -n "$modified_files" ]; then
dinfo "Detected change(s) in the following file(s): $modified_files"
return 1
fi
return 0
}
check_drivers_modified()
{
local _target _new_drivers _old_drivers _module_name _module_filename
# If it's dump target is on block device, detect the block driver
_target=$(get_block_dump_target)
if [[ -n "$_target" ]]; then
_record_block_drivers() {
local _drivers
_drivers=$(udevadm info -a "/dev/block/$1" | sed -n 's/\s*DRIVERS=="\(\S\+\)"/\1/p')
for _driver in $_drivers; do
if ! [[ " $_new_drivers " == *" $_driver "* ]]; then
_new_drivers="$_new_drivers $_driver"
fi
done
ddebug "MAJ:MIN=$1 drivers='$_drivers'"
}
check_block_and_slaves_all _record_block_drivers "$(get_maj_min "$_target")"
fi
# Include watchdog drivers if watchdog module is not omitted
is_dracut_mod_omitted watchdog || is_dracut_mod_omitted watchdog-modules || _new_drivers+=" $(get_watchdog_drvs)"
[ -z "$_new_drivers" ] && return 0
if is_fadump_capable; then
_old_drivers="$(lsinitrd "$TARGET_INITRD" -f /usr/lib/dracut/fadump-kernel-modules.txt | tr '\n' ' ')"
else
_old_drivers="$(lsinitrd "$TARGET_INITRD" -f /usr/lib/dracut/loaded-kernel-modules.txt | tr '\n' ' ')"
fi
ddebug "Modules required for kdump: '$_new_drivers'"
ddebug "Modules included in old initramfs: '$_old_drivers'"
for _driver in $_new_drivers; do
# Skip deprecated/invalid driver name or built-in module
_module_name=$(modinfo --set-version "$KDUMP_KERNELVER" -F name $_driver 2>/dev/null)
_module_filename=$(modinfo --set-version "$KDUMP_KERNELVER" -n $_driver 2>/dev/null)
if [ $? -ne 0 ] || [ -z "$_module_name" ] || [[ "$_module_filename" = *"(builtin)"* ]]; then
continue
fi
if ! [[ " $_old_drivers " == *" $_module_name "* ]]; then
dinfo "Detected change in block device driver, new loaded module: $_module_name"
return 1
fi
done
}
check_fs_modified()
{
local _old_dev _old_mntpoint _old_fstype
local _new_dev _new_mntpoint _new_fstype
local _target _dracut_args
# No need to check in case of mount target specified via "dracut_args".
if is_mount_in_dracut_args; then
return 0
fi
# No need to check in case of raw target.
# Currently we do not check also if ssh/nfs target is specified
if is_ssh_dump_target || is_nfs_dump_target || is_raw_dump_target; then
return 0
fi
_target=$(get_block_dump_target)
_new_fstype=$(get_fs_type_from_target $_target)
if [[ -z "$_target" ]] || [[ -z "$_new_fstype" ]];then
derror "Dump target is invalid"
return 2
fi
ddebug "_target=$_target _new_fstype=$_new_fstype"
_new_dev=$(kdump_get_persistent_dev $_target)
if [ -z "$_new_dev" ]; then
perror "Get persistent device name failed"
return 2
fi
_new_mntpoint="$(get_kdump_mntpoint_from_target $_target)"
_dracut_args=$(lsinitrd $TARGET_INITRD -f usr/lib/dracut/build-parameter.txt)
if [[ -z "$_dracut_args" ]];then
dwarn "Warning: No dracut arguments found in initrd"
return 0
fi
# if --mount argument present then match old and new target, mount
# point and file system. If any of them mismatches then rebuild
echo $_dracut_args | grep "\-\-mount" &> /dev/null
if [[ $? -eq 0 ]];then
set -- $(echo $_dracut_args | awk -F "--mount '" '{print $2}' | cut -d' ' -f1,2,3)
_old_dev=$1
_old_mntpoint=$2
_old_fstype=$3
[[ $_new_dev = $_old_dev && $_new_mntpoint = $_old_mntpoint && $_new_fstype = $_old_fstype ]] && return 0
# otherwise rebuild if target device is not a root device
else
[[ "$_target" = "$(get_root_fs_device)" ]] && return 0
fi
dinfo "Detected change in File System"
return 1
}
# returns 0 if system is not modified
# returns 1 if system is modified
# returns 2 if system modification is invalid
check_system_modified()
{
local ret
[[ -f $TARGET_INITRD ]] || return 1
check_files_modified
ret=$?
if [ $ret -ne 0 ]; then
return $ret
fi
check_fs_modified
ret=$?
if [ $ret -ne 0 ]; then
return $ret
fi
check_drivers_modified
ret=$?
if [ $ret -ne 0 ]; then
return $ret
fi
return 0
}
check_rebuild()
{
local capture_capable_initrd="1"
local _force_rebuild force_rebuild="0"
local _force_no_rebuild force_no_rebuild="0"
local ret system_modified="0"
setup_initrd
if [ $? -ne 0 ]; then
return 1
fi
_force_no_rebuild=`grep ^force_no_rebuild $KDUMP_CONFIG_FILE 2>/dev/null`
if [ $? -eq 0 ]; then
force_no_rebuild=`echo $_force_no_rebuild | cut -d' ' -f2`
if [ "$force_no_rebuild" != "0" ] && [ "$force_no_rebuild" != "1" ];then
derror "Error: force_no_rebuild value is invalid"
return 1
fi
fi
_force_rebuild=`grep ^force_rebuild $KDUMP_CONFIG_FILE 2>/dev/null`
if [ $? -eq 0 ]; then
force_rebuild=`echo $_force_rebuild | cut -d' ' -f2`
if [ "$force_rebuild" != "0" ] && [ "$force_rebuild" != "1" ];then
derror "Error: force_rebuild value is invalid"
return 1
fi
fi
if [[ "$force_no_rebuild" == "1" && "$force_rebuild" == "1" ]]; then
derror "Error: force_rebuild and force_no_rebuild are enabled simultaneously in kdump.conf"
return 1
fi
# Will not rebuild kdump initrd
if [ "$force_no_rebuild" == "1" ]; then
return 0
fi
#check to see if dependent files has been modified
#since last build of the image file
if [ -f $TARGET_INITRD ]; then
image_time=`stat -c "%Y" $TARGET_INITRD 2>/dev/null`
#in case of fadump mode, check whether the default/target
#initrd is already built with dump capture capability
if [ "$DEFAULT_DUMP_MODE" == "fadump" ]; then
capture_capable_initrd=$(lsinitrd -f $DRACUT_MODULES_FILE $TARGET_INITRD | grep -e ^kdumpbase$ -e ^zz-fadumpinit$ | wc -l)
fi
fi
check_system_modified
ret=$?
if [ $ret -eq 2 ]; then
return 1
elif [ $ret -eq 1 ];then
system_modified="1"
fi
if [ $image_time -eq 0 ]; then
dinfo "No kdump initial ramdisk found."
elif [ "$capture_capable_initrd" == "0" ]; then
dinfo "Rebuild $TARGET_INITRD with dump capture support"
elif [ "$force_rebuild" != "0" ]; then
dinfo "Force rebuild $TARGET_INITRD"
elif [ "$system_modified" != "0" ]; then
:
else
return 0
fi
dinfo "Rebuilding $TARGET_INITRD"
rebuild_initrd
return $?
}
# On ppc64le LPARs, the keys trusted by firmware do not end up in
# .builtin_trusted_keys. So instead, add the key to the .ima keyring
function load_kdump_kernel_key()
{
# this is only called inside is_secure_boot_enforced,
# no need to retest
# this is only required if DT /ibm,secure-boot is a file.
# if it is a dir, we are on OpenPower and don't need this.
if ! [ -f /proc/device-tree/ibm,secure-boot ]; then
return
fi
KDUMP_KEY_ID=$(cat /usr/share/doc/kernel-keys/$KDUMP_KERNELVER/kernel-signing-ppc.cer |
keyctl padd asymmetric kernelkey-$RANDOM %:.ima)
}
# remove a previously loaded key. There's no real security implication
# to leaving it around, we choose to do this because it makes it easier
# to be idempotent and so as to reduce the potential for confusion.
function remove_kdump_kernel_key()
{
if [ -z "$KDUMP_KEY_ID" ]; then
return
fi
keyctl unlink $KDUMP_KEY_ID %:.ima
}
# Load the kdump kernel specified in /etc/sysconfig/kdump
# If none is specified, try to load a kdump kernel with the same version
# as the currently running kernel.
load_kdump()
{
local ret
KEXEC_ARGS=$(prepare_kexec_args "${KEXEC_ARGS}")
KDUMP_COMMANDLINE=$(prepare_cmdline "${KDUMP_COMMANDLINE}" "${KDUMP_COMMANDLINE_REMOVE}" "${KDUMP_COMMANDLINE_APPEND}")
# For secureboot enabled machines, use new kexec file based syscall.
# Old syscall will always fail as it does not have capability to
# to kernel signature verification.
if is_secure_boot_enforced; then
dinfo "Secure Boot is enabled. Using kexec file based syscall."
KEXEC_ARGS="$KEXEC_ARGS -s"
load_kdump_kernel_key
fi
ddebug "$KEXEC $KEXEC_ARGS $standard_kexec_args --command-line=$KDUMP_COMMANDLINE --initrd=$TARGET_INITRD $KDUMP_KERNEL"
# The '12' represents an intermediate temporary file descriptor
# to store the standard error file descriptor '2', and later
# restore the error file descriptor with the file descriptor '12'
# and release it.
exec 12>&2
exec 2>> $KDUMP_LOG_PATH/kdump.log
PS4='+ $(date "+%Y-%m-%d %H:%M:%S") ${BASH_SOURCE}@${LINENO}: '
set -x
$KEXEC $KEXEC_ARGS $standard_kexec_args \
--command-line="$KDUMP_COMMANDLINE" \
--initrd=$TARGET_INITRD $KDUMP_KERNEL
ret=$?
set +x
exec 2>&12 12>&-
remove_kdump_kernel_key
if [ $ret == 0 ]; then
dinfo "kexec: loaded kdump kernel"
return 0
else
derror "kexec: failed to load kdump kernel"
return 1
fi
}
check_ssh_config()
{
while read config_opt config_val; do
case "$config_opt" in
sshkey)
# remove inline comments after the end of a directive.
if [ -f "$config_val" ]; then
# canonicalize the path
SSH_KEY_LOCATION=$(/usr/bin/readlink -m $config_val)
else
dwarn "WARNING: '$config_val' doesn't exist, using default value '$SSH_KEY_LOCATION'"
fi
;;
path)
SAVE_PATH=$config_val
;;
ssh)
DUMP_TARGET=$config_val
;;
*)
;;
esac
done <<< "$(read_strip_comments $KDUMP_CONFIG_FILE)"
#make sure they've configured kdump.conf for ssh dumps
local SSH_TARGET=`echo -n $DUMP_TARGET | sed -n '/.*@/p'`
if [ -z "$SSH_TARGET" ]; then
return 1
fi
return 0
}
# ipv6 host address may takes a long time to be ready.
# Instead of checking against ipv6 address, we just check the network reachable
# by the return val of 'ssh'
check_and_wait_network_ready()
{
local start_time=$(date +%s)
local warn_once=1
local cur
local diff
local retval
local errmsg
while true; do
errmsg=$(ssh -i $SSH_KEY_LOCATION -o BatchMode=yes $DUMP_TARGET mkdir -p $SAVE_PATH 2>&1)
retval=$?
# ssh exits with the exit status of the remote command or with 255 if an error occurred
if [ $retval -eq 0 ]; then
return 0
elif [ $retval -ne 255 ]; then
derror "Could not create $DUMP_TARGET:$SAVE_PATH, you should check the privilege on server side"
return 1
fi
# if server removes the authorized_keys or, no /root/.ssh/kdump_id_rsa
ddebug "$errmsg"
echo $errmsg | grep -q "Permission denied\|No such file or directory\|Host key verification failed" &> /dev/null
if [ $? -eq 0 ]; then
derror "Could not create $DUMP_TARGET:$SAVE_PATH, you probably need to run \"kdumpctl propagate\""
return 1
fi
if [ $warn_once -eq 1 ]; then
dwarn "Network dump target is not usable, waiting for it to be ready..."
warn_once=0
fi
cur=$(date +%s)
let "diff = $cur - $start_time"
# 60s time out
if [ $diff -gt 180 ]; then
break;
fi
sleep 1
done
dinfo "Could not create $DUMP_TARGET:$SAVE_PATH, ipaddr is not ready yet. You should check network connection"
return 1
}
check_ssh_target()
{
check_and_wait_network_ready
if [ $? -ne 0 ]; then
return 1
fi
return 0
}
propagate_ssh_key()
{
check_ssh_config
if [ $? -ne 0 ]; then
derror "No ssh config specified in $KDUMP_CONFIG_FILE. Can't propagate"
exit 1
fi
local KEYFILE=$SSH_KEY_LOCATION
local errmsg="Failed to propagate ssh key"
#Check to see if we already created key, if not, create it.
if [ -f $KEYFILE ]; then
dinfo "Using existing keys..."
else
dinfo "Generating new ssh keys... "
/usr/bin/ssh-keygen -t rsa -f $KEYFILE -N "" 2>&1 > /dev/null
dinfo "done."
fi
#now find the target ssh user and server to contact.
SSH_USER=`echo $DUMP_TARGET | cut -d\ -f2 | cut -d@ -f1`
SSH_SERVER=`echo $DUMP_TARGET | sed -e's/\(.*@\)\(.*$\)/\2/'`
#now send the found key to the found server
ssh-copy-id -i $KEYFILE $SSH_USER@$SSH_SERVER
RET=$?
if [ $RET == 0 ]; then
dinfo "$KEYFILE has been added to ~$SSH_USER/.ssh/authorized_keys on $SSH_SERVER"
return 0
else
derror "$errmsg, $KEYFILE failed in transfer to $SSH_SERVER"
exit 1
fi
}
show_reserved_mem()
{
local mem=$(cat /sys/kernel/kexec_crash_size)
local mem_mb=$(expr $mem / 1024 / 1024)
dinfo "Reserved "$mem_mb"MB memory for crash kernel"
}
check_current_fadump_status()
{
# Check if firmware-assisted dump has been registered.
rc=`cat $FADUMP_REGISTER_SYS_NODE`
[ $rc -eq 1 ] && return 0
return 1
}
check_current_status()
{
if [ $DEFAULT_DUMP_MODE == "fadump" ]; then
check_current_fadump_status
else
check_current_kdump_status
fi
return $?
}
save_raw()
{
local kdump_dir
local raw_target
raw_target=$(awk '$1 ~ /^raw$/ { print $2; }' $KDUMP_CONFIG_FILE)
[ -z "$raw_target" ] && return 0
[ -b "$raw_target" ] || {
derror "raw partition $raw_target not found"
return 1
}
kdump_dir=`grep ^path $KDUMP_CONFIG_FILE | cut -d' ' -f2-`
if [ -z "${kdump_dir}" ]; then
coredir="/var/crash/`date +"%Y-%m-%d-%H:%M"`"
else
coredir="${kdump_dir}/`date +"%Y-%m-%d-%H:%M"`"
fi
mkdir -p "$coredir"
[ -d "$coredir" ] || {
derror "failed to create $coredir"
return 1
}
if makedumpfile -R $coredir/vmcore <$raw_target >/dev/null 2>&1; then
# dump found
dinfo "Dump saved to $coredir/vmcore"
# wipe makedumpfile header
dd if=/dev/zero of=$raw_target bs=1b count=1 2>/dev/null
else
rm -rf "$coredir"
fi
return 0
}
local_fs_dump_target()
{
local _target
_target=$(egrep "^ext[234]|^xfs|^btrfs|^minix" /etc/kdump.conf)
if [ $? -eq 0 ]; then
echo $_target|awk '{print $2}'
fi
}
path_to_be_relabeled()
{
local _path _target _mnt="/" _rmnt
if is_user_configured_dump_target; then
if is_mount_in_dracut_args; then
return;
fi
_target=$(local_fs_dump_target)
if [[ -n "$_target" ]]; then
_mnt=$(get_mntpoint_from_target $_target)
if ! is_mounted "$_mnt"; then
return
fi
else
return
fi
fi
_path=$(get_save_path)
# if $_path is masked by other mount, we will not relabel it.
_rmnt=$(df $_mnt/$_path 2>/dev/null | tail -1 | awk '{ print $NF }')
if [ "$_rmnt" == "$_mnt" ]; then
echo $_mnt/$_path
fi
}
selinux_relabel()
{
local _path _i _attr
_path=$(path_to_be_relabeled)
if [ -z "$_path" ] || ! [ -d "$_path" ] ; then
return
fi
while IFS= read -r -d '' _i; do
_attr=$(getfattr -m "security.selinux" "$_i" 2>/dev/null)
if [ -z "$_attr" ]; then
restorecon "$_i";
fi
done < <(find "$_path" -print0)
}
check_fence_kdump_config()
{
local hostname=`hostname`
local ipaddrs=`hostname -I`
local nodes=$(get_option_value "fence_kdump_nodes")
for node in $nodes; do
if [ "$node" = "$hostname" ]; then
derror "Option fence_kdump_nodes cannot contain $hostname"
return 1
fi
# node can be ipaddr
echo "$ipaddrs " | grep "$node " > /dev/null
if [ $? -eq 0 ]; then
derror "Option fence_kdump_nodes cannot contain $node"
return 1
fi
done
return 0
}
check_dump_feasibility()
{
if [ $DEFAULT_DUMP_MODE == "fadump" ]; then
return 0
fi
check_kdump_feasibility
return $?
}
start_fadump()
{
echo 1 > $FADUMP_REGISTER_SYS_NODE
if ! check_current_fadump_status; then
derror "fadump: failed to register"
return 1
fi
dinfo "fadump: registered successfully"
return 0
}
start_dump()
{
if [ $DEFAULT_DUMP_MODE == "fadump" ]; then
start_fadump
else
load_kdump
fi
return $?
}
check_failure_action_config()
{
local default_option
local failure_action
local option="failure_action"
default_option=$(awk '$1 ~ /^default$/ {print $2;}' $KDUMP_CONFIG_FILE)
failure_action=$(awk '$1 ~ /^failure_action$/ {print $2;}' $KDUMP_CONFIG_FILE)
if [ -z "$failure_action" -a -z "$default_option" ]; then
return 0
elif [ -n "$failure_action" -a -n "$default_option" ]; then
derror "Cannot specify 'failure_action' and 'default' option together"
return 1
fi
if [ -n "$default_option" ]; then
option="default"
failure_action="$default_option"
fi
case "$failure_action" in
reboot|halt|poweroff|shell|dump_to_rootfs)
return 0
;;
*)
dinfo $"Usage kdump.conf: $option {reboot|halt|poweroff|shell|dump_to_rootfs}"
return 1
esac
}
check_final_action_config()
{
local final_action
final_action=$(awk '$1 ~ /^final_action$/ {print $2;}' $KDUMP_CONFIG_FILE)
if [ -z "$final_action" ]; then
return 0
else
case "$final_action" in
reboot|halt|poweroff)
return 0
;;
*)
dinfo $"Usage kdump.conf: final_action {reboot|halt|poweroff}"
return 1
esac
fi
}
start()
{
check_dump_feasibility
if [ $? -ne 0 ]; then
derror "Starting kdump: [FAILED]"
return 1
fi
check_config
if [ $? -ne 0 ]; then
derror "Starting kdump: [FAILED]"
return 1
fi
if sestatus 2>/dev/null | grep -q "SELinux status.*enabled"; then
selinux_relabel
fi
save_raw
if [ $? -ne 0 ]; then
derror "Starting kdump: [FAILED]"
return 1
fi
check_current_status
if [ $? == 0 ]; then
dwarn "Kdump already running: [WARNING]"
return 0
fi
if check_ssh_config; then
if ! check_ssh_target; then
derror "Starting kdump: [FAILED]"
return 1
fi
fi
check_rebuild
if [ $? != 0 ]; then
derror "Starting kdump: [FAILED]"
return 1
fi
start_dump
if [ $? != 0 ]; then
derror "Starting kdump: [FAILED]"
return 1
fi
dinfo "Starting kdump: [OK]"
}
reload()
{
check_current_status
if [ $? -ne 0 ]; then
dwarn "Kdump was not running: [WARNING]"
fi
if [ $DEFAULT_DUMP_MODE == "fadump" ]; then
reload_fadump
return $?
else
stop_kdump
fi
if [ $? -ne 0 ]; then
derror "Stopping kdump: [FAILED]"
return 1
fi
dinfo "Stopping kdump: [OK]"
setup_initrd
if [ $? -ne 0 ]; then
derror "Starting kdump: [FAILED]"
return 1
fi
start_dump
if [ $? -ne 0 ]; then
derror "Starting kdump: [FAILED]"
return 1
fi
dinfo "Starting kdump: [OK]"
}
stop_fadump()
{
echo 0 > $FADUMP_REGISTER_SYS_NODE
if check_current_fadump_status; then
derror "fadump: failed to unregister"
return 1
fi
dinfo "fadump: unregistered successfully"
return 0
}
stop_kdump()
{
if is_secure_boot_enforced; then
$KEXEC -s -p -u
else
$KEXEC -p -u
fi
if [ $? != 0 ]; then
derror "kexec: failed to unload kdump kernel"
return 1
fi
dinfo "kexec: unloaded kdump kernel"
return 0
}
reload_fadump()
{
echo 1 > $FADUMP_REGISTER_SYS_NODE
if [ $? == 0 ]; then
dinfo "fadump: re-registered successfully"
return 0
else
# FADump could fail on older kernel where re-register
# support is not enabled. Try stop/start from userspace
# to handle such scenario.
stop_fadump
if [ $? == 0 ]; then
start_fadump
return $?
fi
fi
return 1
}
stop()
{
if [ $DEFAULT_DUMP_MODE == "fadump" ]; then
stop_fadump
else
stop_kdump
fi
if [ $? != 0 ]; then
derror "Stopping kdump: [FAILED]"
return 1
fi
dinfo "Stopping kdump: [OK]"
return 0
}
rebuild() {
check_config
if [ $? -ne 0 ]; then
return 1
fi
if check_ssh_config; then
if ! check_ssh_target; then
return 1
fi
fi
setup_initrd
if [ $? -ne 0 ]; then
return 1
fi
dinfo "Rebuilding $TARGET_INITRD"
rebuild_initrd
return $?
}
do_estimate() {
local kdump_mods
local -A large_mods
local baseline
local kernel_size mod_size initrd_size baseline_size runtime_size reserved_size estimated_size recommended_size
local size_mb=$(( 1024 * 1024 ))
setup_initrd
if [ ! -f "$TARGET_INITRD" ]; then
derror "kdumpctl estimate: kdump initramfs is not built yet."
exit 1
fi
kdump_mods="$(lsinitrd "$TARGET_INITRD" -f /usr/lib/dracut/loaded-kernel-modules.txt | tr '\n' ' ')"
baseline=$(kdump_get_arch_recommend_size)
if [[ "${baseline: -1}" == "M" ]]; then
baseline=${baseline%M}
elif [[ "${baseline: -1}" == "G" ]]; then
baseline=$(( ${baseline%G} * 1024 ))
elif [[ "${baseline: -1}" == "T" ]]; then
baseline=$(( ${baseline%Y} * 1048576 ))
fi
# The default value when using crashkernel=auto
baseline_size=$((baseline * size_mb))
# Current reserved crashkernel size
reserved_size=$(cat /sys/kernel/kexec_crash_size)
# A pre-estimated value for userspace usage and kernel
# runtime allocation, 64M should good for most cases
runtime_size=$((64 * size_mb))
# Kernel image size
kernel_size=$(get_kernel_size "$KDUMP_KERNEL")
# Kdump initramfs size
initrd_size=$(du -b "$TARGET_INITRD" | awk '{print $1}')
# Kernel modules static size after loaded
mod_size=0
while read -r _name _size _; do
if [[ ! " $kdump_mods " == *" $_name "* ]]; then
continue
fi
mod_size=$((mod_size + _size))
# Mark module with static size larger than 2M as large module
if [[ $((_size / size_mb)) -ge 1 ]]; then
large_mods[$_name]=$_size
fi
done <<< "$(< /proc/modules)"
# Extra memory usage required for LUKS2 decryption
crypt_size=0
for _dev in $(get_all_kdump_crypt_dev); do
_crypt_info=$(cryptsetup luksDump "/dev/block/$_dev")
[[ $(echo "$_crypt_info" | sed -n "s/^Version:\s*\(.*\)/\1/p" ) == "2" ]] || continue
for _mem in $(echo "$_crypt_info" | sed -n "s/\sMemory:\s*\(.*\)/\1/p" | sort -n ); do
crypt_size=$((crypt_size + _mem * 1024))
break
done
done
[[ $crypt_size -ne 0 ]] && echo -e "Encrypted kdump target requires extra memory, assuming using the keyslot with minimun memory requirement\n"
estimated_size=$((kernel_size + mod_size + initrd_size + runtime_size + crypt_size))
if [[ $baseline_size -gt $estimated_size ]]; then
recommended_size=$baseline_size
else
recommended_size=$estimated_size
fi
echo "Reserved crashkernel: $((reserved_size / size_mb))M"
echo "Recommended crashkernel: $((recommended_size / size_mb))M"
echo
echo "Kernel image size: $((kernel_size / size_mb))M"
echo "Kernel modules size: $((mod_size / size_mb))M"
echo "Initramfs size: $((initrd_size / size_mb))M"
echo "Runtime reservation: $((runtime_size / size_mb))M"
[[ $crypt_size -ne 0 ]] && \
echo "LUKS required size: $((crypt_size / size_mb))M"
echo -n "Large modules:"
if [[ "${#large_mods[@]}" -eq 0 ]]; then
echo " <none>"
else
echo ""
for _mod in "${!large_mods[@]}"; do
echo " $_mod: ${large_mods[$_mod]}"
done
fi
if [[ $reserved_size -lt $recommended_size ]]; then
echo "WARNING: Current crashkernel size is lower than recommended size $((recommended_size / size_mb))M."
fi
}
if [ ! -f "$KDUMP_CONFIG_FILE" ]; then
derror "Error: No kdump config file found!"
exit 1
fi
main ()
{
# Determine if the dump mode is kdump or fadump
determine_dump_mode
case "$1" in
start)
if [ -s /proc/vmcore ]; then
save_core
reboot
else
start
fi
;;
stop)
stop
;;
status)
EXIT_CODE=0
check_current_status
case "$?" in
0)
dinfo "Kdump is operational"
EXIT_CODE=0
;;
1)
dinfo "Kdump is not operational"
EXIT_CODE=3
;;
esac
exit $EXIT_CODE
;;
reload)
reload
;;
restart)
stop
start
;;
rebuild)
rebuild
;;
condrestart)
;;
propagate)
propagate_ssh_key
;;
showmem)
show_reserved_mem
;;
estimate)
do_estimate
;;
*)
dinfo $"Usage: $0 {estimate|start|stop|status|restart|reload|rebuild|propagate|showmem}"
exit 1
esac
}
# Other kdumpctl instances will block in queue, until this one exits
single_instance_lock
# To avoid fd 9 leaking, we invoke a subshell, close fd 9 and call main.
# So that fd isn't leaking when main is invoking a subshell.
(exec 9<&-; main $1)
exit $?