kexec-tools/kdumpctl

662 lines
15 KiB
Plaintext
Raw Normal View History

2011-07-06 19:25:34 +00:00
#! /bin/sh
KEXEC=/sbin/kexec
KDUMP_KERNELVER=""
KDUMP_COMMANDLINE=""
KEXEC_ARGS=""
KDUMP_CONFIG_FILE="/etc/kdump.conf"
MKDUMPRD="/sbin/mkdumprd -f"
SAVE_PATH=/var/crash
SSH_KEY_LOCATION="/root/.ssh/kdump_id_rsa"
DUMP_TARGET=""
2011-07-06 19:25:34 +00:00
. /lib/kdump/kdump-lib.sh
2011-07-06 19:25:34 +00:00
standard_kexec_args="-p"
if [ -f /etc/sysconfig/kdump ]; then
. /etc/sysconfig/kdump
fi
single_instance_lock()
{
exec 9>/var/lock/kdump
flock 9
}
# remove_cmdline_param <kernel cmdline> <param1> [<param2>] ... [<paramN>]
# Remove a list of kernel parameters from a given kernel cmdline and print the result.
# For each "arg" in the removing params list, "arg" and "arg=xxx" will be removed if exists.
function remove_cmdline_param()
{
local cmdline=$1
shift
for arg in $@; do
cmdline=`echo $cmdline | \
sed -e "s/\b$arg=[^ ]*\b//g" \
-e "s/\b$arg\b//g" \
-e "s/\s\+/ /g"`
done
echo $cmdline
}
2011-07-06 19:25:34 +00:00
function save_core()
{
coredir="/var/crash/`date +"%Y-%m-%d-%H:%M"`"
mkdir -p $coredir
cp --sparse=always /proc/vmcore $coredir/vmcore-incomplete
if [ $? == 0 ]; then
mv $coredir/vmcore-incomplete $coredir/vmcore
echo "saved a vmcore to $coredir"
2011-07-06 19:25:34 +00:00
else
echo "failed to save a vmcore to $coredir" >&2
2011-07-06 19:25:34 +00:00
fi
# pass the dmesg to Abrt tool if exists, in order
# to collect the kernel oops message.
# https://fedorahosted.org/abrt/
if [ -x /usr/bin/dumpoops ]; then
makedumpfile --dump-dmesg $coredir/vmcore $coredir/dmesg >/dev/null 2>&1
dumpoops -d $coredir/dmesg >/dev/null 2>&1
if [ $? == 0 ]; then
echo "kernel oops has been collected by abrt tool"
2011-07-06 19:25:34 +00:00
fi
fi
}
function rebuild_initrd()
{
$MKDUMPRD $kdump_initrd $kdump_kver
if [ $? != 0 ]; then
echo "mkdumprd: failed to make kdump initrd" >&2
return 1
fi
}
#$1: the files to be checked with IFS=' '
function check_exist()
{
for file in $1; do
if [ ! -f "$file" ]; then
echo -n "Error: $file not found."; echo
return 1
fi
done
}
#$1: the files to be checked with IFS=' '
function check_executable()
{
for file in $1; do
if [ ! -x "$file" ]; then
echo -n "Error: $file is not executable."; echo
return 1
fi
done
}
function check_config()
{
local nr
nr=$(awk 'BEGIN{cnt=0} /^raw|^ssh[[:blank:]]|^nfs|^ext[234]|^xfs|^btrfs|^minix/{cnt++} END{print cnt}' $KDUMP_CONFIG_FILE)
[ $nr -gt 1 ] && {
echo "More than one dump targets specified."
return 1
}
while read config_opt config_val; do
# remove inline comments after the end of a directive.
config_val=$(strip_comments $config_val)
case "$config_opt" in
\#* | "")
;;
add dracut_args option to kdump.conf mkdumprd call dracut to rebuilding kdump initrd, sometimes passing extra dracut args is helpful. For example user can enable debug output with --debug, --printsize to print roughly increased initramfs size by each module, --omit-drivers to omit kernel modules, etc. This patch enables dracut_args option for passing extra args to dracut. Also it modifies add_dracut_arg() to treat a string with-in quote as single string because for dracut options which has its own args, the args need to be quoted and space seperated. If add_dracut_arg() gets an string read from kdump.conf and if that string contains double quotes, then while converting to positional parameters those double quotes are not interpreted. Hence if /etc/kdump.conf contains following. dracut_args --add-drivers "driver1 driver2" then add_dracut_args() sees following positional parameters $1= --add-drivers $2= "driver1 $3= driver2" Notice, double quotes have been ignored and parameters have been broken based on white space. Modify add_dracut_arg() to look for parameters starting with " and if one is found, it tries to merge all the next parameters till one is found with ending double quote. Hence effectively simulating following behavior. $1= --add-drivers $2= "driver1 driver2" [v1->v2]: address quoted substring in dracut_args, also handle the leading and ending spaces in substring. [v2->v3]: fix dracut arguments seperator in kdump.conf. [v3->v4]: improve changelog, thanks vivek. [v4->v5]: make the manpage more verbose [vivek]. Tested with below dracut_args test cases: 1. dracut_args --add-drivers "pcspkr virtio_net" --omit-drivers "sdhci-pci hid-logitech-dj e1000" 2. dracut_args --add-drivers " pcspkr virtio_net " --omit-drivers "sdhci-pci hid-logitech-dj e1000" Signed-off-by: Dave Young <dyoung@redhat.com> Acked-by: Vivek Goyal <vgoyal@redhat.com>
2013-04-15 02:12:05 +00:00
raw|ext2|ext3|ext4|minix|btrfs|xfs|nfs|ssh|sshkey|path|core_collector|kdump_post|kdump_pre|extra_bins|extra_modules|default|force_rebuild|dracut_args)
[ -z "$config_val" ] && {
echo "Invalid kdump config value for option $config_opt."
return 1;
}
;;
net|options|link_delay|disk_timeout|debug_mem_level|blacklist)
echo "Deprecated kdump config option: $config_opt. Refer to kdump.conf manpage for alternatives."
return 1
;;
*)
echo "Invalid kdump config option $config_opt"
return 1;
;;
esac
done < $KDUMP_CONFIG_FILE
return 0
}
# check_fence_kdump <image timestamp>
# return 0 if fence_kdump is configured and kdump initrd needs to be rebuilt
function check_fence_kdump()
{
local image_time=$1
local cib_time
is_fence_kdump || return 1
cib_time=`pcs cluster cib | xmllint --xpath 'string(/cib/@cib-last-written)' - | \
xargs -0 date +%s --date`
if [ -z $cib_time -o $cib_time -le $image_time ]; then
return 1
fi
return 0
}
function check_rebuild()
2011-07-06 19:25:34 +00:00
{
local extra_modules modified_files=""
local _force_rebuild force_rebuild="0"
2011-07-06 19:25:34 +00:00
if [ -z "$KDUMP_KERNELVER" ]; then
2011-07-25 10:04:32 +00:00
kdump_kver=`uname -r`
2011-07-06 19:25:34 +00:00
else
kdump_kver=$KDUMP_KERNELVER
fi
kdump_kernel="${KDUMP_BOOTDIR}/${KDUMP_IMG}-${kdump_kver}${KDUMP_IMG_EXT}"
kdump_initrd="${KDUMP_BOOTDIR}/initramfs-${kdump_kver}kdump.img"
2011-07-06 19:25:34 +00:00
_force_rebuild=`grep ^force_rebuild $KDUMP_CONFIG_FILE 2>/dev/null`
if [ $? -eq 0 ]; then
force_rebuild=`echo $_force_rebuild | cut -d' ' -f2`
if [ "$force_rebuild" != "0" ] && [ "$force_rebuild" != "1" ];then
echo "Error: force_rebuild value is invalid"
return 1
fi
fi
#will rebuild every time if extra_modules are specified
extra_modules=`grep ^extra_modules $KDUMP_CONFIG_FILE`
[ -n "$extra_modules" ] && force_rebuild="1"
2011-07-06 19:25:34 +00:00
#check to see if dependent files has been modified
#since last build of the image file
if [ -f $kdump_initrd ]; then
image_time=`stat -c "%Y" $kdump_initrd 2>/dev/null`
else
image_time=0
2011-07-06 19:25:34 +00:00
fi
#also rebuild when cluster conf is changed and fence kdump is enabled.
check_fence_kdump $image_time && modified_files="cluster-cib"
EXTRA_BINS=`grep ^kdump_post $KDUMP_CONFIG_FILE | cut -d\ -f2`
CHECK_FILES=`grep ^kdump_pre $KDUMP_CONFIG_FILE | cut -d\ -f2`
EXTRA_BINS="$EXTRA_BINS $CHECK_FILES"
CHECK_FILES=`grep ^extra_bins $KDUMP_CONFIG_FILE | cut -d\ -f2-`
EXTRA_BINS="$EXTRA_BINS $CHECK_FILES"
files="$KDUMP_CONFIG_FILE $kdump_kernel $EXTRA_BINS"
if [ -f $FENCE_KDUMP_CONFIG ]; then
files="$files $FENCE_KDUMP_CONFIG"
fi
check_exist "$files" && check_executable "$EXTRA_BINS"
[ $? -ne 0 ] && return 1
for file in $files; do
time_stamp=`stat -c "%Y" $file`
if [ "$time_stamp" -gt "$image_time" ]; then
modified_files="$modified_files $file"
2011-07-06 19:25:34 +00:00
fi
done
if [ $image_time -eq 0 ]; then
echo -n "No kdump initial ramdisk found."; echo
elif [ "$force_rebuild" != "0" ]; then
echo -n "Force rebuild $kdump_initrd"; echo
elif [ -n "$modified_files" ]; then
echo "Detected change(s) the following file(s):"
echo -n " "; echo "$modified_files" | sed 's/\s/\n /g'
else
return 0
2011-07-06 19:25:34 +00:00
fi
echo "Rebuilding $kdump_initrd"
rebuild_initrd
return $?
2011-07-06 19:25:34 +00:00
}
# This function check iomem and determines if we have more than
# 4GB of ram available. Returns 1 if we do, 0 if we dont
function need_64bit_headers()
{
return `tail -n 1 /proc/iomem | awk '{ split ($1, r, "-"); \
print (strtonum("0x" r[2]) > strtonum("0xffffffff")); }'`
}
# Load the kdump kerel specified in /etc/sysconfig/kdump
# If none is specified, try to load a kdump kernel with the same version
# as the currently running kernel.
function load_kdump()
{
MEM_RESERVED=$(cat /sys/kernel/kexec_crash_size)
if [ $MEM_RESERVED -eq 0 ]
2011-07-06 19:25:34 +00:00
then
echo "No memory reserved for crash kernel." >&2
2011-07-06 19:25:34 +00:00
return 1
fi
ARCH=`uname -m`
2011-07-06 19:25:34 +00:00
if [ "$ARCH" == "i686" -o "$ARCH" == "i386" ]
then
need_64bit_headers
if [ $? == 1 ]
then
FOUND_ELF_ARGS=`echo $KEXEC_ARGS | grep elf32-core-headers`
if [ -n "$FOUND_ELF_ARGS" ]
then
echo -n "Warning: elf32-core-headers overrides correct elf64 setting"
echo
else
KEXEC_ARGS="$KEXEC_ARGS --elf64-core-headers"
fi
else
FOUND_ELF_ARGS=`echo $KEXEC_ARGS | grep elf64-core-headers`
if [ -z "$FOUND_ELF_ARGS" ]
then
KEXEC_ARGS="$KEXEC_ARGS --elf32-core-headers"
fi
fi
fi
if [ -z "$KDUMP_COMMANDLINE" ]
then
KDUMP_COMMANDLINE=`cat /proc/cmdline`
fi
KDUMP_COMMANDLINE=`remove_cmdline_param "$KDUMP_COMMANDLINE" crashkernel hugepages hugepagesz`
2011-07-06 19:25:34 +00:00
KDUMP_COMMANDLINE="${KDUMP_COMMANDLINE} ${KDUMP_COMMANDLINE_APPEND}"
$KEXEC $KEXEC_ARGS $standard_kexec_args \
--command-line="$KDUMP_COMMANDLINE" \
--initrd=$kdump_initrd $kdump_kernel 2>/dev/null
if [ $? == 0 ]; then
echo "kexec: loaded kdump kernel"
2011-07-06 19:25:34 +00:00
return 0
else
echo "kexec: failed to load kdump kernel" >&2
2011-07-06 19:25:34 +00:00
return 1
fi
}
function check_ssh_config()
{
while read config_opt config_val; do
# remove inline comments after the end of a directive.
config_val=$(strip_comments $config_val)
case "$config_opt" in
sshkey)
if [ -f "$config_val" ]; then
# canonicalize the path
SSH_KEY_LOCATION=$(/usr/bin/readlink -m $config_val)
else
echo "WARNING: '$config_val' doesn't exist, using default value '$SSH_KEY_LOCATION'"
fi
;;
path)
SAVE_PATH=$config_val
;;
ssh)
DUMP_TARGET=$config_val
;;
*)
;;
esac
done < $KDUMP_CONFIG_FILE
#make sure they've configured kdump.conf for ssh dumps
local SSH_TARGET=`echo -n $DUMP_TARGET | sed -n '/.*@/p'`
if [ -z "$SSH_TARGET" ]; then
return 1
fi
return 0
}
function check_ssh_target()
{
local _ret
ssh -q -i $SSH_KEY_LOCATION -o BatchMode=yes $DUMP_TARGET mkdir -p $SAVE_PATH
_ret=$?
if [ $_ret -ne 0 ]; then
echo "Could not create $DUMP_TARGET:$SAVE_PATH, you probably need to run \"kdumpctl propagate\"" >&2
return 1
fi
return 0
}
2011-07-06 19:25:34 +00:00
function propagate_ssh_key()
{
check_ssh_config
if [ $? -ne 0 ]; then
echo "No ssh config specified in $KDUMP_CONFIG_FILE. Can't propagate" >&2
exit 1
fi
2011-07-06 19:25:34 +00:00
#Check if selinux is on... must flip to permissive mode
#for the moment to create key, then flip back...
se_enforce=`/usr/sbin/sestatus | grep -c "^Current mode.*enforcing"`
if [ "$se_enforce" -ge 1 ]; then
/usr/sbin/setenforce 0 2>&1 > /dev/null
fi
local KEYFILE=$SSH_KEY_LOCATION
2011-07-06 19:25:34 +00:00
local errmsg="Failed to propagate ssh key"
#Check to see if we already created key, if not, create it.
if [ -f $KEYFILE ]; then
echo "Using existing keys..."
else
echo -n "Generating new ssh keys... "
/usr/bin/ssh-keygen -t rsa -f $KEYFILE -N "" 2>&1 > /dev/null
2011-07-06 19:25:34 +00:00
echo "done."
fi
#If necessary, flip selinux back to enforcing
if [ "$se_enforce" -ge 1 ]; then
/usr/sbin/setenforce 1 2>&1 > /dev/null
fi
#now find the target ssh user and server to contact.
SSH_USER=`echo $DUMP_TARGET | cut -d\ -f2 | cut -d@ -f1`
SSH_SERVER=`echo $DUMP_TARGET | sed -e's/\(.*@\)\(.*$\)/\2/'`
2011-07-06 19:25:34 +00:00
#now send the found key to the found server
ssh-copy-id -i $KEYFILE $SSH_USER@$SSH_SERVER
2011-07-06 19:25:34 +00:00
RET=$?
if [ $RET == 0 ]; then
echo $KEYFILE has been added to ~$SSH_USER/.ssh/authorized_keys on $SSH_SERVER
return 0
else
echo $errmsg, $KEYFILE failed in transfer to $SSH_SERVER >&2
2011-07-06 19:25:34 +00:00
exit 1
fi
}
function check_current_kdump_status()
2011-07-06 19:25:34 +00:00
{
rc=`cat /sys/kernel/kexec_crash_loaded`
if [ $rc == 1 ]; then
return 0
else
return 1
fi
}
function save_raw()
{
local kdump_dir
local raw_target
raw_target=$(awk '$1 ~ /^raw$/ { print $2; }' $KDUMP_CONFIG_FILE)
[ -z "$raw_target" ] && return 0
[ -b "$raw_target" ] || {
echo "raw partition $raw_target not found"
return 1
}
kdump_dir=`grep ^path $KDUMP_CONFIG_FILE | cut -d' ' -f2-`
if [ -z "${kdump_dir}" ]; then
coredir="/var/crash/`date +"%Y-%m-%d-%H:%M"`"
else
coredir="${kdump_dir}/`date +"%Y-%m-%d-%H:%M"`"
fi
mkdir -p "$coredir"
[ -d "$coredir" ] || {
echo "failed to create $coredir"
return 1
}
if makedumpfile -R $coredir/vmcore <$raw_target >/dev/null 2>&1; then
# dump found
echo "Dump saved to $coredir/vmcore"
# wipe makedumpfile header
dd if=/dev/zero of=$raw_target bs=1b count=1 2>/dev/null
else
rm -rf "$coredir"
fi
return 0
}
get_save_path() {
local _save_path=$(grep "^path" /etc/kdump.conf|awk '{print $2}')
if [ -z "$_save_path" ]; then
_save_path="/var/crash"
fi
echo $_save_path
}
is_dump_target_configured() {
local _target
_target=$(egrep "^ext[234]|^xfs|^btrfs|^minix|^raw|^ssh|^nfs" /etc/kdump.conf)
[ -n "$_target" ]
}
local_fs_dump_target()
{
local _target
_target=$(egrep "^ext[234]|^xfs|^btrfs|^minix" /etc/kdump.conf)
if [ $? -eq 0 ]; then
echo $_target|awk '{print $2}'
fi
}
path_to_be_relabeled() {
local _path _target _mnt="/" _rmnt
if is_dump_target_configured; then
_target=$(local_fs_dump_target)
if [[ -n "$_target" ]]; then
_mnt=$(findmnt -k -f -n -r -o TARGET $_target)
if [ -z "$_mnt" ]; then
return
fi
else
return
fi
fi
_path=$(get_save_path)
# if $_path is masked by other mount, we will not relabel it.
_rmnt=$(df $_mnt/$_path 2>/dev/null | tail -1 | awk '{ print $NF }')
if [ "$_rmnt" == "$_mnt" ]; then
echo $_mnt/$_path
fi
}
selinux_relabel()
{
local _path _i _attr
_path=$(path_to_be_relabeled)
if [ -z "$_path" ] || ! [ -d "$_path" ] ; then
return
fi
for _i in $(find $_path); do
_attr=$(getfattr -m "security.selinux" $_i 2>/dev/null)
if [ -z "$_attr" ]; then
restorecon $_i;
fi
done
}
# Check if secure boot is being enforced.
#
# Per Peter Jones, we need check efivar SecureBoot-$(the UUID) and
# SetupMode-$(the UUID), they are both 5 bytes binary data. The first four
# bytes are the attributes associated with the variable and can safely be
# ignored, the last bytes are one-byte true-or-false variables. If SecureBoot
# is 1 and SetupMode is 0, then secure boot is being enforced.
#
# Assume efivars is mounted at /sys/firmware/efi/efivars.
function is_secure_boot_enforced()
{
local secure_boot_file setup_mode_file
local secure_boot_byte setup_mode_byte
secure_boot_file=$(find /sys/firmware/efi/efivars -name SecureBoot-* 2>/dev/null)
setup_mode_file=$(find /sys/firmware/efi/efivars -name SetupMode-* 2>/dev/null)
if [ -f "$secure_boot_file" ] && [ -f "$setup_mode_file" ]; then
secure_boot_byte=$(hexdump -v -e '/1 "%d\ "' $secure_boot_file|cut -d' ' -f 5)
setup_mode_byte=$(hexdump -v -e '/1 "%d\ "' $setup_mode_file|cut -d' ' -f 5)
if [ "$secure_boot_byte" = "1" ] && [ "$setup_mode_byte" = "0" ]; then
return 0
fi
fi
return 1
}
function check_kdump_feasibility()
{
if is_secure_boot_enforced; then
echo "Secure Boot is Enabled. Kdump service can't be started. Disable Secure Boot and retry"
return 1;
fi
if [ ! -e /sys/kernel/kexec_crash_loaded ]; then
echo "Kdump is not supported on this kernel"
return 1
fi
}
2011-07-06 19:25:34 +00:00
function start()
{
check_config
if [ $? -ne 0 ]; then
echo "Starting kdump: [FAILED]"
return 1
fi
if sestatus 2>/dev/null | grep -q "SELinux status.*enabled"; then
selinux_relabel
fi
save_raw
if [ $? -ne 0 ]; then
echo "Starting kdump: [FAILED]"
return 1
fi
check_kdump_feasibility
if [ $? -ne 0 ]; then
echo "Starting kdump: [FAILED]"
return 1
fi
check_current_kdump_status
if [ $? == 0 ]; then
echo "Kdump already running: [WARNING]"
return 0
2011-07-06 19:25:34 +00:00
fi
if check_ssh_config; then
if ! check_ssh_target; then
echo "Starting kdump: [FAILED]"
return 1
fi
fi
check_rebuild
2011-07-06 19:25:34 +00:00
if [ $? != 0 ]; then
echo "Starting kdump: [FAILED]"
2011-07-06 19:25:34 +00:00
return 1
fi
load_kdump
if [ $? != 0 ]; then
echo "Starting kdump: [FAILED]"
2011-07-06 19:25:34 +00:00
return 1
fi
echo "Starting kdump: [OK]"
2011-07-06 19:25:34 +00:00
}
function stop()
{
$KEXEC -p -u 2>/dev/null
if [ $? == 0 ]; then
echo "kexec: unloaded kdump kernel"
echo "Stopping kdump: [OK]"
2011-07-06 19:25:34 +00:00
return 0
else
echo "kexec: failed to unloaded kdump kernel"
echo "Stopping kdump: [FAILED]"
2011-07-06 19:25:34 +00:00
return 1
fi
}
if [ ! -f "$KDUMP_CONFIG_FILE" ]; then
echo "Error: No kdump config file found!" >&2
exit 1
fi
2013-11-25 16:23:11 +00:00
main ()
{
case "$1" in
start)
if [ -s /proc/vmcore ]; then
save_core
reboot
else
start
fi
;;
stop)
stop
;;
status)
2011-07-06 19:25:34 +00:00
EXIT_CODE=0
check_current_kdump_status
2013-11-25 16:23:11 +00:00
case "$?" in
0)
echo "Kdump is operational"
EXIT_CODE=0
;;
1)
echo "Kdump is not operational"
EXIT_CODE=3
;;
esac
exit $EXIT_CODE
2011-07-06 19:25:34 +00:00
;;
2013-11-25 16:23:11 +00:00
restart)
stop
start
;;
condrestart)
2011-07-06 19:25:34 +00:00
;;
2013-11-25 16:23:11 +00:00
propagate)
propagate_ssh_key
2011-07-06 19:25:34 +00:00
;;
2013-11-25 16:23:11 +00:00
*)
echo $"Usage: $0 {start|stop|status|restart|propagate}"
exit 1
2011-07-06 19:25:34 +00:00
esac
2013-11-25 16:23:11 +00:00
}
# Other kdumpctl instances will block in queue, until this one exits
single_instance_lock
# To avoid fd 9 leaking, we invoke a subshell, close fd 9 and call main.
# So that fd isn't leaking when main is invoking a subshell.
(exec 9<&-; main $1)
2011-07-06 19:25:34 +00:00
exit $?