kexec-tools/kdumpctl

465 lines
10 KiB
Plaintext
Raw Normal View History

2011-07-06 19:25:34 +00:00
#! /bin/sh
KEXEC=/sbin/kexec
KDUMP_KERNELVER=""
KDUMP_COMMANDLINE=""
KEXEC_ARGS=""
KDUMP_CONFIG_FILE="/etc/kdump.conf"
MKDUMPRD="/sbin/mkdumprd -f"
SAVE_PATH=/var/crash
SSH_KEY_LOCATION="/root/.ssh/kdump_id_rsa"
DUMP_TARGET=""
2011-07-06 19:25:34 +00:00
LOGGER="/usr/bin/logger -p info -t kdump"
standard_kexec_args="-p"
if [ -f /etc/sysconfig/kdump ]; then
. /etc/sysconfig/kdump
fi
function save_core()
{
coredir="/var/crash/`date +"%Y-%m-%d-%H:%M"`"
mkdir -p $coredir
cp --sparse=always /proc/vmcore $coredir/vmcore-incomplete
if [ $? == 0 ]; then
mv $coredir/vmcore-incomplete $coredir/vmcore
$LOGGER "saved a vmcore to $coredir"
else
$LOGGER "failed to save a vmcore to $coredir"
fi
# pass the dmesg to Abrt tool if exists, in order
# to collect the kernel oops message.
# https://fedorahosted.org/abrt/
if [ -x /usr/bin/dumpoops ]; then
makedumpfile --dump-dmesg $coredir/vmcore $coredir/dmesg >/dev/null 2>&1
dumpoops -d $coredir/dmesg >/dev/null 2>&1
if [ $? == 0 ]; then
$LOGGER "kernel oops has been collected by abrt tool"
fi
fi
}
function rebuild_initrd()
{
$MKDUMPRD $kdump_initrd $kdump_kver
if [ $? != 0 ]; then
echo "Failed to run mkdumprd"
$LOGGER "mkdumprd: failed to make kdump initrd"
return 1
fi
}
#$1: the files to be checked with IFS=' '
function check_exist()
{
for file in $1; do
if [ ! -f "$file" ]; then
echo -n "Error: $file not found."; echo
return 1
fi
done
}
#$1: the files to be checked with IFS=' '
function check_executable()
{
for file in $1; do
if [ ! -x "$file" ]; then
echo -n "Error: $file is not executable."; echo
return 1
fi
done
}
2011-07-06 19:25:34 +00:00
function check_config()
{
local extra_modules modified_files=""
local force_rebuild=0
2011-07-06 19:25:34 +00:00
if [ -z "$KDUMP_KERNELVER" ]; then
2011-07-25 10:04:32 +00:00
kdump_kver=`uname -r`
2011-07-06 19:25:34 +00:00
else
kdump_kver=$KDUMP_KERNELVER
fi
kdump_kernel="${KDUMP_BOOTDIR}/${KDUMP_IMG}-${kdump_kver}${KDUMP_IMG_EXT}"
kdump_initrd="${KDUMP_BOOTDIR}/initramfs-${kdump_kver}kdump.img"
2011-07-06 19:25:34 +00:00
#will rebuild every time if extra_modules are specified
extra_modules=`grep ^extra_modules $KDUMP_CONFIG_FILE`
[ -n "$extra_modules" ] && force_rebuild=1
2011-07-06 19:25:34 +00:00
#check to see if dependent files has been modified
#since last build of the image file
if [ -f $kdump_initrd ]; then
image_time=`stat -c "%Y" $kdump_initrd 2>/dev/null`
else
image_time=0
2011-07-06 19:25:34 +00:00
fi
EXTRA_BINS=`grep ^extra_bins $KDUMP_CONFIG_FILE | cut -d\ -f2-`
files="$KDUMP_CONFIG_FILE $kdump_kernel $EXTRA_BINS"
check_exist "$files" && check_executable "$EXTRA_BINS"
[ $? -ne 0 ] && return 1
for file in $files; do
time_stamp=`stat -c "%Y" $file`
if [ "$time_stamp" -gt "$image_time" ]; then
modified_files="$modified_files $file"
2011-07-06 19:25:34 +00:00
fi
done
if [ $image_time -eq 0 ]; then
echo -n "No kdump initial ramdisk found."; echo
elif [ "$force_rebuild" -ne 0 ]; then
echo -n "Force rebuild $kdump_initrd"; echo
elif [ -n "$modified_files" ]; then
echo "Detected change(s) the following file(s):"
echo -n " "; echo "$modified_files" | sed 's/\s/\n /g'
else
return 0
2011-07-06 19:25:34 +00:00
fi
echo "Rebuilding $kdump_initrd"
rebuild_initrd
return $?
2011-07-06 19:25:34 +00:00
}
# This function check iomem and determines if we have more than
# 4GB of ram available. Returns 1 if we do, 0 if we dont
function need_64bit_headers()
{
return `tail -n 1 /proc/iomem | awk '{ split ($1, r, "-"); \
print (strtonum("0x" r[2]) > strtonum("0xffffffff")); }'`
}
# Load the kdump kerel specified in /etc/sysconfig/kdump
# If none is specified, try to load a kdump kernel with the same version
# as the currently running kernel.
function load_kdump()
{
if [ -z "$KDUMP_COMMANDLINE" ]
then
KDUMP_COMMANDLINE=`cat /proc/cmdline`
fi
ARCH=`uname -m`
if [ "$ARCH" == "ppc64" ]
then
MEM_RESERVED=`grep "crashkernel=[0-9]\+[MmKkGg]@[0-9]\+[MmGgKk]" /proc/cmdline`
if [ -z "$MEM_RESERVED" ]
then
MEM_RESERVED=`grep "crashkernel=[0-9]\+[MmKkGg]" /proc/cmdline`
fi
else
MEM_RESERVED=`grep "Crash kernel" /proc/iomem | grep -v "00000000-00000000"`
fi
if [ -z "$MEM_RESERVED" ]
then
$LOGGER "No crashkernel parameter specified for running kernel"
return 1
fi
if [ "$ARCH" == "i686" -o "$ARCH" == "i386" ]
then
need_64bit_headers
if [ $? == 1 ]
then
FOUND_ELF_ARGS=`echo $KEXEC_ARGS | grep elf32-core-headers`
if [ -n "$FOUND_ELF_ARGS" ]
then
echo -n "Warning: elf32-core-headers overrides correct elf64 setting"
echo
else
KEXEC_ARGS="$KEXEC_ARGS --elf64-core-headers"
fi
else
FOUND_ELF_ARGS=`echo $KEXEC_ARGS | grep elf64-core-headers`
if [ -z "$FOUND_ELF_ARGS" ]
then
KEXEC_ARGS="$KEXEC_ARGS --elf32-core-headers"
fi
fi
fi
KDUMP_COMMANDLINE=`echo $KDUMP_COMMANDLINE | sed -e 's/crashkernel=[^ ]*//'`
KDUMP_COMMANDLINE="${KDUMP_COMMANDLINE} ${KDUMP_COMMANDLINE_APPEND}"
$KEXEC $KEXEC_ARGS $standard_kexec_args \
--command-line="$KDUMP_COMMANDLINE" \
--initrd=$kdump_initrd $kdump_kernel 2>/dev/null
if [ $? == 0 ]; then
$LOGGER "kexec: loaded kdump kernel"
return 0
else
$LOGGER "kexec: failed to load kdump kernel"
return 1
fi
}
function check_ssh_config()
{
while read config_opt config_val; do
case "$config_opt" in
sshkey)
if [ -f "$config_val" ]; then
# canonicalize the path
SSH_KEY_LOCATION=$(/usr/bin/readlink -m $config_val)
else
echo "WARNING: '$config_val' doesn't exist, using default value '$SSH_KEY_LOCATION'"
fi
;;
path)
SAVE_PATH=$config_val
;;
net)
DUMP_TARGET=$config_val
;;
*)
;;
esac
done < $KDUMP_CONFIG_FILE
#make sure they've configured kdump.conf for ssh dumps
local SSH_TARGET=`echo -n $DUMP_TARGET | sed -n '/.*@/p'`
if [ -z "$SSH_TARGET" ]; then
return 1
fi
return 0
}
function check_ssh_target()
{
ssh -q -i $SSH_KEY_LOCATION -o BatchMode=yes $DUMP_TARGET mkdir -p $SAVE_PATH
if [ $? -ne 0 ]; then
echo "Could not create $DUMP_TARGET:$SAVE_PATH, you probably need to run \"service kdump propagate\""
exit $?
fi
return 0
}
2011-07-06 19:25:34 +00:00
function propagate_ssh_key()
{
check_ssh_config
if [ $? -ne 0 ]; then
echo "No ssh config specified in $KDUMP_CONFIG_FILE. Can't propagate"
$LOGGER "$errmsg, no ssh config specified in $KDUMP_CONFIG_FILE"
exit 1
fi
2011-07-06 19:25:34 +00:00
#Check if selinux is on... must flip to permissive mode
#for the moment to create key, then flip back...
se_enforce=`/usr/sbin/sestatus | grep -c "^Current mode.*enforcing"`
if [ "$se_enforce" -ge 1 ]; then
/usr/sbin/setenforce 0 2>&1 > /dev/null
fi
local KEYFILE=$SSH_KEY_LOCATION
2011-07-06 19:25:34 +00:00
local errmsg="Failed to propagate ssh key"
#Check to see if we already created key, if not, create it.
if [ -f $KEYFILE ]; then
echo "Using existing keys..."
else
echo -n "Generating new ssh keys... "
/usr/bin/ssh-keygen -t rsa -f $KEYFILE -N "" 2>&1 > /dev/null
2011-07-06 19:25:34 +00:00
echo "done."
fi
#If necessary, flip selinux back to enforcing
if [ "$se_enforce" -ge 1 ]; then
/usr/sbin/setenforce 1 2>&1 > /dev/null
fi
#now find the target ssh user and server to contact.
SSH_USER=`echo $DUMP_TARGET | cut -d\ -f2 | cut -d@ -f1`
SSH_SERVER=`echo $DUMP_TARGET | sed -e's/\(.*@\)\(.*$\)/\2/'`
2011-07-06 19:25:34 +00:00
#now send the found key to the found server
ssh-copy-id -i $KEYFILE $SSH_USER@$SSH_SERVER
2011-07-06 19:25:34 +00:00
RET=$?
if [ $RET == 0 ]; then
echo $KEYFILE has been added to ~$SSH_USER/.ssh/authorized_keys on $SSH_SERVER
$LOGGER "propagated ssh key (ssh server: $SSH_SERVER)"
return 0
else
echo $KEYFILE failed in transfer to $SSH_SERVER
$LOGGER "$errmsg, unable to transfer $KEYFILE to $SSH_SERVER"
exit 1
fi
}
function status()
{
if [ ! -e /sys/kernel/kexec_crash_loaded ]
then
return 2
fi
rc=`cat /sys/kernel/kexec_crash_loaded`
if [ $rc == 1 ]; then
return 0
else
return 1
fi
}
function save_raw()
{
local kdump_dir
local raw_target
raw_target=$(awk '$1 ~ /^raw$/ { print $2; }' $KDUMP_CONFIG_FILE)
[ -z "$raw_target" ] && return 0
[ -b "$raw_target" ] || {
echo "raw partition $raw_target not found"
return 1
}
kdump_dir=`grep ^path $KDUMP_CONFIG_FILE | cut -d' ' -f2-`
if [ -z "${kdump_dir}" ]; then
coredir="/var/crash/`date +"%Y-%m-%d-%H:%M"`"
else
coredir="${kdump_dir}/`date +"%Y-%m-%d-%H:%M"`"
fi
mkdir -p "$coredir"
[ -d "$coredir" ] || {
echo "failed to create $coredir"
return 1
}
if makedumpfile -R $coredir/vmcore <$raw_target >/dev/null 2>&1; then
# dump found
echo "Dump saved to $coredir/vmcore"
# wipe makedumpfile header
dd if=/dev/zero of=$raw_target bs=1b count=1 2>/dev/null
else
rm -rf "$coredir"
fi
return 0
}
2011-07-06 19:25:34 +00:00
function start()
{
local nr
nr=$(awk 'BEGIN{cnt=0} /^raw|^net|^ext[234]|^xfs|^btrfs|^minix/{cnt++} END{print cnt}' $KDUMP_CONFIG_FILE)
[ $nr -gt 1 ] && {
echo -n "Error: More than one dump targets specified"; echo
return 1
}
save_raw
if [ $? -ne 0 ]; then
echo -n "Starting kdump:"; echo
$LOGGER "failed to start up"
return 1
fi
2011-07-06 19:25:34 +00:00
status
rc=$?
if [ $rc == 2 ]; then
echo -n "Kdump is not supported on this kernel"; echo
return 1;
else
if [ $rc == 0 ]; then
echo -n "Kdump already running"; echo
return 0
fi
fi
check_ssh_config && check_ssh_target
2011-07-06 19:25:34 +00:00
check_config
if [ $? != 0 ]; then
echo -n "Starting kdump:"; echo
$LOGGER "failed to start up, config file incorrect"
return 1
fi
load_kdump
if [ $? != 0 ]; then
echo -n "Starting kdump:"; echo
$LOGGER "failed to start up"
return 1
fi
echo -n "Starting kdump:"; echo
$LOGGER "started up"
}
function stop()
{
$KEXEC -p -u 2>/dev/null
if [ $? == 0 ]; then
$LOGGER "kexec: unloaded kdump kernel"
echo -n "Stopping kdump:"; echo
$LOGGER "stopped"
return 0
else
$LOGGER "kexec: failed to unload kdump kernel"
echo -n "Stopping kdump:"; echo
$LOGGER "failed to stop"
return 1
fi
}
if [ ! -f "$KDUMP_CONFIG_FILE" ]; then
echo -n "No kdump config file found!"; echo
exit 1
fi
2011-07-06 19:25:34 +00:00
case "$1" in
start)
if [ -s /proc/vmcore ]; then
save_core
reboot
else
start
fi
;;
stop)
stop
;;
status)
EXIT_CODE=0
status
case "$?" in
0)
echo "Kdump is operational"
EXIT_CODE=0
;;
1)
echo "Kdump is not operational"
EXIT_CODE=3
;;
2)
echo "Kdump is unsupported on this kernel"
EXIT_CODE=3
;;
esac
exit $EXIT_CODE
;;
restart)
stop
start
;;
condrestart)
;;
propagate)
propagate_ssh_key
;;
*)
echo $"Usage: $0 {start|stop|status|restart|propagate}"
exit 1
esac
exit $?