4e4a173698
Original patch is from Amerigo, but it has changed a lot: remove multi dump refreshed with latest git for the dump instruction function restructure fixed the behavior of do_kdump_pre, if kdump_pre fails it will reboot update the docs check the existance and executable of kdump_pre/post files, also check the timestamp of them for rebuilding. refresh patch, Address comments from vivek: s/hush/bash in docs fix the copy-paste error in kdump post error message s/reboot\/halt/reboot in kexec-kdump-howto.txt Signed-off-by: Dave Young <dyoung@redhat.com> Acked-by: Vivek Goyal <vgoyal@redhat.com>
468 lines
10 KiB
Bash
Executable File
468 lines
10 KiB
Bash
Executable File
#! /bin/sh
|
|
KEXEC=/sbin/kexec
|
|
|
|
KDUMP_KERNELVER=""
|
|
KDUMP_COMMANDLINE=""
|
|
KEXEC_ARGS=""
|
|
KDUMP_CONFIG_FILE="/etc/kdump.conf"
|
|
MKDUMPRD="/sbin/mkdumprd -f"
|
|
SAVE_PATH=/var/crash
|
|
SSH_KEY_LOCATION="/root/.ssh/kdump_id_rsa"
|
|
DUMP_TARGET=""
|
|
|
|
LOGGER="/usr/bin/logger -p info -t kdump"
|
|
|
|
standard_kexec_args="-p"
|
|
|
|
if [ -f /etc/sysconfig/kdump ]; then
|
|
. /etc/sysconfig/kdump
|
|
fi
|
|
|
|
function save_core()
|
|
{
|
|
coredir="/var/crash/`date +"%Y-%m-%d-%H:%M"`"
|
|
|
|
mkdir -p $coredir
|
|
cp --sparse=always /proc/vmcore $coredir/vmcore-incomplete
|
|
if [ $? == 0 ]; then
|
|
mv $coredir/vmcore-incomplete $coredir/vmcore
|
|
$LOGGER "saved a vmcore to $coredir"
|
|
else
|
|
$LOGGER "failed to save a vmcore to $coredir"
|
|
fi
|
|
|
|
# pass the dmesg to Abrt tool if exists, in order
|
|
# to collect the kernel oops message.
|
|
# https://fedorahosted.org/abrt/
|
|
if [ -x /usr/bin/dumpoops ]; then
|
|
makedumpfile --dump-dmesg $coredir/vmcore $coredir/dmesg >/dev/null 2>&1
|
|
dumpoops -d $coredir/dmesg >/dev/null 2>&1
|
|
if [ $? == 0 ]; then
|
|
$LOGGER "kernel oops has been collected by abrt tool"
|
|
fi
|
|
fi
|
|
}
|
|
|
|
function rebuild_initrd()
|
|
{
|
|
$MKDUMPRD $kdump_initrd $kdump_kver
|
|
if [ $? != 0 ]; then
|
|
echo "Failed to run mkdumprd"
|
|
$LOGGER "mkdumprd: failed to make kdump initrd"
|
|
return 1
|
|
fi
|
|
}
|
|
|
|
#$1: the files to be checked with IFS=' '
|
|
function check_exist()
|
|
{
|
|
for file in $1; do
|
|
if [ ! -f "$file" ]; then
|
|
echo -n "Error: $file not found."; echo
|
|
return 1
|
|
fi
|
|
done
|
|
}
|
|
|
|
#$1: the files to be checked with IFS=' '
|
|
function check_executable()
|
|
{
|
|
for file in $1; do
|
|
if [ ! -x "$file" ]; then
|
|
echo -n "Error: $file is not executable."; echo
|
|
return 1
|
|
fi
|
|
done
|
|
}
|
|
|
|
function check_config()
|
|
{
|
|
local extra_modules modified_files=""
|
|
local force_rebuild=0
|
|
|
|
if [ -z "$KDUMP_KERNELVER" ]; then
|
|
kdump_kver=`uname -r`
|
|
else
|
|
kdump_kver=$KDUMP_KERNELVER
|
|
fi
|
|
|
|
kdump_kernel="${KDUMP_BOOTDIR}/${KDUMP_IMG}-${kdump_kver}${KDUMP_IMG_EXT}"
|
|
kdump_initrd="${KDUMP_BOOTDIR}/initramfs-${kdump_kver}kdump.img"
|
|
|
|
#will rebuild every time if extra_modules are specified
|
|
extra_modules=`grep ^extra_modules $KDUMP_CONFIG_FILE`
|
|
[ -n "$extra_modules" ] && force_rebuild=1
|
|
|
|
#check to see if dependent files has been modified
|
|
#since last build of the image file
|
|
if [ -f $kdump_initrd ]; then
|
|
image_time=`stat -c "%Y" $kdump_initrd 2>/dev/null`
|
|
else
|
|
image_time=0
|
|
fi
|
|
|
|
EXTRA_BINS=`grep ^kdump_post $KDUMP_CONFIG_FILE | cut -d\ -f2`
|
|
CHECK_FILES=`grep ^kdump_pre $KDUMP_CONFIG_FILE | cut -d\ -f2`
|
|
EXTRA_BINS="$EXTRA_BINS $CHECK_FILES"
|
|
CHECK_FILES=`grep ^extra_bins $KDUMP_CONFIG_FILE | cut -d\ -f2-`
|
|
EXTRA_BINS="$EXTRA_BINS $CHECK_FILES"
|
|
files="$KDUMP_CONFIG_FILE $kdump_kernel $EXTRA_BINS"
|
|
|
|
check_exist "$files" && check_executable "$EXTRA_BINS"
|
|
[ $? -ne 0 ] && return 1
|
|
|
|
for file in $files; do
|
|
time_stamp=`stat -c "%Y" $file`
|
|
if [ "$time_stamp" -gt "$image_time" ]; then
|
|
modified_files="$modified_files $file"
|
|
fi
|
|
done
|
|
|
|
if [ $image_time -eq 0 ]; then
|
|
echo -n "No kdump initial ramdisk found."; echo
|
|
elif [ "$force_rebuild" -ne 0 ]; then
|
|
echo -n "Force rebuild $kdump_initrd"; echo
|
|
elif [ -n "$modified_files" ]; then
|
|
echo "Detected change(s) the following file(s):"
|
|
echo -n " "; echo "$modified_files" | sed 's/\s/\n /g'
|
|
else
|
|
return 0
|
|
fi
|
|
|
|
echo "Rebuilding $kdump_initrd"
|
|
rebuild_initrd
|
|
return $?
|
|
}
|
|
|
|
# This function check iomem and determines if we have more than
|
|
# 4GB of ram available. Returns 1 if we do, 0 if we dont
|
|
function need_64bit_headers()
|
|
{
|
|
return `tail -n 1 /proc/iomem | awk '{ split ($1, r, "-"); \
|
|
print (strtonum("0x" r[2]) > strtonum("0xffffffff")); }'`
|
|
}
|
|
|
|
# Load the kdump kerel specified in /etc/sysconfig/kdump
|
|
# If none is specified, try to load a kdump kernel with the same version
|
|
# as the currently running kernel.
|
|
function load_kdump()
|
|
{
|
|
|
|
if [ -z "$KDUMP_COMMANDLINE" ]
|
|
then
|
|
KDUMP_COMMANDLINE=`cat /proc/cmdline`
|
|
fi
|
|
|
|
ARCH=`uname -m`
|
|
if [ "$ARCH" == "ppc64" ]
|
|
then
|
|
MEM_RESERVED=`grep "crashkernel=[0-9]\+[MmKkGg]@[0-9]\+[MmGgKk]" /proc/cmdline`
|
|
if [ -z "$MEM_RESERVED" ]
|
|
then
|
|
MEM_RESERVED=`grep "crashkernel=[0-9]\+[MmKkGg]" /proc/cmdline`
|
|
fi
|
|
else
|
|
MEM_RESERVED=`grep "Crash kernel" /proc/iomem | grep -v "00000000-00000000"`
|
|
fi
|
|
|
|
if [ -z "$MEM_RESERVED" ]
|
|
then
|
|
$LOGGER "No crashkernel parameter specified for running kernel"
|
|
return 1
|
|
fi
|
|
|
|
if [ "$ARCH" == "i686" -o "$ARCH" == "i386" ]
|
|
then
|
|
|
|
need_64bit_headers
|
|
if [ $? == 1 ]
|
|
then
|
|
FOUND_ELF_ARGS=`echo $KEXEC_ARGS | grep elf32-core-headers`
|
|
if [ -n "$FOUND_ELF_ARGS" ]
|
|
then
|
|
echo -n "Warning: elf32-core-headers overrides correct elf64 setting"
|
|
echo
|
|
else
|
|
KEXEC_ARGS="$KEXEC_ARGS --elf64-core-headers"
|
|
fi
|
|
else
|
|
FOUND_ELF_ARGS=`echo $KEXEC_ARGS | grep elf64-core-headers`
|
|
if [ -z "$FOUND_ELF_ARGS" ]
|
|
then
|
|
KEXEC_ARGS="$KEXEC_ARGS --elf32-core-headers"
|
|
fi
|
|
fi
|
|
fi
|
|
|
|
KDUMP_COMMANDLINE=`echo $KDUMP_COMMANDLINE | sed -e 's/crashkernel=[^ ]*//'`
|
|
KDUMP_COMMANDLINE="${KDUMP_COMMANDLINE} ${KDUMP_COMMANDLINE_APPEND}"
|
|
|
|
$KEXEC $KEXEC_ARGS $standard_kexec_args \
|
|
--command-line="$KDUMP_COMMANDLINE" \
|
|
--initrd=$kdump_initrd $kdump_kernel 2>/dev/null
|
|
if [ $? == 0 ]; then
|
|
$LOGGER "kexec: loaded kdump kernel"
|
|
return 0
|
|
else
|
|
$LOGGER "kexec: failed to load kdump kernel"
|
|
return 1
|
|
fi
|
|
}
|
|
|
|
function check_ssh_config()
|
|
{
|
|
while read config_opt config_val; do
|
|
case "$config_opt" in
|
|
sshkey)
|
|
if [ -f "$config_val" ]; then
|
|
# canonicalize the path
|
|
SSH_KEY_LOCATION=$(/usr/bin/readlink -m $config_val)
|
|
else
|
|
echo "WARNING: '$config_val' doesn't exist, using default value '$SSH_KEY_LOCATION'"
|
|
fi
|
|
;;
|
|
path)
|
|
SAVE_PATH=$config_val
|
|
;;
|
|
net)
|
|
DUMP_TARGET=$config_val
|
|
;;
|
|
*)
|
|
;;
|
|
esac
|
|
done < $KDUMP_CONFIG_FILE
|
|
|
|
#make sure they've configured kdump.conf for ssh dumps
|
|
local SSH_TARGET=`echo -n $DUMP_TARGET | sed -n '/.*@/p'`
|
|
if [ -z "$SSH_TARGET" ]; then
|
|
return 1
|
|
fi
|
|
return 0
|
|
}
|
|
|
|
function check_ssh_target()
|
|
{
|
|
ssh -q -i $SSH_KEY_LOCATION -o BatchMode=yes $DUMP_TARGET mkdir -p $SAVE_PATH
|
|
if [ $? -ne 0 ]; then
|
|
echo "Could not create $DUMP_TARGET:$SAVE_PATH, you probably need to run \"service kdump propagate\""
|
|
exit $?
|
|
fi
|
|
return 0
|
|
}
|
|
|
|
function propagate_ssh_key()
|
|
{
|
|
check_ssh_config
|
|
if [ $? -ne 0 ]; then
|
|
echo "No ssh config specified in $KDUMP_CONFIG_FILE. Can't propagate"
|
|
$LOGGER "$errmsg, no ssh config specified in $KDUMP_CONFIG_FILE"
|
|
exit 1
|
|
fi
|
|
|
|
#Check if selinux is on... must flip to permissive mode
|
|
#for the moment to create key, then flip back...
|
|
se_enforce=`/usr/sbin/sestatus | grep -c "^Current mode.*enforcing"`
|
|
if [ "$se_enforce" -ge 1 ]; then
|
|
/usr/sbin/setenforce 0 2>&1 > /dev/null
|
|
fi
|
|
|
|
local KEYFILE=$SSH_KEY_LOCATION
|
|
local errmsg="Failed to propagate ssh key"
|
|
|
|
#Check to see if we already created key, if not, create it.
|
|
if [ -f $KEYFILE ]; then
|
|
echo "Using existing keys..."
|
|
else
|
|
echo -n "Generating new ssh keys... "
|
|
/usr/bin/ssh-keygen -t rsa -f $KEYFILE -N "" 2>&1 > /dev/null
|
|
echo "done."
|
|
fi
|
|
|
|
#If necessary, flip selinux back to enforcing
|
|
if [ "$se_enforce" -ge 1 ]; then
|
|
/usr/sbin/setenforce 1 2>&1 > /dev/null
|
|
fi
|
|
|
|
#now find the target ssh user and server to contact.
|
|
SSH_USER=`echo $DUMP_TARGET | cut -d\ -f2 | cut -d@ -f1`
|
|
SSH_SERVER=`echo $DUMP_TARGET | sed -e's/\(.*@\)\(.*$\)/\2/'`
|
|
|
|
#now send the found key to the found server
|
|
ssh-copy-id -i $KEYFILE $SSH_USER@$SSH_SERVER
|
|
RET=$?
|
|
if [ $RET == 0 ]; then
|
|
echo $KEYFILE has been added to ~$SSH_USER/.ssh/authorized_keys on $SSH_SERVER
|
|
$LOGGER "propagated ssh key (ssh server: $SSH_SERVER)"
|
|
return 0
|
|
else
|
|
echo $KEYFILE failed in transfer to $SSH_SERVER
|
|
$LOGGER "$errmsg, unable to transfer $KEYFILE to $SSH_SERVER"
|
|
exit 1
|
|
fi
|
|
|
|
}
|
|
|
|
function status()
|
|
{
|
|
if [ ! -e /sys/kernel/kexec_crash_loaded ]
|
|
then
|
|
return 2
|
|
fi
|
|
rc=`cat /sys/kernel/kexec_crash_loaded`
|
|
if [ $rc == 1 ]; then
|
|
return 0
|
|
else
|
|
return 1
|
|
fi
|
|
}
|
|
|
|
function save_raw()
|
|
{
|
|
local kdump_dir
|
|
local raw_target
|
|
|
|
raw_target=$(awk '$1 ~ /^raw$/ { print $2; }' $KDUMP_CONFIG_FILE)
|
|
[ -z "$raw_target" ] && return 0
|
|
[ -b "$raw_target" ] || {
|
|
echo "raw partition $raw_target not found"
|
|
return 1
|
|
}
|
|
kdump_dir=`grep ^path $KDUMP_CONFIG_FILE | cut -d' ' -f2-`
|
|
if [ -z "${kdump_dir}" ]; then
|
|
coredir="/var/crash/`date +"%Y-%m-%d-%H:%M"`"
|
|
else
|
|
coredir="${kdump_dir}/`date +"%Y-%m-%d-%H:%M"`"
|
|
fi
|
|
|
|
mkdir -p "$coredir"
|
|
[ -d "$coredir" ] || {
|
|
echo "failed to create $coredir"
|
|
return 1
|
|
}
|
|
if makedumpfile -R $coredir/vmcore <$raw_target >/dev/null 2>&1; then
|
|
# dump found
|
|
echo "Dump saved to $coredir/vmcore"
|
|
# wipe makedumpfile header
|
|
dd if=/dev/zero of=$raw_target bs=1b count=1 2>/dev/null
|
|
else
|
|
rm -rf "$coredir"
|
|
fi
|
|
|
|
return 0
|
|
}
|
|
|
|
function start()
|
|
{
|
|
local nr
|
|
nr=$(awk 'BEGIN{cnt=0} /^raw|^net|^ext[234]|^xfs|^btrfs|^minix/{cnt++} END{print cnt}' $KDUMP_CONFIG_FILE)
|
|
[ $nr -gt 1 ] && {
|
|
echo -n "Error: More than one dump targets specified"; echo
|
|
return 1
|
|
}
|
|
|
|
save_raw
|
|
if [ $? -ne 0 ]; then
|
|
echo -n "Starting kdump:"; echo
|
|
$LOGGER "failed to start up"
|
|
return 1
|
|
fi
|
|
|
|
status
|
|
rc=$?
|
|
if [ $rc == 2 ]; then
|
|
echo -n "Kdump is not supported on this kernel"; echo
|
|
return 1;
|
|
else
|
|
if [ $rc == 0 ]; then
|
|
echo -n "Kdump already running"; echo
|
|
return 0
|
|
fi
|
|
fi
|
|
|
|
check_ssh_config && check_ssh_target
|
|
|
|
check_config
|
|
if [ $? != 0 ]; then
|
|
echo -n "Starting kdump:"; echo
|
|
$LOGGER "failed to start up, config file incorrect"
|
|
return 1
|
|
fi
|
|
load_kdump
|
|
if [ $? != 0 ]; then
|
|
echo -n "Starting kdump:"; echo
|
|
$LOGGER "failed to start up"
|
|
return 1
|
|
fi
|
|
|
|
echo -n "Starting kdump:"; echo
|
|
$LOGGER "started up"
|
|
}
|
|
|
|
function stop()
|
|
{
|
|
$KEXEC -p -u 2>/dev/null
|
|
if [ $? == 0 ]; then
|
|
$LOGGER "kexec: unloaded kdump kernel"
|
|
echo -n "Stopping kdump:"; echo
|
|
$LOGGER "stopped"
|
|
return 0
|
|
else
|
|
$LOGGER "kexec: failed to unload kdump kernel"
|
|
echo -n "Stopping kdump:"; echo
|
|
$LOGGER "failed to stop"
|
|
return 1
|
|
fi
|
|
}
|
|
|
|
if [ ! -f "$KDUMP_CONFIG_FILE" ]; then
|
|
echo -n "No kdump config file found!"; echo
|
|
exit 1
|
|
fi
|
|
|
|
case "$1" in
|
|
start)
|
|
if [ -s /proc/vmcore ]; then
|
|
save_core
|
|
reboot
|
|
else
|
|
start
|
|
fi
|
|
;;
|
|
stop)
|
|
stop
|
|
;;
|
|
status)
|
|
EXIT_CODE=0
|
|
status
|
|
case "$?" in
|
|
0)
|
|
echo "Kdump is operational"
|
|
EXIT_CODE=0
|
|
;;
|
|
1)
|
|
echo "Kdump is not operational"
|
|
EXIT_CODE=3
|
|
;;
|
|
2)
|
|
echo "Kdump is unsupported on this kernel"
|
|
EXIT_CODE=3
|
|
;;
|
|
esac
|
|
exit $EXIT_CODE
|
|
;;
|
|
restart)
|
|
stop
|
|
start
|
|
;;
|
|
condrestart)
|
|
;;
|
|
propagate)
|
|
propagate_ssh_key
|
|
;;
|
|
*)
|
|
echo $"Usage: $0 {start|stop|status|restart|propagate}"
|
|
exit 1
|
|
esac
|
|
|
|
exit $?
|