kexec-tools/kdumpctl
WANG Chao a6f03150e9 kdumpctl: Run multiple kdumpctl instances one by one in serial order
There will be a race condition if multiple kdumpctl instances are
running at the same time.

By introducing a global mutex lock, only one instance can acquire this
lock and run, others will be waiting for the lock in queue. Now each
kdump instance will be run in serial order and there won't any race
condition.

This is a patch backported from RHEL6.

Signed-off-by: WANG Chao <chaowang@redhat.com>
Acked-by: Vivek Goyal <vgoyal@redhat.com>
Acked-by: Dave Young <dyoung@redhat.com>
2013-09-27 10:07:13 +08:00

589 lines
13 KiB
Bash
Executable File

#! /bin/sh
KEXEC=/sbin/kexec
KDUMP_KERNELVER=""
KDUMP_COMMANDLINE=""
KEXEC_ARGS=""
KDUMP_CONFIG_FILE="/etc/kdump.conf"
MKDUMPRD="/sbin/mkdumprd -f"
SAVE_PATH=/var/crash
SSH_KEY_LOCATION="/root/.ssh/kdump_id_rsa"
DUMP_TARGET=""
standard_kexec_args="-p"
if [ -f /etc/sysconfig/kdump ]; then
. /etc/sysconfig/kdump
fi
single_instance_lock()
{
exec 9>/var/lock/kdump
flock 9
}
# remove_cmdline_param <kernel cmdline> <param1> [<param2>] ... [<paramN>]
# Remove a list of kernel parameters from a given kernel cmdline and print the result.
# For each "arg" in the removing params list, "arg" and "arg=xxx" will be removed if exists.
function remove_cmdline_param()
{
local cmdline=$1
shift
for arg in $@; do
cmdline=`echo $cmdline | \
sed -e "s/\b$arg=[^ ]*\b//g" \
-e "s/\b$arg\b//g" \
-e "s/\s\+/ /g"`
done
echo $cmdline
}
function save_core()
{
coredir="/var/crash/`date +"%Y-%m-%d-%H:%M"`"
mkdir -p $coredir
cp --sparse=always /proc/vmcore $coredir/vmcore-incomplete
if [ $? == 0 ]; then
mv $coredir/vmcore-incomplete $coredir/vmcore
echo "saved a vmcore to $coredir"
else
echo "failed to save a vmcore to $coredir" >&2
fi
# pass the dmesg to Abrt tool if exists, in order
# to collect the kernel oops message.
# https://fedorahosted.org/abrt/
if [ -x /usr/bin/dumpoops ]; then
makedumpfile --dump-dmesg $coredir/vmcore $coredir/dmesg >/dev/null 2>&1
dumpoops -d $coredir/dmesg >/dev/null 2>&1
if [ $? == 0 ]; then
echo "kernel oops has been collected by abrt tool"
fi
fi
}
function rebuild_initrd()
{
$MKDUMPRD $kdump_initrd $kdump_kver
if [ $? != 0 ]; then
echo "mkdumprd: failed to make kdump initrd" >&2
return 1
fi
}
#$1: the files to be checked with IFS=' '
function check_exist()
{
for file in $1; do
if [ ! -f "$file" ]; then
echo -n "Error: $file not found."; echo
return 1
fi
done
}
#$1: the files to be checked with IFS=' '
function check_executable()
{
for file in $1; do
if [ ! -x "$file" ]; then
echo -n "Error: $file is not executable."; echo
return 1
fi
done
}
function check_config()
{
local nr
nr=$(awk 'BEGIN{cnt=0} /^raw|^ssh[[:blank:]]|^nfs|^ext[234]|^xfs|^btrfs|^minix/{cnt++} END{print cnt}' $KDUMP_CONFIG_FILE)
[ $nr -gt 1 ] && {
echo "More than one dump targets specified."
return 1
}
while read config_opt config_val; do
case "$config_opt" in
\#* | "")
;;
raw|ext2|ext3|ext4|minix|btrfs|xfs|nfs|ssh|sshkey|path|core_collector|kdump_post|kdump_pre|extra_bins|extra_modules|default|force_rebuild|dracut_args)
[ -z "$config_val" ] && {
echo "Invalid kdump config value for option $config_opt."
return 1;
}
;;
net|options|link_delay|disk_timeout|debug_mem_level|blacklist)
echo "Deprecated kdump config option: $config_opt. Refer to kdump.conf manpage for alternatives."
return 1
;;
*)
echo "Invalid kdump config option $config_opt"
return 1;
;;
esac
done < $KDUMP_CONFIG_FILE
return 0
}
function check_rebuild()
{
local extra_modules modified_files=""
local _force_rebuild force_rebuild="0"
if [ -z "$KDUMP_KERNELVER" ]; then
kdump_kver=`uname -r`
else
kdump_kver=$KDUMP_KERNELVER
fi
kdump_kernel="${KDUMP_BOOTDIR}/${KDUMP_IMG}-${kdump_kver}${KDUMP_IMG_EXT}"
kdump_initrd="${KDUMP_BOOTDIR}/initramfs-${kdump_kver}kdump.img"
_force_rebuild=`grep ^force_rebuild $KDUMP_CONFIG_FILE 2>/dev/null`
if [ $? -eq 0 ]; then
force_rebuild=`echo $_force_rebuild | cut -d' ' -f2`
if [ "$force_rebuild" != "0" ] && [ "$force_rebuild" != "1" ];then
echo "Error: force_rebuild value is invalid"
return 1
fi
fi
#will rebuild every time if extra_modules are specified
extra_modules=`grep ^extra_modules $KDUMP_CONFIG_FILE`
[ -n "$extra_modules" ] && force_rebuild="1"
#check to see if dependent files has been modified
#since last build of the image file
if [ -f $kdump_initrd ]; then
image_time=`stat -c "%Y" $kdump_initrd 2>/dev/null`
else
image_time=0
fi
EXTRA_BINS=`grep ^kdump_post $KDUMP_CONFIG_FILE | cut -d\ -f2`
CHECK_FILES=`grep ^kdump_pre $KDUMP_CONFIG_FILE | cut -d\ -f2`
EXTRA_BINS="$EXTRA_BINS $CHECK_FILES"
CHECK_FILES=`grep ^extra_bins $KDUMP_CONFIG_FILE | cut -d\ -f2-`
EXTRA_BINS="$EXTRA_BINS $CHECK_FILES"
files="$KDUMP_CONFIG_FILE $kdump_kernel $EXTRA_BINS"
check_exist "$files" && check_executable "$EXTRA_BINS"
[ $? -ne 0 ] && return 1
for file in $files; do
time_stamp=`stat -c "%Y" $file`
if [ "$time_stamp" -gt "$image_time" ]; then
modified_files="$modified_files $file"
fi
done
if [ $image_time -eq 0 ]; then
echo -n "No kdump initial ramdisk found."; echo
elif [ "$force_rebuild" != "0" ]; then
echo -n "Force rebuild $kdump_initrd"; echo
elif [ -n "$modified_files" ]; then
echo "Detected change(s) the following file(s):"
echo -n " "; echo "$modified_files" | sed 's/\s/\n /g'
else
return 0
fi
echo "Rebuilding $kdump_initrd"
rebuild_initrd
return $?
}
# This function check iomem and determines if we have more than
# 4GB of ram available. Returns 1 if we do, 0 if we dont
function need_64bit_headers()
{
return `tail -n 1 /proc/iomem | awk '{ split ($1, r, "-"); \
print (strtonum("0x" r[2]) > strtonum("0xffffffff")); }'`
}
# Load the kdump kerel specified in /etc/sysconfig/kdump
# If none is specified, try to load a kdump kernel with the same version
# as the currently running kernel.
function load_kdump()
{
MEM_RESERVED=$(cat /sys/kernel/kexec_crash_size)
if [ $MEM_RESERVED -eq 0 ]
then
echo "No memory reserved for crash kernel." >&2
return 1
fi
ARCH=`uname -m`
if [ "$ARCH" == "i686" -o "$ARCH" == "i386" ]
then
need_64bit_headers
if [ $? == 1 ]
then
FOUND_ELF_ARGS=`echo $KEXEC_ARGS | grep elf32-core-headers`
if [ -n "$FOUND_ELF_ARGS" ]
then
echo -n "Warning: elf32-core-headers overrides correct elf64 setting"
echo
else
KEXEC_ARGS="$KEXEC_ARGS --elf64-core-headers"
fi
else
FOUND_ELF_ARGS=`echo $KEXEC_ARGS | grep elf64-core-headers`
if [ -z "$FOUND_ELF_ARGS" ]
then
KEXEC_ARGS="$KEXEC_ARGS --elf32-core-headers"
fi
fi
fi
if [ -z "$KDUMP_COMMANDLINE" ]
then
KDUMP_COMMANDLINE=`cat /proc/cmdline`
fi
KDUMP_COMMANDLINE=`remove_cmdline_param "$KDUMP_COMMANDLINE" crashkernel hugepages hugepagesz`
KDUMP_COMMANDLINE="${KDUMP_COMMANDLINE} ${KDUMP_COMMANDLINE_APPEND}"
$KEXEC $KEXEC_ARGS $standard_kexec_args \
--command-line="$KDUMP_COMMANDLINE" \
--initrd=$kdump_initrd $kdump_kernel 2>/dev/null
if [ $? == 0 ]; then
echo "kexec: loaded kdump kernel"
return 0
else
echo "kexec: failed to load kdump kernel" >&2
return 1
fi
}
function check_ssh_config()
{
while read config_opt config_val; do
case "$config_opt" in
sshkey)
if [ -f "$config_val" ]; then
# canonicalize the path
SSH_KEY_LOCATION=$(/usr/bin/readlink -m $config_val)
else
echo "WARNING: '$config_val' doesn't exist, using default value '$SSH_KEY_LOCATION'"
fi
;;
path)
SAVE_PATH=$config_val
;;
ssh)
DUMP_TARGET=$config_val
;;
*)
;;
esac
done < $KDUMP_CONFIG_FILE
#make sure they've configured kdump.conf for ssh dumps
local SSH_TARGET=`echo -n $DUMP_TARGET | sed -n '/.*@/p'`
if [ -z "$SSH_TARGET" ]; then
return 1
fi
return 0
}
function check_ssh_target()
{
local _ret
ssh -q -i $SSH_KEY_LOCATION -o BatchMode=yes $DUMP_TARGET mkdir -p $SAVE_PATH
_ret=$?
if [ $_ret -ne 0 ]; then
echo "Could not create $DUMP_TARGET:$SAVE_PATH, you probably need to run \"kdumpctl propagate\"" >&2
return 1
fi
return 0
}
function propagate_ssh_key()
{
check_ssh_config
if [ $? -ne 0 ]; then
echo "No ssh config specified in $KDUMP_CONFIG_FILE. Can't propagate" >&2
exit 1
fi
#Check if selinux is on... must flip to permissive mode
#for the moment to create key, then flip back...
se_enforce=`/usr/sbin/sestatus | grep -c "^Current mode.*enforcing"`
if [ "$se_enforce" -ge 1 ]; then
/usr/sbin/setenforce 0 2>&1 > /dev/null
fi
local KEYFILE=$SSH_KEY_LOCATION
local errmsg="Failed to propagate ssh key"
#Check to see if we already created key, if not, create it.
if [ -f $KEYFILE ]; then
echo "Using existing keys..."
else
echo -n "Generating new ssh keys... "
/usr/bin/ssh-keygen -t rsa -f $KEYFILE -N "" 2>&1 > /dev/null
echo "done."
fi
#If necessary, flip selinux back to enforcing
if [ "$se_enforce" -ge 1 ]; then
/usr/sbin/setenforce 1 2>&1 > /dev/null
fi
#now find the target ssh user and server to contact.
SSH_USER=`echo $DUMP_TARGET | cut -d\ -f2 | cut -d@ -f1`
SSH_SERVER=`echo $DUMP_TARGET | sed -e's/\(.*@\)\(.*$\)/\2/'`
#now send the found key to the found server
ssh-copy-id -i $KEYFILE $SSH_USER@$SSH_SERVER
RET=$?
if [ $RET == 0 ]; then
echo $KEYFILE has been added to ~$SSH_USER/.ssh/authorized_keys on $SSH_SERVER
return 0
else
echo $errmsg, $KEYFILE failed in transfer to $SSH_SERVER >&2
exit 1
fi
}
function status()
{
if [ ! -e /sys/kernel/kexec_crash_loaded ]
then
return 2
fi
rc=`cat /sys/kernel/kexec_crash_loaded`
if [ $rc == 1 ]; then
return 0
else
return 1
fi
}
function save_raw()
{
local kdump_dir
local raw_target
raw_target=$(awk '$1 ~ /^raw$/ { print $2; }' $KDUMP_CONFIG_FILE)
[ -z "$raw_target" ] && return 0
[ -b "$raw_target" ] || {
echo "raw partition $raw_target not found"
return 1
}
kdump_dir=`grep ^path $KDUMP_CONFIG_FILE | cut -d' ' -f2-`
if [ -z "${kdump_dir}" ]; then
coredir="/var/crash/`date +"%Y-%m-%d-%H:%M"`"
else
coredir="${kdump_dir}/`date +"%Y-%m-%d-%H:%M"`"
fi
mkdir -p "$coredir"
[ -d "$coredir" ] || {
echo "failed to create $coredir"
return 1
}
if makedumpfile -R $coredir/vmcore <$raw_target >/dev/null 2>&1; then
# dump found
echo "Dump saved to $coredir/vmcore"
# wipe makedumpfile header
dd if=/dev/zero of=$raw_target bs=1b count=1 2>/dev/null
else
rm -rf "$coredir"
fi
return 0
}
get_save_path() {
local _save_path=$(grep "^path" /etc/kdump.conf|awk '{print $2}')
if [ -z "$_save_path" ]; then
_save_path="/var/crash"
fi
echo $_save_path
}
is_dump_target_configured() {
local _target
_target=$(egrep "^ext[234]|^xfs|^btrfs|^minix|^raw|^ssh|^nfs" /etc/kdump.conf)
[ -n "$_target" ]
}
local_fs_dump_target()
{
local _target
_target=$(egrep "^ext[234]|^xfs|^btrfs|^minix" /etc/kdump.conf)
if [ $? -eq 0 ]; then
echo $_target|awk '{print $2}'
fi
}
path_to_be_relabeled() {
local _path _target _mnt="/" _rmnt
if is_dump_target_configured; then
_target=$(local_fs_dump_target)
if [[ -n "$_target" ]]; then
_mnt=$(findmnt -k -f -n -r -o TARGET $_target)
if [ -z "$_mnt" ]; then
return
fi
else
return
fi
fi
_path=$(get_save_path)
# if $_path is masked by other mount, we will not relabel it.
_rmnt=$(df $_mnt/$_path 2>/dev/null | tail -1 | awk '{ print $NF }')
if [ "$_rmnt" == "$_mnt" ]; then
echo $_mnt/$_path
fi
}
selinux_relabel()
{
local _path _i _attr
_path=$(path_to_be_relabeled)
if [ -z "$_path" ] || ! [ -d "$_path" ] ; then
return
fi
for _i in $(find $_path); do
_attr=$(getfattr -m "security.selinux" $_i 2>/dev/null)
if [ -z "$_attr" ]; then
restorecon $_i;
fi
done
}
function start()
{
check_config
if [ $? -ne 0 ]; then
echo "Starting kdump: [FAILED]"
return 1
fi
if sestatus 2>/dev/null | grep -q "SELinux status.*enabled"; then
selinux_relabel
fi
save_raw
if [ $? -ne 0 ]; then
echo "Starting kdump: [FAILED]"
return 1
fi
status
rc=$?
if [ $rc == 2 ]; then
echo "Kdump is not supported on this kernel: [WARNING]"
return 1;
else
if [ $rc == 0 ]; then
echo "Kdump already running: [WARNING]"
return 0
fi
fi
if check_ssh_config; then
if ! check_ssh_target; then
echo "Starting kdump: [FAILED]"
return 1
fi
fi
check_rebuild
if [ $? != 0 ]; then
echo "Starting kdump: [FAILED]"
return 1
fi
load_kdump
if [ $? != 0 ]; then
echo "Starting kdump: [FAILED]"
return 1
fi
echo "Starting kdump: [OK]"
}
function stop()
{
$KEXEC -p -u 2>/dev/null
if [ $? == 0 ]; then
echo "kexec: unloaded kdump kernel"
echo "Stopping kdump: [OK]"
return 0
else
echo "kexec: failed to unloaded kdump kernel"
echo "Stopping kdump: [FAILED]"
return 1
fi
}
if [ ! -f "$KDUMP_CONFIG_FILE" ]; then
echo "Error: No kdump config file found!" >&2
exit 1
fi
# Other kdumpctl instances will block in queue, until this one exits
single_instance_lock
case "$1" in
start)
if [ -s /proc/vmcore ]; then
save_core
reboot
else
start
fi
;;
stop)
stop
;;
status)
EXIT_CODE=0
status
case "$?" in
0)
echo "Kdump is operational"
EXIT_CODE=0
;;
1)
echo "Kdump is not operational"
EXIT_CODE=3
;;
2)
echo "Kdump is unsupported on this kernel"
EXIT_CODE=3
;;
esac
exit $EXIT_CODE
;;
restart)
stop
start
;;
condrestart)
;;
propagate)
propagate_ssh_key
;;
*)
echo $"Usage: $0 {start|stop|status|restart|propagate}"
exit 1
esac
exit $?