kexec-tools/kdumpctl

732 lines
16 KiB
Plaintext
Raw Normal View History

2011-07-06 19:25:34 +00:00
#! /bin/sh
KEXEC=/sbin/kexec
KDUMP_KERNELVER=""
KDUMP_COMMANDLINE=""
KEXEC_ARGS=""
KDUMP_CONFIG_FILE="/etc/kdump.conf"
MKDUMPRD="/sbin/mkdumprd -f"
SAVE_PATH=/var/crash
SSH_KEY_LOCATION="/root/.ssh/kdump_id_rsa"
DUMP_TARGET=""
2011-07-06 19:25:34 +00:00
. /lib/kdump/kdump-lib.sh
2011-07-06 19:25:34 +00:00
standard_kexec_args="-p"
if [ -f /etc/sysconfig/kdump ]; then
. /etc/sysconfig/kdump
fi
single_instance_lock()
{
local rc timeout=5
exec 9>/var/lock/kdump
flock -n 9
rc=$?
while [ $rc -ne 0 ]; do
echo "Another app is currently holding the kdump lock; waiting for it to exit..."
flock -w $timeout 9
rc=$?
done
}
# remove_cmdline_param <kernel cmdline> <param1> [<param2>] ... [<paramN>]
# Remove a list of kernel parameters from a given kernel cmdline and print the result.
# For each "arg" in the removing params list, "arg" and "arg=xxx" will be removed if exists.
remove_cmdline_param()
{
local cmdline=$1
shift
for arg in $@; do
cmdline=`echo $cmdline | \
sed -e "s/\b$arg=[^ ]*\b//g" \
-e "s/\b$arg\b//g" \
-e "s/\s\+/ /g"`
done
echo $cmdline
}
kdumpctl: Pass disable_cpu_apicid to kexec of capture kernel == Version 2 == Addresses Vivek's review comments: 1. Don't force numeric in awk script snipet. 2. Command line processing is moved from load_kernel to new function "prepare_cmdline." This new function is responsible for setting up the command line passed to KEXEC. 3. New function "append_cmdline" is added to append {argument,value} pair to command line if argument is not already present. == Version 1 == A recent patch (https://lkml.org/lkml/2014/1/15/42) enables multiple processors in the crash kernel. To do this safely the crash kernel needs to know which CPU was the 1st kernel BSP (bootstrap processor) so that the crash kernel will NOT send the BSP an INIT. If the crash kernel sends an INIT to the 1st kernel BSP, some systems may reset or hang. The EFI spec doesn't require that any particular processor is chosen as the BSP and the CPU (and its apic id) can change from one boot to the next. Hence automating the selection of CPU to disable if the system would panic is desired. This patch updates the kdumpctl script to get the "initial apicid" of CPU 0 in the first kernel and will pass this as the "disable_cpu_apicid=" arguement to kexec if it wasn't explicitly set in /etc/sysconfig/kdump KDUMP_COMMANDLINE_APPEND. CPU 0 is chosen as it is the processor thats execute the OS initialization code and hence was the BSP as per x86 SDM (Vol 3a Section 8.4.) See associated Red Hat Bugzilla(s) for additional background material: https://bugzilla.redhat.com/show_bug.cgi?id=1059031 https://bugzilla.redhat.com/show_bug.cgi?id=980621 Signed-off-by: Jerry Hoemann <jerry.hoemann@hp.com> Acked-by: Vivek Goyal <vgoyal@redhat.com>
2014-02-21 01:09:30 +00:00
#
# This function returns the "initial apicid" of the
# boot cpu (cpu 0) if present.
#
get_bootcpu_initial_apicid()
kdumpctl: Pass disable_cpu_apicid to kexec of capture kernel == Version 2 == Addresses Vivek's review comments: 1. Don't force numeric in awk script snipet. 2. Command line processing is moved from load_kernel to new function "prepare_cmdline." This new function is responsible for setting up the command line passed to KEXEC. 3. New function "append_cmdline" is added to append {argument,value} pair to command line if argument is not already present. == Version 1 == A recent patch (https://lkml.org/lkml/2014/1/15/42) enables multiple processors in the crash kernel. To do this safely the crash kernel needs to know which CPU was the 1st kernel BSP (bootstrap processor) so that the crash kernel will NOT send the BSP an INIT. If the crash kernel sends an INIT to the 1st kernel BSP, some systems may reset or hang. The EFI spec doesn't require that any particular processor is chosen as the BSP and the CPU (and its apic id) can change from one boot to the next. Hence automating the selection of CPU to disable if the system would panic is desired. This patch updates the kdumpctl script to get the "initial apicid" of CPU 0 in the first kernel and will pass this as the "disable_cpu_apicid=" arguement to kexec if it wasn't explicitly set in /etc/sysconfig/kdump KDUMP_COMMANDLINE_APPEND. CPU 0 is chosen as it is the processor thats execute the OS initialization code and hence was the BSP as per x86 SDM (Vol 3a Section 8.4.) See associated Red Hat Bugzilla(s) for additional background material: https://bugzilla.redhat.com/show_bug.cgi?id=1059031 https://bugzilla.redhat.com/show_bug.cgi?id=980621 Signed-off-by: Jerry Hoemann <jerry.hoemann@hp.com> Acked-by: Vivek Goyal <vgoyal@redhat.com>
2014-02-21 01:09:30 +00:00
{
awk ' \
BEGIN { CPU = "-1"; } \
$1=="processor" && $2==":" { CPU = $NF; } \
CPU=="0" && /initial apicid/ { print $NF; } \
' \
/proc/cpuinfo
}
#
# This function appends argument "$2=$3" to string ($1) if not already present.
#
append_cmdline()
kdumpctl: Pass disable_cpu_apicid to kexec of capture kernel == Version 2 == Addresses Vivek's review comments: 1. Don't force numeric in awk script snipet. 2. Command line processing is moved from load_kernel to new function "prepare_cmdline." This new function is responsible for setting up the command line passed to KEXEC. 3. New function "append_cmdline" is added to append {argument,value} pair to command line if argument is not already present. == Version 1 == A recent patch (https://lkml.org/lkml/2014/1/15/42) enables multiple processors in the crash kernel. To do this safely the crash kernel needs to know which CPU was the 1st kernel BSP (bootstrap processor) so that the crash kernel will NOT send the BSP an INIT. If the crash kernel sends an INIT to the 1st kernel BSP, some systems may reset or hang. The EFI spec doesn't require that any particular processor is chosen as the BSP and the CPU (and its apic id) can change from one boot to the next. Hence automating the selection of CPU to disable if the system would panic is desired. This patch updates the kdumpctl script to get the "initial apicid" of CPU 0 in the first kernel and will pass this as the "disable_cpu_apicid=" arguement to kexec if it wasn't explicitly set in /etc/sysconfig/kdump KDUMP_COMMANDLINE_APPEND. CPU 0 is chosen as it is the processor thats execute the OS initialization code and hence was the BSP as per x86 SDM (Vol 3a Section 8.4.) See associated Red Hat Bugzilla(s) for additional background material: https://bugzilla.redhat.com/show_bug.cgi?id=1059031 https://bugzilla.redhat.com/show_bug.cgi?id=980621 Signed-off-by: Jerry Hoemann <jerry.hoemann@hp.com> Acked-by: Vivek Goyal <vgoyal@redhat.com>
2014-02-21 01:09:30 +00:00
{
local cmdline=$1
local newstr=${cmdline/$2/""}
kdumpctl: Pass disable_cpu_apicid to kexec of capture kernel == Version 2 == Addresses Vivek's review comments: 1. Don't force numeric in awk script snipet. 2. Command line processing is moved from load_kernel to new function "prepare_cmdline." This new function is responsible for setting up the command line passed to KEXEC. 3. New function "append_cmdline" is added to append {argument,value} pair to command line if argument is not already present. == Version 1 == A recent patch (https://lkml.org/lkml/2014/1/15/42) enables multiple processors in the crash kernel. To do this safely the crash kernel needs to know which CPU was the 1st kernel BSP (bootstrap processor) so that the crash kernel will NOT send the BSP an INIT. If the crash kernel sends an INIT to the 1st kernel BSP, some systems may reset or hang. The EFI spec doesn't require that any particular processor is chosen as the BSP and the CPU (and its apic id) can change from one boot to the next. Hence automating the selection of CPU to disable if the system would panic is desired. This patch updates the kdumpctl script to get the "initial apicid" of CPU 0 in the first kernel and will pass this as the "disable_cpu_apicid=" arguement to kexec if it wasn't explicitly set in /etc/sysconfig/kdump KDUMP_COMMANDLINE_APPEND. CPU 0 is chosen as it is the processor thats execute the OS initialization code and hence was the BSP as per x86 SDM (Vol 3a Section 8.4.) See associated Red Hat Bugzilla(s) for additional background material: https://bugzilla.redhat.com/show_bug.cgi?id=1059031 https://bugzilla.redhat.com/show_bug.cgi?id=980621 Signed-off-by: Jerry Hoemann <jerry.hoemann@hp.com> Acked-by: Vivek Goyal <vgoyal@redhat.com>
2014-02-21 01:09:30 +00:00
# unchanged str implies argument wasn't there
if [ "$cmdline" == "$newstr" ]; then
cmdline="${cmdline} ${2}=${3}"
fi
kdumpctl: Pass disable_cpu_apicid to kexec of capture kernel == Version 2 == Addresses Vivek's review comments: 1. Don't force numeric in awk script snipet. 2. Command line processing is moved from load_kernel to new function "prepare_cmdline." This new function is responsible for setting up the command line passed to KEXEC. 3. New function "append_cmdline" is added to append {argument,value} pair to command line if argument is not already present. == Version 1 == A recent patch (https://lkml.org/lkml/2014/1/15/42) enables multiple processors in the crash kernel. To do this safely the crash kernel needs to know which CPU was the 1st kernel BSP (bootstrap processor) so that the crash kernel will NOT send the BSP an INIT. If the crash kernel sends an INIT to the 1st kernel BSP, some systems may reset or hang. The EFI spec doesn't require that any particular processor is chosen as the BSP and the CPU (and its apic id) can change from one boot to the next. Hence automating the selection of CPU to disable if the system would panic is desired. This patch updates the kdumpctl script to get the "initial apicid" of CPU 0 in the first kernel and will pass this as the "disable_cpu_apicid=" arguement to kexec if it wasn't explicitly set in /etc/sysconfig/kdump KDUMP_COMMANDLINE_APPEND. CPU 0 is chosen as it is the processor thats execute the OS initialization code and hence was the BSP as per x86 SDM (Vol 3a Section 8.4.) See associated Red Hat Bugzilla(s) for additional background material: https://bugzilla.redhat.com/show_bug.cgi?id=1059031 https://bugzilla.redhat.com/show_bug.cgi?id=980621 Signed-off-by: Jerry Hoemann <jerry.hoemann@hp.com> Acked-by: Vivek Goyal <vgoyal@redhat.com>
2014-02-21 01:09:30 +00:00
echo $cmdline
kdumpctl: Pass disable_cpu_apicid to kexec of capture kernel == Version 2 == Addresses Vivek's review comments: 1. Don't force numeric in awk script snipet. 2. Command line processing is moved from load_kernel to new function "prepare_cmdline." This new function is responsible for setting up the command line passed to KEXEC. 3. New function "append_cmdline" is added to append {argument,value} pair to command line if argument is not already present. == Version 1 == A recent patch (https://lkml.org/lkml/2014/1/15/42) enables multiple processors in the crash kernel. To do this safely the crash kernel needs to know which CPU was the 1st kernel BSP (bootstrap processor) so that the crash kernel will NOT send the BSP an INIT. If the crash kernel sends an INIT to the 1st kernel BSP, some systems may reset or hang. The EFI spec doesn't require that any particular processor is chosen as the BSP and the CPU (and its apic id) can change from one boot to the next. Hence automating the selection of CPU to disable if the system would panic is desired. This patch updates the kdumpctl script to get the "initial apicid" of CPU 0 in the first kernel and will pass this as the "disable_cpu_apicid=" arguement to kexec if it wasn't explicitly set in /etc/sysconfig/kdump KDUMP_COMMANDLINE_APPEND. CPU 0 is chosen as it is the processor thats execute the OS initialization code and hence was the BSP as per x86 SDM (Vol 3a Section 8.4.) See associated Red Hat Bugzilla(s) for additional background material: https://bugzilla.redhat.com/show_bug.cgi?id=1059031 https://bugzilla.redhat.com/show_bug.cgi?id=980621 Signed-off-by: Jerry Hoemann <jerry.hoemann@hp.com> Acked-by: Vivek Goyal <vgoyal@redhat.com>
2014-02-21 01:09:30 +00:00
}
# This function performs a series of edits on the command line
prepare_cmdline()
kdumpctl: Pass disable_cpu_apicid to kexec of capture kernel == Version 2 == Addresses Vivek's review comments: 1. Don't force numeric in awk script snipet. 2. Command line processing is moved from load_kernel to new function "prepare_cmdline." This new function is responsible for setting up the command line passed to KEXEC. 3. New function "append_cmdline" is added to append {argument,value} pair to command line if argument is not already present. == Version 1 == A recent patch (https://lkml.org/lkml/2014/1/15/42) enables multiple processors in the crash kernel. To do this safely the crash kernel needs to know which CPU was the 1st kernel BSP (bootstrap processor) so that the crash kernel will NOT send the BSP an INIT. If the crash kernel sends an INIT to the 1st kernel BSP, some systems may reset or hang. The EFI spec doesn't require that any particular processor is chosen as the BSP and the CPU (and its apic id) can change from one boot to the next. Hence automating the selection of CPU to disable if the system would panic is desired. This patch updates the kdumpctl script to get the "initial apicid" of CPU 0 in the first kernel and will pass this as the "disable_cpu_apicid=" arguement to kexec if it wasn't explicitly set in /etc/sysconfig/kdump KDUMP_COMMANDLINE_APPEND. CPU 0 is chosen as it is the processor thats execute the OS initialization code and hence was the BSP as per x86 SDM (Vol 3a Section 8.4.) See associated Red Hat Bugzilla(s) for additional background material: https://bugzilla.redhat.com/show_bug.cgi?id=1059031 https://bugzilla.redhat.com/show_bug.cgi?id=980621 Signed-off-by: Jerry Hoemann <jerry.hoemann@hp.com> Acked-by: Vivek Goyal <vgoyal@redhat.com>
2014-02-21 01:09:30 +00:00
{
local cmdline;
if [ -z "$KDUMP_COMMANDLINE" ]; then
cmdline=`cat /proc/cmdline`
else
cmdline=${KDUMP_COMMANDLINE}
fi
cmdline=`remove_cmdline_param "$cmdline" crashkernel hugepages hugepagesz`
cmdline="${cmdline} ${KDUMP_COMMANDLINE_APPEND}"
local id=`get_bootcpu_initial_apicid`
if [ ! -z ${id} ] ; then
cmdline=`append_cmdline "${cmdline}" disable_cpu_apicid ${id}`
fi
echo $cmdline
}
save_core()
2011-07-06 19:25:34 +00:00
{
coredir="/var/crash/`date +"%Y-%m-%d-%H:%M"`"
mkdir -p $coredir
cp --sparse=always /proc/vmcore $coredir/vmcore-incomplete
if [ $? == 0 ]; then
mv $coredir/vmcore-incomplete $coredir/vmcore
echo "saved a vmcore to $coredir"
2011-07-06 19:25:34 +00:00
else
echo "failed to save a vmcore to $coredir" >&2
2011-07-06 19:25:34 +00:00
fi
# pass the dmesg to Abrt tool if exists, in order
# to collect the kernel oops message.
# https://fedorahosted.org/abrt/
if [ -x /usr/bin/dumpoops ]; then
makedumpfile --dump-dmesg $coredir/vmcore $coredir/dmesg >/dev/null 2>&1
dumpoops -d $coredir/dmesg >/dev/null 2>&1
if [ $? == 0 ]; then
echo "kernel oops has been collected by abrt tool"
2011-07-06 19:25:34 +00:00
fi
fi
}
rebuild_initrd()
{
$MKDUMPRD $kdump_initrd $kdump_kver
if [ $? != 0 ]; then
echo "mkdumprd: failed to make kdump initrd" >&2
return 1
fi
}
#$1: the files to be checked with IFS=' '
check_exist()
{
for file in $1; do
if [ ! -f "$file" ]; then
echo -n "Error: $file not found."; echo
return 1
fi
done
}
#$1: the files to be checked with IFS=' '
check_executable()
{
for file in $1; do
if [ ! -x "$file" ]; then
echo -n "Error: $file is not executable."; echo
return 1
fi
done
}
check_config()
{
local nr
nr=$(awk 'BEGIN{cnt=0} /^raw|^ssh[[:blank:]]|^nfs|^ext[234]|^xfs|^btrfs|^minix/{cnt++} END{print cnt}' $KDUMP_CONFIG_FILE)
[ $nr -gt 1 ] && {
echo "More than one dump targets specified."
return 1
}
while read config_opt config_val; do
# remove inline comments after the end of a directive.
config_val=$(strip_comments $config_val)
case "$config_opt" in
\#* | "")
;;
raw|ext2|ext3|ext4|minix|btrfs|xfs|nfs|ssh|sshkey|path|core_collector|kdump_post|kdump_pre|extra_bins|extra_modules|default|force_rebuild|dracut_args|fence_kdump_args|fence_kdump_nodes)
[ -z "$config_val" ] && {
echo "Invalid kdump config value for option $config_opt."
return 1;
}
;;
net|options|link_delay|disk_timeout|debug_mem_level|blacklist)
echo "Deprecated kdump config option: $config_opt. Refer to kdump.conf manpage for alternatives."
return 1
;;
*)
echo "Invalid kdump config option $config_opt"
return 1;
;;
esac
done < $KDUMP_CONFIG_FILE
check_fence_kdump_config || return 1
return 0
}
# get_pcs_cluster_modified_files <image timestamp>
# return list of modified file for fence_kdump modified in Pacemaker cluster
get_pcs_cluster_modified_files()
{
local image_time=$1
local time_stamp
local modified_files
is_generic_fence_kdump && return 1
is_pcs_fence_kdump || return 1
time_stamp=`pcs cluster cib | xmllint --xpath 'string(/cib/@cib-last-written)' - | \
xargs -0 date +%s --date`
if [ -n $time_stamp -a $time_stamp -gt $image_time ]; then
modified_files="cluster-cib"
fi
if [ -f $FENCE_KDUMP_CONFIG_FILE ]; then
time_stamp=`stat -c "%Y" $FENCE_KDUMP_CONFIG_FILE`
if [ "$time_stamp" -gt "$image_time" ]; then
modified_files="$modified_files $FENCE_KDUMP_CONFIG_FILE"
fi
fi
echo $modified_files
}
check_rebuild()
2011-07-06 19:25:34 +00:00
{
local extra_modules modified_files=""
local _force_rebuild force_rebuild="0"
2011-07-06 19:25:34 +00:00
if [ -z "$KDUMP_KERNELVER" ]; then
2011-07-25 10:04:32 +00:00
kdump_kver=`uname -r`
2011-07-06 19:25:34 +00:00
else
kdump_kver=$KDUMP_KERNELVER
fi
kdump_kernel="${KDUMP_BOOTDIR}/${KDUMP_IMG}-${kdump_kver}${KDUMP_IMG_EXT}"
kdump_initrd="${KDUMP_BOOTDIR}/initramfs-${kdump_kver}kdump.img"
2011-07-06 19:25:34 +00:00
_force_rebuild=`grep ^force_rebuild $KDUMP_CONFIG_FILE 2>/dev/null`
if [ $? -eq 0 ]; then
force_rebuild=`echo $_force_rebuild | cut -d' ' -f2`
if [ "$force_rebuild" != "0" ] && [ "$force_rebuild" != "1" ];then
echo "Error: force_rebuild value is invalid"
return 1
fi
fi
#will rebuild every time if extra_modules are specified
extra_modules=`grep ^extra_modules $KDUMP_CONFIG_FILE`
[ -n "$extra_modules" ] && force_rebuild="1"
2011-07-06 19:25:34 +00:00
#check to see if dependent files has been modified
#since last build of the image file
if [ -f $kdump_initrd ]; then
image_time=`stat -c "%Y" $kdump_initrd 2>/dev/null`
else
image_time=0
2011-07-06 19:25:34 +00:00
fi
#also rebuild when Pacemaker cluster conf is changed and fence kdump is enabled.
modified_files=$(get_pcs_cluster_modified_files $image_time)
EXTRA_BINS=`grep ^kdump_post $KDUMP_CONFIG_FILE | cut -d\ -f2`
CHECK_FILES=`grep ^kdump_pre $KDUMP_CONFIG_FILE | cut -d\ -f2`
EXTRA_BINS="$EXTRA_BINS $CHECK_FILES"
CHECK_FILES=`grep ^extra_bins $KDUMP_CONFIG_FILE | cut -d\ -f2-`
EXTRA_BINS="$EXTRA_BINS $CHECK_FILES"
files="$KDUMP_CONFIG_FILE $kdump_kernel $EXTRA_BINS"
check_exist "$files" && check_executable "$EXTRA_BINS"
[ $? -ne 0 ] && return 1
for file in $files; do
time_stamp=`stat -c "%Y" $file`
if [ "$time_stamp" -gt "$image_time" ]; then
modified_files="$modified_files $file"
2011-07-06 19:25:34 +00:00
fi
done
if [ $image_time -eq 0 ]; then
echo -n "No kdump initial ramdisk found."; echo
elif [ "$force_rebuild" != "0" ]; then
echo -n "Force rebuild $kdump_initrd"; echo
elif [ -n "$modified_files" ]; then
echo "Detected change(s) in the following file(s):"
echo -n " "; echo "$modified_files" | sed 's/\s/\n /g'
else
return 0
2011-07-06 19:25:34 +00:00
fi
echo "Rebuilding $kdump_initrd"
rebuild_initrd
return $?
2011-07-06 19:25:34 +00:00
}
# This function check iomem and determines if we have more than
# 4GB of ram available. Returns 1 if we do, 0 if we dont
need_64bit_headers()
2011-07-06 19:25:34 +00:00
{
return `tail -n 1 /proc/iomem | awk '{ split ($1, r, "-"); \
print (strtonum("0x" r[2]) > strtonum("0xffffffff")); }'`
2011-07-06 19:25:34 +00:00
}
# Load the kdump kerel specified in /etc/sysconfig/kdump
# If none is specified, try to load a kdump kernel with the same version
# as the currently running kernel.
load_kdump()
2011-07-06 19:25:34 +00:00
{
MEM_RESERVED=$(cat /sys/kernel/kexec_crash_size)
if [ $MEM_RESERVED -eq 0 ]
2011-07-06 19:25:34 +00:00
then
echo "No memory reserved for crash kernel." >&2
2011-07-06 19:25:34 +00:00
return 1
fi
ARCH=`uname -m`
2011-07-06 19:25:34 +00:00
if [ "$ARCH" == "i686" -o "$ARCH" == "i386" ]
then
need_64bit_headers
if [ $? == 1 ]
then
FOUND_ELF_ARGS=`echo $KEXEC_ARGS | grep elf32-core-headers`
if [ -n "$FOUND_ELF_ARGS" ]
then
echo -n "Warning: elf32-core-headers overrides correct elf64 setting"
echo
else
KEXEC_ARGS="$KEXEC_ARGS --elf64-core-headers"
fi
else
FOUND_ELF_ARGS=`echo $KEXEC_ARGS | grep elf64-core-headers`
if [ -z "$FOUND_ELF_ARGS" ]
then
KEXEC_ARGS="$KEXEC_ARGS --elf32-core-headers"
fi
fi
fi
kdumpctl: Pass disable_cpu_apicid to kexec of capture kernel == Version 2 == Addresses Vivek's review comments: 1. Don't force numeric in awk script snipet. 2. Command line processing is moved from load_kernel to new function "prepare_cmdline." This new function is responsible for setting up the command line passed to KEXEC. 3. New function "append_cmdline" is added to append {argument,value} pair to command line if argument is not already present. == Version 1 == A recent patch (https://lkml.org/lkml/2014/1/15/42) enables multiple processors in the crash kernel. To do this safely the crash kernel needs to know which CPU was the 1st kernel BSP (bootstrap processor) so that the crash kernel will NOT send the BSP an INIT. If the crash kernel sends an INIT to the 1st kernel BSP, some systems may reset or hang. The EFI spec doesn't require that any particular processor is chosen as the BSP and the CPU (and its apic id) can change from one boot to the next. Hence automating the selection of CPU to disable if the system would panic is desired. This patch updates the kdumpctl script to get the "initial apicid" of CPU 0 in the first kernel and will pass this as the "disable_cpu_apicid=" arguement to kexec if it wasn't explicitly set in /etc/sysconfig/kdump KDUMP_COMMANDLINE_APPEND. CPU 0 is chosen as it is the processor thats execute the OS initialization code and hence was the BSP as per x86 SDM (Vol 3a Section 8.4.) See associated Red Hat Bugzilla(s) for additional background material: https://bugzilla.redhat.com/show_bug.cgi?id=1059031 https://bugzilla.redhat.com/show_bug.cgi?id=980621 Signed-off-by: Jerry Hoemann <jerry.hoemann@hp.com> Acked-by: Vivek Goyal <vgoyal@redhat.com>
2014-02-21 01:09:30 +00:00
KDUMP_COMMANDLINE=`prepare_cmdline`
2011-07-06 19:25:34 +00:00
$KEXEC $KEXEC_ARGS $standard_kexec_args \
--command-line="$KDUMP_COMMANDLINE" \
--initrd=$kdump_initrd $kdump_kernel 2>/dev/null
if [ $? == 0 ]; then
echo "kexec: loaded kdump kernel"
2011-07-06 19:25:34 +00:00
return 0
else
echo "kexec: failed to load kdump kernel" >&2
2011-07-06 19:25:34 +00:00
return 1
fi
}
check_ssh_config()
{
while read config_opt config_val; do
# remove inline comments after the end of a directive.
config_val=$(strip_comments $config_val)
case "$config_opt" in
sshkey)
if [ -f "$config_val" ]; then
# canonicalize the path
SSH_KEY_LOCATION=$(/usr/bin/readlink -m $config_val)
else
echo "WARNING: '$config_val' doesn't exist, using default value '$SSH_KEY_LOCATION'"
fi
;;
path)
SAVE_PATH=$config_val
;;
ssh)
DUMP_TARGET=$config_val
;;
*)
;;
esac
done < $KDUMP_CONFIG_FILE
#make sure they've configured kdump.conf for ssh dumps
local SSH_TARGET=`echo -n $DUMP_TARGET | sed -n '/.*@/p'`
if [ -z "$SSH_TARGET" ]; then
return 1
fi
return 0
}
check_ssh_target()
{
local _ret
ssh -q -i $SSH_KEY_LOCATION -o BatchMode=yes $DUMP_TARGET mkdir -p $SAVE_PATH
_ret=$?
if [ $_ret -ne 0 ]; then
echo "Could not create $DUMP_TARGET:$SAVE_PATH, you probably need to run \"kdumpctl propagate\"" >&2
return 1
fi
return 0
}
propagate_ssh_key()
2011-07-06 19:25:34 +00:00
{
check_ssh_config
if [ $? -ne 0 ]; then
echo "No ssh config specified in $KDUMP_CONFIG_FILE. Can't propagate" >&2
exit 1
fi
local KEYFILE=$SSH_KEY_LOCATION
2011-07-06 19:25:34 +00:00
local errmsg="Failed to propagate ssh key"
#Check to see if we already created key, if not, create it.
if [ -f $KEYFILE ]; then
echo "Using existing keys..."
else
echo -n "Generating new ssh keys... "
/usr/bin/ssh-keygen -t rsa -f $KEYFILE -N "" 2>&1 > /dev/null
2011-07-06 19:25:34 +00:00
echo "done."
fi
#now find the target ssh user and server to contact.
SSH_USER=`echo $DUMP_TARGET | cut -d\ -f2 | cut -d@ -f1`
SSH_SERVER=`echo $DUMP_TARGET | sed -e's/\(.*@\)\(.*$\)/\2/'`
2011-07-06 19:25:34 +00:00
#now send the found key to the found server
ssh-copy-id -i $KEYFILE $SSH_USER@$SSH_SERVER
2011-07-06 19:25:34 +00:00
RET=$?
if [ $RET == 0 ]; then
echo $KEYFILE has been added to ~$SSH_USER/.ssh/authorized_keys on $SSH_SERVER
return 0
else
echo $errmsg, $KEYFILE failed in transfer to $SSH_SERVER >&2
2011-07-06 19:25:34 +00:00
exit 1
fi
}
check_current_kdump_status()
2011-07-06 19:25:34 +00:00
{
rc=`cat /sys/kernel/kexec_crash_loaded`
if [ $rc == 1 ]; then
return 0
else
return 1
fi
}
save_raw()
{
local kdump_dir
local raw_target
raw_target=$(awk '$1 ~ /^raw$/ { print $2; }' $KDUMP_CONFIG_FILE)
[ -z "$raw_target" ] && return 0
[ -b "$raw_target" ] || {
echo "raw partition $raw_target not found"
return 1
}
kdump_dir=`grep ^path $KDUMP_CONFIG_FILE | cut -d' ' -f2-`
if [ -z "${kdump_dir}" ]; then
coredir="/var/crash/`date +"%Y-%m-%d-%H:%M"`"
else
coredir="${kdump_dir}/`date +"%Y-%m-%d-%H:%M"`"
fi
mkdir -p "$coredir"
[ -d "$coredir" ] || {
echo "failed to create $coredir"
return 1
}
if makedumpfile -R $coredir/vmcore <$raw_target >/dev/null 2>&1; then
# dump found
echo "Dump saved to $coredir/vmcore"
# wipe makedumpfile header
dd if=/dev/zero of=$raw_target bs=1b count=1 2>/dev/null
else
rm -rf "$coredir"
fi
return 0
}
get_save_path()
{
local _save_path=$(grep "^path" /etc/kdump.conf|awk '{print $2}')
if [ -z "$_save_path" ]; then
_save_path="/var/crash"
fi
echo $_save_path
}
is_dump_target_configured()
{
local _target
_target=$(egrep "^ext[234]|^xfs|^btrfs|^minix|^raw|^ssh|^nfs" /etc/kdump.conf)
[ -n "$_target" ]
}
local_fs_dump_target()
{
local _target
_target=$(egrep "^ext[234]|^xfs|^btrfs|^minix" /etc/kdump.conf)
if [ $? -eq 0 ]; then
echo $_target|awk '{print $2}'
fi
}
path_to_be_relabeled()
{
local _path _target _mnt="/" _rmnt
if is_dump_target_configured; then
_target=$(local_fs_dump_target)
if [[ -n "$_target" ]]; then
_mnt=$(findmnt -k -f -n -r -o TARGET $_target)
if [ -z "$_mnt" ]; then
return
fi
else
return
fi
fi
_path=$(get_save_path)
# if $_path is masked by other mount, we will not relabel it.
_rmnt=$(df $_mnt/$_path 2>/dev/null | tail -1 | awk '{ print $NF }')
if [ "$_rmnt" == "$_mnt" ]; then
echo $_mnt/$_path
fi
}
selinux_relabel()
{
local _path _i _attr
_path=$(path_to_be_relabeled)
if [ -z "$_path" ] || ! [ -d "$_path" ] ; then
return
fi
for _i in $(find $_path); do
_attr=$(getfattr -m "security.selinux" $_i 2>/dev/null)
if [ -z "$_attr" ]; then
restorecon $_i;
fi
done
}
# Check if secure boot is being enforced.
#
# Per Peter Jones, we need check efivar SecureBoot-$(the UUID) and
# SetupMode-$(the UUID), they are both 5 bytes binary data. The first four
# bytes are the attributes associated with the variable and can safely be
# ignored, the last bytes are one-byte true-or-false variables. If SecureBoot
# is 1 and SetupMode is 0, then secure boot is being enforced.
#
# Assume efivars is mounted at /sys/firmware/efi/efivars.
is_secure_boot_enforced()
{
local secure_boot_file setup_mode_file
local secure_boot_byte setup_mode_byte
secure_boot_file=$(find /sys/firmware/efi/efivars -name SecureBoot-* 2>/dev/null)
setup_mode_file=$(find /sys/firmware/efi/efivars -name SetupMode-* 2>/dev/null)
if [ -f "$secure_boot_file" ] && [ -f "$setup_mode_file" ]; then
secure_boot_byte=$(hexdump -v -e '/1 "%d\ "' $secure_boot_file|cut -d' ' -f 5)
setup_mode_byte=$(hexdump -v -e '/1 "%d\ "' $setup_mode_file|cut -d' ' -f 5)
if [ "$secure_boot_byte" = "1" ] && [ "$setup_mode_byte" = "0" ]; then
return 0
fi
fi
return 1
}
check_kdump_feasibility()
{
if is_secure_boot_enforced; then
echo "Secure Boot is Enabled. Kdump service can't be started. Disable Secure Boot and retry"
return 1;
fi
if [ ! -e /sys/kernel/kexec_crash_loaded ]; then
echo "Kdump is not supported on this kernel"
return 1
fi
}
check_fence_kdump_config()
{
local hostname=`hostname`
local nodes=$(get_option_value "fence_kdump_nodes")
for node in $nodes; do
if [ "$node" = "$hostname" ]; then
echo "Option fence_kdump_nodes cannot contain $hostname"
return 1
fi
done
return 0
}
start()
2011-07-06 19:25:34 +00:00
{
check_config
if [ $? -ne 0 ]; then
echo "Starting kdump: [FAILED]"
return 1
fi
if sestatus 2>/dev/null | grep -q "SELinux status.*enabled"; then
selinux_relabel
fi
save_raw
if [ $? -ne 0 ]; then
echo "Starting kdump: [FAILED]"
return 1
fi
check_kdump_feasibility
if [ $? -ne 0 ]; then
echo "Starting kdump: [FAILED]"
return 1
fi
check_current_kdump_status
if [ $? == 0 ]; then
echo "Kdump already running: [WARNING]"
return 0
2011-07-06 19:25:34 +00:00
fi
if check_ssh_config; then
if ! check_ssh_target; then
echo "Starting kdump: [FAILED]"
return 1
fi
fi
check_rebuild
2011-07-06 19:25:34 +00:00
if [ $? != 0 ]; then
echo "Starting kdump: [FAILED]"
2011-07-06 19:25:34 +00:00
return 1
fi
load_kdump
if [ $? != 0 ]; then
echo "Starting kdump: [FAILED]"
2011-07-06 19:25:34 +00:00
return 1
fi
echo "Starting kdump: [OK]"
2011-07-06 19:25:34 +00:00
}
stop()
2011-07-06 19:25:34 +00:00
{
$KEXEC -p -u 2>/dev/null
if [ $? == 0 ]; then
echo "kexec: unloaded kdump kernel"
echo "Stopping kdump: [OK]"
2011-07-06 19:25:34 +00:00
return 0
else
echo "kexec: failed to unload kdump kernel"
echo "Stopping kdump: [FAILED]"
2011-07-06 19:25:34 +00:00
return 1
fi
}
if [ ! -f "$KDUMP_CONFIG_FILE" ]; then
echo "Error: No kdump config file found!" >&2
exit 1
fi
2013-11-25 16:23:11 +00:00
main ()
{
case "$1" in
start)
if [ -s /proc/vmcore ]; then
save_core
reboot
else
start
fi
;;
stop)
stop
;;
status)
2011-07-06 19:25:34 +00:00
EXIT_CODE=0
check_current_kdump_status
2013-11-25 16:23:11 +00:00
case "$?" in
0)
echo "Kdump is operational"
EXIT_CODE=0
;;
1)
echo "Kdump is not operational"
EXIT_CODE=3
;;
esac
exit $EXIT_CODE
2011-07-06 19:25:34 +00:00
;;
2013-11-25 16:23:11 +00:00
restart)
stop
start
;;
condrestart)
2011-07-06 19:25:34 +00:00
;;
2013-11-25 16:23:11 +00:00
propagate)
propagate_ssh_key
2011-07-06 19:25:34 +00:00
;;
2013-11-25 16:23:11 +00:00
*)
echo $"Usage: $0 {start|stop|status|restart|propagate}"
exit 1
2011-07-06 19:25:34 +00:00
esac
2013-11-25 16:23:11 +00:00
}
# Other kdumpctl instances will block in queue, until this one exits
single_instance_lock
# To avoid fd 9 leaking, we invoke a subshell, close fd 9 and call main.
# So that fd isn't leaking when main is invoking a subshell.
(exec 9<&-; main $1)
2011-07-06 19:25:34 +00:00
exit $?