Improve debugging in the kdump kernel
Let's use the logger in the second kernel and collect the kernel ring buffer(dmesg) of the second kernel. Signed-off-by: Lianbo Jiang <lijiang@redhat.com> Acked-by: Kairui Song <kasong@redhat.com>
This commit is contained in:
parent
88a8b94de9
commit
d7054f4cd8
@ -5,7 +5,6 @@ if [ -f /etc/fadump.initramfs ] && [ ! -f /proc/device-tree/rtas/ibm,kernel-dump
|
||||
exit 0
|
||||
fi
|
||||
|
||||
exec &> /dev/console
|
||||
. /lib/dracut-lib.sh
|
||||
. /lib/kdump-lib-initramfs.sh
|
||||
|
||||
@ -22,7 +21,7 @@ do_dump()
|
||||
_ret=$?
|
||||
|
||||
if [ $_ret -ne 0 ]; then
|
||||
echo "kdump: saving vmcore failed"
|
||||
derror "saving vmcore failed"
|
||||
fi
|
||||
|
||||
return $_ret
|
||||
@ -36,7 +35,7 @@ do_kdump_pre()
|
||||
"$KDUMP_PRE"
|
||||
_ret=$?
|
||||
if [ $_ret -ne 0 ]; then
|
||||
echo "kdump: $KDUMP_PRE exited with $_ret status"
|
||||
derror "$KDUMP_PRE exited with $_ret status"
|
||||
return $_ret
|
||||
fi
|
||||
fi
|
||||
@ -47,7 +46,7 @@ do_kdump_pre()
|
||||
"$file"
|
||||
_ret=$?
|
||||
if [ $_ret -ne 0 ]; then
|
||||
echo "kdump: $file exited with $_ret status"
|
||||
derror "$file exited with $_ret status"
|
||||
fi
|
||||
done
|
||||
fi
|
||||
@ -63,7 +62,7 @@ do_kdump_post()
|
||||
"$file" "$1"
|
||||
_ret=$?
|
||||
if [ $_ret -ne 0 ]; then
|
||||
echo "kdump: $file exited with $_ret status"
|
||||
derror "$file exited with $_ret status"
|
||||
fi
|
||||
done
|
||||
fi
|
||||
@ -72,7 +71,7 @@ do_kdump_post()
|
||||
"$KDUMP_POST" "$1"
|
||||
_ret=$?
|
||||
if [ $_ret -ne 0 ]; then
|
||||
echo "kdump: $KDUMP_POST exited with $_ret status"
|
||||
derror "$KDUMP_POST exited with $_ret status"
|
||||
fi
|
||||
fi
|
||||
}
|
||||
@ -88,7 +87,7 @@ dump_raw()
|
||||
|
||||
[ -b "$_raw" ] || return 1
|
||||
|
||||
echo "kdump: saving to raw disk $_raw"
|
||||
dinfo "saving to raw disk $_raw"
|
||||
|
||||
if ! $(echo -n $CORE_COLLECTOR|grep -q makedumpfile); then
|
||||
_src_size=`ls -l /proc/vmcore | cut -d' ' -f5`
|
||||
@ -96,21 +95,22 @@ dump_raw()
|
||||
monitor_dd_progress $_src_size_mb &
|
||||
fi
|
||||
|
||||
echo "kdump: saving vmcore"
|
||||
dinfo "saving vmcore"
|
||||
$CORE_COLLECTOR /proc/vmcore | dd of=$_raw bs=$DD_BLKSIZE >> /tmp/dd_progress_file 2>&1 || return 1
|
||||
sync
|
||||
|
||||
echo "kdump: saving vmcore complete"
|
||||
dinfo "saving vmcore complete"
|
||||
return 0
|
||||
}
|
||||
|
||||
dump_ssh()
|
||||
{
|
||||
local ret
|
||||
local _opt="-i $1 -o BatchMode=yes -o StrictHostKeyChecking=yes"
|
||||
local _dir="$KDUMP_PATH/$HOST_IP-$DATEDIR"
|
||||
local _host=$2
|
||||
|
||||
echo "kdump: saving to $_host:$_dir"
|
||||
dinfo "saving to $_host:$_dir"
|
||||
|
||||
cat /var/lib/random-seed > /dev/urandom
|
||||
ssh -q $_opt $_host mkdir -p $_dir || return 1
|
||||
@ -118,17 +118,29 @@ dump_ssh()
|
||||
save_vmcore_dmesg_ssh ${DMESG_COLLECTOR} ${_dir} "${_opt}" $_host
|
||||
save_opalcore_ssh ${_dir} "${_opt}" $_host
|
||||
|
||||
echo "kdump: saving vmcore"
|
||||
dinfo "saving vmcore"
|
||||
|
||||
if [ "${CORE_COLLECTOR%%[[:blank:]]*}" = "scp" ]; then
|
||||
scp -q $_opt /proc/vmcore "$_host:$_dir/vmcore-incomplete" || return 1
|
||||
scp -q $_opt /proc/vmcore "$_host:$_dir/vmcore-incomplete"
|
||||
ret=$?
|
||||
save_log
|
||||
scp -q $_opt $KDUMP_LOG_FILE "$_host:$_dir/"
|
||||
if [ $ret -ne 0 ]; then
|
||||
return 1
|
||||
fi
|
||||
ssh $_opt $_host "mv $_dir/vmcore-incomplete $_dir/vmcore" || return 1
|
||||
else
|
||||
$CORE_COLLECTOR /proc/vmcore | ssh $_opt $_host "dd bs=512 of=$_dir/vmcore-incomplete" || return 1
|
||||
$CORE_COLLECTOR /proc/vmcore | ssh $_opt $_host "dd bs=512 of=$_dir/vmcore-incomplete"
|
||||
ret=$?
|
||||
save_log
|
||||
scp -q $_opt $KDUMP_LOG_FILE "$_host:$_dir/"
|
||||
if [ $ret -ne 0 ]; then
|
||||
return 1
|
||||
fi
|
||||
ssh $_opt $_host "mv $_dir/vmcore-incomplete $_dir/vmcore.flat" || return 1
|
||||
fi
|
||||
|
||||
echo "kdump: saving vmcore complete"
|
||||
dinfo "saving vmcore complete"
|
||||
return 0
|
||||
}
|
||||
|
||||
@ -137,6 +149,8 @@ save_opalcore_ssh() {
|
||||
local _opts="$2"
|
||||
local _location=$3
|
||||
|
||||
ddebug "_path=$_path _opts=$_opts _location=$_location"
|
||||
|
||||
if [ ! -f $OPALCORE ]; then
|
||||
# Check if we are on an old kernel that uses a different path
|
||||
if [ -f /sys/firmware/opal/core ]; then
|
||||
@ -146,15 +160,15 @@ save_opalcore_ssh() {
|
||||
fi
|
||||
fi
|
||||
|
||||
echo "kdump: saving opalcore"
|
||||
dinfo "saving opalcore:$OPALCORE to $_location:$_path"
|
||||
scp $_opts $OPALCORE $_location:$_path/opalcore-incomplete
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "kdump: saving opalcore failed"
|
||||
derror "saving opalcore failed"
|
||||
return 1
|
||||
fi
|
||||
|
||||
ssh $_opts $_location mv $_path/opalcore-incomplete $_path/opalcore
|
||||
echo "kdump: saving opalcore complete"
|
||||
dinfo "saving opalcore complete"
|
||||
return 0
|
||||
}
|
||||
|
||||
@ -164,15 +178,15 @@ save_vmcore_dmesg_ssh() {
|
||||
local _opts="$3"
|
||||
local _location=$4
|
||||
|
||||
echo "kdump: saving vmcore-dmesg.txt"
|
||||
dinfo "saving vmcore-dmesg.txt to $_location:$_path"
|
||||
$_dmesg_collector /proc/vmcore | ssh $_opts $_location "dd of=$_path/vmcore-dmesg-incomplete.txt"
|
||||
_exitcode=$?
|
||||
|
||||
if [ $_exitcode -eq 0 ]; then
|
||||
ssh -q $_opts $_location mv $_path/vmcore-dmesg-incomplete.txt $_path/vmcore-dmesg.txt
|
||||
echo "kdump: saving vmcore-dmesg.txt complete"
|
||||
dinfo "saving vmcore-dmesg.txt complete"
|
||||
else
|
||||
echo "kdump: saving vmcore-dmesg.txt failed"
|
||||
derror "saving vmcore-dmesg.txt failed"
|
||||
fi
|
||||
}
|
||||
|
||||
@ -182,12 +196,12 @@ get_host_ip()
|
||||
if is_nfs_dump_target || is_ssh_dump_target
|
||||
then
|
||||
kdumpnic=$(getarg kdumpnic=)
|
||||
[ -z "$kdumpnic" ] && echo "kdump: failed to get kdumpnic!" && return 1
|
||||
[ -z "$kdumpnic" ] && derror "failed to get kdumpnic!" && return 1
|
||||
_host=`ip addr show dev $kdumpnic|grep '[ ]*inet'`
|
||||
[ $? -ne 0 ] && echo "kdump: wrong kdumpnic: $kdumpnic" && return 1
|
||||
[ $? -ne 0 ] && derror "wrong kdumpnic: $kdumpnic" && return 1
|
||||
_host=`echo $_host | head -n 1 | cut -d' ' -f2`
|
||||
_host="${_host%%/*}"
|
||||
[ -z "$_host" ] && echo "kdump: wrong kdumpnic: $kdumpnic" && return 1
|
||||
[ -z "$_host" ] && derror "wrong kdumpnic: $kdumpnic" && return 1
|
||||
HOST_IP=$_host
|
||||
fi
|
||||
return 0
|
||||
@ -196,7 +210,7 @@ get_host_ip()
|
||||
read_kdump_conf()
|
||||
{
|
||||
if [ ! -f "$KDUMP_CONF" ]; then
|
||||
echo "kdump: $KDUMP_CONF not found"
|
||||
derror "$KDUMP_CONF not found"
|
||||
return
|
||||
fi
|
||||
|
||||
@ -240,7 +254,7 @@ fence_kdump_notify
|
||||
|
||||
get_host_ip
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "kdump: get_host_ip exited with non-zero status!"
|
||||
derror "get_host_ip exited with non-zero status!"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
@ -250,7 +264,7 @@ fi
|
||||
|
||||
do_kdump_pre
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "kdump: kdump_pre script exited with non-zero status!"
|
||||
derror "kdump_pre script exited with non-zero status!"
|
||||
do_final_action
|
||||
# During systemd service to reboot the machine, stop this shell script running
|
||||
exit 1
|
||||
@ -261,7 +275,7 @@ DUMP_RETVAL=$?
|
||||
|
||||
do_kdump_post $DUMP_RETVAL
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "kdump: kdump_post script exited with non-zero status!"
|
||||
derror "kdump_post script exited with non-zero status!"
|
||||
fi
|
||||
|
||||
if [ $DUMP_RETVAL -ne 0 ]; then
|
||||
|
@ -818,6 +818,7 @@ install() {
|
||||
kdump_install_random_seed
|
||||
fi
|
||||
dracut_install -o /etc/adjtime /etc/localtime
|
||||
inst_simple "/etc/sysconfig/kdump"
|
||||
inst "$moddir/monitor_dd_progress" "/kdumpscripts/monitor_dd_progress"
|
||||
chmod +x ${initdir}/kdumpscripts/monitor_dd_progress
|
||||
inst "/bin/dd" "/bin/dd"
|
||||
@ -830,8 +831,11 @@ install() {
|
||||
inst "/bin/sed" "/bin/sed"
|
||||
inst "/sbin/makedumpfile" "/sbin/makedumpfile"
|
||||
inst "/sbin/vmcore-dmesg" "/sbin/vmcore-dmesg"
|
||||
inst "/usr/bin/printf" "/sbin/printf"
|
||||
inst "/usr/bin/logger" "/sbin/logger"
|
||||
inst "/lib/kdump/kdump-lib.sh" "/lib/kdump-lib.sh"
|
||||
inst "/lib/kdump/kdump-lib-initramfs.sh" "/lib/kdump-lib-initramfs.sh"
|
||||
inst "/lib/kdump/kdump-logger.sh" "/lib/kdump-logger.sh"
|
||||
inst "$moddir/kdump.sh" "/usr/bin/kdump.sh"
|
||||
inst "$moddir/kdump-capture.service" "$systemdsystemunitdir/kdump-capture.service"
|
||||
mkdir -p "$initdir/$systemdsystemunitdir/initrd.target.wants"
|
||||
|
@ -1,8 +1,10 @@
|
||||
# These variables and functions are useful in 2nd kernel
|
||||
|
||||
. /etc/sysconfig/kdump
|
||||
. /lib/kdump-lib.sh
|
||||
|
||||
KDUMP_PATH="/var/crash"
|
||||
KDUMP_LOG_FILE="/run/initramfs/kexec-dmesg.log"
|
||||
CORE_COLLECTOR=""
|
||||
DEFAULT_CORE_COLLECTOR="makedumpfile -l --message-level 1 -d 31"
|
||||
DMESG_COLLECTOR="/sbin/vmcore-dmesg"
|
||||
@ -20,6 +22,13 @@ KDUMP_POST=""
|
||||
NEWROOT="/sysroot"
|
||||
OPALCORE="/sys/firmware/opal/mpipl/core"
|
||||
|
||||
#initiate the kdump logger
|
||||
dlog_init
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "failed to initiate the kdump logger."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
get_kdump_confs()
|
||||
{
|
||||
local config_opt config_val
|
||||
@ -94,27 +103,40 @@ get_kdump_confs()
|
||||
fi
|
||||
}
|
||||
|
||||
# store the kexec kernel log to a file.
|
||||
save_log()
|
||||
{
|
||||
dmesg -T > $KDUMP_LOG_FILE
|
||||
|
||||
if command -v journalctl > /dev/null; then
|
||||
journalctl -ab >> $KDUMP_LOG_FILE
|
||||
fi
|
||||
}
|
||||
|
||||
# dump_fs <mount point>
|
||||
dump_fs()
|
||||
{
|
||||
local ret
|
||||
local _mp=$1
|
||||
local _dev=$(get_mount_info SOURCE target $_mp -f)
|
||||
local _op=$(get_mount_info OPTIONS target $_mp -f)
|
||||
|
||||
ddebug "_mp=$_mp _dev=$_dev _op=$_op"
|
||||
|
||||
# If dump path have a corresponding device entry but not mounted, mount it.
|
||||
if [ -n "$_dev" ]; then
|
||||
if ! is_mounted "$_mp"; then
|
||||
echo "kdump: dump target $_dev is not mounted, trying to mount..."
|
||||
dinfo "dump target $_dev is not mounted, trying to mount..."
|
||||
mkdir -p $_mp
|
||||
mount -o $_op $_dev $_mp
|
||||
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "kdump: mounting failed (mount point: $_mp, option: $_op)"
|
||||
derror "mounting failed (mount point: $_mp, option: $_op)"
|
||||
return 1
|
||||
fi
|
||||
fi
|
||||
else
|
||||
echo "kdump: failed to dump to \"$_mp\", it's not a mount point!"
|
||||
derror "failed to dump to \"$_mp\", it's not a mount point!"
|
||||
return 1
|
||||
fi
|
||||
|
||||
@ -123,11 +145,11 @@ dump_fs()
|
||||
|
||||
local _dump_path=$(echo "$_mp/$KDUMP_PATH/$HOST_IP-$DATEDIR/" | tr -s /)
|
||||
|
||||
echo "kdump: saving to $_dump_path"
|
||||
dinfo "saving to $_dump_path"
|
||||
|
||||
# Only remount to read-write mode if the dump target is mounted read-only.
|
||||
if [[ "$_op" = "ro"* ]]; then
|
||||
echo "kdump: Mounting Dump target $_dev in rw mode."
|
||||
dinfo "Mounting Dump target $_dev in rw mode."
|
||||
mount -o remount,rw $_dev $_mp || return 1
|
||||
fi
|
||||
|
||||
@ -136,12 +158,18 @@ dump_fs()
|
||||
save_vmcore_dmesg_fs ${DMESG_COLLECTOR} "$_dump_path"
|
||||
save_opalcore_fs "$_dump_path"
|
||||
|
||||
echo "kdump: saving vmcore"
|
||||
$CORE_COLLECTOR /proc/vmcore $_dump_path/vmcore-incomplete || return 1
|
||||
dinfo "saving vmcore"
|
||||
$CORE_COLLECTOR /proc/vmcore $_dump_path/vmcore-incomplete
|
||||
ret=$?
|
||||
save_log
|
||||
mv $KDUMP_LOG_FILE $_dump_path/
|
||||
if [ $ret -ne 0 ]; then
|
||||
return 1
|
||||
fi
|
||||
mv $_dump_path/vmcore-incomplete $_dump_path/vmcore
|
||||
sync
|
||||
|
||||
echo "kdump: saving vmcore complete"
|
||||
dinfo "saving vmcore complete"
|
||||
|
||||
# improper kernel cmdline can cause the failure of echo, we can ignore this kind of failure
|
||||
return 0
|
||||
@ -151,7 +179,7 @@ save_vmcore_dmesg_fs() {
|
||||
local _dmesg_collector=$1
|
||||
local _path=$2
|
||||
|
||||
echo "kdump: saving vmcore-dmesg.txt"
|
||||
dinfo "saving vmcore-dmesg.txt to ${_path}"
|
||||
$_dmesg_collector /proc/vmcore > ${_path}/vmcore-dmesg-incomplete.txt
|
||||
_exitcode=$?
|
||||
if [ $_exitcode -eq 0 ]; then
|
||||
@ -161,9 +189,9 @@ save_vmcore_dmesg_fs() {
|
||||
# saving vmcore failed and system rebooted without sync and there
|
||||
# was no vmcore-dmesg.txt available.
|
||||
sync
|
||||
echo "kdump: saving vmcore-dmesg.txt complete"
|
||||
dinfo "saving vmcore-dmesg.txt complete"
|
||||
else
|
||||
echo "kdump: saving vmcore-dmesg.txt failed"
|
||||
derror "saving vmcore-dmesg.txt failed"
|
||||
fi
|
||||
}
|
||||
|
||||
@ -179,43 +207,47 @@ save_opalcore_fs() {
|
||||
fi
|
||||
fi
|
||||
|
||||
echo "kdump: saving opalcore"
|
||||
dinfo "saving opalcore:$OPALCORE to ${_path}/opalcore"
|
||||
cp $OPALCORE ${_path}/opalcore
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "kdump: saving opalcore failed"
|
||||
derror "saving opalcore failed"
|
||||
return 1
|
||||
fi
|
||||
|
||||
sync
|
||||
echo "kdump: saving opalcore complete"
|
||||
dinfo "saving opalcore complete"
|
||||
return 0
|
||||
}
|
||||
|
||||
dump_to_rootfs()
|
||||
{
|
||||
|
||||
echo "Kdump: trying to bring up rootfs device"
|
||||
dinfo "Trying to bring up rootfs device"
|
||||
systemctl start dracut-initqueue
|
||||
echo "Kdump: waiting for rootfs mount, will timeout after 90 seconds"
|
||||
dinfo "Waiting for rootfs mount, will timeout after 90 seconds"
|
||||
systemctl start sysroot.mount
|
||||
|
||||
ddebug "NEWROOT=$NEWROOT"
|
||||
|
||||
dump_fs $NEWROOT
|
||||
}
|
||||
|
||||
kdump_emergency_shell()
|
||||
{
|
||||
echo "PS1=\"kdump:\\\${PWD}# \"" >/etc/profile
|
||||
ddebug "Switching to dracut emergency..."
|
||||
/bin/dracut-emergency
|
||||
rm -f /etc/profile
|
||||
}
|
||||
|
||||
do_failure_action()
|
||||
{
|
||||
echo "Kdump: Executing failure action $FAILURE_ACTION"
|
||||
dinfo "Executing failure action $FAILURE_ACTION"
|
||||
eval $FAILURE_ACTION
|
||||
}
|
||||
|
||||
do_final_action()
|
||||
{
|
||||
dinfo "Executing final action $FINAL_ACTION"
|
||||
eval $FINAL_ACTION
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user