Improve debugging in the kdump kernel

Let's use the logger in the second kernel and collect the kernel ring
buffer(dmesg) of the second kernel.

Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
Acked-by: Kairui Song <kasong@redhat.com>
This commit is contained in:
Lianbo Jiang 2020-10-27 17:04:25 +08:00 committed by Kairui Song
parent 88a8b94de9
commit d7054f4cd8
3 changed files with 94 additions and 44 deletions

View File

@ -5,7 +5,6 @@ if [ -f /etc/fadump.initramfs ] && [ ! -f /proc/device-tree/rtas/ibm,kernel-dump
exit 0 exit 0
fi fi
exec &> /dev/console
. /lib/dracut-lib.sh . /lib/dracut-lib.sh
. /lib/kdump-lib-initramfs.sh . /lib/kdump-lib-initramfs.sh
@ -22,7 +21,7 @@ do_dump()
_ret=$? _ret=$?
if [ $_ret -ne 0 ]; then if [ $_ret -ne 0 ]; then
echo "kdump: saving vmcore failed" derror "saving vmcore failed"
fi fi
return $_ret return $_ret
@ -36,7 +35,7 @@ do_kdump_pre()
"$KDUMP_PRE" "$KDUMP_PRE"
_ret=$? _ret=$?
if [ $_ret -ne 0 ]; then if [ $_ret -ne 0 ]; then
echo "kdump: $KDUMP_PRE exited with $_ret status" derror "$KDUMP_PRE exited with $_ret status"
return $_ret return $_ret
fi fi
fi fi
@ -47,7 +46,7 @@ do_kdump_pre()
"$file" "$file"
_ret=$? _ret=$?
if [ $_ret -ne 0 ]; then if [ $_ret -ne 0 ]; then
echo "kdump: $file exited with $_ret status" derror "$file exited with $_ret status"
fi fi
done done
fi fi
@ -63,7 +62,7 @@ do_kdump_post()
"$file" "$1" "$file" "$1"
_ret=$? _ret=$?
if [ $_ret -ne 0 ]; then if [ $_ret -ne 0 ]; then
echo "kdump: $file exited with $_ret status" derror "$file exited with $_ret status"
fi fi
done done
fi fi
@ -72,7 +71,7 @@ do_kdump_post()
"$KDUMP_POST" "$1" "$KDUMP_POST" "$1"
_ret=$? _ret=$?
if [ $_ret -ne 0 ]; then if [ $_ret -ne 0 ]; then
echo "kdump: $KDUMP_POST exited with $_ret status" derror "$KDUMP_POST exited with $_ret status"
fi fi
fi fi
} }
@ -88,7 +87,7 @@ dump_raw()
[ -b "$_raw" ] || return 1 [ -b "$_raw" ] || return 1
echo "kdump: saving to raw disk $_raw" dinfo "saving to raw disk $_raw"
if ! $(echo -n $CORE_COLLECTOR|grep -q makedumpfile); then if ! $(echo -n $CORE_COLLECTOR|grep -q makedumpfile); then
_src_size=`ls -l /proc/vmcore | cut -d' ' -f5` _src_size=`ls -l /proc/vmcore | cut -d' ' -f5`
@ -96,21 +95,22 @@ dump_raw()
monitor_dd_progress $_src_size_mb & monitor_dd_progress $_src_size_mb &
fi fi
echo "kdump: saving vmcore" dinfo "saving vmcore"
$CORE_COLLECTOR /proc/vmcore | dd of=$_raw bs=$DD_BLKSIZE >> /tmp/dd_progress_file 2>&1 || return 1 $CORE_COLLECTOR /proc/vmcore | dd of=$_raw bs=$DD_BLKSIZE >> /tmp/dd_progress_file 2>&1 || return 1
sync sync
echo "kdump: saving vmcore complete" dinfo "saving vmcore complete"
return 0 return 0
} }
dump_ssh() dump_ssh()
{ {
local ret
local _opt="-i $1 -o BatchMode=yes -o StrictHostKeyChecking=yes" local _opt="-i $1 -o BatchMode=yes -o StrictHostKeyChecking=yes"
local _dir="$KDUMP_PATH/$HOST_IP-$DATEDIR" local _dir="$KDUMP_PATH/$HOST_IP-$DATEDIR"
local _host=$2 local _host=$2
echo "kdump: saving to $_host:$_dir" dinfo "saving to $_host:$_dir"
cat /var/lib/random-seed > /dev/urandom cat /var/lib/random-seed > /dev/urandom
ssh -q $_opt $_host mkdir -p $_dir || return 1 ssh -q $_opt $_host mkdir -p $_dir || return 1
@ -118,17 +118,29 @@ dump_ssh()
save_vmcore_dmesg_ssh ${DMESG_COLLECTOR} ${_dir} "${_opt}" $_host save_vmcore_dmesg_ssh ${DMESG_COLLECTOR} ${_dir} "${_opt}" $_host
save_opalcore_ssh ${_dir} "${_opt}" $_host save_opalcore_ssh ${_dir} "${_opt}" $_host
echo "kdump: saving vmcore" dinfo "saving vmcore"
if [ "${CORE_COLLECTOR%%[[:blank:]]*}" = "scp" ]; then if [ "${CORE_COLLECTOR%%[[:blank:]]*}" = "scp" ]; then
scp -q $_opt /proc/vmcore "$_host:$_dir/vmcore-incomplete" || return 1 scp -q $_opt /proc/vmcore "$_host:$_dir/vmcore-incomplete"
ret=$?
save_log
scp -q $_opt $KDUMP_LOG_FILE "$_host:$_dir/"
if [ $ret -ne 0 ]; then
return 1
fi
ssh $_opt $_host "mv $_dir/vmcore-incomplete $_dir/vmcore" || return 1 ssh $_opt $_host "mv $_dir/vmcore-incomplete $_dir/vmcore" || return 1
else else
$CORE_COLLECTOR /proc/vmcore | ssh $_opt $_host "dd bs=512 of=$_dir/vmcore-incomplete" || return 1 $CORE_COLLECTOR /proc/vmcore | ssh $_opt $_host "dd bs=512 of=$_dir/vmcore-incomplete"
ret=$?
save_log
scp -q $_opt $KDUMP_LOG_FILE "$_host:$_dir/"
if [ $ret -ne 0 ]; then
return 1
fi
ssh $_opt $_host "mv $_dir/vmcore-incomplete $_dir/vmcore.flat" || return 1 ssh $_opt $_host "mv $_dir/vmcore-incomplete $_dir/vmcore.flat" || return 1
fi fi
echo "kdump: saving vmcore complete" dinfo "saving vmcore complete"
return 0 return 0
} }
@ -137,6 +149,8 @@ save_opalcore_ssh() {
local _opts="$2" local _opts="$2"
local _location=$3 local _location=$3
ddebug "_path=$_path _opts=$_opts _location=$_location"
if [ ! -f $OPALCORE ]; then if [ ! -f $OPALCORE ]; then
# Check if we are on an old kernel that uses a different path # Check if we are on an old kernel that uses a different path
if [ -f /sys/firmware/opal/core ]; then if [ -f /sys/firmware/opal/core ]; then
@ -146,15 +160,15 @@ save_opalcore_ssh() {
fi fi
fi fi
echo "kdump: saving opalcore" dinfo "saving opalcore:$OPALCORE to $_location:$_path"
scp $_opts $OPALCORE $_location:$_path/opalcore-incomplete scp $_opts $OPALCORE $_location:$_path/opalcore-incomplete
if [ $? -ne 0 ]; then if [ $? -ne 0 ]; then
echo "kdump: saving opalcore failed" derror "saving opalcore failed"
return 1 return 1
fi fi
ssh $_opts $_location mv $_path/opalcore-incomplete $_path/opalcore ssh $_opts $_location mv $_path/opalcore-incomplete $_path/opalcore
echo "kdump: saving opalcore complete" dinfo "saving opalcore complete"
return 0 return 0
} }
@ -164,15 +178,15 @@ save_vmcore_dmesg_ssh() {
local _opts="$3" local _opts="$3"
local _location=$4 local _location=$4
echo "kdump: saving vmcore-dmesg.txt" dinfo "saving vmcore-dmesg.txt to $_location:$_path"
$_dmesg_collector /proc/vmcore | ssh $_opts $_location "dd of=$_path/vmcore-dmesg-incomplete.txt" $_dmesg_collector /proc/vmcore | ssh $_opts $_location "dd of=$_path/vmcore-dmesg-incomplete.txt"
_exitcode=$? _exitcode=$?
if [ $_exitcode -eq 0 ]; then if [ $_exitcode -eq 0 ]; then
ssh -q $_opts $_location mv $_path/vmcore-dmesg-incomplete.txt $_path/vmcore-dmesg.txt ssh -q $_opts $_location mv $_path/vmcore-dmesg-incomplete.txt $_path/vmcore-dmesg.txt
echo "kdump: saving vmcore-dmesg.txt complete" dinfo "saving vmcore-dmesg.txt complete"
else else
echo "kdump: saving vmcore-dmesg.txt failed" derror "saving vmcore-dmesg.txt failed"
fi fi
} }
@ -182,12 +196,12 @@ get_host_ip()
if is_nfs_dump_target || is_ssh_dump_target if is_nfs_dump_target || is_ssh_dump_target
then then
kdumpnic=$(getarg kdumpnic=) kdumpnic=$(getarg kdumpnic=)
[ -z "$kdumpnic" ] && echo "kdump: failed to get kdumpnic!" && return 1 [ -z "$kdumpnic" ] && derror "failed to get kdumpnic!" && return 1
_host=`ip addr show dev $kdumpnic|grep '[ ]*inet'` _host=`ip addr show dev $kdumpnic|grep '[ ]*inet'`
[ $? -ne 0 ] && echo "kdump: wrong kdumpnic: $kdumpnic" && return 1 [ $? -ne 0 ] && derror "wrong kdumpnic: $kdumpnic" && return 1
_host=`echo $_host | head -n 1 | cut -d' ' -f2` _host=`echo $_host | head -n 1 | cut -d' ' -f2`
_host="${_host%%/*}" _host="${_host%%/*}"
[ -z "$_host" ] && echo "kdump: wrong kdumpnic: $kdumpnic" && return 1 [ -z "$_host" ] && derror "wrong kdumpnic: $kdumpnic" && return 1
HOST_IP=$_host HOST_IP=$_host
fi fi
return 0 return 0
@ -196,7 +210,7 @@ get_host_ip()
read_kdump_conf() read_kdump_conf()
{ {
if [ ! -f "$KDUMP_CONF" ]; then if [ ! -f "$KDUMP_CONF" ]; then
echo "kdump: $KDUMP_CONF not found" derror "$KDUMP_CONF not found"
return return
fi fi
@ -240,7 +254,7 @@ fence_kdump_notify
get_host_ip get_host_ip
if [ $? -ne 0 ]; then if [ $? -ne 0 ]; then
echo "kdump: get_host_ip exited with non-zero status!" derror "get_host_ip exited with non-zero status!"
exit 1 exit 1
fi fi
@ -250,7 +264,7 @@ fi
do_kdump_pre do_kdump_pre
if [ $? -ne 0 ]; then if [ $? -ne 0 ]; then
echo "kdump: kdump_pre script exited with non-zero status!" derror "kdump_pre script exited with non-zero status!"
do_final_action do_final_action
# During systemd service to reboot the machine, stop this shell script running # During systemd service to reboot the machine, stop this shell script running
exit 1 exit 1
@ -261,7 +275,7 @@ DUMP_RETVAL=$?
do_kdump_post $DUMP_RETVAL do_kdump_post $DUMP_RETVAL
if [ $? -ne 0 ]; then if [ $? -ne 0 ]; then
echo "kdump: kdump_post script exited with non-zero status!" derror "kdump_post script exited with non-zero status!"
fi fi
if [ $DUMP_RETVAL -ne 0 ]; then if [ $DUMP_RETVAL -ne 0 ]; then

View File

@ -818,6 +818,7 @@ install() {
kdump_install_random_seed kdump_install_random_seed
fi fi
dracut_install -o /etc/adjtime /etc/localtime dracut_install -o /etc/adjtime /etc/localtime
inst_simple "/etc/sysconfig/kdump"
inst "$moddir/monitor_dd_progress" "/kdumpscripts/monitor_dd_progress" inst "$moddir/monitor_dd_progress" "/kdumpscripts/monitor_dd_progress"
chmod +x ${initdir}/kdumpscripts/monitor_dd_progress chmod +x ${initdir}/kdumpscripts/monitor_dd_progress
inst "/bin/dd" "/bin/dd" inst "/bin/dd" "/bin/dd"
@ -830,8 +831,11 @@ install() {
inst "/bin/sed" "/bin/sed" inst "/bin/sed" "/bin/sed"
inst "/sbin/makedumpfile" "/sbin/makedumpfile" inst "/sbin/makedumpfile" "/sbin/makedumpfile"
inst "/sbin/vmcore-dmesg" "/sbin/vmcore-dmesg" inst "/sbin/vmcore-dmesg" "/sbin/vmcore-dmesg"
inst "/usr/bin/printf" "/sbin/printf"
inst "/usr/bin/logger" "/sbin/logger"
inst "/lib/kdump/kdump-lib.sh" "/lib/kdump-lib.sh" inst "/lib/kdump/kdump-lib.sh" "/lib/kdump-lib.sh"
inst "/lib/kdump/kdump-lib-initramfs.sh" "/lib/kdump-lib-initramfs.sh" inst "/lib/kdump/kdump-lib-initramfs.sh" "/lib/kdump-lib-initramfs.sh"
inst "/lib/kdump/kdump-logger.sh" "/lib/kdump-logger.sh"
inst "$moddir/kdump.sh" "/usr/bin/kdump.sh" inst "$moddir/kdump.sh" "/usr/bin/kdump.sh"
inst "$moddir/kdump-capture.service" "$systemdsystemunitdir/kdump-capture.service" inst "$moddir/kdump-capture.service" "$systemdsystemunitdir/kdump-capture.service"
mkdir -p "$initdir/$systemdsystemunitdir/initrd.target.wants" mkdir -p "$initdir/$systemdsystemunitdir/initrd.target.wants"

View File

@ -1,8 +1,10 @@
# These variables and functions are useful in 2nd kernel # These variables and functions are useful in 2nd kernel
. /etc/sysconfig/kdump
. /lib/kdump-lib.sh . /lib/kdump-lib.sh
KDUMP_PATH="/var/crash" KDUMP_PATH="/var/crash"
KDUMP_LOG_FILE="/run/initramfs/kexec-dmesg.log"
CORE_COLLECTOR="" CORE_COLLECTOR=""
DEFAULT_CORE_COLLECTOR="makedumpfile -l --message-level 1 -d 31" DEFAULT_CORE_COLLECTOR="makedumpfile -l --message-level 1 -d 31"
DMESG_COLLECTOR="/sbin/vmcore-dmesg" DMESG_COLLECTOR="/sbin/vmcore-dmesg"
@ -20,6 +22,13 @@ KDUMP_POST=""
NEWROOT="/sysroot" NEWROOT="/sysroot"
OPALCORE="/sys/firmware/opal/mpipl/core" OPALCORE="/sys/firmware/opal/mpipl/core"
#initiate the kdump logger
dlog_init
if [ $? -ne 0 ]; then
echo "failed to initiate the kdump logger."
exit 1
fi
get_kdump_confs() get_kdump_confs()
{ {
local config_opt config_val local config_opt config_val
@ -94,27 +103,40 @@ get_kdump_confs()
fi fi
} }
# store the kexec kernel log to a file.
save_log()
{
dmesg -T > $KDUMP_LOG_FILE
if command -v journalctl > /dev/null; then
journalctl -ab >> $KDUMP_LOG_FILE
fi
}
# dump_fs <mount point> # dump_fs <mount point>
dump_fs() dump_fs()
{ {
local ret
local _mp=$1 local _mp=$1
local _dev=$(get_mount_info SOURCE target $_mp -f) local _dev=$(get_mount_info SOURCE target $_mp -f)
local _op=$(get_mount_info OPTIONS target $_mp -f) local _op=$(get_mount_info OPTIONS target $_mp -f)
ddebug "_mp=$_mp _dev=$_dev _op=$_op"
# If dump path have a corresponding device entry but not mounted, mount it. # If dump path have a corresponding device entry but not mounted, mount it.
if [ -n "$_dev" ]; then if [ -n "$_dev" ]; then
if ! is_mounted "$_mp"; then if ! is_mounted "$_mp"; then
echo "kdump: dump target $_dev is not mounted, trying to mount..." dinfo "dump target $_dev is not mounted, trying to mount..."
mkdir -p $_mp mkdir -p $_mp
mount -o $_op $_dev $_mp mount -o $_op $_dev $_mp
if [ $? -ne 0 ]; then if [ $? -ne 0 ]; then
echo "kdump: mounting failed (mount point: $_mp, option: $_op)" derror "mounting failed (mount point: $_mp, option: $_op)"
return 1 return 1
fi fi
fi fi
else else
echo "kdump: failed to dump to \"$_mp\", it's not a mount point!" derror "failed to dump to \"$_mp\", it's not a mount point!"
return 1 return 1
fi fi
@ -123,11 +145,11 @@ dump_fs()
local _dump_path=$(echo "$_mp/$KDUMP_PATH/$HOST_IP-$DATEDIR/" | tr -s /) local _dump_path=$(echo "$_mp/$KDUMP_PATH/$HOST_IP-$DATEDIR/" | tr -s /)
echo "kdump: saving to $_dump_path" dinfo "saving to $_dump_path"
# Only remount to read-write mode if the dump target is mounted read-only. # Only remount to read-write mode if the dump target is mounted read-only.
if [[ "$_op" = "ro"* ]]; then if [[ "$_op" = "ro"* ]]; then
echo "kdump: Mounting Dump target $_dev in rw mode." dinfo "Mounting Dump target $_dev in rw mode."
mount -o remount,rw $_dev $_mp || return 1 mount -o remount,rw $_dev $_mp || return 1
fi fi
@ -136,12 +158,18 @@ dump_fs()
save_vmcore_dmesg_fs ${DMESG_COLLECTOR} "$_dump_path" save_vmcore_dmesg_fs ${DMESG_COLLECTOR} "$_dump_path"
save_opalcore_fs "$_dump_path" save_opalcore_fs "$_dump_path"
echo "kdump: saving vmcore" dinfo "saving vmcore"
$CORE_COLLECTOR /proc/vmcore $_dump_path/vmcore-incomplete || return 1 $CORE_COLLECTOR /proc/vmcore $_dump_path/vmcore-incomplete
ret=$?
save_log
mv $KDUMP_LOG_FILE $_dump_path/
if [ $ret -ne 0 ]; then
return 1
fi
mv $_dump_path/vmcore-incomplete $_dump_path/vmcore mv $_dump_path/vmcore-incomplete $_dump_path/vmcore
sync sync
echo "kdump: saving vmcore complete" dinfo "saving vmcore complete"
# improper kernel cmdline can cause the failure of echo, we can ignore this kind of failure # improper kernel cmdline can cause the failure of echo, we can ignore this kind of failure
return 0 return 0
@ -151,7 +179,7 @@ save_vmcore_dmesg_fs() {
local _dmesg_collector=$1 local _dmesg_collector=$1
local _path=$2 local _path=$2
echo "kdump: saving vmcore-dmesg.txt" dinfo "saving vmcore-dmesg.txt to ${_path}"
$_dmesg_collector /proc/vmcore > ${_path}/vmcore-dmesg-incomplete.txt $_dmesg_collector /proc/vmcore > ${_path}/vmcore-dmesg-incomplete.txt
_exitcode=$? _exitcode=$?
if [ $_exitcode -eq 0 ]; then if [ $_exitcode -eq 0 ]; then
@ -161,9 +189,9 @@ save_vmcore_dmesg_fs() {
# saving vmcore failed and system rebooted without sync and there # saving vmcore failed and system rebooted without sync and there
# was no vmcore-dmesg.txt available. # was no vmcore-dmesg.txt available.
sync sync
echo "kdump: saving vmcore-dmesg.txt complete" dinfo "saving vmcore-dmesg.txt complete"
else else
echo "kdump: saving vmcore-dmesg.txt failed" derror "saving vmcore-dmesg.txt failed"
fi fi
} }
@ -179,43 +207,47 @@ save_opalcore_fs() {
fi fi
fi fi
echo "kdump: saving opalcore" dinfo "saving opalcore:$OPALCORE to ${_path}/opalcore"
cp $OPALCORE ${_path}/opalcore cp $OPALCORE ${_path}/opalcore
if [ $? -ne 0 ]; then if [ $? -ne 0 ]; then
echo "kdump: saving opalcore failed" derror "saving opalcore failed"
return 1 return 1
fi fi
sync sync
echo "kdump: saving opalcore complete" dinfo "saving opalcore complete"
return 0 return 0
} }
dump_to_rootfs() dump_to_rootfs()
{ {
echo "Kdump: trying to bring up rootfs device" dinfo "Trying to bring up rootfs device"
systemctl start dracut-initqueue systemctl start dracut-initqueue
echo "Kdump: waiting for rootfs mount, will timeout after 90 seconds" dinfo "Waiting for rootfs mount, will timeout after 90 seconds"
systemctl start sysroot.mount systemctl start sysroot.mount
ddebug "NEWROOT=$NEWROOT"
dump_fs $NEWROOT dump_fs $NEWROOT
} }
kdump_emergency_shell() kdump_emergency_shell()
{ {
echo "PS1=\"kdump:\\\${PWD}# \"" >/etc/profile echo "PS1=\"kdump:\\\${PWD}# \"" >/etc/profile
ddebug "Switching to dracut emergency..."
/bin/dracut-emergency /bin/dracut-emergency
rm -f /etc/profile rm -f /etc/profile
} }
do_failure_action() do_failure_action()
{ {
echo "Kdump: Executing failure action $FAILURE_ACTION" dinfo "Executing failure action $FAILURE_ACTION"
eval $FAILURE_ACTION eval $FAILURE_ACTION
} }
do_final_action() do_final_action()
{ {
dinfo "Executing final action $FINAL_ACTION"
eval $FINAL_ACTION eval $FINAL_ACTION
} }