Compare commits
No commits in common. "c8" and "c9-beta" have entirely different histories.
6
.gitignore
vendored
6
.gitignore
vendored
@ -1,3 +1,3 @@
|
||||
SOURCES/1.7.2.tar.gz
|
||||
SOURCES/eppic_050615.tar.gz
|
||||
SOURCES/kexec-tools-2.0.26.tar.xz
|
||||
SOURCES/eppic-e8844d3.tar.gz
|
||||
SOURCES/kexec-tools-2.0.27.tar.xz
|
||||
SOURCES/makedumpfile-1.7.4.tar.gz
|
||||
|
@ -1,3 +1,3 @@
|
||||
24bce02cd42cdbb960ada4d9e733355582e35784 SOURCES/1.7.2.tar.gz
|
||||
a096c8e0892b559f40b01916aae240652f75b68a SOURCES/eppic_050615.tar.gz
|
||||
27cea5d032ec1e93506b8110222420abf754df2d SOURCES/kexec-tools-2.0.26.tar.xz
|
||||
80ac3f5e77d3c79883edadf14428734db4720009 SOURCES/eppic-e8844d3.tar.gz
|
||||
ed15f191adee22ab0721ba62af1cae67eb981670 SOURCES/kexec-tools-2.0.27.tar.xz
|
||||
98cae2b1062871905795918c32b6d46ccd115074 SOURCES/makedumpfile-1.7.4.tar.gz
|
||||
|
31
SOURCES/60-fadump.install
Executable file
31
SOURCES/60-fadump.install
Executable file
@ -0,0 +1,31 @@
|
||||
#!/usr/bin/bash
|
||||
|
||||
COMMAND="$1"
|
||||
KERNEL_VERSION="$2"
|
||||
|
||||
if ! [[ ${KERNEL_INSTALL_MACHINE_ID-x} ]]; then
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Currently, fadump is supported only in environments with
|
||||
# writable /boot directory.
|
||||
if [[ ! -w "/boot" ]]; then
|
||||
exit 0
|
||||
fi
|
||||
|
||||
FADUMP_INITRD="/boot/.initramfs-${KERNEL_VERSION}.img.default"
|
||||
FADUMP_INITRD_CHECKSUM="$FADUMP_INITRD.checksum"
|
||||
|
||||
ret=0
|
||||
case "$COMMAND" in
|
||||
add)
|
||||
# Do nothing, fadump initramfs is strictly host only
|
||||
# and managed by kdump service
|
||||
;;
|
||||
remove)
|
||||
rm -f -- "$FADUMP_INITRD"
|
||||
rm -f -- "$FADUMP_INITRD_CHECKSUM"
|
||||
ret=$?
|
||||
;;
|
||||
esac
|
||||
exit $ret
|
13
SOURCES/92-crashkernel.install
Executable file
13
SOURCES/92-crashkernel.install
Executable file
@ -0,0 +1,13 @@
|
||||
#!/usr/bin/bash
|
||||
|
||||
COMMAND="$1"
|
||||
KERNEL_VERSION="$2"
|
||||
KDUMP_INITRD_DIR_ABS="$3"
|
||||
KERNEL_IMAGE="$4"
|
||||
|
||||
case "$COMMAND" in
|
||||
add)
|
||||
kdumpctl _reset-crashkernel-for-installed_kernel "$KERNEL_VERSION"
|
||||
exit 0
|
||||
;;
|
||||
esac
|
@ -17,6 +17,6 @@ GOTO="kdump_reload_end"
|
||||
|
||||
LABEL="kdump_reload_cpu"
|
||||
|
||||
RUN+="/bin/sh -c '/usr/bin/systemctl is-active kdump.service || exit 0; ! test -f /sys/kernel/fadump_enabled || cat /sys/kernel/fadump_enabled | grep 0 || exit 0; /usr/bin/systemd-run --quiet --no-block /usr/lib/udev/kdump-udev-throttler'"
|
||||
RUN+="/bin/sh -c '/usr/bin/systemctl is-active kdump.service || exit 0; ! test -f /sys/kernel/fadump/enabled || cat /sys/kernel/fadump/enabled | grep 0 || exit 0; /usr/bin/systemd-run --quiet --no-block /usr/lib/udev/kdump-udev-throttler'"
|
||||
|
||||
LABEL="kdump_reload_end"
|
||||
|
120
SOURCES/crashkernel-howto.txt
Normal file
120
SOURCES/crashkernel-howto.txt
Normal file
@ -0,0 +1,120 @@
|
||||
Introduction
|
||||
============
|
||||
|
||||
This document describes features the kexec-tools package provides for setting
|
||||
and estimating the crashkernel value.
|
||||
|
||||
Kdump lives in a pre-reserved chunk of memory, and the size of the reserved
|
||||
memory is specified by the `crashkernel=` kernel parameter. It's hard to
|
||||
estimate an accurate `crashkernel=` value, so it's always recommended to test
|
||||
kdump after you updated the `crashkernel=` value or changed the dump target.
|
||||
|
||||
|
||||
Default crashkernel value
|
||||
=========================
|
||||
|
||||
Latest kexec-tools provides "kdumpctl get-default-crashkernel" to retrieve
|
||||
the default crashkernel value,
|
||||
|
||||
$ echo $(kdumpctl get-default-crashkernel)
|
||||
1G-4G:192M,4G-64G:256M,64G-:512M
|
||||
|
||||
It will be taken as the default value of 'crashkernel=', you can use
|
||||
this value as a reference for setting crashkernel value manually.
|
||||
|
||||
|
||||
New installed system
|
||||
====================
|
||||
|
||||
Anaconda is the OS installer which sets all the kernel boot cmdline on a newly
|
||||
installed system. If kdump is enabled during Anaconda installation, Anaconda
|
||||
will use the default crashkernel value as the default `crashkernel=` value on
|
||||
the newly installed system.
|
||||
|
||||
Users can override the value during Anaconda installation manually.
|
||||
|
||||
|
||||
Auto update of crashkernel boot parameter
|
||||
=========================================
|
||||
|
||||
A new release of kexec-tools could update the default crashkernel value. By
|
||||
default, kexec-tools would reset crashkernel to the new default value if it
|
||||
detects the old default crashkernel value is used by installed kernels. If you
|
||||
don't want kexec-tools to update the old default crashkernel to the new default
|
||||
crashkernel, you can change auto_reset_crashkernel to no in kdump.conf.
|
||||
|
||||
Supported Bootloaders
|
||||
---------------------
|
||||
|
||||
This auto update only works with GRUB2 and ZIPL, as kexec-tools heavily depends
|
||||
on `grubby`. If other boot loaders are used, the user will have to update the
|
||||
`crashkernel=` value manually.
|
||||
|
||||
|
||||
Reset crashkernel to default value
|
||||
==================================
|
||||
|
||||
kexec-tools only perform the auto update of crashkernel value when it can
|
||||
confirm the boot kernel's crashkernel value is using its corresponding default
|
||||
value and auto_reset_crashkernel=yes in kdump.conf. In other cases, the user
|
||||
can reset the crashkernel value by themselves.
|
||||
|
||||
Reset using kdumpctl
|
||||
--------------------
|
||||
|
||||
To make it easier to reset the `crashkernel=` kernel cmdline to this default
|
||||
value properly, `kdumpctl` also provides a sub-command:
|
||||
|
||||
`kdumpctl reset-crashkernel [--kernel=path_to_kernel] [--reboot]`
|
||||
|
||||
This command will reset the bootloader's kernel cmdline to the default value.
|
||||
It will also update bootloader config if the bootloader has a standalone config
|
||||
file. User will have to reboot the machine after this command to make it take
|
||||
effect if --reboot is not specified. For more details, please refer to the
|
||||
reset-crashkernel command in `man kdumpctl`.
|
||||
|
||||
Reset manually
|
||||
--------------
|
||||
|
||||
To reset the crashkernel value manually, it's recommended to use utils like
|
||||
`grubby`. A one liner script for resetting `crashkernel=` value of all installed
|
||||
kernels to the default value is:
|
||||
|
||||
grubby --update-kernel ALL --args "crashkernel=$(kdumpctl get-default-crashkernel)"
|
||||
|
||||
NOTE: On s390x you also need to run zipl for the change to take effect.
|
||||
|
||||
Estimate crashkernel
|
||||
====================
|
||||
|
||||
The best way to estimate a usable crashkernel value is by testing kdump
|
||||
manually. And you can set crashkernel to a large value, then adjust the
|
||||
crashkernel value to an acceptable value gradually.
|
||||
|
||||
`kdumpctl` also provides a sub-command for doing rough estimating without
|
||||
triggering kdump:
|
||||
|
||||
`kdumpctl estimate`
|
||||
|
||||
The output will be like this:
|
||||
|
||||
```
|
||||
Encrypted kdump target requires extra memory, assuming using the keyslot with minimum memory requirement
|
||||
|
||||
Reserved crashkernel: 256M
|
||||
Recommended crashkernel: 655M
|
||||
|
||||
Kernel image size: 47M
|
||||
Kernel modules size: 12M
|
||||
Initramfs size: 19M
|
||||
Runtime reservation: 64M
|
||||
LUKS required size: 512M
|
||||
Large modules:
|
||||
xfs: 1892352
|
||||
nouveau: 2318336
|
||||
WARNING: Current crashkernel size is lower than recommended size 655M.
|
||||
```
|
||||
|
||||
It will generate a summary report about the estimated memory consumption
|
||||
of each component of kdump. The value may not be accurate enough, but
|
||||
would be a good start for finding a suitable crashkernel value.
|
@ -6,9 +6,8 @@ KDUMP_KERNEL=""
|
||||
KDUMP_INITRD=""
|
||||
|
||||
check() {
|
||||
if [ ! -f /etc/sysconfig/kdump ] || [ ! -f /lib/kdump/kdump-lib.sh ]\
|
||||
|| [ -n "${IN_KDUMP}" ]
|
||||
then
|
||||
if [[ ! -f /etc/sysconfig/kdump ]] || [[ ! -f /lib/kdump/kdump-lib.sh ]] \
|
||||
|| [[ -n ${IN_KDUMP} ]]; then
|
||||
return 1
|
||||
fi
|
||||
return 255
|
||||
@ -25,7 +24,7 @@ prepare_kernel_initrd() {
|
||||
prepare_kdump_bootinfo
|
||||
|
||||
# $kernel is a variable from dracut
|
||||
if [ "$KDUMP_KERNELVER" != $kernel ]; then
|
||||
if [[ $KDUMP_KERNELVER != "$kernel" ]]; then
|
||||
dwarn "Using kernel version '$KDUMP_KERNELVER' for early kdump," \
|
||||
"but the initramfs is generated for kernel version '$kernel'"
|
||||
fi
|
||||
@ -33,12 +32,12 @@ prepare_kernel_initrd() {
|
||||
|
||||
install() {
|
||||
prepare_kernel_initrd
|
||||
if [ ! -f "$KDUMP_KERNEL" ]; then
|
||||
if [[ ! -f $KDUMP_KERNEL ]]; then
|
||||
derror "Could not find required kernel for earlykdump," \
|
||||
"earlykdump will not work!"
|
||||
return 1
|
||||
fi
|
||||
if [ ! -f "$KDUMP_INITRD" ]; then
|
||||
if [[ ! -f $KDUMP_INITRD ]]; then
|
||||
derror "Could not find required kdump initramfs for earlykdump," \
|
||||
"please ensure kdump initramfs is generated first," \
|
||||
"earlykdump will not work!"
|
||||
@ -51,7 +50,9 @@ install() {
|
||||
inst_binary "/usr/bin/gawk" "/usr/bin/awk"
|
||||
inst_binary "/usr/bin/logger" "/usr/bin/logger"
|
||||
inst_binary "/usr/bin/printf" "/usr/bin/printf"
|
||||
inst_binary "/usr/bin/xargs" "/usr/bin/xargs"
|
||||
inst_script "/lib/kdump/kdump-lib.sh" "/lib/kdump-lib.sh"
|
||||
inst_script "/lib/kdump/kdump-lib-initramfs.sh" "/lib/kdump/kdump-lib-initramfs.sh"
|
||||
inst_script "/lib/kdump/kdump-logger.sh" "/lib/kdump-logger.sh"
|
||||
inst_hook cmdline 00 "$moddir/early-kdump.sh"
|
||||
inst_binary "$KDUMP_KERNEL"
|
||||
|
@ -14,9 +14,8 @@ EARLY_KEXEC_ARGS=""
|
||||
. /lib/kdump-lib.sh
|
||||
. /lib/kdump-logger.sh
|
||||
|
||||
#initiate the kdump logger
|
||||
dlog_init
|
||||
if [ $? -ne 0 ]; then
|
||||
# initiate the kdump logger
|
||||
if ! dlog_init; then
|
||||
echo "failed to initiate the kdump logger."
|
||||
exit 1
|
||||
fi
|
||||
@ -30,8 +29,7 @@ prepare_parameters()
|
||||
|
||||
early_kdump_load()
|
||||
{
|
||||
check_kdump_feasibility
|
||||
if [ $? -ne 0 ]; then
|
||||
if ! check_kdump_feasibility; then
|
||||
return 1
|
||||
fi
|
||||
|
||||
@ -40,8 +38,7 @@ early_kdump_load()
|
||||
return 1
|
||||
fi
|
||||
|
||||
check_current_kdump_status
|
||||
if [ $? == 0 ]; then
|
||||
if is_kernel_loaded "kdump"; then
|
||||
return 1
|
||||
fi
|
||||
|
||||
@ -56,10 +53,9 @@ early_kdump_load()
|
||||
--command-line=$EARLY_KDUMP_CMDLINE --initrd=$EARLY_KDUMP_INITRD \
|
||||
$EARLY_KDUMP_KERNEL"
|
||||
|
||||
$KEXEC ${EARLY_KEXEC_ARGS} $standard_kexec_args \
|
||||
if $KEXEC $EARLY_KEXEC_ARGS $standard_kexec_args \
|
||||
--command-line="$EARLY_KDUMP_CMDLINE" \
|
||||
--initrd=$EARLY_KDUMP_INITRD $EARLY_KDUMP_KERNEL
|
||||
if [ $? == 0 ]; then
|
||||
--initrd=$EARLY_KDUMP_INITRD $EARLY_KDUMP_KERNEL; then
|
||||
dinfo "kexec: loaded early-kdump kernel"
|
||||
return 0
|
||||
else
|
||||
|
@ -1,27 +1,26 @@
|
||||
# This file is part of systemd.
|
||||
#
|
||||
# systemd is free software; you can redistribute it and/or modify it
|
||||
# under the terms of the GNU Lesser General Public License as published by
|
||||
# the Free Software Foundation; either version 2.1 of the License, or
|
||||
# (at your option) any later version.
|
||||
|
||||
# This service will be placed in kdump initramfs and replace both the systemd
|
||||
# emergency service and dracut emergency shell. IOW, any emergency will be
|
||||
# kick this service and in turn isolating to kdump error handler.
|
||||
# This service will run the real kdump error handler code. Executing the
|
||||
# failure action configured in kdump.conf
|
||||
|
||||
[Unit]
|
||||
Description=Kdump Emergency
|
||||
Description=Kdump Error Handler
|
||||
DefaultDependencies=no
|
||||
IgnoreOnIsolate=yes
|
||||
After=systemd-vconsole-setup.service
|
||||
Wants=systemd-vconsole-setup.service
|
||||
|
||||
[Service]
|
||||
ExecStart=/usr/bin/systemctl --no-block isolate kdump-error-handler.service
|
||||
Environment=HOME=/
|
||||
Environment=DRACUT_SYSTEMD=1
|
||||
Environment=NEWROOT=/sysroot
|
||||
WorkingDirectory=/
|
||||
ExecStart=/bin/kdump.sh --error-handler
|
||||
ExecStopPost=-/bin/rm -f -- /.console_lock
|
||||
Type=oneshot
|
||||
StandardInput=tty-force
|
||||
StandardOutput=inherit
|
||||
StandardError=inherit
|
||||
KillMode=process
|
||||
IgnoreSIGPIPE=no
|
||||
TasksMax=infinity
|
||||
|
||||
# Bash ignores SIGTERM, so we send SIGHUP instead, to ensure that bash
|
||||
# terminates cleanly.
|
||||
|
@ -1,33 +0,0 @@
|
||||
# This file is part of systemd.
|
||||
#
|
||||
# systemd is free software; you can redistribute it and/or modify it
|
||||
# under the terms of the GNU Lesser General Public License as published by
|
||||
# the Free Software Foundation; either version 2.1 of the License, or
|
||||
# (at your option) any later version.
|
||||
|
||||
# This service will run the real kdump error handler code. Executing the
|
||||
# failure action configured in kdump.conf
|
||||
|
||||
[Unit]
|
||||
Description=Kdump Error Handler
|
||||
DefaultDependencies=no
|
||||
After=systemd-vconsole-setup.service
|
||||
Wants=systemd-vconsole-setup.service
|
||||
AllowIsolate=yes
|
||||
|
||||
[Service]
|
||||
Environment=HOME=/
|
||||
Environment=DRACUT_SYSTEMD=1
|
||||
Environment=NEWROOT=/sysroot
|
||||
WorkingDirectory=/
|
||||
ExecStart=/bin/kdump-error-handler.sh
|
||||
Type=oneshot
|
||||
StandardInput=tty-force
|
||||
StandardOutput=inherit
|
||||
StandardError=inherit
|
||||
KillMode=process
|
||||
IgnoreSIGPIPE=no
|
||||
|
||||
# Bash ignores SIGTERM, so we send SIGHUP instead, to ensure that bash
|
||||
# terminates cleanly.
|
||||
KillSignal=SIGHUP
|
@ -1,10 +0,0 @@
|
||||
#!/bin/sh
|
||||
|
||||
. /lib/kdump-lib-initramfs.sh
|
||||
|
||||
set -o pipefail
|
||||
export PATH=$PATH:$KDUMP_SCRIPT_DIR
|
||||
|
||||
get_kdump_confs
|
||||
do_failure_action
|
||||
do_final_action
|
@ -1,324 +1,618 @@
|
||||
#!/bin/sh
|
||||
|
||||
# continue here only if we have to save dump.
|
||||
if [ -f /etc/fadump.initramfs ] && [ ! -f /proc/device-tree/rtas/ibm,kernel-dump ] && [ ! -f /proc/device-tree/ibm,opal/dump/mpipl-boot ]; then
|
||||
exit 0
|
||||
fi
|
||||
#
|
||||
# The main kdump routine in capture kernel, bash may not be the
|
||||
# default shell. Any code added must be POSIX compliant.
|
||||
|
||||
. /lib/dracut-lib.sh
|
||||
. /lib/kdump-logger.sh
|
||||
. /lib/kdump-lib-initramfs.sh
|
||||
|
||||
set -o pipefail
|
||||
#initiate the kdump logger
|
||||
if ! dlog_init; then
|
||||
echo "failed to initiate the kdump logger."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
KDUMP_PATH="/var/crash"
|
||||
KDUMP_LOG_FILE="/run/initramfs/kexec-dmesg.log"
|
||||
CORE_COLLECTOR=""
|
||||
DEFAULT_CORE_COLLECTOR="makedumpfile -l --message-level 7 -d 31"
|
||||
DMESG_COLLECTOR="/sbin/vmcore-dmesg"
|
||||
FAILURE_ACTION="systemctl reboot -f"
|
||||
DATEDIR=$(date +%Y-%m-%d-%T)
|
||||
HOST_IP='127.0.0.1'
|
||||
DUMP_INSTRUCTION=""
|
||||
SSH_KEY_LOCATION="/root/.ssh/kdump_id_rsa"
|
||||
DD_BLKSIZE=512
|
||||
FINAL_ACTION="systemctl reboot -f"
|
||||
KDUMP_PRE=""
|
||||
KDUMP_POST=""
|
||||
NEWROOT="/sysroot"
|
||||
OPALCORE="/sys/firmware/opal/mpipl/core"
|
||||
KDUMP_CONF_PARSED="/tmp/kdump.conf.$$"
|
||||
|
||||
# POSIX doesn't have pipefail, only apply when using bash
|
||||
# shellcheck disable=SC3040
|
||||
[ -n "$BASH" ] && set -o pipefail
|
||||
|
||||
DUMP_RETVAL=0
|
||||
|
||||
export PATH=$PATH:$KDUMP_SCRIPT_DIR
|
||||
kdump_read_conf > $KDUMP_CONF_PARSED
|
||||
|
||||
get_kdump_confs()
|
||||
{
|
||||
while read -r config_opt config_val; do
|
||||
# remove inline comments after the end of a directive.
|
||||
case "$config_opt" in
|
||||
path)
|
||||
KDUMP_PATH="$config_val"
|
||||
;;
|
||||
core_collector)
|
||||
[ -n "$config_val" ] && CORE_COLLECTOR="$config_val"
|
||||
;;
|
||||
sshkey)
|
||||
if [ -f "$config_val" ]; then
|
||||
SSH_KEY_LOCATION=$config_val
|
||||
fi
|
||||
;;
|
||||
kdump_pre)
|
||||
KDUMP_PRE="$config_val"
|
||||
;;
|
||||
kdump_post)
|
||||
KDUMP_POST="$config_val"
|
||||
;;
|
||||
fence_kdump_args)
|
||||
FENCE_KDUMP_ARGS="$config_val"
|
||||
;;
|
||||
fence_kdump_nodes)
|
||||
FENCE_KDUMP_NODES="$config_val"
|
||||
;;
|
||||
failure_action | default)
|
||||
case $config_val in
|
||||
shell)
|
||||
FAILURE_ACTION="kdump_emergency_shell"
|
||||
;;
|
||||
reboot)
|
||||
FAILURE_ACTION="systemctl reboot -f && exit"
|
||||
;;
|
||||
halt)
|
||||
FAILURE_ACTION="halt && exit"
|
||||
;;
|
||||
poweroff)
|
||||
FAILURE_ACTION="systemctl poweroff -f && exit"
|
||||
;;
|
||||
dump_to_rootfs)
|
||||
FAILURE_ACTION="dump_to_rootfs"
|
||||
;;
|
||||
esac
|
||||
;;
|
||||
final_action)
|
||||
case $config_val in
|
||||
reboot)
|
||||
FINAL_ACTION="systemctl reboot -f"
|
||||
;;
|
||||
halt)
|
||||
FINAL_ACTION="halt"
|
||||
;;
|
||||
poweroff)
|
||||
FINAL_ACTION="systemctl poweroff -f"
|
||||
;;
|
||||
esac
|
||||
;;
|
||||
esac
|
||||
done < "$KDUMP_CONF_PARSED"
|
||||
|
||||
if [ -z "$CORE_COLLECTOR" ]; then
|
||||
CORE_COLLECTOR="$DEFAULT_CORE_COLLECTOR"
|
||||
if is_ssh_dump_target || is_raw_dump_target; then
|
||||
CORE_COLLECTOR="$CORE_COLLECTOR -F"
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
||||
# store the kexec kernel log to a file.
|
||||
save_log()
|
||||
{
|
||||
dmesg -T > $KDUMP_LOG_FILE
|
||||
|
||||
if command -v journalctl > /dev/null; then
|
||||
journalctl -ab >> $KDUMP_LOG_FILE
|
||||
fi
|
||||
chmod 600 $KDUMP_LOG_FILE
|
||||
}
|
||||
|
||||
# $1: dump path, must be a mount point
|
||||
dump_fs()
|
||||
{
|
||||
ddebug "dump_fs _mp=$1"
|
||||
|
||||
if ! is_mounted "$1"; then
|
||||
dinfo "dump path '$1' is not mounted, trying to mount..."
|
||||
if ! mount --target "$1"; then
|
||||
derror "failed to dump to '$1', it's not a mount point!"
|
||||
return 1
|
||||
fi
|
||||
fi
|
||||
|
||||
# Remove -F in makedumpfile case. We don't want a flat format dump here.
|
||||
case $CORE_COLLECTOR in
|
||||
*makedumpfile*)
|
||||
CORE_COLLECTOR=$(echo "$CORE_COLLECTOR" | sed -e "s/-F//g")
|
||||
;;
|
||||
esac
|
||||
|
||||
_dump_fs_path=$(echo "$1/$KDUMP_PATH/$HOST_IP-$DATEDIR/" | tr -s /)
|
||||
dinfo "saving to $_dump_fs_path"
|
||||
|
||||
# Only remount to read-write mode if the dump target is mounted read-only.
|
||||
_dump_mnt_op=$(get_mount_info OPTIONS target "$1" -f)
|
||||
case $_dump_mnt_op in
|
||||
ro*)
|
||||
dinfo "Remounting the dump target in rw mode."
|
||||
mount -o remount,rw "$1" || return 1
|
||||
;;
|
||||
esac
|
||||
|
||||
mkdir -p "$_dump_fs_path" || return 1
|
||||
|
||||
save_vmcore_dmesg_fs ${DMESG_COLLECTOR} "$_dump_fs_path"
|
||||
save_opalcore_fs "$_dump_fs_path"
|
||||
|
||||
dinfo "saving vmcore"
|
||||
$CORE_COLLECTOR /proc/vmcore "$_dump_fs_path/vmcore-incomplete"
|
||||
_dump_exitcode=$?
|
||||
if [ $_dump_exitcode -eq 0 ]; then
|
||||
sync -f "$_dump_fs_path/vmcore-incomplete"
|
||||
_sync_exitcode=$?
|
||||
if [ $_sync_exitcode -eq 0 ]; then
|
||||
mv "$_dump_fs_path/vmcore-incomplete" "$_dump_fs_path/vmcore"
|
||||
dinfo "saving vmcore complete"
|
||||
else
|
||||
derror "sync vmcore failed, exitcode:$_sync_exitcode"
|
||||
return 1
|
||||
fi
|
||||
else
|
||||
derror "saving vmcore failed, exitcode:$_dump_exitcode"
|
||||
fi
|
||||
|
||||
dinfo "saving the $KDUMP_LOG_FILE to $_dump_fs_path/"
|
||||
save_log
|
||||
mv "$KDUMP_LOG_FILE" "$_dump_fs_path/"
|
||||
if [ $_dump_exitcode -ne 0 ]; then
|
||||
return 1
|
||||
fi
|
||||
|
||||
# improper kernel cmdline can cause the failure of echo, we can ignore this kind of failure
|
||||
return 0
|
||||
}
|
||||
|
||||
# $1: dmesg collector
|
||||
# $2: dump path
|
||||
save_vmcore_dmesg_fs()
|
||||
{
|
||||
dinfo "saving vmcore-dmesg.txt to $2"
|
||||
if $1 /proc/vmcore > "$2/vmcore-dmesg-incomplete.txt"; then
|
||||
mv "$2/vmcore-dmesg-incomplete.txt" "$2/vmcore-dmesg.txt"
|
||||
chmod 600 "$2/vmcore-dmesg.txt"
|
||||
|
||||
# Make sure file is on disk. There have been instances where later
|
||||
# saving vmcore failed and system rebooted without sync and there
|
||||
# was no vmcore-dmesg.txt available.
|
||||
sync
|
||||
dinfo "saving vmcore-dmesg.txt complete"
|
||||
else
|
||||
if [ -f "$2/vmcore-dmesg-incomplete.txt" ]; then
|
||||
chmod 600 "$2/vmcore-dmesg-incomplete.txt"
|
||||
fi
|
||||
derror "saving vmcore-dmesg.txt failed"
|
||||
fi
|
||||
}
|
||||
|
||||
# $1: dump path
|
||||
save_opalcore_fs()
|
||||
{
|
||||
if [ ! -f $OPALCORE ]; then
|
||||
# Check if we are on an old kernel that uses a different path
|
||||
if [ -f /sys/firmware/opal/core ]; then
|
||||
OPALCORE="/sys/firmware/opal/core"
|
||||
else
|
||||
return 0
|
||||
fi
|
||||
fi
|
||||
|
||||
dinfo "saving opalcore:$OPALCORE to $1/opalcore"
|
||||
if ! cp $OPALCORE "$1/opalcore"; then
|
||||
derror "saving opalcore failed"
|
||||
return 1
|
||||
fi
|
||||
|
||||
sync
|
||||
dinfo "saving opalcore complete"
|
||||
return 0
|
||||
}
|
||||
|
||||
dump_to_rootfs()
|
||||
{
|
||||
|
||||
if [ "$(systemctl status dracut-initqueue | sed -n "s/^\s*Active: \(\S*\)\s.*$/\1/p")" = "inactive" ]; then
|
||||
dinfo "Trying to bring up initqueue for rootfs mount"
|
||||
systemctl start dracut-initqueue
|
||||
fi
|
||||
|
||||
dinfo "Clean up dead systemd services"
|
||||
systemctl cancel
|
||||
dinfo "Waiting for rootfs mount, will timeout after 90 seconds"
|
||||
systemctl start --no-block sysroot.mount
|
||||
|
||||
_loop=0
|
||||
while [ $_loop -lt 90 ] && ! is_mounted /sysroot; do
|
||||
sleep 1
|
||||
_loop=$((_loop + 1))
|
||||
done
|
||||
|
||||
if ! is_mounted /sysroot; then
|
||||
derror "Failed to mount rootfs"
|
||||
return
|
||||
fi
|
||||
|
||||
ddebug "NEWROOT=$NEWROOT"
|
||||
dump_fs $NEWROOT
|
||||
}
|
||||
|
||||
kdump_emergency_shell()
|
||||
{
|
||||
ddebug "Switching to kdump emergency shell..."
|
||||
|
||||
[ -f /etc/profile ] && . /etc/profile
|
||||
export PS1='kdump:${PWD}# '
|
||||
|
||||
. /lib/dracut-lib.sh
|
||||
if [ -f /dracut-state.sh ]; then
|
||||
. /dracut-state.sh 2> /dev/null
|
||||
fi
|
||||
|
||||
source_conf /etc/conf.d
|
||||
|
||||
type plymouth > /dev/null 2>&1 && plymouth quit
|
||||
|
||||
source_hook "emergency"
|
||||
while read -r _tty rest; do
|
||||
(
|
||||
echo
|
||||
echo
|
||||
echo 'Entering kdump emergency mode.'
|
||||
echo 'Type "journalctl" to view system logs.'
|
||||
echo 'Type "rdsosreport" to generate a sosreport, you can then'
|
||||
echo 'save it elsewhere and attach it to a bug report.'
|
||||
echo
|
||||
echo
|
||||
) > "/dev/$_tty"
|
||||
done < /proc/consoles
|
||||
sh -i -l
|
||||
/bin/rm -f -- /.console_lock
|
||||
}
|
||||
|
||||
do_failure_action()
|
||||
{
|
||||
dinfo "Executing failure action $FAILURE_ACTION"
|
||||
eval $FAILURE_ACTION
|
||||
}
|
||||
|
||||
do_final_action()
|
||||
{
|
||||
dinfo "Executing final action $FINAL_ACTION"
|
||||
eval $FINAL_ACTION
|
||||
}
|
||||
|
||||
do_dump()
|
||||
{
|
||||
local _ret
|
||||
eval $DUMP_INSTRUCTION
|
||||
_ret=$?
|
||||
|
||||
eval $DUMP_INSTRUCTION
|
||||
_ret=$?
|
||||
if [ $_ret -ne 0 ]; then
|
||||
derror "saving vmcore failed"
|
||||
fi
|
||||
|
||||
if [ $_ret -ne 0 ]; then
|
||||
derror "saving vmcore failed"
|
||||
fi
|
||||
|
||||
return $_ret
|
||||
return $_ret
|
||||
}
|
||||
|
||||
do_kdump_pre()
|
||||
{
|
||||
local _ret
|
||||
if [ -n "$KDUMP_PRE" ]; then
|
||||
"$KDUMP_PRE"
|
||||
_ret=$?
|
||||
if [ $_ret -ne 0 ]; then
|
||||
derror "$KDUMP_PRE exited with $_ret status"
|
||||
return $_ret
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ -n "$KDUMP_PRE" ]; then
|
||||
"$KDUMP_PRE"
|
||||
_ret=$?
|
||||
if [ $_ret -ne 0 ]; then
|
||||
derror "$KDUMP_PRE exited with $_ret status"
|
||||
return $_ret
|
||||
fi
|
||||
fi
|
||||
|
||||
# if any script fails, it just raises warning and continues
|
||||
if [ -d /etc/kdump/pre.d ]; then
|
||||
for file in /etc/kdump/pre.d/*; do
|
||||
"$file"
|
||||
_ret=$?
|
||||
if [ $_ret -ne 0 ]; then
|
||||
derror "$file exited with $_ret status"
|
||||
fi
|
||||
done
|
||||
fi
|
||||
return 0
|
||||
# if any script fails, it just raises warning and continues
|
||||
if [ -d /etc/kdump/pre.d ]; then
|
||||
for file in /etc/kdump/pre.d/*; do
|
||||
"$file"
|
||||
_ret=$?
|
||||
if [ $_ret -ne 0 ]; then
|
||||
derror "$file exited with $_ret status"
|
||||
fi
|
||||
done
|
||||
fi
|
||||
return 0
|
||||
}
|
||||
|
||||
do_kdump_post()
|
||||
{
|
||||
local _ret
|
||||
if [ -d /etc/kdump/post.d ]; then
|
||||
for file in /etc/kdump/post.d/*; do
|
||||
"$file" "$1"
|
||||
_ret=$?
|
||||
if [ $_ret -ne 0 ]; then
|
||||
derror "$file exited with $_ret status"
|
||||
fi
|
||||
done
|
||||
fi
|
||||
|
||||
if [ -d /etc/kdump/post.d ]; then
|
||||
for file in /etc/kdump/post.d/*; do
|
||||
"$file" "$1"
|
||||
_ret=$?
|
||||
if [ $_ret -ne 0 ]; then
|
||||
derror "$file exited with $_ret status"
|
||||
fi
|
||||
done
|
||||
fi
|
||||
|
||||
if [ -n "$KDUMP_POST" ]; then
|
||||
"$KDUMP_POST" "$1"
|
||||
_ret=$?
|
||||
if [ $_ret -ne 0 ]; then
|
||||
derror "$KDUMP_POST exited with $_ret status"
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
||||
add_dump_code()
|
||||
{
|
||||
DUMP_INSTRUCTION=$1
|
||||
if [ -n "$KDUMP_POST" ]; then
|
||||
"$KDUMP_POST" "$1"
|
||||
_ret=$?
|
||||
if [ $_ret -ne 0 ]; then
|
||||
derror "$KDUMP_POST exited with $_ret status"
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
||||
# $1: block target, eg. /dev/sda
|
||||
dump_raw()
|
||||
{
|
||||
local _raw=$1
|
||||
[ -b "$1" ] || return 1
|
||||
|
||||
[ -b "$_raw" ] || return 1
|
||||
dinfo "saving to raw disk $1"
|
||||
|
||||
dinfo "saving to raw disk $_raw"
|
||||
if ! echo "$CORE_COLLECTOR" | grep -q makedumpfile; then
|
||||
_src_size=$(stat --format %s /proc/vmcore)
|
||||
_src_size_mb=$((_src_size / 1048576))
|
||||
/kdumpscripts/monitor_dd_progress $_src_size_mb &
|
||||
fi
|
||||
|
||||
if ! $(echo -n $CORE_COLLECTOR|grep -q makedumpfile); then
|
||||
_src_size=`ls -l /proc/vmcore | cut -d' ' -f5`
|
||||
_src_size_mb=$(($_src_size / 1048576))
|
||||
monitor_dd_progress $_src_size_mb &
|
||||
fi
|
||||
dinfo "saving vmcore"
|
||||
$CORE_COLLECTOR /proc/vmcore | dd of="$1" bs=$DD_BLKSIZE >> /tmp/dd_progress_file 2>&1 || return 1
|
||||
sync
|
||||
|
||||
dinfo "saving vmcore"
|
||||
$CORE_COLLECTOR /proc/vmcore | dd of=$_raw bs=$DD_BLKSIZE >> /tmp/dd_progress_file 2>&1 || return 1
|
||||
sync
|
||||
|
||||
dinfo "saving vmcore complete"
|
||||
return 0
|
||||
dinfo "saving vmcore complete"
|
||||
return 0
|
||||
}
|
||||
|
||||
# $1: ssh key file
|
||||
# $2: ssh address in <user>@<host> format
|
||||
dump_ssh()
|
||||
{
|
||||
local _ret=0
|
||||
local _exitcode=0 _exitcode2=0
|
||||
local _opt="-i $1 -o BatchMode=yes -o StrictHostKeyChecking=yes"
|
||||
local _dir="$KDUMP_PATH/$HOST_IP-$DATEDIR"
|
||||
local _host=$2
|
||||
local _vmcore="vmcore"
|
||||
local _ipv6_addr="" _username=""
|
||||
_ret=0
|
||||
_ssh_opt="-i $1 -o BatchMode=yes -o StrictHostKeyChecking=yes"
|
||||
_ssh_dir="$KDUMP_PATH/$HOST_IP-$DATEDIR"
|
||||
if is_ipv6_address "$2"; then
|
||||
_scp_address=${2%@*}@"[${2#*@}]"
|
||||
else
|
||||
_scp_address=$2
|
||||
fi
|
||||
|
||||
dinfo "saving to $_host:$_dir"
|
||||
dinfo "saving to $2:$_ssh_dir"
|
||||
|
||||
cat /var/lib/random-seed > /dev/urandom
|
||||
ssh -q $_opt $_host mkdir -p $_dir || return 1
|
||||
cat /var/lib/random-seed > /dev/urandom
|
||||
ssh -q $_ssh_opt "$2" mkdir -p "$_ssh_dir" || return 1
|
||||
|
||||
save_vmcore_dmesg_ssh ${DMESG_COLLECTOR} ${_dir} "${_opt}" $_host
|
||||
save_opalcore_ssh ${_dir} "${_opt}" $_host
|
||||
save_vmcore_dmesg_ssh "$DMESG_COLLECTOR" "$_ssh_dir" "$_ssh_opt" "$2"
|
||||
dinfo "saving vmcore"
|
||||
|
||||
dinfo "saving vmcore"
|
||||
save_opalcore_ssh "$_ssh_dir" "$_ssh_opt" "$2" "$_scp_address"
|
||||
|
||||
if is_ipv6_address "$_host"; then
|
||||
_username=${_host%@*}
|
||||
_ipv6_addr="[${_host#*@}]"
|
||||
fi
|
||||
if [ "${CORE_COLLECTOR%%[[:blank:]]*}" = "scp" ]; then
|
||||
scp -q $_ssh_opt /proc/vmcore "$_scp_address:$_ssh_dir/vmcore-incomplete"
|
||||
_ret=$?
|
||||
_vmcore="vmcore"
|
||||
else
|
||||
$CORE_COLLECTOR /proc/vmcore | ssh $_ssh_opt "$2" "umask 0077 && dd bs=512 of='$_ssh_dir/vmcore-incomplete'"
|
||||
_ret=$?
|
||||
_vmcore="vmcore.flat"
|
||||
fi
|
||||
|
||||
if [ "${CORE_COLLECTOR%%[[:blank:]]*}" = "scp" ]; then
|
||||
if [ -n "$_username" ] && [ -n "$_ipv6_addr" ]; then
|
||||
scp -q $_opt /proc/vmcore "$_username@$_ipv6_addr:$_dir/vmcore-incomplete"
|
||||
else
|
||||
scp -q $_opt /proc/vmcore "$_host:$_dir/vmcore-incomplete"
|
||||
fi
|
||||
_exitcode=$?
|
||||
else
|
||||
$CORE_COLLECTOR /proc/vmcore | ssh $_opt $_host "umask 0077 && dd bs=512 of=$_dir/vmcore-incomplete"
|
||||
_exitcode=$?
|
||||
_vmcore="vmcore.flat"
|
||||
fi
|
||||
if [ $_ret -eq 0 ]; then
|
||||
ssh $_ssh_opt "$2" "mv '$_ssh_dir/vmcore-incomplete' '$_ssh_dir/$_vmcore'"
|
||||
_ret=$?
|
||||
if [ $_ret -ne 0 ]; then
|
||||
derror "moving vmcore failed, exitcode:$_ret"
|
||||
else
|
||||
dinfo "saving vmcore complete"
|
||||
fi
|
||||
else
|
||||
derror "saving vmcore failed, exitcode:$_ret"
|
||||
fi
|
||||
|
||||
if [ $_exitcode -eq 0 ]; then
|
||||
ssh $_opt $_host "mv $_dir/vmcore-incomplete $_dir/$_vmcore"
|
||||
_exitcode2=$?
|
||||
if [ $_exitcode2 -ne 0 ]; then
|
||||
derror "moving vmcore failed, _exitcode:$_exitcode2"
|
||||
else
|
||||
dinfo "saving vmcore complete"
|
||||
fi
|
||||
else
|
||||
derror "saving vmcore failed, _exitcode:$_exitcode"
|
||||
fi
|
||||
dinfo "saving the $KDUMP_LOG_FILE to $2:$_ssh_dir/"
|
||||
save_log
|
||||
if ! scp -q $_ssh_opt $KDUMP_LOG_FILE "$_scp_address:$_ssh_dir/"; then
|
||||
derror "saving log file failed, _exitcode:$_ret"
|
||||
fi
|
||||
|
||||
dinfo "saving the $KDUMP_LOG_FILE to $_host:$_dir/"
|
||||
save_log
|
||||
if [ -n "$_username" ] && [ -n "$_ipv6_addr" ]; then
|
||||
scp -q $_opt $KDUMP_LOG_FILE "$_username@$_ipv6_addr:$_dir/"
|
||||
else
|
||||
scp -q $_opt $KDUMP_LOG_FILE "$_host:$_dir/"
|
||||
fi
|
||||
_ret=$?
|
||||
if [ $_ret -ne 0 ]; then
|
||||
derror "saving log file failed, _exitcode:$_ret"
|
||||
fi
|
||||
|
||||
if [ $_exitcode -ne 0 ] || [ $_exitcode2 -ne 0 ];then
|
||||
return 1
|
||||
fi
|
||||
|
||||
return 0
|
||||
return $_ret
|
||||
}
|
||||
|
||||
save_opalcore_ssh() {
|
||||
local _path=$1
|
||||
local _opts="$2"
|
||||
local _location=$3
|
||||
local _user_name="" _ipv6addr=""
|
||||
# $1: dump path
|
||||
# $2: ssh opts
|
||||
# $3: ssh address in <user>@<host> format
|
||||
# $4: scp address, similar with ssh address but IPv6 addresses are quoted
|
||||
save_opalcore_ssh()
|
||||
{
|
||||
if [ ! -f $OPALCORE ]; then
|
||||
# Check if we are on an old kernel that uses a different path
|
||||
if [ -f /sys/firmware/opal/core ]; then
|
||||
OPALCORE="/sys/firmware/opal/core"
|
||||
else
|
||||
return 0
|
||||
fi
|
||||
fi
|
||||
|
||||
ddebug "_path=$_path _opts=$_opts _location=$_location"
|
||||
dinfo "saving opalcore:$OPALCORE to $3:$1"
|
||||
|
||||
if [ ! -f $OPALCORE ]; then
|
||||
# Check if we are on an old kernel that uses a different path
|
||||
if [ -f /sys/firmware/opal/core ]; then
|
||||
OPALCORE="/sys/firmware/opal/core"
|
||||
else
|
||||
return 0
|
||||
fi
|
||||
fi
|
||||
if ! scp $2 $OPALCORE "$4:$1/opalcore-incomplete"; then
|
||||
derror "saving opalcore failed"
|
||||
return 1
|
||||
fi
|
||||
|
||||
if is_ipv6_address "$_host"; then
|
||||
_user_name=${_location%@*}
|
||||
_ipv6addr="[${_location#*@}]"
|
||||
fi
|
||||
|
||||
dinfo "saving opalcore:$OPALCORE to $_location:$_path"
|
||||
|
||||
if [ -n "$_user_name" ] && [ -n "$_ipv6addr" ]; then
|
||||
scp $_opts $OPALCORE $_user_name@$_ipv6addr:$_path/opalcore-incomplete
|
||||
else
|
||||
scp $_opts $OPALCORE $_location:$_path/opalcore-incomplete
|
||||
fi
|
||||
if [ $? -ne 0 ]; then
|
||||
derror "saving opalcore failed"
|
||||
return 1
|
||||
fi
|
||||
|
||||
ssh $_opts $_location mv $_path/opalcore-incomplete $_path/opalcore
|
||||
dinfo "saving opalcore complete"
|
||||
return 0
|
||||
ssh $2 "$3" mv "$1/opalcore-incomplete" "$1/opalcore"
|
||||
dinfo "saving opalcore complete"
|
||||
return 0
|
||||
}
|
||||
|
||||
save_vmcore_dmesg_ssh() {
|
||||
local _dmesg_collector=$1
|
||||
local _path=$2
|
||||
local _opts="$3"
|
||||
local _location=$4
|
||||
# $1: dmesg collector
|
||||
# $2: dump path
|
||||
# $3: ssh opts
|
||||
# $4: ssh address in <user>@<host> format
|
||||
save_vmcore_dmesg_ssh()
|
||||
{
|
||||
dinfo "saving vmcore-dmesg.txt to $4:$2"
|
||||
if $1 /proc/vmcore | ssh $3 "$4" "umask 0077 && dd of='$2/vmcore-dmesg-incomplete.txt'"; then
|
||||
ssh -q $3 "$4" mv "$2/vmcore-dmesg-incomplete.txt" "$2/vmcore-dmesg.txt"
|
||||
dinfo "saving vmcore-dmesg.txt complete"
|
||||
else
|
||||
derror "saving vmcore-dmesg.txt failed"
|
||||
fi
|
||||
}
|
||||
|
||||
dinfo "saving vmcore-dmesg.txt to $_location:$_path"
|
||||
$_dmesg_collector /proc/vmcore | ssh $_opts $_location "umask 0077 && dd of=$_path/vmcore-dmesg-incomplete.txt"
|
||||
_exitcode=$?
|
||||
wait_online_network()
|
||||
{
|
||||
# In some cases, network may still not be ready because nm-online is called
|
||||
# with "-s" which means to wait for NetworkManager startup to complete, rather
|
||||
# than waiting for network connectivity specifically. Wait 10mins more for the
|
||||
# network to be truely ready in these cases.
|
||||
_loop=0
|
||||
while [ $_loop -lt 600 ]; do
|
||||
sleep 1
|
||||
_loop=$((_loop + 1))
|
||||
if _route=$(kdump_get_ip_route "$1" 2> /dev/null); then
|
||||
printf "%s" "$_route"
|
||||
return
|
||||
else
|
||||
dwarn "Waiting for network to be ready (${_loop}s / 10min)"
|
||||
fi
|
||||
done
|
||||
|
||||
if [ $_exitcode -eq 0 ]; then
|
||||
ssh -q $_opts $_location mv $_path/vmcore-dmesg-incomplete.txt $_path/vmcore-dmesg.txt
|
||||
dinfo "saving vmcore-dmesg.txt complete"
|
||||
else
|
||||
derror "saving vmcore-dmesg.txt failed"
|
||||
fi
|
||||
derror "Oops. The network still isn't ready after waiting 10mins."
|
||||
exit 1
|
||||
}
|
||||
|
||||
get_host_ip()
|
||||
{
|
||||
local _host
|
||||
if is_nfs_dump_target || is_ssh_dump_target
|
||||
then
|
||||
kdumpnic=$(getarg kdumpnic=)
|
||||
[ -z "$kdumpnic" ] && derror "failed to get kdumpnic!" && return 1
|
||||
_host=`ip addr show dev $kdumpnic|grep '[ ]*inet'`
|
||||
[ $? -ne 0 ] && derror "wrong kdumpnic: $kdumpnic" && return 1
|
||||
_host=`echo $_host | head -n 1 | cut -d' ' -f2`
|
||||
_host="${_host%%/*}"
|
||||
[ -z "$_host" ] && derror "wrong kdumpnic: $kdumpnic" && return 1
|
||||
HOST_IP=$_host
|
||||
fi
|
||||
return 0
|
||||
|
||||
if ! is_nfs_dump_target && ! is_ssh_dump_target; then
|
||||
return 0
|
||||
fi
|
||||
|
||||
_kdump_remote_ip=$(getarg kdump_remote_ip=)
|
||||
|
||||
if [ -z "$_kdump_remote_ip" ]; then
|
||||
derror "failed to get remote IP address!"
|
||||
return 1
|
||||
fi
|
||||
|
||||
if ! _route=$(wait_online_network "$_kdump_remote_ip"); then
|
||||
return 1
|
||||
fi
|
||||
|
||||
_netdev=$(kdump_get_ip_route_field "$_route" "dev")
|
||||
|
||||
if ! _kdumpip=$(ip addr show dev "$_netdev" | grep '[ ]*inet'); then
|
||||
derror "Failed to get IP of $_netdev"
|
||||
return 1
|
||||
fi
|
||||
|
||||
_kdumpip=$(echo "$_kdumpip" | head -n 1 | awk '{print $2}')
|
||||
_kdumpip="${_kdumpip%%/*}"
|
||||
HOST_IP=$_kdumpip
|
||||
}
|
||||
|
||||
read_kdump_conf()
|
||||
read_kdump_confs()
|
||||
{
|
||||
if [ ! -f "$KDUMP_CONF" ]; then
|
||||
derror "$KDUMP_CONF not found"
|
||||
return
|
||||
fi
|
||||
if [ ! -f "$KDUMP_CONFIG_FILE" ]; then
|
||||
derror "$KDUMP_CONFIG_FILE not found"
|
||||
return
|
||||
fi
|
||||
|
||||
get_kdump_confs
|
||||
get_kdump_confs
|
||||
|
||||
# rescan for add code for dump target
|
||||
while read config_opt config_val;
|
||||
do
|
||||
# remove inline comments after the end of a directive.
|
||||
case "$config_opt" in
|
||||
dracut_args)
|
||||
config_val=$(get_dracut_args_target "$config_val")
|
||||
if [ -n "$config_val" ]; then
|
||||
config_val=$(get_mntpoint_from_target "$config_val")
|
||||
add_dump_code "dump_fs $config_val"
|
||||
fi
|
||||
;;
|
||||
ext[234]|xfs|btrfs|minix|nfs)
|
||||
config_val=$(get_mntpoint_from_target "$config_val")
|
||||
add_dump_code "dump_fs $config_val"
|
||||
;;
|
||||
raw)
|
||||
add_dump_code "dump_raw $config_val"
|
||||
;;
|
||||
ssh)
|
||||
add_dump_code "dump_ssh $SSH_KEY_LOCATION $config_val"
|
||||
;;
|
||||
esac
|
||||
done <<< "$(read_strip_comments $KDUMP_CONF)"
|
||||
# rescan for add code for dump target
|
||||
while read -r config_opt config_val; do
|
||||
# remove inline comments after the end of a directive.
|
||||
case "$config_opt" in
|
||||
dracut_args)
|
||||
config_val=$(get_dracut_args_target "$config_val")
|
||||
if [ -n "$config_val" ]; then
|
||||
config_val=$(get_mntpoint_from_target "$config_val")
|
||||
DUMP_INSTRUCTION="dump_fs $config_val"
|
||||
fi
|
||||
;;
|
||||
ext[234] | xfs | btrfs | minix | nfs | virtiofs)
|
||||
config_val=$(get_mntpoint_from_target "$config_val")
|
||||
DUMP_INSTRUCTION="dump_fs $config_val"
|
||||
;;
|
||||
raw)
|
||||
DUMP_INSTRUCTION="dump_raw $config_val"
|
||||
;;
|
||||
ssh)
|
||||
DUMP_INSTRUCTION="dump_ssh $SSH_KEY_LOCATION $config_val"
|
||||
;;
|
||||
esac
|
||||
done < "$KDUMP_CONF_PARSED"
|
||||
}
|
||||
|
||||
fence_kdump_notify()
|
||||
{
|
||||
if [ -n "$FENCE_KDUMP_NODES" ]; then
|
||||
$FENCE_KDUMP_SEND $FENCE_KDUMP_ARGS $FENCE_KDUMP_NODES &
|
||||
fi
|
||||
if [ -n "$FENCE_KDUMP_NODES" ]; then
|
||||
# shellcheck disable=SC2086
|
||||
$FENCE_KDUMP_SEND $FENCE_KDUMP_ARGS $FENCE_KDUMP_NODES &
|
||||
fi
|
||||
}
|
||||
|
||||
read_kdump_conf
|
||||
if [ "$1" = "--error-handler" ]; then
|
||||
get_kdump_confs
|
||||
do_failure_action
|
||||
do_final_action
|
||||
|
||||
exit $?
|
||||
fi
|
||||
|
||||
# continue here only if we have to save dump.
|
||||
if [ -f /etc/fadump.initramfs ] && [ ! -f /proc/device-tree/rtas/ibm,kernel-dump ] && [ ! -f /proc/device-tree/ibm,opal/dump/mpipl-boot ]; then
|
||||
exit 0
|
||||
fi
|
||||
|
||||
read_kdump_confs
|
||||
fence_kdump_notify
|
||||
|
||||
get_host_ip
|
||||
if [ $? -ne 0 ]; then
|
||||
derror "get_host_ip exited with non-zero status!"
|
||||
exit 1
|
||||
if ! get_host_ip; then
|
||||
derror "get_host_ip exited with non-zero status!"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ -z "$DUMP_INSTRUCTION" ]; then
|
||||
add_dump_code "dump_fs $NEWROOT"
|
||||
DUMP_INSTRUCTION="dump_fs $NEWROOT"
|
||||
fi
|
||||
|
||||
do_kdump_pre
|
||||
if [ $? -ne 0 ]; then
|
||||
derror "kdump_pre script exited with non-zero status!"
|
||||
do_final_action
|
||||
# During systemd service to reboot the machine, stop this shell script running
|
||||
exit 1
|
||||
if ! do_kdump_pre; then
|
||||
derror "kdump_pre script exited with non-zero status!"
|
||||
do_final_action
|
||||
# During systemd service to reboot the machine, stop this shell script running
|
||||
exit 1
|
||||
fi
|
||||
make_trace_mem "kdump saving vmcore" '1:shortmem' '2+:mem' '3+:slab'
|
||||
do_dump
|
||||
DUMP_RETVAL=$?
|
||||
|
||||
do_kdump_post $DUMP_RETVAL
|
||||
if [ $? -ne 0 ]; then
|
||||
derror "kdump_post script exited with non-zero status!"
|
||||
if ! do_kdump_post $DUMP_RETVAL; then
|
||||
derror "kdump_post script exited with non-zero status!"
|
||||
fi
|
||||
|
||||
if [ $DUMP_RETVAL -ne 0 ]; then
|
||||
exit 1
|
||||
exit 1
|
||||
fi
|
||||
|
||||
do_final_action
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -39,7 +39,7 @@ kernel are one and the same on ppc64.
|
||||
If you're reading this document, you should already have kexec-tools
|
||||
installed. If not, you install it via the following command:
|
||||
|
||||
# yum install kexec-tools
|
||||
# dnf install kexec-tools
|
||||
|
||||
Fadump Operational Flow:
|
||||
|
||||
@ -82,7 +82,7 @@ How to configure fadump:
|
||||
Again, we assume if you're reading this document, you should already have
|
||||
kexec-tools installed. If not, you install it via the following command:
|
||||
|
||||
# yum install kexec-tools
|
||||
# dnf install kexec-tools
|
||||
|
||||
Make the kernel to be configured with FADump as the default boot entry, if
|
||||
it isn't already:
|
||||
@ -94,20 +94,24 @@ anything interesting in the way of debug analysis, you'll also need to install
|
||||
the kernel-debuginfo package, of the same arch as your running kernel, and the
|
||||
crash utility:
|
||||
|
||||
# yum --enablerepo=\*debuginfo install kernel-debuginfo.$(uname -m) crash
|
||||
# dnf --enablerepo=\*debuginfo install kernel-debuginfo.$(uname -m) crash
|
||||
|
||||
Next up, we need to modify some boot parameters to enable firmware assisted
|
||||
dump. With the help of grubby, it's very easy to append "fadump=on" to the end
|
||||
of your kernel boot parameters. To reserve the appropriate amount of memory
|
||||
for boot memory preservation, pass 'crashkernel=X' kernel cmdline parameter.
|
||||
For the recommended value of X, see 'FADump Memory Requirements' section.
|
||||
Next up, we can enable firmware assisted dump and reserve the memory for boot
|
||||
memory preservation as specified in in the table of 'FADump Memory Requirements'
|
||||
section:
|
||||
|
||||
# kdumpctl reset-crashkernel --fadump=on
|
||||
|
||||
Alternatively, you can use grubby to reserve custom amount of memory:
|
||||
|
||||
# grubby --args="fadump=on crashkernel=6G" --update-kernel=/boot/vmlinuz-`uname -r`
|
||||
|
||||
By default, FADump reserved memory will be initialized as CMA area to make the
|
||||
memory available through CMA allocator on the production kernel. We can opt out
|
||||
of this, making reserved memory unavailable to production kernel, by booting the
|
||||
linux kernel with 'fadump=nocma' instead of 'fadump=on'.
|
||||
linux kernel with 'fadump=nocma' instead of 'fadump=on':
|
||||
|
||||
# kdumpctl reset-crashkernel --fadump=nocma
|
||||
|
||||
The term 'boot memory' means size of the low memory chunk that is required for
|
||||
a kernel to boot successfully when booted with restricted memory. By default,
|
||||
@ -133,7 +137,7 @@ Then, start up kdump as well:
|
||||
# systemctl start kdump.service
|
||||
|
||||
This should turn on the firmware assisted functionality in kernel by
|
||||
echo'ing 1 to /sys/kernel/fadump_registered, leaving the system ready
|
||||
echo'ing 1 to /sys/kernel/fadump/registered, leaving the system ready
|
||||
to capture a vmcore upon crashing. For journaling filesystems like XFS an
|
||||
additional step is required to ensure bootloader does not pick the
|
||||
older initrd (without vmcore capture scripts):
|
||||
@ -344,9 +348,12 @@ or
|
||||
OR
|
||||
# grubby --update-kernel=/boot/vmlinuz-`uname -r` --args="fadump=off"
|
||||
|
||||
If KDump is to be used as the dump capturing mechanism, update the crashkernel
|
||||
parameter (Else, remove "crashkernel=" parameter too, using grubby):
|
||||
Remove "crashkernel=" from kernel cmdline parameters:
|
||||
|
||||
# grubby --update-kernel=/boot/vmlinuz-$kver --args="crashkernl=auto"
|
||||
# grubby --update-kernel=/boot/vmlinuz-`uname -r` --remove-args="crashkernel"
|
||||
|
||||
If KDump is to be used as the dump capturing mechanism, reset the crashkernel parameter:
|
||||
|
||||
# kdumpctl reset-crashkernel --fadump=off
|
||||
|
||||
Reboot the system for the settings to take effect.
|
||||
|
@ -1,7 +1,6 @@
|
||||
#!/bin/bash
|
||||
# $1: target arch
|
||||
|
||||
|
||||
SED_EXP=""
|
||||
|
||||
generate()
|
||||
@ -20,6 +19,12 @@ generate()
|
||||
#
|
||||
# Supported options:
|
||||
#
|
||||
# auto_reset_crashkernel <yes|no>
|
||||
# - whether to reset kernel crashkernel to new default value
|
||||
# or not when kexec-tools updates the default crashkernel value and
|
||||
# existing kernels using the old default kernel crashkernel value.
|
||||
# The default value is yes.
|
||||
#
|
||||
# raw <partition>
|
||||
# - Will dd /proc/vmcore into <partition>.
|
||||
# Use persistent device names for partition devices,
|
||||
@ -41,11 +46,12 @@ generate()
|
||||
#
|
||||
# <fs type> <partition>
|
||||
# - Will mount -t <fs type> <partition> <mnt>, and copy
|
||||
# /proc/vmcore to <mnt>/<path>/%DATE/.
|
||||
# /proc/vmcore to <mnt>/<path>/%HOST_IP-%DATE/.
|
||||
# NOTE: <partition> can be a device node, label or uuid.
|
||||
# It's recommended to use persistent device names
|
||||
# such as /dev/vg/<devname>.
|
||||
# Otherwise it's suggested to use label or uuid.
|
||||
# Supported fs types: ext[234], xfs, btrfs, minix, virtiofs
|
||||
#
|
||||
# path <path>
|
||||
# - "path" represents the file system path in which vmcore
|
||||
@ -174,11 +180,13 @@ generate()
|
||||
#ext4 /dev/vg/lv_kdump
|
||||
#ext4 LABEL=/boot
|
||||
#ext4 UUID=03138356-5e61-4ab3-b58e-27507ac41937
|
||||
#virtiofs myfs
|
||||
#nfs my.server.com:/export/tmp
|
||||
#nfs [2001:db8::1:2:3:4]:/export/tmp
|
||||
#ssh user@my.server.com
|
||||
#ssh user@2001:db8::1:2:3:4
|
||||
#sshkey /root/.ssh/kdump_id_rsa
|
||||
auto_reset_crashkernel yes
|
||||
path /var/crash
|
||||
core_collector makedumpfile -l --message-level 7 -d 31
|
||||
#core_collector scp
|
||||
@ -201,20 +209,20 @@ update_param()
|
||||
}
|
||||
|
||||
case "$1" in
|
||||
aarch64)
|
||||
;;
|
||||
i386)
|
||||
;;
|
||||
ppc64)
|
||||
;;
|
||||
ppc64le)
|
||||
;;
|
||||
aarch64) ;;
|
||||
|
||||
i386) ;;
|
||||
|
||||
ppc64) ;;
|
||||
|
||||
ppc64le) ;;
|
||||
|
||||
s390x)
|
||||
update_param core_collector \
|
||||
"makedumpfile -c --message-level 7 -d 31"
|
||||
;;
|
||||
x86_64)
|
||||
;;
|
||||
x86_64) ;;
|
||||
|
||||
*)
|
||||
echo "Warning: Unknown architecture '$1', using default kdump.conf template."
|
||||
;;
|
||||
|
@ -1,248 +1,184 @@
|
||||
# These variables and functions are useful in 2nd kernel
|
||||
#!/bin/sh
|
||||
#
|
||||
# The code in this file will be used in initramfs environment, bash may
|
||||
# not be the default shell. Any code added must be POSIX compliant.
|
||||
|
||||
. /lib/kdump-lib.sh
|
||||
. /lib/kdump-logger.sh
|
||||
DEFAULT_PATH="/var/crash/"
|
||||
KDUMP_CONFIG_FILE="/etc/kdump.conf"
|
||||
FENCE_KDUMP_CONFIG_FILE="/etc/sysconfig/fence_kdump"
|
||||
FENCE_KDUMP_SEND="/usr/libexec/fence_kdump_send"
|
||||
LVM_CONF="/etc/lvm/lvm.conf"
|
||||
|
||||
KDUMP_PATH="/var/crash"
|
||||
KDUMP_LOG_FILE="/run/initramfs/kexec-dmesg.log"
|
||||
CORE_COLLECTOR=""
|
||||
DEFAULT_CORE_COLLECTOR="makedumpfile -l --message-level 7 -d 31"
|
||||
DMESG_COLLECTOR="/sbin/vmcore-dmesg"
|
||||
FAILURE_ACTION="systemctl reboot -f"
|
||||
DATEDIR=`date +%Y-%m-%d-%T`
|
||||
HOST_IP='127.0.0.1'
|
||||
DUMP_INSTRUCTION=""
|
||||
SSH_KEY_LOCATION="/root/.ssh/kdump_id_rsa"
|
||||
KDUMP_SCRIPT_DIR="/kdumpscripts"
|
||||
DD_BLKSIZE=512
|
||||
FINAL_ACTION="systemctl reboot -f"
|
||||
KDUMP_CONF="/etc/kdump.conf"
|
||||
KDUMP_PRE=""
|
||||
KDUMP_POST=""
|
||||
NEWROOT="/sysroot"
|
||||
OPALCORE="/sys/firmware/opal/mpipl/core"
|
||||
|
||||
#initiate the kdump logger
|
||||
dlog_init
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "failed to initiate the kdump logger."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
get_kdump_confs()
|
||||
# Read kdump config in well formated style
|
||||
kdump_read_conf()
|
||||
{
|
||||
local config_opt config_val
|
||||
|
||||
while read config_opt config_val;
|
||||
do
|
||||
# remove inline comments after the end of a directive.
|
||||
case "$config_opt" in
|
||||
path)
|
||||
KDUMP_PATH="$config_val"
|
||||
;;
|
||||
core_collector)
|
||||
[ -n "$config_val" ] && CORE_COLLECTOR="$config_val"
|
||||
;;
|
||||
sshkey)
|
||||
if [ -f "$config_val" ]; then
|
||||
SSH_KEY_LOCATION=$config_val
|
||||
fi
|
||||
;;
|
||||
kdump_pre)
|
||||
KDUMP_PRE="$config_val"
|
||||
;;
|
||||
kdump_post)
|
||||
KDUMP_POST="$config_val"
|
||||
;;
|
||||
fence_kdump_args)
|
||||
FENCE_KDUMP_ARGS="$config_val"
|
||||
;;
|
||||
fence_kdump_nodes)
|
||||
FENCE_KDUMP_NODES="$config_val"
|
||||
;;
|
||||
failure_action|default)
|
||||
case $config_val in
|
||||
shell)
|
||||
FAILURE_ACTION="kdump_emergency_shell"
|
||||
;;
|
||||
reboot)
|
||||
FAILURE_ACTION="systemctl reboot -f && exit"
|
||||
;;
|
||||
halt)
|
||||
FAILURE_ACTION="halt && exit"
|
||||
;;
|
||||
poweroff)
|
||||
FAILURE_ACTION="systemctl poweroff -f && exit"
|
||||
;;
|
||||
dump_to_rootfs)
|
||||
FAILURE_ACTION="dump_to_rootfs"
|
||||
;;
|
||||
esac
|
||||
;;
|
||||
final_action)
|
||||
case $config_val in
|
||||
reboot)
|
||||
FINAL_ACTION="systemctl reboot -f"
|
||||
;;
|
||||
halt)
|
||||
FINAL_ACTION="halt"
|
||||
;;
|
||||
poweroff)
|
||||
FINAL_ACTION="systemctl poweroff -f"
|
||||
;;
|
||||
esac
|
||||
;;
|
||||
esac
|
||||
done <<< "$(read_strip_comments $KDUMP_CONF)"
|
||||
|
||||
if [ -z "$CORE_COLLECTOR" ]; then
|
||||
CORE_COLLECTOR="$DEFAULT_CORE_COLLECTOR"
|
||||
if is_ssh_dump_target || is_raw_dump_target; then
|
||||
CORE_COLLECTOR="$CORE_COLLECTOR -F"
|
||||
fi
|
||||
fi
|
||||
# Following steps are applied in order: strip trailing comment, strip trailing space,
|
||||
# strip heading space, match non-empty line, remove duplicated spaces between conf name and value
|
||||
[ -f "$KDUMP_CONFIG_FILE" ] && sed -n -e "s/#.*//;s/\s*$//;s/^\s*//;s/\(\S\+\)\s*\(.*\)/\1 \2/p" $KDUMP_CONFIG_FILE
|
||||
}
|
||||
|
||||
# store the kexec kernel log to a file.
|
||||
save_log()
|
||||
# Retrieves config value defined in kdump.conf
|
||||
# $1: config name, sed regexp compatible
|
||||
kdump_get_conf_val()
|
||||
{
|
||||
dmesg -T > $KDUMP_LOG_FILE
|
||||
|
||||
if command -v journalctl > /dev/null; then
|
||||
journalctl -ab >> $KDUMP_LOG_FILE
|
||||
fi
|
||||
chmod 600 $KDUMP_LOG_FILE
|
||||
# For lines matching "^\s*$1\s+", remove matched part (config name including space),
|
||||
# remove tailing comment, space, then store in hold space. Print out the hold buffer on last line.
|
||||
[ -f "$KDUMP_CONFIG_FILE" ] &&
|
||||