Upstream: fedora
Resolves: RHEL-32060
Conflict: Yes, there are several conflicts. 1) Upstream have moved
dracut-kdump.sh into kdump-utils/dracut/99kdumpbase/kdump.sh,
so the targeting files are changed. 2) There are several
patchsets([1] [2]) which not backported to rhel9, so some
formating conflicts encountered. But there is no functional
change been made for the patch backporting.
[1]: https://github.com/rhkdump/kdump-utils/pull/18/commits
[2]: https://github.com/rhkdump/kdump-utils/pull/33/commits
commit 88525ebf5e43cc86aea66dc75ec83db58233883b
Author: Tao Liu <ltao@redhat.com>
Date: Thu Sep 5 15:49:07 2024 +1200
Introduce vmcore creation notification to kdump
Motivation
==========
People may forget to recheck to ensure kdump works, which as a result, a
possibility of no vmcores generated after a real system crash. It is
unexpected for kdump.
It is highly recommended people to recheck kdump after any system
modification, such as:
a. after kernel patching or whole yum update, as it might break something
on which kdump is dependent, maybe due to introduction of any new bug etc.
b. after any change at hardware level, maybe storage, networking,
firmware upgrading etc.
c. after implementing any new application, like which involves 3rd party modules
etc.
Though these exceed the range of kdump, however a simple vmcore creation
status notification is good to have for now.
Design
======
Kdump currently will check any relating files/fs/drivers modified before
determine if initrd should rebuild when (re)start. A rebuild is an
indicator of such modification, and kdump need to be rechecked. This will
clear the vmcore creation status specified in $VMCORE_CREATION_STATUS.
Vmcore creation check will happen at "kdumpctl (re)start/status", and will
report the creation success/fail status to users. A "success" status indicates
previously there has been a vmcore successfully generated based on the current
env, so it is more likely a vmcore will be generated later when real crash
happens; A "fail" status indicates previously there was no vmcore
generated, or has been a vmcore creation failed based on current env. User
should check the 2nd kernel log or the kexec-dmesg.log for the failing reason.
$VMCORE_CREATION_STATUS is used for recording the vmcore creation status of
the current env. The format will be like:
success 1718682002
Which means, there has been a vmcore generated successfully at this
timestamp for the current env.
Usage
=====
[root@localhost ~]# kdumpctl restart
kdump: kexec: unloaded kdump kernel
kdump: Stopping kdump: [OK]
kdump: kexec: loaded kdump kernel
kdump: Starting kdump: [OK]
kdump: Notice: No vmcore creation test performed!
[root@localhost ~]# kdumpctl test
[root@localhost ~]# kdumpctl status
kdump: Kdump is operational
kdump: Notice: Last successful vmcore creation on Tue Jun 18 16:39:10 CST 2024
[root@localhost ~]# kdumpctl restart
kdump: kexec: unloaded kdump kernel
kdump: Stopping kdump: [OK]
kdump: kexec: loaded kdump kernel
kdump: Starting kdump: [OK]
kdump: Notice: Last successful vmcore creation on Tue Jun 18 16:39:10 CST 2024
The notification for kdumpctl (re)start/status can be disabled by
setting VMCORE_CREATION_NOTIFICATION in /etc/sysconfig/kdump
Signed-off-by: Tao Liu <ltao@redhat.com>
Signed-off-by: Tao Liu <ltao@redhat.com>
494 lines
13 KiB
Bash
494 lines
13 KiB
Bash
#!/bin/bash --norc
|
|
# New mkdumprd
|
|
#
|
|
# Copyright 2011 Red Hat, Inc.
|
|
#
|
|
# Written by Cong Wang <amwang@redhat.com>
|
|
#
|
|
|
|
if [[ -f /etc/sysconfig/kdump ]]; then
|
|
. /etc/sysconfig/kdump
|
|
fi
|
|
|
|
[[ $dracutbasedir ]] || dracutbasedir=/usr/lib/dracut
|
|
. $dracutbasedir/dracut-functions.sh
|
|
. /lib/kdump/kdump-lib.sh
|
|
. /lib/kdump/kdump-logger.sh
|
|
export IN_KDUMP=1
|
|
|
|
#initiate the kdump logger
|
|
if ! dlog_init; then
|
|
echo "failed to initiate the kdump logger."
|
|
exit 1
|
|
fi
|
|
|
|
SSH_KEY_LOCATION="/root/.ssh/kdump_id_rsa"
|
|
SAVE_PATH=$(get_save_path)
|
|
OVERRIDE_RESETTABLE=0
|
|
|
|
extra_modules=""
|
|
dracut_args=(--add kdumpbase --quiet --hostonly --hostonly-cmdline --hostonly-i18n --hostonly-mode strict --hostonly-nics '' -o "plymouth resume ifcfg earlykdump")
|
|
|
|
MKDUMPRD_TMPDIR="$(mktemp -d -t mkdumprd.XXXXXX)"
|
|
[ -d "$MKDUMPRD_TMPDIR" ] || perror_exit "dracut: mktemp -p -d -t dracut.XXXXXX failed."
|
|
MKDUMPRD_TMPMNT="$MKDUMPRD_TMPDIR/target"
|
|
|
|
trap '
|
|
ret=$?;
|
|
is_mounted $MKDUMPRD_TMPMNT && umount -f $MKDUMPRD_TMPMNT;
|
|
[[ -d $MKDUMPRD_TMPDIR ]] && rm --one-file-system -rf -- "$MKDUMPRD_TMPDIR";
|
|
exit $ret;
|
|
' EXIT
|
|
|
|
# clean up after ourselves no matter how we die.
|
|
trap 'exit 1;' SIGINT
|
|
|
|
add_dracut_arg()
|
|
{
|
|
dracut_args+=("$@")
|
|
}
|
|
|
|
add_dracut_mount()
|
|
{
|
|
add_dracut_arg "--mount" "$1"
|
|
}
|
|
|
|
add_dracut_sshkey()
|
|
{
|
|
add_dracut_arg "--sshkey" "$1"
|
|
}
|
|
|
|
# caller should ensure $1 is valid and mounted in 1st kernel
|
|
to_mount()
|
|
{
|
|
local _target=$1 _fstype=$2 _options=$3 _new_mntpoint=$4
|
|
local _sed_cmd _pdev
|
|
|
|
_new_mntpoint="${_new_mntpoint:-$(get_kdump_mntpoint_from_target "$_target")}"
|
|
_fstype="${_fstype:-$(get_fs_type_from_target "$_target")}"
|
|
_options="${_options:-$(get_mntopt_from_target "$_target")}"
|
|
_options="${_options:-defaults}"
|
|
|
|
if [[ $_fstype == "nfs"* ]]; then
|
|
_pdev=$_target
|
|
_sed_cmd+='s/,\(mount\)\?addr=[^,]*//g;'
|
|
_sed_cmd+='s/,\(mount\)\?proto=[^,]*//g;'
|
|
_sed_cmd+='s/,clientaddr=[^,]*//;'
|
|
else
|
|
# for non-nfs _target converting to use udev persistent name
|
|
_pdev="$(kdump_get_persistent_dev "$_target")"
|
|
if [[ -z $_pdev ]]; then
|
|
return 1
|
|
fi
|
|
fi
|
|
|
|
# mount fs target as rw in 2nd kernel
|
|
_sed_cmd+='s/\(^\|,\)ro\($\|,\)/\1rw\2/g;'
|
|
# with 'noauto' in fstab nfs and non-root disk mount will fail in 2nd
|
|
# kernel, filter it out here.
|
|
_sed_cmd+='s/\(^\|,\)noauto\($\|,\)/\1/g;'
|
|
# drop nofail or nobootwait
|
|
_sed_cmd+='s/\(^\|,\)nofail\($\|,\)/\1/g;'
|
|
_sed_cmd+='s/\(^\|,\)nobootwait\($\|,\)/\1/g;'
|
|
|
|
_options=$(echo "$_options" | sed "$_sed_cmd")
|
|
|
|
echo "$_pdev $_new_mntpoint $_fstype $_options"
|
|
}
|
|
|
|
#Function: get_ssh_size
|
|
#$1=dump target
|
|
#called from while loop and shouldn't read from stdin, so we're using "ssh -n"
|
|
get_ssh_size()
|
|
{
|
|
local _out
|
|
local _opt=("-i" "$SSH_KEY_LOCATION" "-o" "BatchMode=yes" "-o" "StrictHostKeyChecking=yes")
|
|
|
|
if ! _out=$(ssh -q -n "${_opt[@]}" "$1" "df" "--output=avail" "$SAVE_PATH"); then
|
|
perror_exit "checking remote ssh server available size failed."
|
|
fi
|
|
|
|
echo -n "$_out" | tail -1
|
|
}
|
|
|
|
#mkdir if save path does not exist on ssh dump target
|
|
#$1=ssh dump target
|
|
#caller should ensure write permission on $1:$SAVE_PATH
|
|
#called from while loop and shouldn't read from stdin, so we're using "ssh -n"
|
|
mkdir_save_path_ssh()
|
|
{
|
|
local _opt _dir
|
|
_opt=(-i "$SSH_KEY_LOCATION" -o BatchMode=yes -o StrictHostKeyChecking=yes)
|
|
ssh -qn "${_opt[@]}" "$1" mkdir -p "$SAVE_PATH" &> /dev/null ||
|
|
perror_exit "mkdir failed on $1:$SAVE_PATH"
|
|
|
|
# check whether user has write permission on $1:$SAVE_PATH
|
|
_dir=$(ssh -qn "${_opt[@]}" "$1" mktemp -dqp "$SAVE_PATH" 2> /dev/null) ||
|
|
perror_exit "Could not create temporary directory on $1:$SAVE_PATH. Make sure user has write permission on destination"
|
|
ssh -qn "${_opt[@]}" "$1" rmdir "$_dir"
|
|
|
|
return 0
|
|
}
|
|
|
|
#Function: get_fs_size
|
|
#$1=dump target
|
|
get_fs_size()
|
|
{
|
|
df --output=avail "$(get_mntpoint_from_target "$1")/$SAVE_PATH" | tail -1
|
|
}
|
|
|
|
#Function: get_raw_size
|
|
#$1=dump target
|
|
get_raw_size()
|
|
{
|
|
fdisk -s "$1"
|
|
}
|
|
|
|
#Function: check_size
|
|
#$1: dump type string ('raw', 'fs', 'ssh')
|
|
#$2: dump target
|
|
check_size()
|
|
{
|
|
local avail memtotal
|
|
|
|
memtotal=$(awk '/MemTotal/{print $2}' /proc/meminfo)
|
|
case "$1" in
|
|
raw)
|
|
avail=$(get_raw_size "$2")
|
|
;;
|
|
ssh)
|
|
avail=$(get_ssh_size "$2")
|
|
;;
|
|
fs)
|
|
avail=$(get_fs_size "$2")
|
|
;;
|
|
*)
|
|
return
|
|
;;
|
|
esac || perror_exit "Check dump target size failed"
|
|
|
|
if [[ $avail -lt $memtotal ]]; then
|
|
dwarn "Warning: There might not be enough space to save a vmcore."
|
|
dwarn " The size of $2 should be greater than $memtotal kilo bytes."
|
|
fi
|
|
}
|
|
|
|
check_save_path_fs()
|
|
{
|
|
local _path=$1
|
|
|
|
if [[ ! -d $_path ]]; then
|
|
perror_exit "Dump path $_path does not exist."
|
|
fi
|
|
}
|
|
|
|
mount_failure()
|
|
{
|
|
local _target=$1
|
|
local _mnt=$2
|
|
local _fstype=$3
|
|
local msg="Failed to mount $_target"
|
|
|
|
if [[ -n $_mnt ]]; then
|
|
msg="$msg on $_mnt"
|
|
fi
|
|
|
|
msg="$msg for kdump preflight check."
|
|
|
|
if [[ $_fstype == "nfs" ]]; then
|
|
msg="$msg Please make sure nfs-utils has been installed."
|
|
fi
|
|
|
|
perror_exit "$msg"
|
|
}
|
|
|
|
check_user_configured_target()
|
|
{
|
|
local _target=$1 _cfg_fs_type=$2 _mounted
|
|
local _mnt _opt _fstype
|
|
|
|
_mnt=$(get_mntpoint_from_target "$_target")
|
|
_opt=$(get_mntopt_from_target "$_target")
|
|
_fstype=$(get_fs_type_from_target "$_target")
|
|
|
|
if [[ -n $_fstype ]]; then
|
|
# In case of nfs4, nfs should be used instead, nfs* options is deprecated in kdump.conf
|
|
[[ $_fstype == "nfs"* ]] && _fstype=nfs
|
|
|
|
if [[ -n $_cfg_fs_type ]] && [[ $_fstype != "$_cfg_fs_type" ]]; then
|
|
perror_exit "\"$_target\" have a wrong type config \"$_cfg_fs_type\", expected \"$_fstype\""
|
|
fi
|
|
else
|
|
_fstype="$_cfg_fs_type"
|
|
_fstype="$_cfg_fs_type"
|
|
fi
|
|
|
|
# For noauto mount, mount it inplace with default value.
|
|
# Else use the temporary target directory
|
|
if [[ -n $_mnt ]]; then
|
|
if ! is_mounted "$_mnt"; then
|
|
if [[ $_opt == *",noauto"* ]]; then
|
|
mount "$_mnt" || mount_failure "$_target" "$_mnt" "$_fstype"
|
|
_mounted=$_mnt
|
|
else
|
|
perror_exit "Dump target \"$_target\" is neither mounted nor configured as \"noauto\""
|
|
fi
|
|
fi
|
|
else
|
|
_mnt=$MKDUMPRD_TMPMNT
|
|
mkdir -p "$_mnt"
|
|
mount "$_target" "$_mnt" -t "$_fstype" -o defaults || mount_failure "$_target" "" "$_fstype"
|
|
_mounted=$_mnt
|
|
fi
|
|
|
|
# For user configured target, use $SAVE_PATH as the dump path within the target
|
|
if [[ ! -d "$_mnt/$SAVE_PATH" ]]; then
|
|
perror_exit "Dump path \"$SAVE_PATH\" does not exist in dump target \"$_target\""
|
|
fi
|
|
|
|
check_size fs "$_target"
|
|
|
|
# Unmount it early, if function is interrupted and didn't reach here, the shell trap will clear it up anyway
|
|
if [[ -n $_mounted ]]; then
|
|
umount -f -- "$_mounted"
|
|
fi
|
|
}
|
|
|
|
# $1: core_collector config value
|
|
verify_core_collector()
|
|
{
|
|
local _cmd="${1%% *}"
|
|
local _params="${1#${_cmd}}"
|
|
|
|
if [[ $_cmd != "makedumpfile" ]]; then
|
|
if is_raw_dump_target; then
|
|
dwarn "Warning: specifying a non-makedumpfile core collector, you will have to recover the vmcore manually."
|
|
fi
|
|
return
|
|
fi
|
|
|
|
if is_ssh_dump_target || is_raw_dump_target; then
|
|
if ! strstr "$_params" "-F"; then
|
|
perror_exit 'The specified dump target needs makedumpfile "-F" option.'
|
|
fi
|
|
_params="$_params vmcore"
|
|
else
|
|
_params="$_params vmcore dumpfile"
|
|
fi
|
|
|
|
# shellcheck disable=SC2086
|
|
if ! $_cmd --check-params $_params; then
|
|
perror_exit "makedumpfile parameter check failed."
|
|
fi
|
|
}
|
|
|
|
add_mount()
|
|
{
|
|
local _mnt
|
|
|
|
_mnt=$(to_mount "$@") || exit 1
|
|
|
|
add_dracut_mount "$_mnt"
|
|
}
|
|
|
|
#handle the case user does not specify the dump target explicitly
|
|
handle_default_dump_target()
|
|
{
|
|
local _target
|
|
local _mntpoint
|
|
|
|
is_user_configured_dump_target && return
|
|
|
|
check_save_path_fs "$SAVE_PATH"
|
|
|
|
_save_path=$(get_bind_mount_source "$SAVE_PATH")
|
|
_target=$(get_target_from_path "$_save_path")
|
|
_mntpoint=$(get_mntpoint_from_target "$_target")
|
|
|
|
SAVE_PATH=${_save_path##"$_mntpoint"}
|
|
add_mount "$_target"
|
|
check_size fs "$_target"
|
|
}
|
|
|
|
# $1: function name
|
|
for_each_block_target()
|
|
{
|
|
local dev majmin
|
|
|
|
for dev in $(get_kdump_targets); do
|
|
[[ -b $dev ]] || continue
|
|
majmin=$(get_maj_min "$dev")
|
|
check_block_and_slaves "$1" "$majmin" && return 1
|
|
done
|
|
|
|
return 0
|
|
}
|
|
|
|
#judge if a specific device with $1 is unresettable
|
|
#return false if unresettable.
|
|
is_unresettable()
|
|
{
|
|
local path device resettable=1
|
|
|
|
path="/sys/$(udevadm info --query=all --path="/sys/dev/block/$1" | awk '/^P:/ {print $2}' | sed -e 's/\(cciss[0-9]\+\/\).*/\1/g' -e 's/\/block\/.*$//')/resettable"
|
|
if [[ -f $path ]]; then
|
|
resettable="$(< "$path")"
|
|
[[ $resettable -eq 0 ]] && [[ $OVERRIDE_RESETTABLE -eq 0 ]] && {
|
|
device=$(udevadm info --query=all --path="/sys/dev/block/$1" | awk -F= '/DEVNAME/{print $2}')
|
|
derror "Error: Can not save vmcore because device $device is unresettable"
|
|
return 0
|
|
}
|
|
fi
|
|
|
|
return 1
|
|
}
|
|
|
|
#check if machine is resettable.
|
|
#return true if resettable
|
|
check_resettable()
|
|
{
|
|
local _target _override_resettable
|
|
|
|
_override_resettable=$(kdump_get_conf_val override_resettable)
|
|
OVERRIDE_RESETTABLE=${_override_resettable:-$OVERRIDE_RESETTABLE}
|
|
if [ "$OVERRIDE_RESETTABLE" != "0" ] && [ "$OVERRIDE_RESETTABLE" != "1" ]; then
|
|
perror_exit "override_resettable value '$OVERRIDE_RESETTABLE' is invalid"
|
|
fi
|
|
|
|
for_each_block_target is_unresettable && return
|
|
|
|
return 1
|
|
}
|
|
|
|
check_crypt()
|
|
{
|
|
local _dev
|
|
|
|
for _dev in $(get_kdump_targets); do
|
|
if [[ -n $(get_luks_crypt_dev "$(get_maj_min "$_dev")") ]]; then
|
|
derror "Device $_dev is encrypted." && return 1
|
|
fi
|
|
done
|
|
}
|
|
|
|
if ! check_resettable; then
|
|
exit 1
|
|
fi
|
|
|
|
if ! check_crypt; then
|
|
dwarn "Warning: Encrypted device is in dump path, which is not recommended, see kexec-kdump-howto.txt for more details."
|
|
fi
|
|
|
|
# firstly get right SSH_KEY_LOCATION
|
|
keyfile=$(kdump_get_conf_val sshkey)
|
|
if [[ -f $keyfile ]]; then
|
|
# canonicalize the path
|
|
SSH_KEY_LOCATION=$(/usr/bin/readlink -m "$keyfile")
|
|
fi
|
|
|
|
while read -r config_opt config_val; do
|
|
# remove inline comments after the end of a directive.
|
|
case "$config_opt" in
|
|
extra_modules)
|
|
extra_modules="$extra_modules $config_val"
|
|
;;
|
|
ext[234] | xfs | btrfs | minix | nfs | virtiofs)
|
|
check_user_configured_target "$config_val" "$config_opt"
|
|
add_mount "$config_val" "$config_opt"
|
|
;;
|
|
raw)
|
|
# checking raw disk writable
|
|
dd if="$config_val" count=1 of=/dev/null > /dev/null 2>&1 || {
|
|
perror_exit "Bad raw disk $config_val"
|
|
}
|
|
_praw=$(persistent_policy="by-id" kdump_get_persistent_dev "$config_val")
|
|
if [[ -z $_praw ]]; then
|
|
exit 1
|
|
fi
|
|
add_dracut_arg "--device" "$_praw"
|
|
check_size raw "$config_val"
|
|
;;
|
|
ssh)
|
|
if strstr "$config_val" "@"; then
|
|
mkdir_save_path_ssh "$config_val"
|
|
check_size ssh "$config_val"
|
|
add_dracut_sshkey "$SSH_KEY_LOCATION"
|
|
else
|
|
perror_exit "Bad ssh dump target $config_val"
|
|
fi
|
|
;;
|
|
core_collector)
|
|
verify_core_collector "$config_val"
|
|
;;
|
|
dracut_args)
|
|
|
|
# When users specify nfs dumping via dracut_args, kexec-tools won't
|
|
# mount nfs fs beforehand thus nfsv4-related drivers won't be installed
|
|
# because we call dracut with --hostonly-mode strict. So manually install
|
|
# nfsv4-related drivers.
|
|
if [[ $(get_dracut_args_fstype "$config_val") == nfs* ]]; then
|
|
add_dracut_arg "--add-drivers" nfs_layout_nfsv41_files
|
|
fi
|
|
|
|
while read -r dracut_arg; do
|
|
add_dracut_arg "$dracut_arg"
|
|
done <<< "$(echo "$config_val" | xargs -n 1 echo)"
|
|
;;
|
|
*) ;;
|
|
|
|
esac
|
|
done <<< "$(kdump_read_conf)"
|
|
|
|
handle_default_dump_target
|
|
|
|
if ! have_compression_in_dracut_args; then
|
|
if is_squash_available && dracut_have_option "--squash-compressor"; then
|
|
add_dracut_arg "--squash-compressor" "zstd"
|
|
elif is_zstd_command_available; then
|
|
add_dracut_arg "--compress" "zstd"
|
|
fi
|
|
fi
|
|
|
|
if [[ -n $extra_modules ]]; then
|
|
add_dracut_arg "--add-drivers" "$extra_modules"
|
|
fi
|
|
|
|
# TODO: The below check is not needed anymore with the introduction of
|
|
# 'zz-fadumpinit' module, that isolates fadump's capture kernel initrd,
|
|
# but still sysroot.mount unit gets generated based on 'root=' kernel
|
|
# parameter available in fadump case. So, find a way to fix that first
|
|
# before removing this check.
|
|
if ! is_fadump_capable; then
|
|
# The 2nd rootfs mount stays behind the normal dump target mount,
|
|
# so it doesn't affect the logic of check_dump_fs_modified().
|
|
is_dump_to_rootfs && add_mount "$(to_dev_name "$(get_root_fs_device)")"
|
|
|
|
add_dracut_arg "--no-hostonly-default-device"
|
|
|
|
if fips-mode-setup --is-enabled 2> /dev/null; then
|
|
add_dracut_arg --add-device "$(findmnt -n -o SOURCE --target /boot)"
|
|
fi
|
|
fi
|
|
|
|
# This is RHEL-only to work around nvme problem, then real fix should go to dracut
|
|
if [[ -d /sys/module/nvme ]]; then
|
|
add_dracut_arg "--add-drivers" "nvme"
|
|
fi
|
|
|
|
status_target=$(get_target_from_path $(dirname "$VMCORE_CREATION_STATUS"))
|
|
|
|
if [[ $(get_root_fs_device) != "$status_target" ]]; then
|
|
new_mntpoint=$(echo /vmcorestatus/$(get_mntpoint_from_target "$status_target") \
|
|
| tr -s "/")
|
|
add_mount "$status_target" "" "" "$new_mntpoint"
|
|
elif ! is_fadump_capable && \
|
|
! [[ ${dracut_args[@]} == *"$(kdump_get_persistent_dev $status_target)"* ]]; then
|
|
add_mount "$status_target"
|
|
fi
|
|
|
|
dracut "${dracut_args[@]}" "$@"
|
|
|
|
_rc=$?
|
|
sync
|
|
exit $_rc
|