c5aa460992
Upstream: fedora Resolves: RHEL-32060 Conflict: Yes, there are several conflicts. 1) Upstream have moved dracut-kdump.sh into kdump-utils/dracut/99kdumpbase/kdump.sh, so the targeting files are changed. 2) There are several patchsets([1] [2]) which not backported to rhel9, so some formating conflicts encountered. But there is no functional change been made for the patch backporting. [1]: https://github.com/rhkdump/kdump-utils/pull/18/commits [2]: https://github.com/rhkdump/kdump-utils/pull/33/commits commit 88525ebf5e43cc86aea66dc75ec83db58233883b Author: Tao Liu <ltao@redhat.com> Date: Thu Sep 5 15:49:07 2024 +1200 Introduce vmcore creation notification to kdump Motivation ========== People may forget to recheck to ensure kdump works, which as a result, a possibility of no vmcores generated after a real system crash. It is unexpected for kdump. It is highly recommended people to recheck kdump after any system modification, such as: a. after kernel patching or whole yum update, as it might break something on which kdump is dependent, maybe due to introduction of any new bug etc. b. after any change at hardware level, maybe storage, networking, firmware upgrading etc. c. after implementing any new application, like which involves 3rd party modules etc. Though these exceed the range of kdump, however a simple vmcore creation status notification is good to have for now. Design ====== Kdump currently will check any relating files/fs/drivers modified before determine if initrd should rebuild when (re)start. A rebuild is an indicator of such modification, and kdump need to be rechecked. This will clear the vmcore creation status specified in $VMCORE_CREATION_STATUS. Vmcore creation check will happen at "kdumpctl (re)start/status", and will report the creation success/fail status to users. A "success" status indicates previously there has been a vmcore successfully generated based on the current env, so it is more likely a vmcore will be generated later when real crash happens; A "fail" status indicates previously there was no vmcore generated, or has been a vmcore creation failed based on current env. User should check the 2nd kernel log or the kexec-dmesg.log for the failing reason. $VMCORE_CREATION_STATUS is used for recording the vmcore creation status of the current env. The format will be like: success 1718682002 Which means, there has been a vmcore generated successfully at this timestamp for the current env. Usage ===== [root@localhost ~]# kdumpctl restart kdump: kexec: unloaded kdump kernel kdump: Stopping kdump: [OK] kdump: kexec: loaded kdump kernel kdump: Starting kdump: [OK] kdump: Notice: No vmcore creation test performed! [root@localhost ~]# kdumpctl test [root@localhost ~]# kdumpctl status kdump: Kdump is operational kdump: Notice: Last successful vmcore creation on Tue Jun 18 16:39:10 CST 2024 [root@localhost ~]# kdumpctl restart kdump: kexec: unloaded kdump kernel kdump: Stopping kdump: [OK] kdump: kexec: loaded kdump kernel kdump: Starting kdump: [OK] kdump: Notice: Last successful vmcore creation on Tue Jun 18 16:39:10 CST 2024 The notification for kdumpctl (re)start/status can be disabled by setting VMCORE_CREATION_NOTIFICATION in /etc/sysconfig/kdump Signed-off-by: Tao Liu <ltao@redhat.com> Signed-off-by: Tao Liu <ltao@redhat.com>
494 lines
13 KiB
Bash
494 lines
13 KiB
Bash
#!/bin/bash --norc
|
|
# New mkdumprd
|
|
#
|
|
# Copyright 2011 Red Hat, Inc.
|
|
#
|
|
# Written by Cong Wang <amwang@redhat.com>
|
|
#
|
|
|
|
if [[ -f /etc/sysconfig/kdump ]]; then
|
|
. /etc/sysconfig/kdump
|
|
fi
|
|
|
|
[[ $dracutbasedir ]] || dracutbasedir=/usr/lib/dracut
|
|
. $dracutbasedir/dracut-functions.sh
|
|
. /lib/kdump/kdump-lib.sh
|
|
. /lib/kdump/kdump-logger.sh
|
|
export IN_KDUMP=1
|
|
|
|
#initiate the kdump logger
|
|
if ! dlog_init; then
|
|
echo "failed to initiate the kdump logger."
|
|
exit 1
|
|
fi
|
|
|
|
SSH_KEY_LOCATION="/root/.ssh/kdump_id_rsa"
|
|
SAVE_PATH=$(get_save_path)
|
|
OVERRIDE_RESETTABLE=0
|
|
|
|
extra_modules=""
|
|
dracut_args=(--add kdumpbase --quiet --hostonly --hostonly-cmdline --hostonly-i18n --hostonly-mode strict --hostonly-nics '' -o "plymouth resume ifcfg earlykdump")
|
|
|
|
MKDUMPRD_TMPDIR="$(mktemp -d -t mkdumprd.XXXXXX)"
|
|
[ -d "$MKDUMPRD_TMPDIR" ] || perror_exit "dracut: mktemp -p -d -t dracut.XXXXXX failed."
|
|
MKDUMPRD_TMPMNT="$MKDUMPRD_TMPDIR/target"
|
|
|
|
trap '
|
|
ret=$?;
|
|
is_mounted $MKDUMPRD_TMPMNT && umount -f $MKDUMPRD_TMPMNT;
|
|
[[ -d $MKDUMPRD_TMPDIR ]] && rm --one-file-system -rf -- "$MKDUMPRD_TMPDIR";
|
|
exit $ret;
|
|
' EXIT
|
|
|
|
# clean up after ourselves no matter how we die.
|
|
trap 'exit 1;' SIGINT
|
|
|
|
add_dracut_arg()
|
|
{
|
|
dracut_args+=("$@")
|
|
}
|
|
|
|
add_dracut_mount()
|
|
{
|
|
add_dracut_arg "--mount" "$1"
|
|
}
|
|
|
|
add_dracut_sshkey()
|
|
{
|
|
add_dracut_arg "--sshkey" "$1"
|
|
}
|
|
|
|
# caller should ensure $1 is valid and mounted in 1st kernel
|
|
to_mount()
|
|
{
|
|
local _target=$1 _fstype=$2 _options=$3 _new_mntpoint=$4
|
|
local _sed_cmd _pdev
|
|
|
|
_new_mntpoint="${_new_mntpoint:-$(get_kdump_mntpoint_from_target "$_target")}"
|
|
_fstype="${_fstype:-$(get_fs_type_from_target "$_target")}"
|
|
_options="${_options:-$(get_mntopt_from_target "$_target")}"
|
|
_options="${_options:-defaults}"
|
|
|
|
if [[ $_fstype == "nfs"* ]]; then
|
|
_pdev=$_target
|
|
_sed_cmd+='s/,\(mount\)\?addr=[^,]*//g;'
|
|
_sed_cmd+='s/,\(mount\)\?proto=[^,]*//g;'
|
|
_sed_cmd+='s/,clientaddr=[^,]*//;'
|
|
else
|
|
# for non-nfs _target converting to use udev persistent name
|
|
_pdev="$(kdump_get_persistent_dev "$_target")"
|
|
if [[ -z $_pdev ]]; then
|
|
return 1
|
|
fi
|
|
fi
|
|
|
|
# mount fs target as rw in 2nd kernel
|
|
_sed_cmd+='s/\(^\|,\)ro\($\|,\)/\1rw\2/g;'
|
|
# with 'noauto' in fstab nfs and non-root disk mount will fail in 2nd
|
|
# kernel, filter it out here.
|
|
_sed_cmd+='s/\(^\|,\)noauto\($\|,\)/\1/g;'
|
|
# drop nofail or nobootwait
|
|
_sed_cmd+='s/\(^\|,\)nofail\($\|,\)/\1/g;'
|
|
_sed_cmd+='s/\(^\|,\)nobootwait\($\|,\)/\1/g;'
|
|
|
|
_options=$(echo "$_options" | sed "$_sed_cmd")
|
|
|
|
echo "$_pdev $_new_mntpoint $_fstype $_options"
|
|
}
|
|
|
|
#Function: get_ssh_size
|
|
#$1=dump target
|
|
#called from while loop and shouldn't read from stdin, so we're using "ssh -n"
|
|
get_ssh_size()
|
|
{
|
|
local _out
|
|
local _opt=("-i" "$SSH_KEY_LOCATION" "-o" "BatchMode=yes" "-o" "StrictHostKeyChecking=yes")
|
|
|
|
if ! _out=$(ssh -q -n "${_opt[@]}" "$1" "df" "--output=avail" "$SAVE_PATH"); then
|
|
perror_exit "checking remote ssh server available size failed."
|
|
fi
|
|
|
|
echo -n "$_out" | tail -1
|
|
}
|
|
|
|
#mkdir if save path does not exist on ssh dump target
|
|
#$1=ssh dump target
|
|
#caller should ensure write permission on $1:$SAVE_PATH
|
|
#called from while loop and shouldn't read from stdin, so we're using "ssh -n"
|
|
mkdir_save_path_ssh()
|
|
{
|
|
local _opt _dir
|
|
_opt=(-i "$SSH_KEY_LOCATION" -o BatchMode=yes -o StrictHostKeyChecking=yes)
|
|
ssh -qn "${_opt[@]}" "$1" mkdir -p "$SAVE_PATH" &> /dev/null ||
|
|
perror_exit "mkdir failed on $1:$SAVE_PATH"
|
|
|
|
# check whether user has write permission on $1:$SAVE_PATH
|
|
_dir=$(ssh -qn "${_opt[@]}" "$1" mktemp -dqp "$SAVE_PATH" 2> /dev/null) ||
|
|
perror_exit "Could not create temporary directory on $1:$SAVE_PATH. Make sure user has write permission on destination"
|
|
ssh -qn "${_opt[@]}" "$1" rmdir "$_dir"
|
|
|
|
return 0
|
|
}
|
|
|
|
#Function: get_fs_size
|
|
#$1=dump target
|
|
get_fs_size()
|
|
{
|
|
df --output=avail "$(get_mntpoint_from_target "$1")/$SAVE_PATH" | tail -1
|
|
}
|
|
|
|
#Function: get_raw_size
|
|
#$1=dump target
|
|
get_raw_size()
|
|
{
|
|
fdisk -s "$1"
|
|
}
|
|
|
|
#Function: check_size
|
|
#$1: dump type string ('raw', 'fs', 'ssh')
|
|
#$2: dump target
|
|
check_size()
|
|
{
|
|
local avail memtotal
|
|
|
|
memtotal=$(awk '/MemTotal/{print $2}' /proc/meminfo)
|
|
case "$1" in
|
|
raw)
|
|
avail=$(get_raw_size "$2")
|
|
;;
|
|
ssh)
|
|
avail=$(get_ssh_size "$2")
|
|
;;
|
|
fs)
|
|
avail=$(get_fs_size "$2")
|
|
;;
|
|
*)
|
|
return
|
|
;;
|
|
esac || perror_exit "Check dump target size failed"
|
|
|
|
if [[ $avail -lt $memtotal ]]; then
|
|
dwarn "Warning: There might not be enough space to save a vmcore."
|
|
dwarn " The size of $2 should be greater than $memtotal kilo bytes."
|
|
fi
|
|
}
|
|
|
|
check_save_path_fs()
|
|
{
|
|
local _path=$1
|
|
|
|
if [[ ! -d $_path ]]; then
|
|
perror_exit "Dump path $_path does not exist."
|
|
fi
|
|
}
|
|
|
|
mount_failure()
|
|
{
|
|
local _target=$1
|
|
local _mnt=$2
|
|
local _fstype=$3
|
|
local msg="Failed to mount $_target"
|
|
|
|
if [[ -n $_mnt ]]; then
|
|
msg="$msg on $_mnt"
|
|
fi
|
|
|
|
msg="$msg for kdump preflight check."
|
|
|
|
if [[ $_fstype == "nfs" ]]; then
|
|
msg="$msg Please make sure nfs-utils has been installed."
|
|
fi
|
|
|
|
perror_exit "$msg"
|
|
}
|
|
|
|
check_user_configured_target()
|
|
{
|
|
local _target=$1 _cfg_fs_type=$2 _mounted
|
|
local _mnt _opt _fstype
|
|
|
|
_mnt=$(get_mntpoint_from_target "$_target")
|
|
_opt=$(get_mntopt_from_target "$_target")
|
|
_fstype=$(get_fs_type_from_target "$_target")
|
|
|
|
if [[ -n $_fstype ]]; then
|
|
# In case of nfs4, nfs should be used instead, nfs* options is deprecated in kdump.conf
|
|
[[ $_fstype == "nfs"* ]] && _fstype=nfs
|
|
|
|
if [[ -n $_cfg_fs_type ]] && [[ $_fstype != "$_cfg_fs_type" ]]; then
|
|
perror_exit "\"$_target\" have a wrong type config \"$_cfg_fs_type\", expected \"$_fstype\""
|
|
fi
|
|
else
|
|
_fstype="$_cfg_fs_type"
|
|
_fstype="$_cfg_fs_type"
|
|
fi
|
|
|
|
# For noauto mount, mount it inplace with default value.
|
|
# Else use the temporary target directory
|
|
if [[ -n $_mnt ]]; then
|
|
if ! is_mounted "$_mnt"; then
|
|
if [[ $_opt == *",noauto"* ]]; then
|
|
mount "$_mnt" || mount_failure "$_target" "$_mnt" "$_fstype"
|
|
_mounted=$_mnt
|
|
else
|
|
perror_exit "Dump target \"$_target\" is neither mounted nor configured as \"noauto\""
|
|
fi
|
|
fi
|
|
else
|
|
_mnt=$MKDUMPRD_TMPMNT
|
|
mkdir -p "$_mnt"
|
|
mount "$_target" "$_mnt" -t "$_fstype" -o defaults || mount_failure "$_target" "" "$_fstype"
|
|
_mounted=$_mnt
|
|
fi
|
|
|
|
# For user configured target, use $SAVE_PATH as the dump path within the target
|
|
if [[ ! -d "$_mnt/$SAVE_PATH" ]]; then
|
|
perror_exit "Dump path \"$SAVE_PATH\" does not exist in dump target \"$_target\""
|
|
fi
|
|
|
|
check_size fs "$_target"
|
|
|
|
# Unmount it early, if function is interrupted and didn't reach here, the shell trap will clear it up anyway
|
|
if [[ -n $_mounted ]]; then
|
|
umount -f -- "$_mounted"
|
|
fi
|
|
}
|
|
|
|
# $1: core_collector config value
|
|
verify_core_collector()
|
|
{
|
|
local _cmd="${1%% *}"
|
|
local _params="${1#${_cmd}}"
|
|
|
|
if [[ $_cmd != "makedumpfile" ]]; then
|
|
if is_raw_dump_target; then
|
|
dwarn "Warning: specifying a non-makedumpfile core collector, you will have to recover the vmcore manually."
|
|
fi
|
|
return
|
|
fi
|
|
|
|
if is_ssh_dump_target || is_raw_dump_target; then
|
|
if ! strstr "$_params" "-F"; then
|
|
perror_exit 'The specified dump target needs makedumpfile "-F" option.'
|
|
fi
|
|
_params="$_params vmcore"
|
|
else
|
|
_params="$_params vmcore dumpfile"
|
|
fi
|
|
|
|
# shellcheck disable=SC2086
|
|
if ! $_cmd --check-params $_params; then
|
|
perror_exit "makedumpfile parameter check failed."
|
|
fi
|
|
}
|
|
|
|
add_mount()
|
|
{
|
|
local _mnt
|
|
|
|
_mnt=$(to_mount "$@") || exit 1
|
|
|
|
add_dracut_mount "$_mnt"
|
|
}
|
|
|
|
#handle the case user does not specify the dump target explicitly
|
|
handle_default_dump_target()
|
|
{
|
|
local _target
|
|
local _mntpoint
|
|
|
|
is_user_configured_dump_target && return
|
|
|
|
check_save_path_fs "$SAVE_PATH"
|
|
|
|
_save_path=$(get_bind_mount_source "$SAVE_PATH")
|
|
_target=$(get_target_from_path "$_save_path")
|
|
_mntpoint=$(get_mntpoint_from_target "$_target")
|
|
|
|
SAVE_PATH=${_save_path##"$_mntpoint"}
|
|
add_mount "$_target"
|
|
check_size fs "$_target"
|
|
}
|
|
|
|
# $1: function name
|
|
for_each_block_target()
|
|
{
|
|
local dev majmin
|
|
|
|
for dev in $(get_kdump_targets); do
|
|
[[ -b $dev ]] || continue
|
|
majmin=$(get_maj_min "$dev")
|
|
check_block_and_slaves "$1" "$majmin" && return 1
|
|
done
|
|
|
|
return 0
|
|
}
|
|
|
|
#judge if a specific device with $1 is unresettable
|
|
#return false if unresettable.
|
|
is_unresettable()
|
|
{
|
|
local path device resettable=1
|
|
|
|
path="/sys/$(udevadm info --query=all --path="/sys/dev/block/$1" | awk '/^P:/ {print $2}' | sed -e 's/\(cciss[0-9]\+\/\).*/\1/g' -e 's/\/block\/.*$//')/resettable"
|
|
if [[ -f $path ]]; then
|
|
resettable="$(< "$path")"
|
|
[[ $resettable -eq 0 ]] && [[ $OVERRIDE_RESETTABLE -eq 0 ]] && {
|
|
device=$(udevadm info --query=all --path="/sys/dev/block/$1" | awk -F= '/DEVNAME/{print $2}')
|
|
derror "Error: Can not save vmcore because device $device is unresettable"
|
|
return 0
|
|
}
|
|
fi
|
|
|
|
return 1
|
|
}
|
|
|
|
#check if machine is resettable.
|
|
#return true if resettable
|
|
check_resettable()
|
|
{
|
|
local _target _override_resettable
|
|
|
|
_override_resettable=$(kdump_get_conf_val override_resettable)
|
|
OVERRIDE_RESETTABLE=${_override_resettable:-$OVERRIDE_RESETTABLE}
|
|
if [ "$OVERRIDE_RESETTABLE" != "0" ] && [ "$OVERRIDE_RESETTABLE" != "1" ]; then
|
|
perror_exit "override_resettable value '$OVERRIDE_RESETTABLE' is invalid"
|
|
fi
|
|
|
|
for_each_block_target is_unresettable && return
|
|
|
|
return 1
|
|
}
|
|
|
|
check_crypt()
|
|
{
|
|
local _dev
|
|
|
|
for _dev in $(get_kdump_targets); do
|
|
if [[ -n $(get_luks_crypt_dev "$(get_maj_min "$_dev")") ]]; then
|
|
derror "Device $_dev is encrypted." && return 1
|
|
fi
|
|
done
|
|
}
|
|
|
|
if ! check_resettable; then
|
|
exit 1
|
|
fi
|
|
|
|
if ! check_crypt; then
|
|
dwarn "Warning: Encrypted device is in dump path, which is not recommended, see kexec-kdump-howto.txt for more details."
|
|
fi
|
|
|
|
# firstly get right SSH_KEY_LOCATION
|
|
keyfile=$(kdump_get_conf_val sshkey)
|
|
if [[ -f $keyfile ]]; then
|
|
# canonicalize the path
|
|
SSH_KEY_LOCATION=$(/usr/bin/readlink -m "$keyfile")
|
|
fi
|
|
|
|
while read -r config_opt config_val; do
|
|
# remove inline comments after the end of a directive.
|
|
case "$config_opt" in
|
|
extra_modules)
|
|
extra_modules="$extra_modules $config_val"
|
|
;;
|
|
ext[234] | xfs | btrfs | minix | nfs | virtiofs)
|
|
check_user_configured_target "$config_val" "$config_opt"
|
|
add_mount "$config_val" "$config_opt"
|
|
;;
|
|
raw)
|
|
# checking raw disk writable
|
|
dd if="$config_val" count=1 of=/dev/null > /dev/null 2>&1 || {
|
|
perror_exit "Bad raw disk $config_val"
|
|
}
|
|
_praw=$(persistent_policy="by-id" kdump_get_persistent_dev "$config_val")
|
|
if [[ -z $_praw ]]; then
|
|
exit 1
|
|
fi
|
|
add_dracut_arg "--device" "$_praw"
|
|
check_size raw "$config_val"
|
|
;;
|
|
ssh)
|
|
if strstr "$config_val" "@"; then
|
|
mkdir_save_path_ssh "$config_val"
|
|
check_size ssh "$config_val"
|
|
add_dracut_sshkey "$SSH_KEY_LOCATION"
|
|
else
|
|
perror_exit "Bad ssh dump target $config_val"
|
|
fi
|
|
;;
|
|
core_collector)
|
|
verify_core_collector "$config_val"
|
|
;;
|
|
dracut_args)
|
|
|
|
# When users specify nfs dumping via dracut_args, kexec-tools won't
|
|
# mount nfs fs beforehand thus nfsv4-related drivers won't be installed
|
|
# because we call dracut with --hostonly-mode strict. So manually install
|
|
# nfsv4-related drivers.
|
|
if [[ $(get_dracut_args_fstype "$config_val") == nfs* ]]; then
|
|
add_dracut_arg "--add-drivers" nfs_layout_nfsv41_files
|
|
fi
|
|
|
|
while read -r dracut_arg; do
|
|
add_dracut_arg "$dracut_arg"
|
|
done <<< "$(echo "$config_val" | xargs -n 1 echo)"
|
|
;;
|
|
*) ;;
|
|
|
|
esac
|
|
done <<< "$(kdump_read_conf)"
|
|
|
|
handle_default_dump_target
|
|
|
|
if ! have_compression_in_dracut_args; then
|
|
if is_squash_available && dracut_have_option "--squash-compressor"; then
|
|
add_dracut_arg "--squash-compressor" "zstd"
|
|
elif is_zstd_command_available; then
|
|
add_dracut_arg "--compress" "zstd"
|
|
fi
|
|
fi
|
|
|
|
if [[ -n $extra_modules ]]; then
|
|
add_dracut_arg "--add-drivers" "$extra_modules"
|
|
fi
|
|
|
|
# TODO: The below check is not needed anymore with the introduction of
|
|
# 'zz-fadumpinit' module, that isolates fadump's capture kernel initrd,
|
|
# but still sysroot.mount unit gets generated based on 'root=' kernel
|
|
# parameter available in fadump case. So, find a way to fix that first
|
|
# before removing this check.
|
|
if ! is_fadump_capable; then
|
|
# The 2nd rootfs mount stays behind the normal dump target mount,
|
|
# so it doesn't affect the logic of check_dump_fs_modified().
|
|
is_dump_to_rootfs && add_mount "$(to_dev_name "$(get_root_fs_device)")"
|
|
|
|
add_dracut_arg "--no-hostonly-default-device"
|
|
|
|
if fips-mode-setup --is-enabled 2> /dev/null; then
|
|
add_dracut_arg --add-device "$(findmnt -n -o SOURCE --target /boot)"
|
|
fi
|
|
fi
|
|
|
|
# This is RHEL-only to work around nvme problem, then real fix should go to dracut
|
|
if [[ -d /sys/module/nvme ]]; then
|
|
add_dracut_arg "--add-drivers" "nvme"
|
|
fi
|
|
|
|
status_target=$(get_target_from_path $(dirname "$VMCORE_CREATION_STATUS"))
|
|
|
|
if [[ $(get_root_fs_device) != "$status_target" ]]; then
|
|
new_mntpoint=$(echo /vmcorestatus/$(get_mntpoint_from_target "$status_target") \
|
|
| tr -s "/")
|
|
add_mount "$status_target" "" "" "$new_mntpoint"
|
|
elif ! is_fadump_capable && \
|
|
! [[ ${dracut_args[@]} == *"$(kdump_get_persistent_dev $status_target)"* ]]; then
|
|
add_mount "$status_target"
|
|
fi
|
|
|
|
dracut "${dracut_args[@]}" "$@"
|
|
|
|
_rc=$?
|
|
sync
|
|
exit $_rc
|