kexec-tools/kdump-lib.sh
Lichen Liu 4a3b7d1337
kdump-lib-initramfs: Improve mount point retrieval logic
Resolves: RHEL-35885
Upstream: https://github.com/rhkdump/kdump-utils/
Conflict: none

commit 27a9d1dc85283239ee0b0b29ce5a00597d3f965f
Author: Lichen Liu <lichliu@redhat.com>
Date:   Sun Sep 29 16:02:04 2024 +0800

    kdump-lib-initramfs: Improve mount point retrieval logic

    We use findmnt to find the real mount point by mount source, however,
    when there is a bind mount target, findmnt will give more than one
    results and what we actually want is the one without fsroot.

    Will fallback to previous method if the $_mntpoint is empty.

    Signed-off-by: Lichen Liu <lichliu@redhat.com>

Signed-off-by: Lichen Liu <lichliu@redhat.com>
2024-12-05 10:23:16 +08:00

1249 lines
31 KiB
Bash
Executable File

#!/bin/bash
#
# Kdump common variables and functions
#
. /usr/lib/kdump/kdump-lib-initramfs.sh
FADUMP_ENABLED_SYS_NODE="/sys/kernel/fadump/enabled"
FADUMP_REGISTER_SYS_NODE="/sys/kernel/fadump/registered"
is_uki()
{
local img
img="$1"
[[ -f "$img" ]] || return
[[ "$(objdump -a "$img" 2> /dev/null)" =~ pei-(x86-64|aarch64-little) ]] || return
objdump -h -j .linux "$img" &> /dev/null
}
is_fadump_capable()
{
# Check if firmware-assisted dump is enabled
# if no, fallback to kdump check
if [[ -f $FADUMP_ENABLED_SYS_NODE ]]; then
rc=$(< $FADUMP_ENABLED_SYS_NODE)
[[ $rc -eq 1 ]] && return 0
fi
return 1
}
is_sme_or_sev_active()
{
journalctl -q --dmesg --grep "^Memory Encryption Features active: AMD (SME|SEV)$" >/dev/null 2>&1
}
is_squash_available()
{
local _version kmodule
_version=$(_get_kdump_kernel_version)
for kmodule in squashfs overlay loop; do
modprobe -S "$_version" --dry-run $kmodule &> /dev/null || return 1
done
}
is_zstd_command_available()
{
[[ -x "$(command -v zstd)" ]]
}
dracut_have_option()
{
local _option=$1
! dracut "$_option" 2>&1 | grep -q "unrecognized option"
}
perror_exit()
{
derror "$@"
exit 1
}
# Check if fence kdump is configured in Pacemaker cluster
is_pcs_fence_kdump()
{
# no pcs or fence_kdump_send executables installed?
type -P pcs > /dev/null || return 1
[[ -x $FENCE_KDUMP_SEND ]] || return 1
# fence kdump not configured?
(pcs cluster cib | grep 'type="fence_kdump"') &> /dev/null || return 1
}
# Check if fence_kdump is configured using kdump options
is_generic_fence_kdump()
{
[[ -x $FENCE_KDUMP_SEND ]] || return 1
[[ $(kdump_get_conf_val fence_kdump_nodes) ]]
}
to_dev_name()
{
local dev="${1//\"/}"
case "$dev" in
UUID=*)
blkid -U "${dev#UUID=}"
;;
LABEL=*)
blkid -L "${dev#LABEL=}"
;;
*)
echo "$dev"
;;
esac
}
is_user_configured_dump_target()
{
[[ $(kdump_get_conf_val "ext[234]\|xfs\|btrfs\|minix\|raw\|nfs\|ssh\|virtiofs") ]] || is_mount_in_dracut_args
}
get_block_dump_target()
{
local _target _fstype
if is_ssh_dump_target || is_nfs_dump_target; then
return
fi
_target=$(kdump_get_conf_val "ext[234]\|xfs\|btrfs\|minix\|raw\|virtiofs")
[[ -n $_target ]] && to_dev_name "$_target" && return
_target=$(get_dracut_args_target "$(kdump_get_conf_val "dracut_args")")
[[ -b $_target ]] && to_dev_name "$_target" && return
_fstype=$(get_dracut_args_fstype "$(kdump_get_conf_val "dracut_args")")
is_fs_type_virtiofs "$_fstype" && echo "$_target" && return
_target=$(get_target_from_path "$(get_save_path)")
[[ -b $_target ]] && to_dev_name "$_target" && return
_fstype=$(get_fs_type_from_target "$_target")
is_fs_type_virtiofs "$_fstype" && echo "$_target" && return
}
is_dump_to_rootfs()
{
[[ $(kdump_get_conf_val 'failure_action\|default') == dump_to_rootfs ]]
}
is_lvm2_thinp_dump_target()
{
_target=$(get_block_dump_target)
[ -n "$_target" ] && is_lvm2_thinp_device "$_target"
}
get_failure_action_target()
{
local _target
if is_dump_to_rootfs; then
# Get rootfs device name
_target=$(get_root_fs_device)
[[ -b $_target ]] && to_dev_name "$_target" && return
is_fs_type_virtiofs "$(get_fs_type_from_target "$_target")" && echo "$_target" && return
# Then, must be nfs root
echo "nfs"
fi
}
# Get kdump targets(including root in case of dump_to_rootfs).
get_kdump_targets()
{
local _target _root
local kdump_targets
_target=$(get_block_dump_target)
if [[ -n $_target ]]; then
kdump_targets=$_target
elif is_ssh_dump_target; then
kdump_targets="ssh"
else
kdump_targets="nfs"
fi
# Add the root device if dump_to_rootfs is specified.
_root=$(get_failure_action_target)
if [[ -n $_root ]] && [[ $kdump_targets != "$_root" ]]; then
kdump_targets="$kdump_targets $_root"
fi
echo "$kdump_targets"
}
# Return the bind mount source path, return the path itself if it's not bind mounted
# Eg. if /path/to/src is bind mounted to /mnt/bind, then:
# /mnt/bind -> /path/to/src, /mnt/bind/dump -> /path/to/src/dump
#
# findmnt uses the option "-v, --nofsroot" to exclusive the [/dir]
# in the SOURCE column for bind-mounts, then if $_src equals to
# $_src_nofsroot, the mountpoint is not bind mounted directory.
#
# Below is just an example for mount info
# /dev/mapper/atomicos-root[/ostree/deploy/rhel-atomic-host/var], if the
# directory is bind mounted. The former part represents the device path, rest
# part is the bind mounted directory which quotes by bracket "[]".
get_bind_mount_source()
{
local _mnt _path _src _opt _fstype
local _fsroot _src_nofsroot
_mnt=$(df "$1" | tail -1 | awk '{print $NF}')
_path=${1#$_mnt}
_src=$(get_mount_info SOURCE target "$_mnt" -f)
_opt=$(get_mount_info OPTIONS target "$_mnt" -f)
_fstype=$(get_mount_info FSTYPE target "$_mnt" -f)
# bind mount in fstab
if [[ -d $_src ]] && [[ $_fstype == none ]] && (echo "$_opt" | grep -q "\bbind\b"); then
echo "$_src$_path" && return
fi
# direct mount
_src_nofsroot=$(get_mount_info SOURCE target "$_mnt" -v -f)
if [[ $_src_nofsroot == "$_src" ]]; then
echo "$_mnt$_path" && return
fi
_fsroot=${_src#${_src_nofsroot}[}
_fsroot=${_fsroot%]}
_mnt=$(get_mntpoint_from_target "$_src_nofsroot")
# for btrfs, _fsroot will also contain the subvol value as well, strip it
if [[ $_fstype == btrfs ]]; then
local _subvol
_subvol=${_opt#*subvol=}
_subvol=${_subvol%,*}
_fsroot=${_fsroot#$_subvol}
fi
echo "$_mnt$_fsroot$_path"
}
get_mntopt_from_target()
{
get_mount_info OPTIONS source "$1" -f
}
# Get the path where the target will be mounted in kdump kernel
# $1: kdump target device
get_kdump_mntpoint_from_target()
{
local _mntpoint
_mntpoint=$(get_mntpoint_from_target "$1")
# mount under /sysroot if dump to root disk or mount under
# mount under /kdumproot if dump target is not mounted in first kernel
# mount under /kdumproot/$_mntpoint in other cases in 2nd kernel.
# systemd will be in charge to umount it.
if [[ -z $_mntpoint ]]; then
_mntpoint="/kdumproot"
else
if [[ $_mntpoint == "/" ]]; then
_mntpoint="/sysroot"
else
_mntpoint="/kdumproot/$_mntpoint"
fi
fi
# strip duplicated "/"
echo $_mntpoint | tr -s "/"
}
kdump_get_persistent_dev()
{
local dev="${1//\"/}"
case "$dev" in
UUID=*)
dev=$(blkid -U "${dev#UUID=}")
;;
LABEL=*)
dev=$(blkid -L "${dev#LABEL=}")
;;
esac
echo $(get_persistent_dev "$dev")
}
is_ostree()
{
test -f /run/ostree-booted
}
# get ip address or hostname from nfs/ssh config value
get_remote_host()
{
local _config_val=$1
# ipv6 address in kdump.conf is around with "[]",
# factor out the ipv6 address
_config_val=${_config_val#*@}
_config_val=${_config_val%:/*}
_config_val=${_config_val#[}
_config_val=${_config_val%]}
echo "$_config_val"
}
is_hostname()
{
local _hostname
_hostname=$(echo "$1" | grep ":")
if [[ -n $_hostname ]]; then
return 1
fi
echo "$1" | grep -q "[a-zA-Z]"
}
# Copied from "/etc/sysconfig/network-scripts/network-functions"
get_hwaddr()
{
if [[ -f "/sys/class/net/$1/address" ]]; then
awk '{ print toupper($0) }' < "/sys/class/net/$1/address"
elif [[ -d "/sys/class/net/$1" ]]; then
LC_ALL="" LANG="" ip -o link show "$1" 2> /dev/null |
awk '{ print toupper(gensub(/.*link\/[^ ]* ([[:alnum:]:]*).*/,
"\\1", 1)); }'
fi
}
# Get value by a field using "nmcli -g"
# Usage: get_nmcli_value_by_field <field> <nmcli command>
#
# "nmcli --get-values" allows us to retrive value(s) by field, for example,
# nmcli --get-values <field> connection show /org/freedesktop/NetworkManager/ActiveConnection/1
# returns the following value for the corresponding field respectively,
# Field Value
# IP4.DNS "10.19.42.41 | 10.11.5.19 | 10.5.30.160"
# 802-3-ethernet.s390-subchannels ""
# bond.options "mode=balance-rr"
get_nmcli_value_by_field()
{
LANG=C nmcli --get-values "$@"
}
# Get nmcli field value of an connection apath (a D-Bus active connection path)
# Usage: get_nmcli_field_by_apath <field> <apath>
get_nmcli_field_by_conpath()
{
local _field=$1 _apath=$2
get_nmcli_value_by_field "$_field" connection show "$_apath"
}
# Get nmcli connection apath (a D-Bus active connection path ) by ifname
#
# apath is used for nmcli connection operations, e.g.
# $ nmcli connection show $apath
get_nmcli_connection_apath_by_ifname()
{
local _ifname=$1
get_nmcli_value_by_field "GENERAL.CON-PATH" device show "$_ifname"
}
get_ifcfg_by_device()
{
grep -E -i -l "^[[:space:]]*DEVICE=\"*${1}\"*[[:space:]]*$" \
/etc/sysconfig/network-scripts/ifcfg-* 2> /dev/null | head -1
}
get_ifcfg_by_hwaddr()
{
grep -E -i -l "^[[:space:]]*HWADDR=\"*${1}\"*[[:space:]]*$" \
/etc/sysconfig/network-scripts/ifcfg-* 2> /dev/null | head -1
}
get_ifcfg_by_uuid()
{
grep -E -i -l "^[[:space:]]*UUID=\"*${1}\"*[[:space:]]*$" \
/etc/sysconfig/network-scripts/ifcfg-* 2> /dev/null | head -1
}
get_ifcfg_by_name()
{
grep -E -i -l "^[[:space:]]*NAME=\"*${1}\"*[[:space:]]*$" \
/etc/sysconfig/network-scripts/ifcfg-* 2> /dev/null | head -1
}
is_nm_running()
{
[[ "$(LANG=C nmcli -t --fields running general status 2> /dev/null)" == "running" ]]
}
is_nm_handling()
{
LANG=C nmcli -t --fields device,state dev status 2> /dev/null |
grep -q "^\(${1}:connected\)\|\(${1}:connecting.*\)$"
}
# $1: netdev name
get_ifcfg_nmcli()
{
local nm_uuid nm_name
local ifcfg_file
# Get the active nmcli config name of $1
if is_nm_running && is_nm_handling "${1}"; then
# The configuration "uuid" and "name" generated by nm is wrote to
# the ifcfg file as "UUID=<nm_uuid>" and "NAME=<nm_name>".
nm_uuid=$(LANG=C nmcli -t --fields uuid,device c show --active 2> /dev/null |
grep "${1}" | head -1 | cut -d':' -f1)
nm_name=$(LANG=C nmcli -t --fields name,device c show --active 2> /dev/null |
grep "${1}" | head -1 | cut -d':' -f1)
ifcfg_file=$(get_ifcfg_by_uuid "${nm_uuid}")
[[ -z ${ifcfg_file} ]] && ifcfg_file=$(get_ifcfg_by_name "${nm_name}")
fi
echo -n "${ifcfg_file}"
}
# $1: netdev name
get_ifcfg_legacy()
{
local ifcfg_file hwaddr
ifcfg_file="/etc/sysconfig/network-scripts/ifcfg-${1}"
[[ -f ${ifcfg_file} ]] && echo -n "${ifcfg_file}" && return
ifcfg_file=$(get_ifcfg_by_name "${1}")
[[ -f ${ifcfg_file} ]] && echo -n "${ifcfg_file}" && return
hwaddr=$(get_hwaddr "${1}")
if [[ -n $hwaddr ]]; then
ifcfg_file=$(get_ifcfg_by_hwaddr "${hwaddr}")
[[ -f ${ifcfg_file} ]] && echo -n "${ifcfg_file}" && return
fi
ifcfg_file=$(get_ifcfg_by_device "${1}")
echo -n "${ifcfg_file}"
}
# $1: netdev name
# Return the ifcfg file whole name(including the path) of $1 if any.
get_ifcfg_filename()
{
local ifcfg_file
ifcfg_file=$(get_ifcfg_nmcli "${1}")
if [[ -z ${ifcfg_file} ]]; then
ifcfg_file=$(get_ifcfg_legacy "${1}")
fi
echo -n "${ifcfg_file}"
}
# returns 0 when omission of a module is desired in dracut_args
# returns 1 otherwise
is_dracut_mod_omitted()
{
local dracut_args dracut_mod=$1
set -- $(kdump_get_conf_val dracut_args)
while [ $# -gt 0 ]; do
case $1 in
-o | --omit)
[[ " ${2//[^[:alnum:]]/ } " == *" $dracut_mod "* ]] && return 0
;;
esac
shift
done
return 1
}
is_wdt_active()
{
local active
[[ -d /sys/class/watchdog ]] || return 1
for dir in /sys/class/watchdog/*; do
[[ -f "$dir/state" ]] || continue
active=$(< "$dir/state")
[[ $active == "active" ]] && return 0
done
return 1
}
have_compression_in_dracut_args()
{
[[ "$(kdump_get_conf_val dracut_args)" =~ (^|[[:space:]])--(gzip|bzip2|lzma|xz|lzo|lz4|zstd|no-compress|compress|squash-compressor)([[:space:]]|$) ]]
}
# If "dracut_args" contains "--mount" information, use it
# directly without any check(users are expected to ensure
# its correctness).
is_mount_in_dracut_args()
{
[[ " $(kdump_get_conf_val dracut_args)" =~ .*[[:space:]]--mount[=[:space:]].* ]]
}
get_reserved_mem_size()
{
local reserved_mem_size=0
if is_fadump_capable; then
reserved_mem_size=$(< /sys/kernel/fadump/mem_reserved)
else
reserved_mem_size=$(< /sys/kernel/kexec_crash_size)
fi
echo "$reserved_mem_size"
}
check_crash_mem_reserved()
{
local mem_reserved
mem_reserved=$(get_reserved_mem_size)
if [[ $mem_reserved -eq 0 ]]; then
derror "No memory reserved for crash kernel"
return 1
fi
return 0
}
check_kdump_feasibility()
{
if [[ ! -e /sys/kernel/kexec_crash_loaded ]]; then
derror "Kdump is not supported on this kernel"
return 1
fi
check_crash_mem_reserved
return $?
}
is_kernel_loaded()
{
local _sysfs _mode
_mode=$1
case "$_mode" in
kdump)
_sysfs="/sys/kernel/kexec_crash_loaded"
;;
fadump)
_sysfs="$FADUMP_REGISTER_SYS_NODE"
;;
*)
derror "Unknown dump mode '$_mode' provided"
return 1
;;
esac
if [[ ! -f $_sysfs ]]; then
derror "$_mode is not supported on this kernel"
return 1
fi
[[ $(< $_sysfs) -eq 1 ]]
}
#
# This function returns the "apicid" of the boot
# cpu (cpu 0) if present.
#
get_bootcpu_apicid()
{
awk ' \
BEGIN { CPU = "-1"; } \
$1=="processor" && $2==":" { CPU = $NF; } \
CPU=="0" && /^apicid/ { print $NF; } \
' \
/proc/cpuinfo
}
# This function check iomem and determines if we have more than
# 4GB of ram available. Returns 1 if we do, 0 if we dont
need_64bit_headers()
{
return "$(tail -n 1 /proc/iomem | awk '{ split ($1, r, "-");
print (strtonum("0x" r[2]) > strtonum("0xffffffff")); }')"
}
# Check if secure boot is being enforced.
#
# Per Peter Jones, we need check efivar SecureBoot-$(the UUID) and
# SetupMode-$(the UUID), they are both 5 bytes binary data. The first four
# bytes are the attributes associated with the variable and can safely be
# ignored, the last bytes are one-byte true-or-false variables. If SecureBoot
# is 1 and SetupMode is 0, then secure boot is being enforced.
#
# Assume efivars is mounted at /sys/firmware/efi/efivars.
is_secure_boot_enforced()
{
local secure_boot_file setup_mode_file
local secure_boot_byte setup_mode_byte
# On powerpc, secure boot is enforced if:
# host secure boot: /ibm,secure-boot/os-secureboot-enforcing DT property exists
# guest secure boot: /ibm,secure-boot >= 2
if [[ -f /proc/device-tree/ibm,secureboot/os-secureboot-enforcing ]]; then
return 0
fi
if [[ -f /proc/device-tree/ibm,secure-boot ]] &&
[[ $(lsprop /proc/device-tree/ibm,secure-boot | tail -1) -ge 2 ]]; then
return 0
fi
# Detect secure boot on x86 and arm64
secure_boot_file=$(find /sys/firmware/efi/efivars -name "SecureBoot-*" 2> /dev/null)
setup_mode_file=$(find /sys/firmware/efi/efivars -name "SetupMode-*" 2> /dev/null)
if [[ -f $secure_boot_file ]] && [[ -f $setup_mode_file ]]; then
secure_boot_byte=$(hexdump -v -e '/1 "%d\ "' "$secure_boot_file" | cut -d' ' -f 5)
setup_mode_byte=$(hexdump -v -e '/1 "%d\ "' "$setup_mode_file" | cut -d' ' -f 5)
if [[ $secure_boot_byte == "1" ]] && [[ $setup_mode_byte == "0" ]]; then
return 0
fi
fi
# Detect secure boot on s390x
if [[ -e "/sys/firmware/ipl/secure" && "$(< /sys/firmware/ipl/secure)" == "1" ]]; then
return 0
fi
return 1
}
#
# prepare_kexec_args <kexec args>
# This function prepares kexec argument.
#
prepare_kexec_args()
{
local kexec_args=$1
local found_elf_args
ARCH=$(uname -m)
if [[ $ARCH == "i686" ]] || [[ $ARCH == "i386" ]]; then
need_64bit_headers
if [[ $? == 1 ]]; then
found_elf_args=$(echo "$kexec_args" | grep elf32-core-headers)
if [[ -n $found_elf_args ]]; then
dwarn "Warning: elf32-core-headers overrides correct elf64 setting"
else
kexec_args="$kexec_args --elf64-core-headers"
fi
else
found_elf_args=$(echo "$kexec_args" | grep elf64-core-headers)
if [[ -z $found_elf_args ]]; then
kexec_args="$kexec_args --elf32-core-headers"
fi
fi
fi
# For secureboot enabled machines, use new kexec file based syscall.
# Old syscall will always fail as it does not have capability to do
# kernel signature verification.
if is_secure_boot_enforced; then
dinfo "Secure Boot is enabled. Using kexec file based syscall."
kexec_args="$kexec_args -s"
fi
echo "$kexec_args"
}
# prepare_kdump_kernel <kdump_kernelver>
# This function return kdump_kernel given a kernel version.
prepare_kdump_kernel()
{
local kdump_kernelver=$1
local dir img boot_dirlist boot_imglist kdump_kernel machine_id
read -r machine_id < /etc/machine-id
boot_dirlist=${KDUMP_BOOTDIR:-"/boot /boot/efi /efi /"}
boot_imglist="$KDUMP_IMG-$kdump_kernelver$KDUMP_IMG_EXT \
$machine_id/$kdump_kernelver/$KDUMP_IMG \
EFI/Linux/$machine_id-$kdump_kernelver.efi"
# The kernel of OSTree based systems is not in the standard locations.
if is_ostree; then
boot_dirlist="$(echo /boot/ostree/*) $boot_dirlist"
fi
# Use BOOT_IMAGE as reference if possible, strip the GRUB root device prefix in (hd0,gpt1) format
boot_img="$(grep -P -o '^BOOT_IMAGE=(\S+)' /proc/cmdline | sed "s/^BOOT_IMAGE=\((\S*)\)\?\(\S*\)/\2/")"
if [[ "$boot_img" == *"$kdump_kernelver" ]]; then
boot_imglist="$boot_img $boot_imglist"
fi
for dir in $boot_dirlist; do
for img in $boot_imglist; do
if [[ -f "$dir/$img" ]]; then
kdump_kernel=$(echo "$dir/$img" | tr -s '/')
break 2
fi
done
done
echo "$kdump_kernel"
}
_is_valid_kver()
{
[[ -f /usr/lib/modules/$1/modules.dep ]]
}
# This function is introduced since 64k variant may be installed on 4k or vice versa
# $1 the kernel path name.
parse_kver_from_path()
{
local _img _kver
[[ -z "$1" ]] && return
_img=$1
BLS_ENTRY_TOKEN=$(</etc/machine-id)
# Fedora standard installation, i.e. $BOOT/vmlinuz-<version>
_kver=${_img##*/vmlinuz-}
_kver=${_kver%"$KDUMP_IMG_EXT"}
if _is_valid_kver "$_kver"; then
echo "$_kver"
return
fi
# BLS recommended image names, i.e. $BOOT/<token>/<version>/linux
_kver=${_img##*/"$BLS_ENTRY_TOKEN"/}
_kver=${_kver%%/*}
if _is_valid_kver "$_kver"; then
echo "$_kver"
return
fi
# Fedora UKI installation, i.e. $BOOT/efi/EFI/Linux/<token>-<version>.efi
_kver=${_img##*/"$BLS_ENTRY_TOKEN"-}
_kver=${_kver%.efi}
if _is_valid_kver "$_kver"; then
echo "$_kver"
return
fi
ddebug "Could not parse version from $_img"
}
_get_kdump_kernel_version()
{
local _version _version_nondebug
if [[ -n "$KDUMP_KERNELVER" ]]; then
echo "$KDUMP_KERNELVER"
return
fi
_version=$(uname -r)
if [[ ! "$_version" =~ [+|-]debug$ ]]; then
echo "$_version"
return
fi
_version_nondebug=${_version%+debug}
_version_nondebug=${_version_nondebug%-debug}
if [[ -f "$(prepare_kdump_kernel "$_version_nondebug")" ]]; then
dinfo "Use of debug kernel detected. Trying to use $_version_nondebug"
echo "$_version_nondebug"
else
dinfo "Use of debug kernel detected but cannot find $_version_nondebug. Falling back to $_version"
echo "$_version"
fi
}
#
# Detect initrd and kernel location, results are stored in global environmental variables:
# KDUMP_BOOTDIR, KDUMP_KERNELVER, KDUMP_KERNEL, DEFAULT_INITRD, and KDUMP_INITRD
#
# Expectes KDUMP_BOOTDIR, KDUMP_IMG, KDUMP_IMG_EXT, KDUMP_KERNELVER to be loaded from config already
# and will prefer already set values so user can specify custom kernel/initramfs location
#
prepare_kdump_bootinfo()
{
local boot_initrdlist default_initrd_base var_target_initrd_dir
KDUMP_KERNELVER=$(_get_kdump_kernel_version)
KDUMP_KERNEL=$(prepare_kdump_kernel "$KDUMP_KERNELVER")
if ! [[ -e $KDUMP_KERNEL ]]; then
derror "Failed to detect kdump kernel location"
return 1
fi
# For 64k variant, e.g. vmlinuz-5.14.0-327.el9.aarch64+64k-debug
if [[ "$KDUMP_KERNEL" == *"+debug" || "$KDUMP_KERNEL" == *"64k-debug" ]]; then
dwarn "Using debug kernel, you may need to set a larger crashkernel than the default value."
fi
# Set KDUMP_BOOTDIR to where kernel image is stored
if is_uki "$KDUMP_KERNEL"; then
KDUMP_BOOTDIR=/boot
else
KDUMP_BOOTDIR=$(dirname "$KDUMP_KERNEL")
fi
# Default initrd should just stay aside of kernel image, try to find it in KDUMP_BOOTDIR
boot_initrdlist="initramfs-$KDUMP_KERNELVER.img initrd"
for initrd in $boot_initrdlist; do
if [[ -f "$KDUMP_BOOTDIR/$initrd" ]]; then
default_initrd_base="$initrd"
DEFAULT_INITRD="$KDUMP_BOOTDIR/$default_initrd_base"
break
fi
done
# Create kdump initrd basename from default initrd basename
# initramfs-5.7.9-200.fc32.x86_64.img => initramfs-5.7.9-200.fc32.x86_64kdump.img
# initrd => initrdkdump
if [[ -z $default_initrd_base ]]; then
kdump_initrd_base=initramfs-${KDUMP_KERNELVER}kdump.img
elif [[ $default_initrd_base == *.* ]]; then
kdump_initrd_base=${default_initrd_base%.*}kdump.${DEFAULT_INITRD##*.}
else
kdump_initrd_base=${default_initrd_base}kdump
fi
# Place kdump initrd in $(/var/lib/kdump) if $(KDUMP_BOOTDIR) not writable
if [[ ! -w $KDUMP_BOOTDIR ]]; then
var_target_initrd_dir="/var/lib/kdump"
mkdir -p "$var_target_initrd_dir"
KDUMP_INITRD="$var_target_initrd_dir/$kdump_initrd_base"
else
KDUMP_INITRD="$KDUMP_BOOTDIR/$kdump_initrd_base"
fi
}
get_watchdog_drvs()
{
local _wdtdrvs _drv _dir
for _dir in /sys/class/watchdog/*; do
# device/modalias will return driver of this device
[[ -f "$_dir/device/modalias" ]] || continue
_drv=$(< "$_dir/device/modalias")
_drv=$(modprobe --set-version "$KDUMP_KERNELVER" -R "$_drv" 2> /dev/null)
for i in $_drv; do
if ! [[ " $_wdtdrvs " == *" $i "* ]]; then
_wdtdrvs="$_wdtdrvs $i"
fi
done
done
echo "$_wdtdrvs"
}
_cmdline_parse()
{
local opt val
while read -r opt; do
if [[ $opt =~ = ]]; then
val=${opt#*=}
opt=${opt%%=*}
# ignore options like 'foo='
[[ -z $val ]] && continue
# xargs removes quotes, add them again
[[ $val =~ [[:space:]] ]] && val="\"$val\""
else
val=""
fi
echo "$opt $val"
done <<< "$(echo "$1" | xargs -n 1 echo)"
}
#
# prepare_cmdline <commandline> <commandline remove> <commandline append>
# This function performs a series of edits on the command line.
# Store the final result in global $KDUMP_COMMANDLINE.
prepare_cmdline()
{
local in out append opt val id drv
local -A remove
in=${1:-$(< /proc/cmdline)}
while read -r opt val; do
[[ -n "$opt" ]] || continue
remove[$opt]=1
done <<< "$(_cmdline_parse "$2")"
append=$3
# These params should always be removed
remove[crashkernel]=1
remove[panic_on_warn]=1
# Always remove "root=X", as we now explicitly generate all kinds
# of dump target mount information including root fs.
#
# We do this before KDUMP_COMMANDLINE_APPEND, if one really cares
# about it(e.g. for debug purpose), then can pass "root=X" using
# KDUMP_COMMANDLINE_APPEND.
remove[root]=1
# With the help of "--hostonly-cmdline", we can avoid some interitage.
remove[rd.lvm.lv]=1
remove[rd.luks.uuid]=1
remove[rd.dm.uuid]=1
remove[rd.md.uuid]=1
remove[fcoe]=1
# Remove netroot, rd.iscsi.initiator and iscsi_initiator since
# we get duplicate entries for the same in case iscsi code adds
# it as well.
remove[netroot]=1
remove[rd.iscsi.initiator]=1
remove[iscsi_initiator]=1
while read -r opt val; do
[[ -n "$opt" ]] || continue
[[ -n "${remove[$opt]}" ]] && continue
if [[ -n "$val" ]]; then
out+="$opt=$val "
else
out+="$opt "
fi
done <<< "$(_cmdline_parse "$in")"
out+="$append "
id=$(get_bootcpu_apicid)
if [[ -n "${id}" ]]; then
out+="disable_cpu_apicid=$id "
fi
# If any watchdog is used, set it's pretimeout to 0. pretimeout let
# watchdog panic the kernel first, and reset the system after the
# panic. If the system is already in kdump, panic is not helpful
# and only increase the chance of watchdog failure.
for drv in $(get_watchdog_drvs); do
out+="$drv.pretimeout=0 "
if [[ $drv == hpwdt ]]; then
# hpwdt have a special parameter kdumptimeout, it is
# only supposed to be set to non-zero in first kernel.
# In kdump, non-zero value could prevent the watchdog
# from resetting the system.
out+="$drv.kdumptimeout=0 "
fi
done
# Always disable gpt-auto-generator as it hangs during boot of the
# crash kernel. Furthermore we know which disk will be used for dumping
# (if at all) and add it explicitly.
is_uki "$KDUMP_KERNEL" && out+="rd.systemd.gpt_auto=no "
# Trim unnecessary whitespaces
echo "$out" | sed -e "s/^ *//g" -e "s/ *$//g" -e "s/ \+/ /g"
}
PROC_IOMEM=/proc/iomem
#get system memory size i.e. memblock.memory.total_size in the unit of GB
get_system_size()
{
sum=$(sed -n "s/\s*\([0-9a-fA-F]\+\)-\([0-9a-fA-F]\+\) : System RAM$/+ 0x\2 - 0x\1 + 1/p" $PROC_IOMEM)
echo $(( (sum) / 1024 / 1024 / 1024))
}
# Return the recommended size for the reserved crashkernel memory
# depending on the system memory size.
#
# This functions is expected to be consistent with the parse_crashkernel_mem()
# in kernel i.e. how kernel allocates the kdump memory given the crashkernel
# parameter crashkernel=range1:size1[,range2:size2,…] and the system memory
# size.
get_recommend_size()
{
local mem_size=$1
local _ck_cmdline=$2
local range start start_unit end end_unit size
while read -r -d , range; do
# need to use non-default IFS as double spaces are used as a
# single delimiter while commas aren't...
IFS=, read start start_unit end end_unit size <<< \
"$(echo "$range" | sed -n "s/\([0-9]\+\)\([GT]\?\)-\([0-9]*\)\([GT]\?\):\([0-9]\+[MG]\)/\1,\2,\3,\4,\5/p")"
# aka. 102400T
end=${end:-104857600}
[[ "$end_unit" == T ]] && end=$((end * 1024))
[[ "$start_unit" == T ]] && start=$((start * 1024))
if [[ $mem_size -ge $start ]] && [[ $mem_size -lt $end ]]; then
echo "$size"
return
fi
# append a ',' as read expects the 'file' to end with a delimiter
done <<< "$_ck_cmdline,"
# no matching range found
echo "0M"
}
has_mlx5()
{
[[ -d /sys/bus/pci/drivers/mlx5_core ]]
}
has_aarch64_smmu()
{
ls /sys/devices/platform/arm-smmu-* 1> /dev/null 2>&1
}
is_memsize() { [[ "$1" =~ ^[+-]?[0-9]+[KkMmGgTtPbEe]?$ ]]; }
# range defined for crashkernel parameter
# i.e. <start>-[<end>]
is_memrange()
{
is_memsize "${1%-*}" || return 1
[[ -n ${1#*-} ]] || return 0
is_memsize "${1#*-}"
}
to_bytes()
{
local _s
_s="$1"
is_memsize "$_s" || return 1
case "${_s: -1}" in
K|k)
_s=${_s::-1}
_s="$((_s * 1024))"
;;
M|m)
_s=${_s::-1}
_s="$((_s * 1024 * 1024))"
;;
G|g)
_s=${_s::-1}
_s="$((_s * 1024 * 1024 * 1024))"
;;
T|t)
_s=${_s::-1}
_s="$((_s * 1024 * 1024 * 1024 * 1024))"
;;
P|p)
_s=${_s::-1}
_s="$((_s * 1024 * 1024 * 1024 * 1024 * 1024))"
;;
E|e)
_s=${_s::-1}
_s="$((_s * 1024 * 1024 * 1024 * 1024 * 1024 * 1024))"
;;
*)
;;
esac
echo "$_s"
}
memsize_add()
{
local -a units=("" "K" "M" "G" "T" "P" "E")
local i a b
a=$(to_bytes "$1") || return 1
b=$(to_bytes "$2") || return 1
i=0
(( a += b ))
while :; do
[[ $(( a / 1024 )) -eq 0 ]] && break
[[ $(( a % 1024 )) -ne 0 ]] && break
[[ $(( ${#units[@]} - 1 )) -eq $i ]] && break
(( a /= 1024 ))
(( i += 1 ))
done
echo "${a}${units[$i]}"
}
_crashkernel_parse()
{
local ck entry
local range size offset
ck="$1"
if [[ "$ck" == *@* ]]; then
offset="@${ck##*@}"
ck=${ck%@*}
elif [[ "$ck" == *,high ]] || [[ "$ck" == *,low ]]; then
offset=",${ck##*,}"
ck=${ck%,*}
else
offset=''
fi
while read -d , -r entry; do
[[ -n "$entry" ]] || continue
if [[ "$entry" == *:* ]]; then
range=${entry%:*}
size=${entry#*:}
else
range=""
size=${entry}
fi
echo "$size;$range;"
done <<< "$ck,"
echo ";;$offset"
}
# $1 crashkernel command line parameter
# $2 size to be added
_crashkernel_add()
{
local ck delta ret
local range size offset
ck="$1"
delta="$2"
ret=""
while IFS=';' read -r size range offset; do
if [[ -n "$offset" ]]; then
ret="${ret%,}$offset"
break
fi
[[ -n "$size" ]] || continue
if [[ -n "$range" ]]; then
is_memrange "$range" || return 1
ret+="$range:"
fi
size=$(memsize_add "$size" "$delta") || return 1
ret+="$size,"
done < <( _crashkernel_parse "$ck")
echo "${ret%,}"
}
# get default crashkernel
# $1 dump mode, if not specified, dump_mode will be judged by is_fadump_capable
# $2 kernel-release, if not specified, got by _get_kdump_kernel_version
kdump_get_arch_recommend_crashkernel()
{
local _arch _ck_cmdline _dump_mode
local _delta=0
if [[ -z "$1" ]]; then
if is_fadump_capable; then
_dump_mode=fadump
else
_dump_mode=kdump
fi
else
_dump_mode=$1
fi
_arch=$(uname -m)
if [[ $_arch == "x86_64" ]] || [[ $_arch == "s390x" ]]; then
_ck_cmdline="1G-4G:192M,4G-64G:256M,64G-:512M"
is_sme_or_sev_active && ((_delta += 64))
elif [[ $_arch == "aarch64" ]]; then
local _running_kernel
# Base line for 4K variant kernel. The formula is based on x86 plus extra = 64M
_ck_cmdline="1G-4G:256M,4G-64G:320M,64G-:576M"
if [[ -z "$2" ]]; then
_running_kernel=$(_get_kdump_kernel_version)
else
_running_kernel=$2
fi
# the naming convention of 64k variant suffixes with +64k, e.g. "vmlinuz-5.14.0-312.el9.aarch64+64k"
if echo "$_running_kernel" | grep -q 64k; then
# Without smmu, the diff of MemFree between 4K and 64K measured on a high end aarch64 machine is 82M.
# Picking up 100M to cover this diff. And finally, we have "1G-4G:356M;4G-64G:420M;64G-:676M"
((_delta += 100))
# On a 64K system, the extra 384MB is calculated by: cmdq_num * 16 bytes + evtq_num * 32B + priq_num * 16B
# While on a 4K system, it is negligible
has_aarch64_smmu && ((_delta += 384))
#64k kernel, mlx5 consumes extra 188M memory, and choose 200M
has_mlx5 && ((_delta += 200))
else
#4k kernel, mlx5 consumes extra 124M memory, and choose 150M
has_mlx5 && ((_delta += 150))
fi
elif [[ $_arch == "ppc64le" ]]; then
if [[ $_dump_mode == "fadump" ]]; then
_ck_cmdline="4G-16G:768M,16G-64G:1G,64G-128G:2G,128G-1T:4G,1T-2T:6G,2T-4T:12G,4T-8T:20G,8T-16T:36G,16T-32T:64G,32T-64T:128G,64T-:180G"
else
_ck_cmdline="2G-4G:384M,4G-16G:512M,16G-64G:1G,64G-128G:2G,128G-:4G"
fi
fi
echo -n "$(_crashkernel_add "$_ck_cmdline" "${_delta}M")"
}
# return recommended size based on current system RAM size
# $1: kernel version, if not set, will defaults to $(uname -r)
kdump_get_arch_recommend_size()
{
local _ck_cmdline _sys_mem
if ! [[ -r "/proc/iomem" ]]; then
echo "Error, can not access /proc/iomem."
return 1
fi
_sys_mem=$(get_system_size)
_ck_cmdline=$(kdump_get_arch_recommend_crashkernel)
_ck_cmdline=${_ck_cmdline//-:/-102400T:}
get_recommend_size "$_sys_mem" "$_ck_cmdline"
}
# Print all underlying crypt devices of a block device
# print nothing if device is not on top of a crypt device
# $1: the block device to be checked in maj:min format
get_luks_crypt_dev()
{
local _type
[[ -b /dev/block/$1 ]] || return 1
_type=$(blkid -u filesystem,crypto -o export -- "/dev/block/$1" | \
sed -n -E "s/^TYPE=(.*)$/\1/p")
[[ $_type == "crypto_LUKS" ]] && echo "$1"
for _x in "/sys/dev/block/$1/slaves/"*; do
[[ -f $_x/dev ]] || continue
[[ $_x/subsystem -ef /sys/class/block ]] || continue
get_luks_crypt_dev "$(< "$_x/dev")"
done
}
# kdump_get_maj_min <device>
# Prints the major and minor of a device node.
# Example:
# $ get_maj_min /dev/sda2
# 8:2
kdump_get_maj_min()
{
local _majmin
_majmin="$(stat -L -c '%t:%T' "$1" 2> /dev/null)"
printf "%s" "$((0x${_majmin%:*})):$((0x${_majmin#*:}))"
}
get_all_kdump_crypt_dev()
{
local _dev
for _dev in $(get_block_dump_target); do
get_luks_crypt_dev "$(kdump_get_maj_min "$_dev")"
done
}