kexec-tools/kdump-lib.sh

1249 lines
31 KiB
Bash
Raw Normal View History

#!/bin/bash
#
# Kdump common variables and functions
#
. /usr/lib/kdump/kdump-lib-initramfs.sh
FADUMP_ENABLED_SYS_NODE="/sys/kernel/fadump/enabled"
FADUMP_REGISTER_SYS_NODE="/sys/kernel/fadump/registered"
kdumpctl: Add basic UKI support Resolves: bz2169720 Upstream: src.fedoraproject.org/rpms/kexec-tools.git Conflicts: Small context difference in kexec-tools.spec commit ea7be0608ed719cc1cb134ecf6ef51a4b7e9f104 Author: Philipp Rudo <prudo@redhat.com> Date: Fri May 5 17:14:42 2023 +0200 kdumpctl: Add basic UKI support A Unified Kernel Image (UKI) is a single EFI PE executable combining an EFI stub, a kernel image, an initrd image, and the kernel command line. They are defined in the Boot Loader Specification [1] as type #2 entries. UKIs have the advantage that all code as well as meta data that is required to boot the system, not only the kernel image, is combined in a single PE file and can be signed for EFI SecureBoot. This extends the coverage of SecureBoot extensively. For RHEL support for UKI were included into kernel-ark with 16c7e3ee836e ("redhat: Add sub-RPM with a EFI unified kernel image for virtual machines"). There are two problems with UKIs from the kdump point of view at the moment. First, they cannot be directly loaded via kexec_file_load and second, the initrd included isn't suitable for kdump. In order to enable kdump on systems with UKIs build the kdump initrd as usual and extract the kernel image before loading the crash kernel. [1] https://uapi-group.org/specifications/specs/boot_loader_specification/ Signed-off-by: Philipp Rudo <prudo@redhat.com> Reviewed-by: Pingfan Liu <piliu@redhat.com> Reviewed-by: Coiby Xu <coxu@redhat.com> Signed-off-by: Philipp Rudo <prudo@redhat.com> Signed-off-by: Tao Liu <ltao@redhat.com>
2023-05-31 07:11:31 +00:00
is_uki()
{
local img
img="$1"
[[ -f "$img" ]] || return
[[ "$(file -b --mime-type "$img")" == application/x-dosexec ]] || return
objdump -h -j .linux "$img" &> /dev/null
}
is_fadump_capable()
{
# Check if firmware-assisted dump is enabled
# if no, fallback to kdump check
if [[ -f $FADUMP_ENABLED_SYS_NODE ]]; then
rc=$(< $FADUMP_ENABLED_SYS_NODE)
[[ $rc -eq 1 ]] && return 0
fi
return 1
}
kdump-lib.sh: add extra 64M to default crashkernel if sme/sev is active Resolves: https://issues.redhat.com/browse/RHEL-8727 Resolves: https://issues.redhat.com/browse/RHEL-8710 Upstream: Fedora Conflict: The 1st hunk is merged manualy because of conflict caused by context. commit 4841bc6a6dfe4d09640322f6703f60dbfe9f6f19 Author: Baoquan He <bhe@redhat.com> Date: Thu Sep 21 18:01:02 2023 +0800 kdump-lib.sh: add extra 64M to default crashkernel if sme/sev is active It's reported that kdump kernel failed to boot and can't dump vmcore when crashkernel=192M and SME/SEV is active. This is because swiotlb will be enabled and reserves 64M memory by default on system with SME/SEV enabled. Then kdump kernel will be out of memory after taking 64M away for swiotlb init. So here add extra 64M memory to default crashkernel value so that kdump kernel can function well as before. When doing that, search journalctl for the "Memory Encryption Features active: AMD" to check if SME or SEV is active. This line of log is printed out in kernel function as below and the type SME is mutual exclusive with type SEV. ***: arch/x86/mm/mem_encrypt.c:print_mem_encrypt_feature_info() Note: 1) The conditional check is relying on journalctl log because I didn't find available system interface to check if SEV is active. Even though we can check if SME is active via /proc/cpuinfo. For consistency, I take the same check for both SME and SEV by searching journalctl. 2) The conditional check is relying on journalctl log, means it won't work for crashkernel setting in anoconda because the installation kernel doesn't have the SME/SEV setting. So customer need manually run 'kdumpctl reset-crashkernel' to reset crashkernel to add the extra 64M after OS installation. 3) We need watch the line of log printing in print_mem_encrypt_feature_info() in kernel just in case people may change it in the future. Signed-off-by: Baoquan He <bhe@redhat.com> Reviewed-by: Philipp Rudo <prudo@redhat.com> Signed-off-by: Baoquan He <bhe@redhat.com>
2023-09-21 10:01:02 +00:00
is_sme_or_sev_active()
{
journalctl -q --dmesg --grep "^Memory Encryption Features active: AMD (SME|SEV)$" >/dev/null 2>&1
}
is_squash_available()
{
local _version kmodule
_version=$(_get_kdump_kernel_version)
for kmodule in squashfs overlay loop; do
modprobe -S "$_version" --dry-run $kmodule &> /dev/null || return 1
done
2021-07-12 08:07:05 +00:00
}
is_zstd_command_available()
{
[[ -x "$(command -v zstd)" ]]
}
Seperate dracut and dracut-squash compressor for zstd Upstream: fedora Resolves: bz2045949 Resolves: bz2044804 Conflict: none commit fc1c79ffd21e7bcb3a710368cd7023c9a634258e Author: Tao Liu <ltao@redhat.com> Date: Sat Oct 8 12:09:08 2022 +0800 Seperate dracut and dracut-squash compressor for zstd Previously kexec-tools will pass "--compress zstd" to dracut. It will make dracut to decide whether: a) call mksquashfs to make a zstd format squash-root.img, b) call cmd zstd to make a initramfs. Since dracut(>= 057) has decoupled the compressor for dracut and dracut-squash, So in this patch, we will pass the compressor seperately. Note: The is_squash_available && !dracut_has_option --squash-compressor && !is_zsdt_command_available case is left unprocessed on purpose. Actually, the situation when we want to call zstd compression is: 1) If squash function OK, we want dracut to invoke mksquashfs to make a zstd format squash-root.img within initramfs. 2) If squash function is not OK, and cmd zstd presents, we want dracut to invoke cmd zstd to make a zstd format initramfs. is_zstd_command_available check can handle case 2 completely. However, for the is_squash_available check, it cannot handle case 1 completely. It only checks if the kernel supports squashfs, it doesn't check whether the squash module has been added by dracut when making initramfs. In fact, in kexec-tools we are unable to do the check, there are multiple ways to forbit dracut to load a module, such as "dracut -o module" and "omit_dracutmodules in dracut.conf". When squash dracut module is omitted, is_squash_available check will still pass, so "--compress zstd" will be appended to dracut cmdline, and it will call cmd zstd to do the compression. However cmd zstd may not exist, so it fails. The previous "--compress zstd" is ambiguous, after the intro of "--squash-compressor", "--squash-compressor" only effect for mksquashfs and "--compress" only effect for specific cmd. So for the is_squash_available && !dracut_has_option --squash-compressor && !is_zsdt_command_available case, we just leave it to be handled the default way. Reviewed-by: Philipp Rudo <prudo@redhat.com> Signed-off-by: Tao Liu <ltao@redhat.com> Signed-off-by: Tao Liu <ltao@redhat.com>
2022-10-26 02:16:16 +00:00
dracut_have_option()
{
local _option=$1
! dracut "$_option" 2>&1 | grep -q "unrecognized option"
}
perror_exit()
{
derror "$@"
exit 1
}
# Check if fence kdump is configured in Pacemaker cluster
is_pcs_fence_kdump()
{
# no pcs or fence_kdump_send executables installed?
type -P pcs > /dev/null || return 1
[[ -x $FENCE_KDUMP_SEND ]] || return 1
# fence kdump not configured?
(pcs cluster cib | grep 'type="fence_kdump"') &> /dev/null || return 1
}
# Check if fence_kdump is configured using kdump options
is_generic_fence_kdump()
{
[[ -x $FENCE_KDUMP_SEND ]] || return 1
[[ $(kdump_get_conf_val fence_kdump_nodes) ]]
}
to_dev_name()
{
local dev="${1//\"/}"
case "$dev" in
UUID=*)
blkid -U "${dev#UUID=}"
;;
LABEL=*)
blkid -L "${dev#LABEL=}"
;;
*)
echo "$dev"
;;
esac
}
is_user_configured_dump_target()
{
[[ $(kdump_get_conf_val "ext[234]\|xfs\|btrfs\|minix\|raw\|nfs\|ssh\|virtiofs") ]] || is_mount_in_dracut_args
}
get_block_dump_target()
{
local _target _fstype
if is_ssh_dump_target || is_nfs_dump_target; then
return
fi
_target=$(kdump_get_conf_val "ext[234]\|xfs\|btrfs\|minix\|raw\|virtiofs")
[[ -n $_target ]] && to_dev_name "$_target" && return
_target=$(get_dracut_args_target "$(kdump_get_conf_val "dracut_args")")
[[ -b $_target ]] && to_dev_name "$_target" && return
_fstype=$(get_dracut_args_fstype "$(kdump_get_conf_val "dracut_args")")
is_fs_type_virtiofs "$_fstype" && echo "$_target" && return
_target=$(get_target_from_path "$(get_save_path)")
[[ -b $_target ]] && to_dev_name "$_target" && return
_fstype=$(get_fs_type_from_target "$_target")
is_fs_type_virtiofs "$_fstype" && echo "$_target" && return
}
is_dump_to_rootfs()
{
[[ $(kdump_get_conf_val 'failure_action\|default') == dump_to_rootfs ]]
}
is_lvm2_thinp_dump_target()
{
_target=$(get_block_dump_target)
[ -n "$_target" ] && is_lvm2_thinp_device "$_target"
}
get_failure_action_target()
{
local _target
if is_dump_to_rootfs; then
# Get rootfs device name
_target=$(get_root_fs_device)
[[ -b $_target ]] && to_dev_name "$_target" && return
is_fs_type_virtiofs "$(get_fs_type_from_target "$_target")" && echo "$_target" && return
# Then, must be nfs root
echo "nfs"
fi
}
# Get kdump targets(including root in case of dump_to_rootfs).
get_kdump_targets()
{
local _target _root
local kdump_targets
_target=$(get_block_dump_target)
if [[ -n $_target ]]; then
kdump_targets=$_target
elif is_ssh_dump_target; then
kdump_targets="ssh"
else
kdump_targets="nfs"
fi
# Add the root device if dump_to_rootfs is specified.
_root=$(get_failure_action_target)
if [[ -n $_root ]] && [[ $kdump_targets != "$_root" ]]; then
kdump_targets="$kdump_targets $_root"
fi
echo "$kdump_targets"
}
# Return the bind mount source path, return the path itself if it's not bind mounted
# Eg. if /path/to/src is bind mounted to /mnt/bind, then:
# /mnt/bind -> /path/to/src, /mnt/bind/dump -> /path/to/src/dump
#
# findmnt uses the option "-v, --nofsroot" to exclusive the [/dir]
# in the SOURCE column for bind-mounts, then if $_src equals to
# $_src_nofsroot, the mountpoint is not bind mounted directory.
#
# Below is just an example for mount info
# /dev/mapper/atomicos-root[/ostree/deploy/rhel-atomic-host/var], if the
# directory is bind mounted. The former part represents the device path, rest
# part is the bind mounted directory which quotes by bracket "[]".
get_bind_mount_source()
{
local _mnt _path _src _opt _fstype
local _fsroot _src_nofsroot
_mnt=$(df "$1" | tail -1 | awk '{print $NF}')
_path=${1#$_mnt}
_src=$(get_mount_info SOURCE target "$_mnt" -f)
_opt=$(get_mount_info OPTIONS target "$_mnt" -f)
_fstype=$(get_mount_info FSTYPE target "$_mnt" -f)
# bind mount in fstab
if [[ -d $_src ]] && [[ $_fstype == none ]] && (echo "$_opt" | grep -q "\bbind\b"); then
echo "$_src$_path" && return
fi
# direct mount
_src_nofsroot=$(get_mount_info SOURCE target "$_mnt" -v -f)
if [[ $_src_nofsroot == "$_src" ]]; then
echo "$_mnt$_path" && return
fi
_fsroot=${_src#${_src_nofsroot}[}
_fsroot=${_fsroot%]}
_mnt=$(get_mount_info TARGET source "$_src_nofsroot" -f)
# for btrfs, _fsroot will also contain the subvol value as well, strip it
if [[ $_fstype == btrfs ]]; then
local _subvol
_subvol=${_opt#*subvol=}
_subvol=${_subvol%,*}
_fsroot=${_fsroot#$_subvol}
fi
echo "$_mnt$_fsroot$_path"
}
get_mntopt_from_target()
{
get_mount_info OPTIONS source "$1" -f
}
# Get the path where the target will be mounted in kdump kernel
# $1: kdump target device
get_kdump_mntpoint_from_target()
{
local _mntpoint
_mntpoint=$(get_mntpoint_from_target "$1")
# mount under /sysroot if dump to root disk or mount under
# mount under /kdumproot if dump target is not mounted in first kernel
# mount under /kdumproot/$_mntpoint in other cases in 2nd kernel.
# systemd will be in charge to umount it.
if [[ -z $_mntpoint ]]; then
_mntpoint="/kdumproot"
else
if [[ $_mntpoint == "/" ]]; then
_mntpoint="/sysroot"
else
_mntpoint="/kdumproot/$_mntpoint"
fi
fi
# strip duplicated "/"
echo $_mntpoint | tr -s "/"
}
kdump_get_persistent_dev()
{
local dev="${1//\"/}"
case "$dev" in
UUID=*)
dev=$(blkid -U "${dev#UUID=}")
;;
LABEL=*)
dev=$(blkid -L "${dev#LABEL=}")
;;
esac
echo $(get_persistent_dev "$dev")
}
is_ostree()
{
test -f /run/ostree-booted
}
# get ip address or hostname from nfs/ssh config value
get_remote_host()
{
local _config_val=$1
# ipv6 address in kdump.conf is around with "[]",
# factor out the ipv6 address
_config_val=${_config_val#*@}
_config_val=${_config_val%:/*}
_config_val=${_config_val#[}
_config_val=${_config_val%]}
echo "$_config_val"
}
is_hostname()
{
local _hostname
_hostname=$(echo "$1" | grep ":")
if [[ -n $_hostname ]]; then
return 1
fi
echo "$1" | grep -q "[a-zA-Z]"
}
# Copied from "/etc/sysconfig/network-scripts/network-functions"
get_hwaddr()
{
if [[ -f "/sys/class/net/$1/address" ]]; then
awk '{ print toupper($0) }' < "/sys/class/net/$1/address"
elif [[ -d "/sys/class/net/$1" ]]; then
LC_ALL="" LANG="" ip -o link show "$1" 2> /dev/null |
awk '{ print toupper(gensub(/.*link\/[^ ]* ([[:alnum:]:]*).*/,
"\\1", 1)); }'
fi
}
# Get value by a field using "nmcli -g"
kdump-lib.sh: rework nmcli related functions upstream: fedora resolves: bz2003832 conflict: none commit 58d3e6db3a2f066ea8e48fe9a8c77a6bb8ca7159 Author: Kairui Song <kasong@redhat.com> Date: Wed Sep 8 15:20:42 2021 +0800 kdump-lib.sh: rework nmcli related functions This fixes word splitting issue with nmcli args. Current kexec-tools scripts won't call nmcli with correct arguments when there are space in network interface name. nmcli expects multiple parameters, but get_nmcli_value_by_field only accepts two params and depends on shell word splitting to split the _nm_show_cmd into multiple params, which is very fragile. So switch the param order, simplified this function and now multiple params can be used properly. And get_nmcli_connection_show_cmd_by_ifname returns multiple nmcli params in a single variable, it depend on shell word splitting to split the words when calling nmcli. But this is very fragile and break easily when there are any special character in the connection path. This function is only introduced to get and cache the nmcli command which contains the "connection name". Actually only cache the "connection path" is enough. Callers should just call get_nmcli_connection_apath_by_ifname to cache the path, and a new helper get_nmcli_field_by_conpath is introduced here to get value from nmcli. This way "connection path" can contain any character. Also get rid of another nmcli_cmd usage in get_nmcli_connection_apath_by_ifname which stores multiple params in a single bash variable separated by space. Signed-off-by: Kairui Song <kasong@redhat.com> Acked-by: Philipp Rudo <prudo@redhat.com> Signed-off-by: Tao Liu <ltao@redhat.com>
2021-11-03 11:23:41 +00:00
# Usage: get_nmcli_value_by_field <field> <nmcli command>
#
# "nmcli --get-values" allows us to retrive value(s) by field, for example,
# nmcli --get-values <field> connection show /org/freedesktop/NetworkManager/ActiveConnection/1
# returns the following value for the corresponding field respectively,
# Field Value
# IP4.DNS "10.19.42.41 | 10.11.5.19 | 10.5.30.160"
# 802-3-ethernet.s390-subchannels ""
# bond.options "mode=balance-rr"
get_nmcli_value_by_field()
{
LANG=C nmcli --get-values "$@"
kdump-lib.sh: rework nmcli related functions upstream: fedora resolves: bz2003832 conflict: none commit 58d3e6db3a2f066ea8e48fe9a8c77a6bb8ca7159 Author: Kairui Song <kasong@redhat.com> Date: Wed Sep 8 15:20:42 2021 +0800 kdump-lib.sh: rework nmcli related functions This fixes word splitting issue with nmcli args. Current kexec-tools scripts won't call nmcli with correct arguments when there are space in network interface name. nmcli expects multiple parameters, but get_nmcli_value_by_field only accepts two params and depends on shell word splitting to split the _nm_show_cmd into multiple params, which is very fragile. So switch the param order, simplified this function and now multiple params can be used properly. And get_nmcli_connection_show_cmd_by_ifname returns multiple nmcli params in a single variable, it depend on shell word splitting to split the words when calling nmcli. But this is very fragile and break easily when there are any special character in the connection path. This function is only introduced to get and cache the nmcli command which contains the "connection name". Actually only cache the "connection path" is enough. Callers should just call get_nmcli_connection_apath_by_ifname to cache the path, and a new helper get_nmcli_field_by_conpath is introduced here to get value from nmcli. This way "connection path" can contain any character. Also get rid of another nmcli_cmd usage in get_nmcli_connection_apath_by_ifname which stores multiple params in a single bash variable separated by space. Signed-off-by: Kairui Song <kasong@redhat.com> Acked-by: Philipp Rudo <prudo@redhat.com> Signed-off-by: Tao Liu <ltao@redhat.com>
2021-11-03 11:23:41 +00:00
}
kdump-lib.sh: rework nmcli related functions upstream: fedora resolves: bz2003832 conflict: none commit 58d3e6db3a2f066ea8e48fe9a8c77a6bb8ca7159 Author: Kairui Song <kasong@redhat.com> Date: Wed Sep 8 15:20:42 2021 +0800 kdump-lib.sh: rework nmcli related functions This fixes word splitting issue with nmcli args. Current kexec-tools scripts won't call nmcli with correct arguments when there are space in network interface name. nmcli expects multiple parameters, but get_nmcli_value_by_field only accepts two params and depends on shell word splitting to split the _nm_show_cmd into multiple params, which is very fragile. So switch the param order, simplified this function and now multiple params can be used properly. And get_nmcli_connection_show_cmd_by_ifname returns multiple nmcli params in a single variable, it depend on shell word splitting to split the words when calling nmcli. But this is very fragile and break easily when there are any special character in the connection path. This function is only introduced to get and cache the nmcli command which contains the "connection name". Actually only cache the "connection path" is enough. Callers should just call get_nmcli_connection_apath_by_ifname to cache the path, and a new helper get_nmcli_field_by_conpath is introduced here to get value from nmcli. This way "connection path" can contain any character. Also get rid of another nmcli_cmd usage in get_nmcli_connection_apath_by_ifname which stores multiple params in a single bash variable separated by space. Signed-off-by: Kairui Song <kasong@redhat.com> Acked-by: Philipp Rudo <prudo@redhat.com> Signed-off-by: Tao Liu <ltao@redhat.com>
2021-11-03 11:23:41 +00:00
# Get nmcli field value of an connection apath (a D-Bus active connection path)
# Usage: get_nmcli_field_by_apath <field> <apath>
get_nmcli_field_by_conpath()
{
local _field=$1 _apath=$2
get_nmcli_value_by_field "$_field" connection show "$_apath"
}
# Get nmcli connection apath (a D-Bus active connection path ) by ifname
#
# apath is used for nmcli connection operations, e.g.
# $ nmcli connection show $apath
get_nmcli_connection_apath_by_ifname()
{
local _ifname=$1
get_nmcli_value_by_field "GENERAL.CON-PATH" device show "$_ifname"
}
get_ifcfg_by_device()
{
grep -E -i -l "^[[:space:]]*DEVICE=\"*${1}\"*[[:space:]]*$" \
/etc/sysconfig/network-scripts/ifcfg-* 2> /dev/null | head -1
}
get_ifcfg_by_hwaddr()
{
grep -E -i -l "^[[:space:]]*HWADDR=\"*${1}\"*[[:space:]]*$" \
/etc/sysconfig/network-scripts/ifcfg-* 2> /dev/null | head -1
}
get_ifcfg_by_uuid()
{
grep -E -i -l "^[[:space:]]*UUID=\"*${1}\"*[[:space:]]*$" \
/etc/sysconfig/network-scripts/ifcfg-* 2> /dev/null | head -1
}
get_ifcfg_by_name()
{
grep -E -i -l "^[[:space:]]*NAME=\"*${1}\"*[[:space:]]*$" \
/etc/sysconfig/network-scripts/ifcfg-* 2> /dev/null | head -1
}
is_nm_running()
{
[[ "$(LANG=C nmcli -t --fields running general status 2> /dev/null)" == "running" ]]
}
is_nm_handling()
{
LANG=C nmcli -t --fields device,state dev status 2> /dev/null |
grep -q "^\(${1}:connected\)\|\(${1}:connecting.*\)$"
}
# $1: netdev name
get_ifcfg_nmcli()
{
local nm_uuid nm_name
local ifcfg_file
# Get the active nmcli config name of $1
if is_nm_running && is_nm_handling "${1}"; then
# The configuration "uuid" and "name" generated by nm is wrote to
# the ifcfg file as "UUID=<nm_uuid>" and "NAME=<nm_name>".
nm_uuid=$(LANG=C nmcli -t --fields uuid,device c show --active 2> /dev/null |
grep "${1}" | head -1 | cut -d':' -f1)
nm_name=$(LANG=C nmcli -t --fields name,device c show --active 2> /dev/null |
grep "${1}" | head -1 | cut -d':' -f1)
ifcfg_file=$(get_ifcfg_by_uuid "${nm_uuid}")
[[ -z ${ifcfg_file} ]] && ifcfg_file=$(get_ifcfg_by_name "${nm_name}")
fi
echo -n "${ifcfg_file}"
}
# $1: netdev name
get_ifcfg_legacy()
{
local ifcfg_file hwaddr
ifcfg_file="/etc/sysconfig/network-scripts/ifcfg-${1}"
[[ -f ${ifcfg_file} ]] && echo -n "${ifcfg_file}" && return
ifcfg_file=$(get_ifcfg_by_name "${1}")
[[ -f ${ifcfg_file} ]] && echo -n "${ifcfg_file}" && return
hwaddr=$(get_hwaddr "${1}")
if [[ -n $hwaddr ]]; then
ifcfg_file=$(get_ifcfg_by_hwaddr "${hwaddr}")
[[ -f ${ifcfg_file} ]] && echo -n "${ifcfg_file}" && return
fi
ifcfg_file=$(get_ifcfg_by_device "${1}")
echo -n "${ifcfg_file}"
}
# $1: netdev name
# Return the ifcfg file whole name(including the path) of $1 if any.
get_ifcfg_filename()
{
local ifcfg_file
ifcfg_file=$(get_ifcfg_nmcli "${1}")
if [[ -z ${ifcfg_file} ]]; then
ifcfg_file=$(get_ifcfg_legacy "${1}")
fi
echo -n "${ifcfg_file}"
}
# returns 0 when omission of a module is desired in dracut_args
# returns 1 otherwise
is_dracut_mod_omitted()
{
local dracut_args dracut_mod=$1
set -- $(kdump_get_conf_val dracut_args)
while [ $# -gt 0 ]; do
case $1 in
-o | --omit)
[[ " ${2//[^[:alnum:]]/ } " == *" $dracut_mod "* ]] && return 0
;;
esac
shift
done
return 1
}
is_wdt_active()
{
local active
[[ -d /sys/class/watchdog ]] || return 1
for dir in /sys/class/watchdog/*; do
[[ -f "$dir/state" ]] || continue
active=$(< "$dir/state")
[[ $active == "active" ]] && return 0
done
return 1
}
Set zstd as the default compression method for kdump initrd resolves: bz1896698 upstream: fedora conflict: none commit 0311f6e25bec9994de2c6b156b5a568583aa8aae Author: Tao Liu <ltao@redhat.com> Date: Wed Jan 5 17:42:12 2022 +0800 Set zstd as the default compression method for kdump initrd zstd has better compression ratio and time consumption balance. When no customized compression method specified in kdump.conf, we will use zstd as the default compression method. **The test method: I installed kexec-tools with and without the patch, executing the following command for 4 times, and calculate the averange time: $ rm -f /boot/initramfs-*kdump.img && time kdumpctl rebuild && \ ls -ail /boot/initramfs-*kdump.img **The test result: Bare metal x86_64 machine: dracut with squash module zlib lzo xz lz4 zstd real 10.6282 11.0398 11.395 8.6424 10.1676 user 9.8932 11.9072 14.2304 2.8286 8.6468 sys 3.523 3.4626 3.6028 3.5 3.4942 size of kdump.img 30575616 31419392 27102208 36666368 29236224 dracut without squash module zlib lzo xz lz4 zstd real 9.509 19.4876 11.6724 9.0338 10.267 user 10.6028 14.516 17.8662 4.0476 9.0936 sys 2.942 2.9184 3.0662 2.9232 3.0662 size of kdump.img 19247949 19958120 14505056 21112544 17007764 PowerVM hosted ppc64le VM: dracut with squash module | dracut without sqaush module zlib zstd | zlib zstd real 10.6742 10.7572 | 9.7676 10.5722 user 18.754 19.8338 | 20.7932 13.179 sys 1.8358 1.864 | 1.637 1.663 | size of | kdump.img 36917248 35467264 | 21441323 19007108 **discussion zstd has a better compression ratio and time consumption balance. Acked-by: Coiby Xu <coxu@redhat.com> Signed-off-by: Tao Liu <ltao@redhat.com> Signed-off-by: Tao Liu <ltao@redhat.com>
2022-01-06 06:23:41 +00:00
have_compression_in_dracut_args()
{
Seperate dracut and dracut-squash compressor for zstd Upstream: fedora Resolves: bz2045949 Resolves: bz2044804 Conflict: none commit fc1c79ffd21e7bcb3a710368cd7023c9a634258e Author: Tao Liu <ltao@redhat.com> Date: Sat Oct 8 12:09:08 2022 +0800 Seperate dracut and dracut-squash compressor for zstd Previously kexec-tools will pass "--compress zstd" to dracut. It will make dracut to decide whether: a) call mksquashfs to make a zstd format squash-root.img, b) call cmd zstd to make a initramfs. Since dracut(>= 057) has decoupled the compressor for dracut and dracut-squash, So in this patch, we will pass the compressor seperately. Note: The is_squash_available && !dracut_has_option --squash-compressor && !is_zsdt_command_available case is left unprocessed on purpose. Actually, the situation when we want to call zstd compression is: 1) If squash function OK, we want dracut to invoke mksquashfs to make a zstd format squash-root.img within initramfs. 2) If squash function is not OK, and cmd zstd presents, we want dracut to invoke cmd zstd to make a zstd format initramfs. is_zstd_command_available check can handle case 2 completely. However, for the is_squash_available check, it cannot handle case 1 completely. It only checks if the kernel supports squashfs, it doesn't check whether the squash module has been added by dracut when making initramfs. In fact, in kexec-tools we are unable to do the check, there are multiple ways to forbit dracut to load a module, such as "dracut -o module" and "omit_dracutmodules in dracut.conf". When squash dracut module is omitted, is_squash_available check will still pass, so "--compress zstd" will be appended to dracut cmdline, and it will call cmd zstd to do the compression. However cmd zstd may not exist, so it fails. The previous "--compress zstd" is ambiguous, after the intro of "--squash-compressor", "--squash-compressor" only effect for mksquashfs and "--compress" only effect for specific cmd. So for the is_squash_available && !dracut_has_option --squash-compressor && !is_zsdt_command_available case, we just leave it to be handled the default way. Reviewed-by: Philipp Rudo <prudo@redhat.com> Signed-off-by: Tao Liu <ltao@redhat.com> Signed-off-by: Tao Liu <ltao@redhat.com>
2022-10-26 02:16:16 +00:00
[[ "$(kdump_get_conf_val dracut_args)" =~ (^|[[:space:]])--(gzip|bzip2|lzma|xz|lzo|lz4|zstd|no-compress|compress|squash-compressor)([[:space:]]|$) ]]
Set zstd as the default compression method for kdump initrd resolves: bz1896698 upstream: fedora conflict: none commit 0311f6e25bec9994de2c6b156b5a568583aa8aae Author: Tao Liu <ltao@redhat.com> Date: Wed Jan 5 17:42:12 2022 +0800 Set zstd as the default compression method for kdump initrd zstd has better compression ratio and time consumption balance. When no customized compression method specified in kdump.conf, we will use zstd as the default compression method. **The test method: I installed kexec-tools with and without the patch, executing the following command for 4 times, and calculate the averange time: $ rm -f /boot/initramfs-*kdump.img && time kdumpctl rebuild && \ ls -ail /boot/initramfs-*kdump.img **The test result: Bare metal x86_64 machine: dracut with squash module zlib lzo xz lz4 zstd real 10.6282 11.0398 11.395 8.6424 10.1676 user 9.8932 11.9072 14.2304 2.8286 8.6468 sys 3.523 3.4626 3.6028 3.5 3.4942 size of kdump.img 30575616 31419392 27102208 36666368 29236224 dracut without squash module zlib lzo xz lz4 zstd real 9.509 19.4876 11.6724 9.0338 10.267 user 10.6028 14.516 17.8662 4.0476 9.0936 sys 2.942 2.9184 3.0662 2.9232 3.0662 size of kdump.img 19247949 19958120 14505056 21112544 17007764 PowerVM hosted ppc64le VM: dracut with squash module | dracut without sqaush module zlib zstd | zlib zstd real 10.6742 10.7572 | 9.7676 10.5722 user 18.754 19.8338 | 20.7932 13.179 sys 1.8358 1.864 | 1.637 1.663 | size of | kdump.img 36917248 35467264 | 21441323 19007108 **discussion zstd has a better compression ratio and time consumption balance. Acked-by: Coiby Xu <coxu@redhat.com> Signed-off-by: Tao Liu <ltao@redhat.com> Signed-off-by: Tao Liu <ltao@redhat.com>
2022-01-06 06:23:41 +00:00
}
# If "dracut_args" contains "--mount" information, use it
# directly without any check(users are expected to ensure
# its correctness).
is_mount_in_dracut_args()
{
[[ " $(kdump_get_conf_val dracut_args)" =~ .*[[:space:]]--mount[=[:space:]].* ]]
}
Introduce a function to get reserved memory size Resolves: bz2232499 Upstream: Fedora Rawhide Conflict: None commit 4b7b7736eef8c682a8af07081d7f06eeb774172f Author: Sourabh Jain <sourabhjain@linux.ibm.com> Date: Wed Aug 2 20:36:48 2023 +0530 Introduce a function to get reserved memory size The size of the reserved memory in the functions show_reserved_mem, check_crash_mem_reserved, and do_estimate are fetched from the sysfs node `/sys/kernel/kexec_crash_size`. However, in the case of fadump, the reserved area size is instead present in /sys/kernel/fadump/mem_reserved. For example: $ kdumpctl showmem kdump: Dump mode is fadump kdump: Reserved 0MB memory for crash kernel The above command showed 0MB for Reserved memory which is incorrect, the actual reservation was 2048MB. To resolve this issue a new helper function is introduced to fetch reserved memory size based on the dump mode. For "fadump" mode, it looks in `/sys/kernel/fadump/mem_reserved`, otherwise, it uses `/sys/kernel/kexec_crash_size`. And all functions that previously fetching reserved memory directly from `/sys/kernel/kexec_crash_size` sysfs node are now updated to use this new function to get the reserved memory size. With the fix in place, the `kdumpctl showmem` command will now display correct reserved memory size. $ kdumpctl showmem kdump: Dump mode is fadump kdump: Reserved 2048MB memory for crash kernel Signed-off-by: Sourabh Jain <sourabhjain@linux.ibm.com> Reported-by: Sachin P Bappalige <sachinpb@linux.vnet.ibm.com> Reviewed-by: Coiby Xu <coxu@redhat.com> Signed-off-by: Lichen Liu <lichliu@redhat.com>
2023-09-21 08:24:28 +00:00
get_reserved_mem_size()
{
local reserved_mem_size=0
if is_fadump_capable; then
reserved_mem_size=$(< /sys/kernel/fadump/mem_reserved)
else
reserved_mem_size=$(< /sys/kernel/kexec_crash_size)
fi
echo "$reserved_mem_size"
}
check_crash_mem_reserved()
{
local mem_reserved
Introduce a function to get reserved memory size Resolves: bz2232499 Upstream: Fedora Rawhide Conflict: None commit 4b7b7736eef8c682a8af07081d7f06eeb774172f Author: Sourabh Jain <sourabhjain@linux.ibm.com> Date: Wed Aug 2 20:36:48 2023 +0530 Introduce a function to get reserved memory size The size of the reserved memory in the functions show_reserved_mem, check_crash_mem_reserved, and do_estimate are fetched from the sysfs node `/sys/kernel/kexec_crash_size`. However, in the case of fadump, the reserved area size is instead present in /sys/kernel/fadump/mem_reserved. For example: $ kdumpctl showmem kdump: Dump mode is fadump kdump: Reserved 0MB memory for crash kernel The above command showed 0MB for Reserved memory which is incorrect, the actual reservation was 2048MB. To resolve this issue a new helper function is introduced to fetch reserved memory size based on the dump mode. For "fadump" mode, it looks in `/sys/kernel/fadump/mem_reserved`, otherwise, it uses `/sys/kernel/kexec_crash_size`. And all functions that previously fetching reserved memory directly from `/sys/kernel/kexec_crash_size` sysfs node are now updated to use this new function to get the reserved memory size. With the fix in place, the `kdumpctl showmem` command will now display correct reserved memory size. $ kdumpctl showmem kdump: Dump mode is fadump kdump: Reserved 2048MB memory for crash kernel Signed-off-by: Sourabh Jain <sourabhjain@linux.ibm.com> Reported-by: Sachin P Bappalige <sachinpb@linux.vnet.ibm.com> Reviewed-by: Coiby Xu <coxu@redhat.com> Signed-off-by: Lichen Liu <lichliu@redhat.com>
2023-09-21 08:24:28 +00:00
mem_reserved=$(get_reserved_mem_size)
if [[ $mem_reserved -eq 0 ]]; then
derror "No memory reserved for crash kernel"
return 1
fi
return 0
}
check_kdump_feasibility()
{
if [[ ! -e /sys/kernel/kexec_crash_loaded ]]; then
derror "Kdump is not supported on this kernel"
return 1
fi
check_crash_mem_reserved
return $?
}
is_kernel_loaded()
{
local _sysfs _mode
_mode=$1
case "$_mode" in
kdump)
_sysfs="/sys/kernel/kexec_crash_loaded"
;;
fadump)
_sysfs="$FADUMP_REGISTER_SYS_NODE"
;;
*)
derror "Unknown dump mode '$_mode' provided"
return 1
;;
esac
if [[ ! -f $_sysfs ]]; then
derror "$_mode is not supported on this kernel"
return 1
fi
[[ $(< $_sysfs) -eq 1 ]]
}
#
# This function returns the "apicid" of the boot
# cpu (cpu 0) if present.
#
get_bootcpu_apicid()
{
awk ' \
BEGIN { CPU = "-1"; } \
$1=="processor" && $2==":" { CPU = $NF; } \
CPU=="0" && /^apicid/ { print $NF; } \
' \
/proc/cpuinfo
}
# This function check iomem and determines if we have more than
# 4GB of ram available. Returns 1 if we do, 0 if we dont
need_64bit_headers()
{
return "$(tail -n 1 /proc/iomem | awk '{ split ($1, r, "-");
print (strtonum("0x" r[2]) > strtonum("0xffffffff")); }')"
}
# Check if secure boot is being enforced.
#
# Per Peter Jones, we need check efivar SecureBoot-$(the UUID) and
# SetupMode-$(the UUID), they are both 5 bytes binary data. The first four
# bytes are the attributes associated with the variable and can safely be
# ignored, the last bytes are one-byte true-or-false variables. If SecureBoot
# is 1 and SetupMode is 0, then secure boot is being enforced.
#
# Assume efivars is mounted at /sys/firmware/efi/efivars.
is_secure_boot_enforced()
{
local secure_boot_file setup_mode_file
local secure_boot_byte setup_mode_byte
# On powerpc, secure boot is enforced if:
# host secure boot: /ibm,secure-boot/os-secureboot-enforcing DT property exists
# guest secure boot: /ibm,secure-boot >= 2
if [[ -f /proc/device-tree/ibm,secureboot/os-secureboot-enforcing ]]; then
return 0
fi
if [[ -f /proc/device-tree/ibm,secure-boot ]] &&
[[ $(lsprop /proc/device-tree/ibm,secure-boot | tail -1) -ge 2 ]]; then
return 0
fi
# Detect secure boot on x86 and arm64
secure_boot_file=$(find /sys/firmware/efi/efivars -name "SecureBoot-*" 2> /dev/null)
setup_mode_file=$(find /sys/firmware/efi/efivars -name "SetupMode-*" 2> /dev/null)
if [[ -f $secure_boot_file ]] && [[ -f $setup_mode_file ]]; then
secure_boot_byte=$(hexdump -v -e '/1 "%d\ "' "$secure_boot_file" | cut -d' ' -f 5)
setup_mode_byte=$(hexdump -v -e '/1 "%d\ "' "$setup_mode_file" | cut -d' ' -f 5)
if [[ $secure_boot_byte == "1" ]] && [[ $setup_mode_byte == "0" ]]; then
return 0
fi
fi
# Detect secure boot on s390x
if [[ -e "/sys/firmware/ipl/secure" && "$(< /sys/firmware/ipl/secure)" == "1" ]]; then
return 0
fi
return 1
}
#
# prepare_kexec_args <kexec args>
# This function prepares kexec argument.
#
prepare_kexec_args()
{
local kexec_args=$1
local found_elf_args
ARCH=$(uname -m)
if [[ $ARCH == "i686" ]] || [[ $ARCH == "i386" ]]; then
need_64bit_headers
if [[ $? == 1 ]]; then
found_elf_args=$(echo "$kexec_args" | grep elf32-core-headers)
if [[ -n $found_elf_args ]]; then
dwarn "Warning: elf32-core-headers overrides correct elf64 setting"
else
kexec_args="$kexec_args --elf64-core-headers"
fi
else
found_elf_args=$(echo "$kexec_args" | grep elf64-core-headers)
if [[ -z $found_elf_args ]]; then
kexec_args="$kexec_args --elf32-core-headers"
fi
fi
fi
# For secureboot enabled machines, use new kexec file based syscall.
# Old syscall will always fail as it does not have capability to do
# kernel signature verification.
if is_secure_boot_enforced; then
dinfo "Secure Boot is enabled. Using kexec file based syscall."
kexec_args="$kexec_args -s"
fi
echo "$kexec_args"
}
# prepare_kdump_kernel <kdump_kernelver>
# This function return kdump_kernel given a kernel version.
prepare_kdump_kernel()
{
local kdump_kernelver=$1
local dir img boot_dirlist boot_imglist kdump_kernel machine_id
read -r machine_id < /etc/machine-id
boot_dirlist=${KDUMP_BOOTDIR:-"/boot /boot/efi /efi /"}
kdumpctl: Add basic UKI support Resolves: bz2169720 Upstream: src.fedoraproject.org/rpms/kexec-tools.git Conflicts: Small context difference in kexec-tools.spec commit ea7be0608ed719cc1cb134ecf6ef51a4b7e9f104 Author: Philipp Rudo <prudo@redhat.com> Date: Fri May 5 17:14:42 2023 +0200 kdumpctl: Add basic UKI support A Unified Kernel Image (UKI) is a single EFI PE executable combining an EFI stub, a kernel image, an initrd image, and the kernel command line. They are defined in the Boot Loader Specification [1] as type #2 entries. UKIs have the advantage that all code as well as meta data that is required to boot the system, not only the kernel image, is combined in a single PE file and can be signed for EFI SecureBoot. This extends the coverage of SecureBoot extensively. For RHEL support for UKI were included into kernel-ark with 16c7e3ee836e ("redhat: Add sub-RPM with a EFI unified kernel image for virtual machines"). There are two problems with UKIs from the kdump point of view at the moment. First, they cannot be directly loaded via kexec_file_load and second, the initrd included isn't suitable for kdump. In order to enable kdump on systems with UKIs build the kdump initrd as usual and extract the kernel image before loading the crash kernel. [1] https://uapi-group.org/specifications/specs/boot_loader_specification/ Signed-off-by: Philipp Rudo <prudo@redhat.com> Reviewed-by: Pingfan Liu <piliu@redhat.com> Reviewed-by: Coiby Xu <coxu@redhat.com> Signed-off-by: Philipp Rudo <prudo@redhat.com> Signed-off-by: Tao Liu <ltao@redhat.com>
2023-05-31 07:11:31 +00:00
boot_imglist="$KDUMP_IMG-$kdump_kernelver$KDUMP_IMG_EXT \
$machine_id/$kdump_kernelver/$KDUMP_IMG \
EFI/Linux/$machine_id-$kdump_kernelver.efi"
# The kernel of OSTree based systems is not in the standard locations.
if is_ostree; then
boot_dirlist="$(echo /boot/ostree/*) $boot_dirlist"
fi
# Use BOOT_IMAGE as reference if possible, strip the GRUB root device prefix in (hd0,gpt1) format
kdump-lib: attempt to fix BOOT_IMAGE detection Resolves: bz2174836 Upstream: Fedora Conflict: None commit f9c32372d2c8d5e58024d2ddc0b70498c696b5d8 Author: Dusty Mabe <dusty@dustymabe.com> Date: Wed Jun 22 12:34:12 2022 -0400 kdump-lib: attempt to fix BOOT_IMAGE detection Currently $boot_img can get bad data if running on a platform that doesn't set BOOT_IMAGE in the kernel command line. For example, currently: - s390x Fedora CoreOS machine: ``` [root@cosa-devsh ~]# sed "s/^BOOT_IMAGE=\((\S*)\)\?\(\S*\) .*/\2/" /proc/cmdline mitigations=auto,nosmt ignition.platform.id=qemu ostree=/ostree/boot.0/fedora-coreos/2a72567ac8f7ed678c3ac89408f795e6ccd4e97b41e14af5f471b6a807e858b9/0 root=UUID=2a88436a-3b6b-4706-b33a-b8270bd87cde rw rootflags=prjquota boot=UUID=f4b2eaa5-9317-4798-85cf-308c477fee4c crashkernel=600M ``` where on a platform that uses GRUB we get: - x86_64 Fedora CoreOS machine: ``` [root@cosa-devsh ~]# sed "s/^BOOT_IMAGE=\((\S*)\)\?\(\S*\) .*/\2/" /proc/cmdline /ostree/fedora-coreos-af4f6cc7b9ff486cfa647680b180e989c72c8eed03a34a42e7328e49332bd20e/vmlinuz-5.18.5-200.fc36.x86_64 ``` We should change the setting of the boot_img variable such that it will be empty if BOOT_IMAGE doesn't exist. With this change on the s390x machine: ``` [root@cosa-devsh ~]# grep -P -o '^BOOT_IMAGE=(\S+)' /proc/cmdline | sed "s/^BOOT_IMAGE=\((\S*)\)\?\(\S*\)/\2/" [root@cosa-devsh ~]# ``` This change mattered much more before the change in c5bdd2d which changed the following line from [[ -n $boot_img ]] to [[ "$boot_img" == *"$kdump_kernelver" ]]. Still I think this change has merit. Signed-off-by: Dusty Mabe <dusty@dustymabe.com> Acked-by: Coiby Xu <coxu@redhat.com> Signed-off-by: Lichen Liu <lichliu@redhat.com>
2023-03-07 02:38:13 +00:00
boot_img="$(grep -P -o '^BOOT_IMAGE=(\S+)' /proc/cmdline | sed "s/^BOOT_IMAGE=\((\S*)\)\?\(\S*\)/\2/")"
if [[ "$boot_img" == *"$kdump_kernelver" ]]; then
boot_imglist="$boot_img $boot_imglist"
fi
for dir in $boot_dirlist; do
for img in $boot_imglist; do
if [[ -f "$dir/$img" ]]; then
kdump_kernel=$(echo "$dir/$img" | tr -s '/')
break 2
fi
done
done
echo "$kdump_kernel"
}
_is_valid_kver()
{
[[ -f /usr/lib/modules/$1/modules.dep ]]
}
# This function is introduced since 64k variant may be installed on 4k or vice versa
# $1 the kernel path name.
parse_kver_from_path()
{
local _img _kver
[[ -z "$1" ]] && return
_img=$1
BLS_ENTRY_TOKEN=$(</etc/machine-id)
# Fedora standard installation, i.e. $BOOT/vmlinuz-<version>
_kver=${_img##*/vmlinuz-}
_kver=${_kver%"$KDUMP_IMG_EXT"}
if _is_valid_kver "$_kver"; then
echo "$_kver"
return
fi
# BLS recommended image names, i.e. $BOOT/<token>/<version>/linux
_kver=${_img##*/"$BLS_ENTRY_TOKEN"/}
_kver=${_kver%%/*}
if _is_valid_kver "$_kver"; then
echo "$_kver"
return
fi
# Fedora UKI installation, i.e. $BOOT/efi/EFI/Linux/<token>-<version>.efi
_kver=${_img##*/"$BLS_ENTRY_TOKEN"-}
_kver=${_kver%.efi}
if _is_valid_kver "$_kver"; then
echo "$_kver"
return
fi
ddebug "Could not parse version from $_img"
}
_get_kdump_kernel_version()
{
local _version _version_nondebug
if [[ -n "$KDUMP_KERNELVER" ]]; then
echo "$KDUMP_KERNELVER"
return
fi
_version=$(uname -r)
if [[ ! "$_version" =~ [+|-]debug$ ]]; then
echo "$_version"
return
fi
_version_nondebug=${_version%+debug}
_version_nondebug=${_version_nondebug%-debug}
if [[ -f "$(prepare_kdump_kernel "$_version_nondebug")" ]]; then
dinfo "Use of debug kernel detected. Trying to use $_version_nondebug"
echo "$_version_nondebug"
else
dinfo "Use of debug kernel detected but cannot find $_version_nondebug. Falling back to $_version"
echo "$_version"
fi
}
#
# Detect initrd and kernel location, results are stored in global environmental variables:
# KDUMP_BOOTDIR, KDUMP_KERNELVER, KDUMP_KERNEL, DEFAULT_INITRD, and KDUMP_INITRD
#
# Expectes KDUMP_BOOTDIR, KDUMP_IMG, KDUMP_IMG_EXT, KDUMP_KERNELVER to be loaded from config already
# and will prefer already set values so user can specify custom kernel/initramfs location
#
prepare_kdump_bootinfo()
{
local boot_initrdlist default_initrd_base var_target_initrd_dir
KDUMP_KERNELVER=$(_get_kdump_kernel_version)
KDUMP_KERNEL=$(prepare_kdump_kernel "$KDUMP_KERNELVER")
if ! [[ -e $KDUMP_KERNEL ]]; then
derror "Failed to detect kdump kernel location"
return 1
fi
# For 64k variant, e.g. vmlinuz-5.14.0-327.el9.aarch64+64k-debug
if [[ "$KDUMP_KERNEL" == *"+debug" || "$KDUMP_KERNEL" == *"64k-debug" ]]; then
dwarn "Using debug kernel, you may need to set a larger crashkernel than the default value."
fi
# Set KDUMP_BOOTDIR to where kernel image is stored
kdumpctl: Add basic UKI support Resolves: bz2169720 Upstream: src.fedoraproject.org/rpms/kexec-tools.git Conflicts: Small context difference in kexec-tools.spec commit ea7be0608ed719cc1cb134ecf6ef51a4b7e9f104 Author: Philipp Rudo <prudo@redhat.com> Date: Fri May 5 17:14:42 2023 +0200 kdumpctl: Add basic UKI support A Unified Kernel Image (UKI) is a single EFI PE executable combining an EFI stub, a kernel image, an initrd image, and the kernel command line. They are defined in the Boot Loader Specification [1] as type #2 entries. UKIs have the advantage that all code as well as meta data that is required to boot the system, not only the kernel image, is combined in a single PE file and can be signed for EFI SecureBoot. This extends the coverage of SecureBoot extensively. For RHEL support for UKI were included into kernel-ark with 16c7e3ee836e ("redhat: Add sub-RPM with a EFI unified kernel image for virtual machines"). There are two problems with UKIs from the kdump point of view at the moment. First, they cannot be directly loaded via kexec_file_load and second, the initrd included isn't suitable for kdump. In order to enable kdump on systems with UKIs build the kdump initrd as usual and extract the kernel image before loading the crash kernel. [1] https://uapi-group.org/specifications/specs/boot_loader_specification/ Signed-off-by: Philipp Rudo <prudo@redhat.com> Reviewed-by: Pingfan Liu <piliu@redhat.com> Reviewed-by: Coiby Xu <coxu@redhat.com> Signed-off-by: Philipp Rudo <prudo@redhat.com> Signed-off-by: Tao Liu <ltao@redhat.com>
2023-05-31 07:11:31 +00:00
if is_uki "$KDUMP_KERNEL"; then
KDUMP_BOOTDIR=/boot
else
KDUMP_BOOTDIR=$(dirname "$KDUMP_KERNEL")
fi
# Default initrd should just stay aside of kernel image, try to find it in KDUMP_BOOTDIR
boot_initrdlist="initramfs-$KDUMP_KERNELVER.img initrd"
for initrd in $boot_initrdlist; do
if [[ -f "$KDUMP_BOOTDIR/$initrd" ]]; then
default_initrd_base="$initrd"
DEFAULT_INITRD="$KDUMP_BOOTDIR/$default_initrd_base"
break
fi
done
# Create kdump initrd basename from default initrd basename
# initramfs-5.7.9-200.fc32.x86_64.img => initramfs-5.7.9-200.fc32.x86_64kdump.img
# initrd => initrdkdump
if [[ -z $default_initrd_base ]]; then
kdump_initrd_base=initramfs-${KDUMP_KERNELVER}kdump.img
elif [[ $default_initrd_base == *.* ]]; then
kdump_initrd_base=${default_initrd_base%.*}kdump.${DEFAULT_INITRD##*.}
else
kdump_initrd_base=${default_initrd_base}kdump
fi
# Place kdump initrd in $(/var/lib/kdump) if $(KDUMP_BOOTDIR) not writable
if [[ ! -w $KDUMP_BOOTDIR ]]; then
var_target_initrd_dir="/var/lib/kdump"
mkdir -p "$var_target_initrd_dir"
KDUMP_INITRD="$var_target_initrd_dir/$kdump_initrd_base"
else
KDUMP_INITRD="$KDUMP_BOOTDIR/$kdump_initrd_base"
fi
}
get_watchdog_drvs()
{
local _wdtdrvs _drv _dir
for _dir in /sys/class/watchdog/*; do
# device/modalias will return driver of this device
[[ -f "$_dir/device/modalias" ]] || continue
_drv=$(< "$_dir/device/modalias")
_drv=$(modprobe --set-version "$KDUMP_KERNELVER" -R "$_drv" 2> /dev/null)
for i in $_drv; do
if ! [[ " $_wdtdrvs " == *" $i "* ]]; then
_wdtdrvs="$_wdtdrvs $i"
fi
done
done
echo "$_wdtdrvs"
}
_cmdline_parse()
{
local opt val
while read -r opt; do
if [[ $opt =~ = ]]; then
val=${opt#*=}
opt=${opt%%=*}
# ignore options like 'foo='
[[ -z $val ]] && continue
# xargs removes quotes, add them again
[[ $val =~ [[:space:]] ]] && val="\"$val\""
else
val=""
fi
echo "$opt $val"
done <<< "$(echo "$1" | xargs -n 1 echo)"
}
#
# prepare_cmdline <commandline> <commandline remove> <commandline append>
# This function performs a series of edits on the command line.
# Store the final result in global $KDUMP_COMMANDLINE.
prepare_cmdline()
{
local in out append opt val id drv
local -A remove
in=${1:-$(< /proc/cmdline)}
while read -r opt val; do
[[ -n "$opt" ]] || continue
remove[$opt]=1
done <<< "$(_cmdline_parse "$2")"
append=$3
# These params should always be removed
remove[crashkernel]=1
remove[panic_on_warn]=1
# Always remove "root=X", as we now explicitly generate all kinds
# of dump target mount information including root fs.
#
# We do this before KDUMP_COMMANDLINE_APPEND, if one really cares
# about it(e.g. for debug purpose), then can pass "root=X" using
# KDUMP_COMMANDLINE_APPEND.
remove[root]=1
# With the help of "--hostonly-cmdline", we can avoid some interitage.
remove[rd.lvm.lv]=1
remove[rd.luks.uuid]=1
remove[rd.dm.uuid]=1
remove[rd.md.uuid]=1
remove[fcoe]=1
# Remove netroot, rd.iscsi.initiator and iscsi_initiator since
# we get duplicate entries for the same in case iscsi code adds
# it as well.
remove[netroot]=1
remove[rd.iscsi.initiator]=1
remove[iscsi_initiator]=1
while read -r opt val; do
[[ -n "$opt" ]] || continue
[[ -n "${remove[$opt]}" ]] && continue
if [[ -n "$val" ]]; then
out+="$opt=$val "
else
out+="$opt "
fi
done <<< "$(_cmdline_parse "$in")"
out+="$append "
id=$(get_bootcpu_apicid)
if [[ -n "${id}" ]]; then
out+="disable_cpu_apicid=$id "
fi
# If any watchdog is used, set it's pretimeout to 0. pretimeout let
# watchdog panic the kernel first, and reset the system after the
# panic. If the system is already in kdump, panic is not helpful
# and only increase the chance of watchdog failure.
for drv in $(get_watchdog_drvs); do
out+="$drv.pretimeout=0 "
if [[ $drv == hpwdt ]]; then
# hpwdt have a special parameter kdumptimeout, it is
# only supposed to be set to non-zero in first kernel.
# In kdump, non-zero value could prevent the watchdog
# from resetting the system.
out+="$drv.kdumptimeout=0 "
fi
done
kdumpctl: Add basic UKI support Resolves: bz2169720 Upstream: src.fedoraproject.org/rpms/kexec-tools.git Conflicts: Small context difference in kexec-tools.spec commit ea7be0608ed719cc1cb134ecf6ef51a4b7e9f104 Author: Philipp Rudo <prudo@redhat.com> Date: Fri May 5 17:14:42 2023 +0200 kdumpctl: Add basic UKI support A Unified Kernel Image (UKI) is a single EFI PE executable combining an EFI stub, a kernel image, an initrd image, and the kernel command line. They are defined in the Boot Loader Specification [1] as type #2 entries. UKIs have the advantage that all code as well as meta data that is required to boot the system, not only the kernel image, is combined in a single PE file and can be signed for EFI SecureBoot. This extends the coverage of SecureBoot extensively. For RHEL support for UKI were included into kernel-ark with 16c7e3ee836e ("redhat: Add sub-RPM with a EFI unified kernel image for virtual machines"). There are two problems with UKIs from the kdump point of view at the moment. First, they cannot be directly loaded via kexec_file_load and second, the initrd included isn't suitable for kdump. In order to enable kdump on systems with UKIs build the kdump initrd as usual and extract the kernel image before loading the crash kernel. [1] https://uapi-group.org/specifications/specs/boot_loader_specification/ Signed-off-by: Philipp Rudo <prudo@redhat.com> Reviewed-by: Pingfan Liu <piliu@redhat.com> Reviewed-by: Coiby Xu <coxu@redhat.com> Signed-off-by: Philipp Rudo <prudo@redhat.com> Signed-off-by: Tao Liu <ltao@redhat.com>
2023-05-31 07:11:31 +00:00
# Always disable gpt-auto-generator as it hangs during boot of the
# crash kernel. Furthermore we know which disk will be used for dumping
# (if at all) and add it explicitly.
is_uki "$KDUMP_KERNEL" && out+="rd.systemd.gpt_auto=no "
# Trim unnecessary whitespaces
echo "$out" | sed -e "s/^ *//g" -e "s/ *$//g" -e "s/ \+/ /g"
}
PROC_IOMEM=/proc/iomem
#get system memory size i.e. memblock.memory.total_size in the unit of GB
get_system_size()
{
sum=$(sed -n "s/\s*\([0-9a-fA-F]\+\)-\([0-9a-fA-F]\+\) : System RAM$/+ 0x\2 - 0x\1 + 1/p" $PROC_IOMEM)
echo $(( (sum) / 1024 / 1024 / 1024))
}
# Return the recommended size for the reserved crashkernel memory
# depending on the system memory size.
#
# This functions is expected to be consistent with the parse_crashkernel_mem()
# in kernel i.e. how kernel allocates the kdump memory given the crashkernel
# parameter crashkernel=range1:size1[,range2:size2,…] and the system memory
# size.
get_recommend_size()
{
local mem_size=$1
local _ck_cmdline=$2
local range start start_unit end end_unit size
while read -r -d , range; do
# need to use non-default IFS as double spaces are used as a
# single delimiter while commas aren't...
IFS=, read start start_unit end end_unit size <<< \
"$(echo "$range" | sed -n "s/\([0-9]\+\)\([GT]\?\)-\([0-9]*\)\([GT]\?\):\([0-9]\+[MG]\)/\1,\2,\3,\4,\5/p")"
# aka. 102400T
end=${end:-104857600}
[[ "$end_unit" == T ]] && end=$((end * 1024))
[[ "$start_unit" == T ]] && start=$((start * 1024))
if [[ $mem_size -ge $start ]] && [[ $mem_size -lt $end ]]; then
echo "$size"
return
fi
# append a ',' as read expects the 'file' to end with a delimiter
done <<< "$_ck_cmdline,"
# no matching range found
echo "0M"
}
has_mlx5()
{
[[ -d /sys/bus/pci/drivers/mlx5_core ]]
}
has_aarch64_smmu()
{
ls /sys/devices/platform/arm-smmu-* 1> /dev/null 2>&1
}
is_memsize() { [[ "$1" =~ ^[+-]?[0-9]+[KkMmGgTtPbEe]?$ ]]; }
# range defined for crashkernel parameter
# i.e. <start>-[<end>]
is_memrange()
{
is_memsize "${1%-*}" || return 1
[[ -n ${1#*-} ]] || return 0
is_memsize "${1#*-}"
}
to_bytes()
{
local _s
_s="$1"
is_memsize "$_s" || return 1
case "${_s: -1}" in
K|k)
_s=${_s::-1}
_s="$((_s * 1024))"
;;
M|m)
_s=${_s::-1}
_s="$((_s * 1024 * 1024))"
;;
G|g)
_s=${_s::-1}
_s="$((_s * 1024 * 1024 * 1024))"
;;
T|t)
_s=${_s::-1}
_s="$((_s * 1024 * 1024 * 1024 * 1024))"
;;
P|p)
_s=${_s::-1}
_s="$((_s * 1024 * 1024 * 1024 * 1024 * 1024))"
;;
E|e)
_s=${_s::-1}
_s="$((_s * 1024 * 1024 * 1024 * 1024 * 1024 * 1024))"
;;
*)
;;
esac
echo "$_s"
}
memsize_add()
{
local -a units=("" "K" "M" "G" "T" "P" "E")
local i a b
a=$(to_bytes "$1") || return 1
b=$(to_bytes "$2") || return 1
i=0
(( a += b ))
while :; do
[[ $(( a / 1024 )) -eq 0 ]] && break
[[ $(( a % 1024 )) -ne 0 ]] && break
[[ $(( ${#units[@]} - 1 )) -eq $i ]] && break
(( a /= 1024 ))
(( i += 1 ))
done
echo "${a}${units[$i]}"
}
_crashkernel_parse()
{
local ck entry
local range size offset
ck="$1"
if [[ "$ck" == *@* ]]; then
offset="@${ck##*@}"
ck=${ck%@*}
elif [[ "$ck" == *,high ]] || [[ "$ck" == *,low ]]; then
offset=",${ck##*,}"
ck=${ck%,*}
else
offset=''
fi
while read -d , -r entry; do
[[ -n "$entry" ]] || continue
if [[ "$entry" == *:* ]]; then
range=${entry%:*}
size=${entry#*:}
else
range=""
size=${entry}
fi
echo "$size;$range;"
done <<< "$ck,"
echo ";;$offset"
}
# $1 crashkernel command line parameter
# $2 size to be added
_crashkernel_add()
{
local ck delta ret
local range size offset
ck="$1"
delta="$2"
ret=""
while IFS=';' read -r size range offset; do
if [[ -n "$offset" ]]; then
ret="${ret%,}$offset"
break
fi
[[ -n "$size" ]] || continue
if [[ -n "$range" ]]; then
is_memrange "$range" || return 1
ret+="$range:"
fi
size=$(memsize_add "$size" "$delta") || return 1
ret+="$size,"
done < <( _crashkernel_parse "$ck")
echo "${ret%,}"
}
# get default crashkernel
# $1 dump mode, if not specified, dump_mode will be judged by is_fadump_capable
# $2 kernel-release, if not specified, got by _get_kdump_kernel_version
kdump_get_arch_recommend_crashkernel()
{
local _arch _ck_cmdline _dump_mode
kdump-lib.sh: add extra 64M to default crashkernel if sme/sev is active Resolves: https://issues.redhat.com/browse/RHEL-8727 Resolves: https://issues.redhat.com/browse/RHEL-8710 Upstream: Fedora Conflict: The 1st hunk is merged manualy because of conflict caused by context. commit 4841bc6a6dfe4d09640322f6703f60dbfe9f6f19 Author: Baoquan He <bhe@redhat.com> Date: Thu Sep 21 18:01:02 2023 +0800 kdump-lib.sh: add extra 64M to default crashkernel if sme/sev is active It's reported that kdump kernel failed to boot and can't dump vmcore when crashkernel=192M and SME/SEV is active. This is because swiotlb will be enabled and reserves 64M memory by default on system with SME/SEV enabled. Then kdump kernel will be out of memory after taking 64M away for swiotlb init. So here add extra 64M memory to default crashkernel value so that kdump kernel can function well as before. When doing that, search journalctl for the "Memory Encryption Features active: AMD" to check if SME or SEV is active. This line of log is printed out in kernel function as below and the type SME is mutual exclusive with type SEV. ***: arch/x86/mm/mem_encrypt.c:print_mem_encrypt_feature_info() Note: 1) The conditional check is relying on journalctl log because I didn't find available system interface to check if SEV is active. Even though we can check if SME is active via /proc/cpuinfo. For consistency, I take the same check for both SME and SEV by searching journalctl. 2) The conditional check is relying on journalctl log, means it won't work for crashkernel setting in anoconda because the installation kernel doesn't have the SME/SEV setting. So customer need manually run 'kdumpctl reset-crashkernel' to reset crashkernel to add the extra 64M after OS installation. 3) We need watch the line of log printing in print_mem_encrypt_feature_info() in kernel just in case people may change it in the future. Signed-off-by: Baoquan He <bhe@redhat.com> Reviewed-by: Philipp Rudo <prudo@redhat.com> Signed-off-by: Baoquan He <bhe@redhat.com>
2023-09-21 10:01:02 +00:00
local _delta=0
if [[ -z "$1" ]]; then
if is_fadump_capable; then
_dump_mode=fadump
else
_dump_mode=kdump
fi
else
_dump_mode=$1
fi
_arch=$(uname -m)
if [[ $_arch == "x86_64" ]] || [[ $_arch == "s390x" ]]; then
_ck_cmdline="1G-4G:192M,4G-64G:256M,64G-:512M"
kdump-lib.sh: add extra 64M to default crashkernel if sme/sev is active Resolves: https://issues.redhat.com/browse/RHEL-8727 Resolves: https://issues.redhat.com/browse/RHEL-8710 Upstream: Fedora Conflict: The 1st hunk is merged manualy because of conflict caused by context. commit 4841bc6a6dfe4d09640322f6703f60dbfe9f6f19 Author: Baoquan He <bhe@redhat.com> Date: Thu Sep 21 18:01:02 2023 +0800 kdump-lib.sh: add extra 64M to default crashkernel if sme/sev is active It's reported that kdump kernel failed to boot and can't dump vmcore when crashkernel=192M and SME/SEV is active. This is because swiotlb will be enabled and reserves 64M memory by default on system with SME/SEV enabled. Then kdump kernel will be out of memory after taking 64M away for swiotlb init. So here add extra 64M memory to default crashkernel value so that kdump kernel can function well as before. When doing that, search journalctl for the "Memory Encryption Features active: AMD" to check if SME or SEV is active. This line of log is printed out in kernel function as below and the type SME is mutual exclusive with type SEV. ***: arch/x86/mm/mem_encrypt.c:print_mem_encrypt_feature_info() Note: 1) The conditional check is relying on journalctl log because I didn't find available system interface to check if SEV is active. Even though we can check if SME is active via /proc/cpuinfo. For consistency, I take the same check for both SME and SEV by searching journalctl. 2) The conditional check is relying on journalctl log, means it won't work for crashkernel setting in anoconda because the installation kernel doesn't have the SME/SEV setting. So customer need manually run 'kdumpctl reset-crashkernel' to reset crashkernel to add the extra 64M after OS installation. 3) We need watch the line of log printing in print_mem_encrypt_feature_info() in kernel just in case people may change it in the future. Signed-off-by: Baoquan He <bhe@redhat.com> Reviewed-by: Philipp Rudo <prudo@redhat.com> Signed-off-by: Baoquan He <bhe@redhat.com>
2023-09-21 10:01:02 +00:00
is_sme_or_sev_active && ((_delta += 64))
elif [[ $_arch == "aarch64" ]]; then
local _running_kernel
# Base line for 4K variant kernel. The formula is based on x86 plus extra = 64M
_ck_cmdline="1G-4G:256M,4G-64G:320M,64G-:576M"
if [[ -z "$2" ]]; then
_running_kernel=$(_get_kdump_kernel_version)
else
_running_kernel=$2
fi
# the naming convention of 64k variant suffixes with +64k, e.g. "vmlinuz-5.14.0-312.el9.aarch64+64k"
if echo "$_running_kernel" | grep -q 64k; then
# Without smmu, the diff of MemFree between 4K and 64K measured on a high end aarch64 machine is 82M.
# Picking up 100M to cover this diff. And finally, we have "1G-4G:356M;4G-64G:420M;64G-:676M"
((_delta += 100))
# On a 64K system, the extra 384MB is calculated by: cmdq_num * 16 bytes + evtq_num * 32B + priq_num * 16B
# While on a 4K system, it is negligible
has_aarch64_smmu && ((_delta += 384))
#64k kernel, mlx5 consumes extra 188M memory, and choose 200M
has_mlx5 && ((_delta += 200))
else
#4k kernel, mlx5 consumes extra 124M memory, and choose 150M
has_mlx5 && ((_delta += 150))
fi
elif [[ $_arch == "ppc64le" ]]; then
if [[ $_dump_mode == "fadump" ]]; then
_ck_cmdline="4G-16G:768M,16G-64G:1G,64G-128G:2G,128G-1T:4G,1T-2T:6G,2T-4T:12G,4T-8T:20G,8T-16T:36G,16T-32T:64G,32T-64T:128G,64T-:180G"
else
_ck_cmdline="2G-4G:384M,4G-16G:512M,16G-64G:1G,64G-128G:2G,128G-:4G"
fi
fi
kdump-lib.sh: add extra 64M to default crashkernel if sme/sev is active Resolves: https://issues.redhat.com/browse/RHEL-8727 Resolves: https://issues.redhat.com/browse/RHEL-8710 Upstream: Fedora Conflict: The 1st hunk is merged manualy because of conflict caused by context. commit 4841bc6a6dfe4d09640322f6703f60dbfe9f6f19 Author: Baoquan He <bhe@redhat.com> Date: Thu Sep 21 18:01:02 2023 +0800 kdump-lib.sh: add extra 64M to default crashkernel if sme/sev is active It's reported that kdump kernel failed to boot and can't dump vmcore when crashkernel=192M and SME/SEV is active. This is because swiotlb will be enabled and reserves 64M memory by default on system with SME/SEV enabled. Then kdump kernel will be out of memory after taking 64M away for swiotlb init. So here add extra 64M memory to default crashkernel value so that kdump kernel can function well as before. When doing that, search journalctl for the "Memory Encryption Features active: AMD" to check if SME or SEV is active. This line of log is printed out in kernel function as below and the type SME is mutual exclusive with type SEV. ***: arch/x86/mm/mem_encrypt.c:print_mem_encrypt_feature_info() Note: 1) The conditional check is relying on journalctl log because I didn't find available system interface to check if SEV is active. Even though we can check if SME is active via /proc/cpuinfo. For consistency, I take the same check for both SME and SEV by searching journalctl. 2) The conditional check is relying on journalctl log, means it won't work for crashkernel setting in anoconda because the installation kernel doesn't have the SME/SEV setting. So customer need manually run 'kdumpctl reset-crashkernel' to reset crashkernel to add the extra 64M after OS installation. 3) We need watch the line of log printing in print_mem_encrypt_feature_info() in kernel just in case people may change it in the future. Signed-off-by: Baoquan He <bhe@redhat.com> Reviewed-by: Philipp Rudo <prudo@redhat.com> Signed-off-by: Baoquan He <bhe@redhat.com>
2023-09-21 10:01:02 +00:00
echo -n "$(_crashkernel_add "$_ck_cmdline" "${_delta}M")"
}
# return recommended size based on current system RAM size
# $1: kernel version, if not set, will defaults to $(uname -r)
kdump_get_arch_recommend_size()
{
local _ck_cmdline _sys_mem
if ! [[ -r "/proc/iomem" ]]; then
echo "Error, can not access /proc/iomem."
return 1
fi
_sys_mem=$(get_system_size)
_ck_cmdline=$(kdump_get_arch_recommend_crashkernel)
_ck_cmdline=${_ck_cmdline//-:/-102400T:}
get_recommend_size "$_sys_mem" "$_ck_cmdline"
}
# Print all underlying crypt devices of a block device
# print nothing if device is not on top of a crypt device
# $1: the block device to be checked in maj:min format
get_luks_crypt_dev()
{
local _type
[[ -b /dev/block/$1 ]] || return 1
_type=$(blkid -u filesystem,crypto -o export -- "/dev/block/$1" | \
sed -n -E "s/^TYPE=(.*)$/\1/p")
[[ $_type == "crypto_LUKS" ]] && echo "$1"
for _x in "/sys/dev/block/$1/slaves/"*; do
[[ -f $_x/dev ]] || continue
[[ $_x/subsystem -ef /sys/class/block ]] || continue
get_luks_crypt_dev "$(< "$_x/dev")"
done
}
# kdump_get_maj_min <device>
# Prints the major and minor of a device node.
# Example:
# $ get_maj_min /dev/sda2
# 8:2
kdump_get_maj_min()
{
local _majmin
_majmin="$(stat -L -c '%t:%T' "$1" 2> /dev/null)"
printf "%s" "$((0x${_majmin%:*})):$((0x${_majmin#*:}))"
}
get_all_kdump_crypt_dev()
{
local _dev
for _dev in $(get_block_dump_target); do
get_luks_crypt_dev "$(kdump_get_maj_min "$_dev")"
done
}