kexec-tools/dracut-kdump.sh

630 lines
14 KiB
Bash
Raw Normal View History

#!/bin/sh
#
# The main kdump routine in capture kernel, bash may not be the
# default shell. Any code added must be POSIX compliant.
. /lib/dracut-lib.sh
. /lib/kdump-logger.sh
. /lib/kdump-lib-initramfs.sh
#initiate the kdump logger
if ! dlog_init; then
echo "failed to initiate the kdump logger."
exit 1
fi
KDUMP_PATH="/var/crash"
KDUMP_LOG_FILE="/run/initramfs/kexec-dmesg.log"
CORE_COLLECTOR=""
DEFAULT_CORE_COLLECTOR="makedumpfile -l --message-level 7 -d 31"
DMESG_COLLECTOR="/sbin/vmcore-dmesg"
FAILURE_ACTION="systemctl reboot -f"
DATEDIR=$(date +%Y-%m-%d-%T)
HOST_IP='127.0.0.1'
DUMP_INSTRUCTION=""
SSH_KEY_LOCATION="/root/.ssh/kdump_id_rsa"
DD_BLKSIZE=512
FINAL_ACTION="systemctl reboot -f"
KDUMP_PRE=""
KDUMP_POST=""
NEWROOT="/sysroot"
OPALCORE="/sys/firmware/opal/mpipl/core"
KDUMP_CONF_PARSED="/tmp/kdump.conf.$$"
# POSIX doesn't have pipefail, only apply when using bash
# shellcheck disable=SC3040
[ -n "$BASH" ] && set -o pipefail
DUMP_RETVAL=0
kdump_read_conf > $KDUMP_CONF_PARSED
get_kdump_confs()
{
while read -r config_opt config_val; do
# remove inline comments after the end of a directive.
case "$config_opt" in
path)
KDUMP_PATH="$config_val"
;;
core_collector)
[ -n "$config_val" ] && CORE_COLLECTOR="$config_val"
;;
sshkey)
if [ -f "$config_val" ]; then
SSH_KEY_LOCATION=$config_val
fi
;;
kdump_pre)
KDUMP_PRE="$config_val"
;;
kdump_post)
KDUMP_POST="$config_val"
;;
fence_kdump_args)
FENCE_KDUMP_ARGS="$config_val"
;;
fence_kdump_nodes)
FENCE_KDUMP_NODES="$config_val"
;;
failure_action | default)
case $config_val in
shell)
FAILURE_ACTION="kdump_emergency_shell"
;;
reboot)
FAILURE_ACTION="systemctl reboot -f && exit"
;;
halt)
FAILURE_ACTION="halt && exit"
;;
poweroff)
FAILURE_ACTION="systemctl poweroff -f && exit"
;;
dump_to_rootfs)
FAILURE_ACTION="dump_to_rootfs"
;;
esac
;;
final_action)
case $config_val in
reboot)
FINAL_ACTION="systemctl reboot -f"
;;
halt)
FINAL_ACTION="halt"
;;
poweroff)
FINAL_ACTION="systemctl poweroff -f"
;;
esac
;;
esac
done < "$KDUMP_CONF_PARSED"
if [ -z "$CORE_COLLECTOR" ]; then
CORE_COLLECTOR="$DEFAULT_CORE_COLLECTOR"
if is_ssh_dump_target || is_raw_dump_target; then
CORE_COLLECTOR="$CORE_COLLECTOR -F"
fi
fi
}
# store the kexec kernel log to a file.
save_log()
{
dmesg -T > $KDUMP_LOG_FILE
if command -v journalctl > /dev/null; then
journalctl -ab >> $KDUMP_LOG_FILE
fi
chmod 600 $KDUMP_LOG_FILE
}
# $1: dump path, must be a mount point
dump_fs()
{
ddebug "dump_fs _mp=$1"
if ! is_mounted "$1"; then
dinfo "dump path '$1' is not mounted, trying to mount..."
if ! mount --target "$1"; then
derror "failed to dump to '$1', it's not a mount point!"
return 1
fi
fi
# Remove -F in makedumpfile case. We don't want a flat format dump here.
case $CORE_COLLECTOR in
*makedumpfile*)
CORE_COLLECTOR=$(echo "$CORE_COLLECTOR" | sed -e "s/-F//g")
;;
esac
_dump_fs_path=$(echo "$1/$KDUMP_PATH/$HOST_IP-$DATEDIR/" | tr -s /)
dinfo "saving to $_dump_fs_path"
# Only remount to read-write mode if the dump target is mounted read-only.
_dump_mnt_op=$(get_mount_info OPTIONS target "$1" -f)
case $_dump_mnt_op in
ro*)
dinfo "Remounting the dump target in rw mode."
mount -o remount,rw "$1" || return 1
;;
esac
mkdir -p "$_dump_fs_path" || return 1
save_vmcore_dmesg_fs ${DMESG_COLLECTOR} "$_dump_fs_path"
save_opalcore_fs "$_dump_fs_path"
dinfo "saving vmcore"
$CORE_COLLECTOR /proc/vmcore "$_dump_fs_path/vmcore-incomplete"
_dump_exitcode=$?
if [ $_dump_exitcode -eq 0 ]; then
sync -f "$_dump_fs_path/vmcore-incomplete"
_sync_exitcode=$?
if [ $_sync_exitcode -eq 0 ]; then
mv "$_dump_fs_path/vmcore-incomplete" "$_dump_fs_path/vmcore"
dinfo "saving vmcore complete"
else
derror "sync vmcore failed, exitcode:$_sync_exitcode"
return 1
fi
else
derror "saving vmcore failed, exitcode:$_dump_exitcode"
fi
dinfo "saving the $KDUMP_LOG_FILE to $_dump_fs_path/"
save_log
mv "$KDUMP_LOG_FILE" "$_dump_fs_path/"
if [ $_dump_exitcode -ne 0 ]; then
return 1
fi
# improper kernel cmdline can cause the failure of echo, we can ignore this kind of failure
return 0
}
# $1: dmesg collector
# $2: dump path
save_vmcore_dmesg_fs()
{
dinfo "saving vmcore-dmesg.txt to $2"
if $1 /proc/vmcore > "$2/vmcore-dmesg-incomplete.txt"; then
mv "$2/vmcore-dmesg-incomplete.txt" "$2/vmcore-dmesg.txt"
chmod 600 "$2/vmcore-dmesg.txt"
# Make sure file is on disk. There have been instances where later
# saving vmcore failed and system rebooted without sync and there
# was no vmcore-dmesg.txt available.
sync
dinfo "saving vmcore-dmesg.txt complete"
else
if [ -f "$2/vmcore-dmesg-incomplete.txt" ]; then
chmod 600 "$2/vmcore-dmesg-incomplete.txt"
fi
derror "saving vmcore-dmesg.txt failed"
fi
}
# $1: dump path
save_opalcore_fs()
{
if [ ! -f $OPALCORE ]; then
# Check if we are on an old kernel that uses a different path
if [ -f /sys/firmware/opal/core ]; then
OPALCORE="/sys/firmware/opal/core"
else
return 0
fi
fi
dinfo "saving opalcore:$OPALCORE to $1/opalcore"
if ! cp $OPALCORE "$1/opalcore"; then
derror "saving opalcore failed"
return 1
fi
sync
dinfo "saving opalcore complete"
return 0
}
dump_to_rootfs()
{
if [ "$(systemctl status dracut-initqueue | sed -n "s/^\s*Active: \(\S*\)\s.*$/\1/p")" = "inactive" ]; then
dinfo "Trying to bring up initqueue for rootfs mount"
systemctl start dracut-initqueue
fi
dinfo "Clean up dead systemd services"
systemctl cancel
dinfo "Waiting for rootfs mount, will timeout after 90 seconds"
systemctl start --no-block sysroot.mount
_loop=0
while [ $_loop -lt 90 ] && ! is_mounted /sysroot; do
sleep 1
_loop=$((_loop + 1))
done
if ! is_mounted /sysroot; then
derror "Failed to mount rootfs"
return
fi
ddebug "NEWROOT=$NEWROOT"
dump_fs $NEWROOT
}
kdump_emergency_shell()
{
ddebug "Switching to kdump emergency shell..."
[ -f /etc/profile ] && . /etc/profile
export PS1='kdump:${PWD}# '
. /lib/dracut-lib.sh
if [ -f /dracut-state.sh ]; then
. /dracut-state.sh 2> /dev/null
fi
source_conf /etc/conf.d
type plymouth > /dev/null 2>&1 && plymouth quit
source_hook "emergency"
while read -r _tty rest; do
(
echo
echo
echo 'Entering kdump emergency mode.'
echo 'Type "journalctl" to view system logs.'
echo 'Type "rdsosreport" to generate a sosreport, you can then'
echo 'save it elsewhere and attach it to a bug report.'
echo
echo
) > "/dev/$_tty"
done < /proc/consoles
sh -i -l
/bin/rm -f -- /.console_lock
}
do_failure_action()
{
dinfo "Executing failure action $FAILURE_ACTION"
eval $FAILURE_ACTION
}
do_final_action()
{
dinfo "Executing final action $FINAL_ACTION"
eval $FINAL_ACTION
}
do_dump()
{
Introduce vmcore creation notification to kdump Upstream: fedora Resolves: RHEL-32060 Conflict: Yes, there are several conflicts. 1) Upstream have moved dracut-kdump.sh into kdump-utils/dracut/99kdumpbase/kdump.sh, so the targeting files are changed. 2) There are several patchsets([1] [2]) which not backported to rhel9, so some formating conflicts encountered. But there is no functional change been made for the patch backporting. [1]: https://github.com/rhkdump/kdump-utils/pull/18/commits [2]: https://github.com/rhkdump/kdump-utils/pull/33/commits commit 88525ebf5e43cc86aea66dc75ec83db58233883b Author: Tao Liu <ltao@redhat.com> Date: Thu Sep 5 15:49:07 2024 +1200 Introduce vmcore creation notification to kdump Motivation ========== People may forget to recheck to ensure kdump works, which as a result, a possibility of no vmcores generated after a real system crash. It is unexpected for kdump. It is highly recommended people to recheck kdump after any system modification, such as: a. after kernel patching or whole yum update, as it might break something on which kdump is dependent, maybe due to introduction of any new bug etc. b. after any change at hardware level, maybe storage, networking, firmware upgrading etc. c. after implementing any new application, like which involves 3rd party modules etc. Though these exceed the range of kdump, however a simple vmcore creation status notification is good to have for now. Design ====== Kdump currently will check any relating files/fs/drivers modified before determine if initrd should rebuild when (re)start. A rebuild is an indicator of such modification, and kdump need to be rechecked. This will clear the vmcore creation status specified in $VMCORE_CREATION_STATUS. Vmcore creation check will happen at "kdumpctl (re)start/status", and will report the creation success/fail status to users. A "success" status indicates previously there has been a vmcore successfully generated based on the current env, so it is more likely a vmcore will be generated later when real crash happens; A "fail" status indicates previously there was no vmcore generated, or has been a vmcore creation failed based on current env. User should check the 2nd kernel log or the kexec-dmesg.log for the failing reason. $VMCORE_CREATION_STATUS is used for recording the vmcore creation status of the current env. The format will be like: success 1718682002 Which means, there has been a vmcore generated successfully at this timestamp for the current env. Usage ===== [root@localhost ~]# kdumpctl restart kdump: kexec: unloaded kdump kernel kdump: Stopping kdump: [OK] kdump: kexec: loaded kdump kernel kdump: Starting kdump: [OK] kdump: Notice: No vmcore creation test performed! [root@localhost ~]# kdumpctl test [root@localhost ~]# kdumpctl status kdump: Kdump is operational kdump: Notice: Last successful vmcore creation on Tue Jun 18 16:39:10 CST 2024 [root@localhost ~]# kdumpctl restart kdump: kexec: unloaded kdump kernel kdump: Stopping kdump: [OK] kdump: kexec: loaded kdump kernel kdump: Starting kdump: [OK] kdump: Notice: Last successful vmcore creation on Tue Jun 18 16:39:10 CST 2024 The notification for kdumpctl (re)start/status can be disabled by setting VMCORE_CREATION_NOTIFICATION in /etc/sysconfig/kdump Signed-off-by: Tao Liu <ltao@redhat.com> Signed-off-by: Tao Liu <ltao@redhat.com>
2024-10-08 01:48:04 +00:00
if [ -d /vmcorestatus ]; then
_vmcore_creation_status="/vmcorestatus/$VMCORE_CREATION_STATUS"
else
_vmcore_creation_status="/sysroot/$VMCORE_CREATION_STATUS"
fi
set_vmcore_creation_status 'clear' "$_vmcore_creation_status"
eval "$DUMP_INSTRUCTION"
_ret=$?
if [ $_ret -ne 0 ]; then
Introduce vmcore creation notification to kdump Upstream: fedora Resolves: RHEL-32060 Conflict: Yes, there are several conflicts. 1) Upstream have moved dracut-kdump.sh into kdump-utils/dracut/99kdumpbase/kdump.sh, so the targeting files are changed. 2) There are several patchsets([1] [2]) which not backported to rhel9, so some formating conflicts encountered. But there is no functional change been made for the patch backporting. [1]: https://github.com/rhkdump/kdump-utils/pull/18/commits [2]: https://github.com/rhkdump/kdump-utils/pull/33/commits commit 88525ebf5e43cc86aea66dc75ec83db58233883b Author: Tao Liu <ltao@redhat.com> Date: Thu Sep 5 15:49:07 2024 +1200 Introduce vmcore creation notification to kdump Motivation ========== People may forget to recheck to ensure kdump works, which as a result, a possibility of no vmcores generated after a real system crash. It is unexpected for kdump. It is highly recommended people to recheck kdump after any system modification, such as: a. after kernel patching or whole yum update, as it might break something on which kdump is dependent, maybe due to introduction of any new bug etc. b. after any change at hardware level, maybe storage, networking, firmware upgrading etc. c. after implementing any new application, like which involves 3rd party modules etc. Though these exceed the range of kdump, however a simple vmcore creation status notification is good to have for now. Design ====== Kdump currently will check any relating files/fs/drivers modified before determine if initrd should rebuild when (re)start. A rebuild is an indicator of such modification, and kdump need to be rechecked. This will clear the vmcore creation status specified in $VMCORE_CREATION_STATUS. Vmcore creation check will happen at "kdumpctl (re)start/status", and will report the creation success/fail status to users. A "success" status indicates previously there has been a vmcore successfully generated based on the current env, so it is more likely a vmcore will be generated later when real crash happens; A "fail" status indicates previously there was no vmcore generated, or has been a vmcore creation failed based on current env. User should check the 2nd kernel log or the kexec-dmesg.log for the failing reason. $VMCORE_CREATION_STATUS is used for recording the vmcore creation status of the current env. The format will be like: success 1718682002 Which means, there has been a vmcore generated successfully at this timestamp for the current env. Usage ===== [root@localhost ~]# kdumpctl restart kdump: kexec: unloaded kdump kernel kdump: Stopping kdump: [OK] kdump: kexec: loaded kdump kernel kdump: Starting kdump: [OK] kdump: Notice: No vmcore creation test performed! [root@localhost ~]# kdumpctl test [root@localhost ~]# kdumpctl status kdump: Kdump is operational kdump: Notice: Last successful vmcore creation on Tue Jun 18 16:39:10 CST 2024 [root@localhost ~]# kdumpctl restart kdump: kexec: unloaded kdump kernel kdump: Stopping kdump: [OK] kdump: kexec: loaded kdump kernel kdump: Starting kdump: [OK] kdump: Notice: Last successful vmcore creation on Tue Jun 18 16:39:10 CST 2024 The notification for kdumpctl (re)start/status can be disabled by setting VMCORE_CREATION_NOTIFICATION in /etc/sysconfig/kdump Signed-off-by: Tao Liu <ltao@redhat.com> Signed-off-by: Tao Liu <ltao@redhat.com>
2024-10-08 01:48:04 +00:00
set_vmcore_creation_status 'fail' "$_vmcore_creation_status"
derror "saving vmcore failed"
Introduce vmcore creation notification to kdump Upstream: fedora Resolves: RHEL-32060 Conflict: Yes, there are several conflicts. 1) Upstream have moved dracut-kdump.sh into kdump-utils/dracut/99kdumpbase/kdump.sh, so the targeting files are changed. 2) There are several patchsets([1] [2]) which not backported to rhel9, so some formating conflicts encountered. But there is no functional change been made for the patch backporting. [1]: https://github.com/rhkdump/kdump-utils/pull/18/commits [2]: https://github.com/rhkdump/kdump-utils/pull/33/commits commit 88525ebf5e43cc86aea66dc75ec83db58233883b Author: Tao Liu <ltao@redhat.com> Date: Thu Sep 5 15:49:07 2024 +1200 Introduce vmcore creation notification to kdump Motivation ========== People may forget to recheck to ensure kdump works, which as a result, a possibility of no vmcores generated after a real system crash. It is unexpected for kdump. It is highly recommended people to recheck kdump after any system modification, such as: a. after kernel patching or whole yum update, as it might break something on which kdump is dependent, maybe due to introduction of any new bug etc. b. after any change at hardware level, maybe storage, networking, firmware upgrading etc. c. after implementing any new application, like which involves 3rd party modules etc. Though these exceed the range of kdump, however a simple vmcore creation status notification is good to have for now. Design ====== Kdump currently will check any relating files/fs/drivers modified before determine if initrd should rebuild when (re)start. A rebuild is an indicator of such modification, and kdump need to be rechecked. This will clear the vmcore creation status specified in $VMCORE_CREATION_STATUS. Vmcore creation check will happen at "kdumpctl (re)start/status", and will report the creation success/fail status to users. A "success" status indicates previously there has been a vmcore successfully generated based on the current env, so it is more likely a vmcore will be generated later when real crash happens; A "fail" status indicates previously there was no vmcore generated, or has been a vmcore creation failed based on current env. User should check the 2nd kernel log or the kexec-dmesg.log for the failing reason. $VMCORE_CREATION_STATUS is used for recording the vmcore creation status of the current env. The format will be like: success 1718682002 Which means, there has been a vmcore generated successfully at this timestamp for the current env. Usage ===== [root@localhost ~]# kdumpctl restart kdump: kexec: unloaded kdump kernel kdump: Stopping kdump: [OK] kdump: kexec: loaded kdump kernel kdump: Starting kdump: [OK] kdump: Notice: No vmcore creation test performed! [root@localhost ~]# kdumpctl test [root@localhost ~]# kdumpctl status kdump: Kdump is operational kdump: Notice: Last successful vmcore creation on Tue Jun 18 16:39:10 CST 2024 [root@localhost ~]# kdumpctl restart kdump: kexec: unloaded kdump kernel kdump: Stopping kdump: [OK] kdump: kexec: loaded kdump kernel kdump: Starting kdump: [OK] kdump: Notice: Last successful vmcore creation on Tue Jun 18 16:39:10 CST 2024 The notification for kdumpctl (re)start/status can be disabled by setting VMCORE_CREATION_NOTIFICATION in /etc/sysconfig/kdump Signed-off-by: Tao Liu <ltao@redhat.com> Signed-off-by: Tao Liu <ltao@redhat.com>
2024-10-08 01:48:04 +00:00
else
set_vmcore_creation_status 'success' "$_vmcore_creation_status"
fi
return $_ret
}
do_kdump_pre()
{
if [ -n "$KDUMP_PRE" ]; then
"$KDUMP_PRE"
_ret=$?
if [ $_ret -ne 0 ]; then
derror "$KDUMP_PRE exited with $_ret status"
return $_ret
fi
fi
# if any script fails, it just raises warning and continues
if [ -d /etc/kdump/pre.d ]; then
for file in /etc/kdump/pre.d/*; do
"$file"
_ret=$?
if [ $_ret -ne 0 ]; then
derror "$file exited with $_ret status"
fi
done
fi
return 0
}
do_kdump_post()
{
if [ -d /etc/kdump/post.d ]; then
for file in /etc/kdump/post.d/*; do
"$file" "$1"
_ret=$?
if [ $_ret -ne 0 ]; then
derror "$file exited with $_ret status"
fi
done
fi
if [ -n "$KDUMP_POST" ]; then
"$KDUMP_POST" "$1"
_ret=$?
if [ $_ret -ne 0 ]; then
derror "$KDUMP_POST exited with $_ret status"
fi
fi
}
# $1: block target, eg. /dev/sda
dump_raw()
{
[ -b "$1" ] || return 1
dinfo "saving to raw disk $1"
if ! echo "$CORE_COLLECTOR" | grep -q makedumpfile; then
_src_size=$(stat --format %s /proc/vmcore)
_src_size_mb=$((_src_size / 1048576))
/kdumpscripts/monitor_dd_progress $_src_size_mb &
fi
dinfo "saving vmcore"
$CORE_COLLECTOR /proc/vmcore | dd of="$1" bs=$DD_BLKSIZE >> /tmp/dd_progress_file 2>&1 || return 1
sync
dinfo "saving vmcore complete"
return 0
}
# $1: ssh key file
# $2: ssh address in <user>@<host> format
dump_ssh()
{
_ret=0
_ssh_opt="-i $1 -o BatchMode=yes -o StrictHostKeyChecking=yes"
_ssh_dir="$KDUMP_PATH/$HOST_IP-$DATEDIR"
if is_ipv6_address "$2"; then
_scp_address=${2%@*}@"[${2#*@}]"
else
_scp_address=$2
fi
dinfo "saving to $2:$_ssh_dir"
cat /var/lib/random-seed > /dev/urandom
ssh -q $_ssh_opt "$2" mkdir -p "$_ssh_dir" || return 1
save_vmcore_dmesg_ssh "$DMESG_COLLECTOR" "$_ssh_dir" "$_ssh_opt" "$2"
dinfo "saving vmcore"
save_opalcore_ssh "$_ssh_dir" "$_ssh_opt" "$2" "$_scp_address"
if [ "${CORE_COLLECTOR%%[[:blank:]]*}" = "scp" ]; then
scp -q $_ssh_opt /proc/vmcore "$_scp_address:$_ssh_dir/vmcore-incomplete"
_ret=$?
_vmcore="vmcore"
else
$CORE_COLLECTOR /proc/vmcore | ssh $_ssh_opt "$2" "umask 0077 && dd bs=512 of='$_ssh_dir/vmcore-incomplete'"
_ret=$?
_vmcore="vmcore.flat"
fi
if [ $_ret -eq 0 ]; then
ssh $_ssh_opt "$2" "mv '$_ssh_dir/vmcore-incomplete' '$_ssh_dir/$_vmcore'"
_ret=$?
if [ $_ret -ne 0 ]; then
derror "moving vmcore failed, exitcode:$_ret"
else
dinfo "saving vmcore complete"
fi
else
derror "saving vmcore failed, exitcode:$_ret"
fi
dinfo "saving the $KDUMP_LOG_FILE to $2:$_ssh_dir/"
save_log
if ! scp -q $_ssh_opt $KDUMP_LOG_FILE "$_scp_address:$_ssh_dir/"; then
derror "saving log file failed, _exitcode:$_ret"
fi
return $_ret
}
# $1: dump path
# $2: ssh opts
# $3: ssh address in <user>@<host> format
# $4: scp address, similar with ssh address but IPv6 addresses are quoted
save_opalcore_ssh()
{
if [ ! -f $OPALCORE ]; then
# Check if we are on an old kernel that uses a different path
if [ -f /sys/firmware/opal/core ]; then
OPALCORE="/sys/firmware/opal/core"
else
return 0
fi
fi
dinfo "saving opalcore:$OPALCORE to $3:$1"
if ! scp $2 $OPALCORE "$4:$1/opalcore-incomplete"; then
derror "saving opalcore failed"
return 1
fi
ssh $2 "$3" mv "$1/opalcore-incomplete" "$1/opalcore"
dinfo "saving opalcore complete"
return 0
}
# $1: dmesg collector
# $2: dump path
# $3: ssh opts
# $4: ssh address in <user>@<host> format
save_vmcore_dmesg_ssh()
{
dinfo "saving vmcore-dmesg.txt to $4:$2"
if $1 /proc/vmcore | ssh $3 "$4" "umask 0077 && dd of='$2/vmcore-dmesg-incomplete.txt'"; then
ssh -q $3 "$4" mv "$2/vmcore-dmesg-incomplete.txt" "$2/vmcore-dmesg.txt"
dinfo "saving vmcore-dmesg.txt complete"
else
derror "saving vmcore-dmesg.txt failed"
fi
}
wait_online_network()
{
# In some cases, network may still not be ready because nm-online is called
# with "-s" which means to wait for NetworkManager startup to complete, rather
# than waiting for network connectivity specifically. Wait 10mins more for the
# network to be truely ready in these cases.
_loop=0
while [ $_loop -lt 600 ]; do
sleep 1
_loop=$((_loop + 1))
if _route=$(kdump_get_ip_route "$1" 2> /dev/null); then
printf "%s" "$_route"
return
else
dwarn "Waiting for network to be ready (${_loop}s / 10min)"
fi
done
derror "Oops. The network still isn't ready after waiting 10mins."
exit 1
}
get_host_ip()
{
Address the cases where a NIC has a different name in kdump kernel Resolves: bz2076416 Upstream: Fedora Conflict: None commit 568623e69a32266a3b00225b9de6a93435c44474 Author: Coiby Xu <coxu@redhat.com> Date: Thu Sep 23 14:25:01 2021 +0800 Address the cases where a NIC has a different name in kdump kernel A NIC may get a different name in the kdump kernel from 1st kernel in cases like, - kernel assigned network interface names are not persistent e.g. [1] - there is an udev rule to rename the NIC in the 1st kernel but the kdump initrd may not have that rule e.g. [2] If NM tries to match a NIC with a connection profile based on NIC name i.e. connection.interface-name, it will fail the above bases. A simple solution is to ask NM to match a connection profile by MAC address. Note we don't need to do this for user-created NICs like vlan, bridge and bond. An remaining issue is passing the name of a NIC via the kdumpnic dracut command line parameter which requires passing ifname=<interface>:<MAC> to have fixed NIC name. But we can simply drop this requirement. kdumpnic is needed because kdump needs to get the IP by NIC name and use the IP to created a dumping folder named "{IP}-{DATE}". We can simply pass the IP to the kdump kernel directly via a new dracut command line parameter kdumpip instead. In addition to the benefit of simplifying the code, there are other three benefits brought by this approach, - make use of whatever network to transfer the vmcore. Because as long as we have the network to we don't care which NIC is active. - if obtained IP in the kdump kernel is different from the one in the 1st kernel. "{IP}-{DATE}" would better tell where the dumped vmcore comes from. - without passing ifname=<interface>:<MAC> to kdump initrd, the issue of there are two interfaces with the same MAC address for Azure Hyper-V NIC SR-IOV [3] is resolved automatically. [1] https://bugzilla.redhat.com/show_bug.cgi?id=1121778 [2] https://bugzilla.redhat.com/show_bug.cgi?id=810107 [3] https://bugzilla.redhat.com/show_bug.cgi?id=1962421 Signed-off-by: Coiby Xu <coxu@redhat.com> Reviewed-by: Thomas Haller <thaller@redhat.com> Reviewed-by: Philipp Rudo <prudo@redhat.com> Signed-off-by: Coiby Xu <coxu@redhat.com>
2022-11-23 01:42:18 +00:00
if ! is_nfs_dump_target && ! is_ssh_dump_target; then
return 0
fi
Address the cases where a NIC has a different name in kdump kernel Resolves: bz2076416 Upstream: Fedora Conflict: None commit 568623e69a32266a3b00225b9de6a93435c44474 Author: Coiby Xu <coxu@redhat.com> Date: Thu Sep 23 14:25:01 2021 +0800 Address the cases where a NIC has a different name in kdump kernel A NIC may get a different name in the kdump kernel from 1st kernel in cases like, - kernel assigned network interface names are not persistent e.g. [1] - there is an udev rule to rename the NIC in the 1st kernel but the kdump initrd may not have that rule e.g. [2] If NM tries to match a NIC with a connection profile based on NIC name i.e. connection.interface-name, it will fail the above bases. A simple solution is to ask NM to match a connection profile by MAC address. Note we don't need to do this for user-created NICs like vlan, bridge and bond. An remaining issue is passing the name of a NIC via the kdumpnic dracut command line parameter which requires passing ifname=<interface>:<MAC> to have fixed NIC name. But we can simply drop this requirement. kdumpnic is needed because kdump needs to get the IP by NIC name and use the IP to created a dumping folder named "{IP}-{DATE}". We can simply pass the IP to the kdump kernel directly via a new dracut command line parameter kdumpip instead. In addition to the benefit of simplifying the code, there are other three benefits brought by this approach, - make use of whatever network to transfer the vmcore. Because as long as we have the network to we don't care which NIC is active. - if obtained IP in the kdump kernel is different from the one in the 1st kernel. "{IP}-{DATE}" would better tell where the dumped vmcore comes from. - without passing ifname=<interface>:<MAC> to kdump initrd, the issue of there are two interfaces with the same MAC address for Azure Hyper-V NIC SR-IOV [3] is resolved automatically. [1] https://bugzilla.redhat.com/show_bug.cgi?id=1121778 [2] https://bugzilla.redhat.com/show_bug.cgi?id=810107 [3] https://bugzilla.redhat.com/show_bug.cgi?id=1962421 Signed-off-by: Coiby Xu <coxu@redhat.com> Reviewed-by: Thomas Haller <thaller@redhat.com> Reviewed-by: Philipp Rudo <prudo@redhat.com> Signed-off-by: Coiby Xu <coxu@redhat.com>
2022-11-23 01:42:18 +00:00
_kdump_remote_ip=$(getarg kdump_remote_ip=)
if [ -z "$_kdump_remote_ip" ]; then
derror "failed to get remote IP address!"
return 1
fi
if ! _route=$(wait_online_network "$_kdump_remote_ip"); then
return 1
fi
Address the cases where a NIC has a different name in kdump kernel Resolves: bz2076416 Upstream: Fedora Conflict: None commit 568623e69a32266a3b00225b9de6a93435c44474 Author: Coiby Xu <coxu@redhat.com> Date: Thu Sep 23 14:25:01 2021 +0800 Address the cases where a NIC has a different name in kdump kernel A NIC may get a different name in the kdump kernel from 1st kernel in cases like, - kernel assigned network interface names are not persistent e.g. [1] - there is an udev rule to rename the NIC in the 1st kernel but the kdump initrd may not have that rule e.g. [2] If NM tries to match a NIC with a connection profile based on NIC name i.e. connection.interface-name, it will fail the above bases. A simple solution is to ask NM to match a connection profile by MAC address. Note we don't need to do this for user-created NICs like vlan, bridge and bond. An remaining issue is passing the name of a NIC via the kdumpnic dracut command line parameter which requires passing ifname=<interface>:<MAC> to have fixed NIC name. But we can simply drop this requirement. kdumpnic is needed because kdump needs to get the IP by NIC name and use the IP to created a dumping folder named "{IP}-{DATE}". We can simply pass the IP to the kdump kernel directly via a new dracut command line parameter kdumpip instead. In addition to the benefit of simplifying the code, there are other three benefits brought by this approach, - make use of whatever network to transfer the vmcore. Because as long as we have the network to we don't care which NIC is active. - if obtained IP in the kdump kernel is different from the one in the 1st kernel. "{IP}-{DATE}" would better tell where the dumped vmcore comes from. - without passing ifname=<interface>:<MAC> to kdump initrd, the issue of there are two interfaces with the same MAC address for Azure Hyper-V NIC SR-IOV [3] is resolved automatically. [1] https://bugzilla.redhat.com/show_bug.cgi?id=1121778 [2] https://bugzilla.redhat.com/show_bug.cgi?id=810107 [3] https://bugzilla.redhat.com/show_bug.cgi?id=1962421 Signed-off-by: Coiby Xu <coxu@redhat.com> Reviewed-by: Thomas Haller <thaller@redhat.com> Reviewed-by: Philipp Rudo <prudo@redhat.com> Signed-off-by: Coiby Xu <coxu@redhat.com>
2022-11-23 01:42:18 +00:00
_netdev=$(kdump_get_ip_route_field "$_route" "dev")
if ! _kdumpip=$(ip addr show dev "$_netdev" | grep '[ ]*inet'); then
derror "Failed to get IP of $_netdev"
return 1
fi
_kdumpip=$(echo "$_kdumpip" | head -n 1 | awk '{print $2}')
_kdumpip="${_kdumpip%%/*}"
HOST_IP=$_kdumpip
}
read_kdump_confs()
{
if [ ! -f "$KDUMP_CONFIG_FILE" ]; then
derror "$KDUMP_CONFIG_FILE not found"
return
fi
get_kdump_confs
# rescan for add code for dump target
while read -r config_opt config_val; do
# remove inline comments after the end of a directive.
case "$config_opt" in
dracut_args)
config_val=$(get_dracut_args_target "$config_val")
if [ -n "$config_val" ]; then
config_val=$(get_mntpoint_from_target "$config_val")
DUMP_INSTRUCTION="dump_fs $config_val"
fi
;;
ext[234] | xfs | btrfs | minix | nfs | virtiofs)
config_val=$(get_mntpoint_from_target "$config_val")
DUMP_INSTRUCTION="dump_fs $config_val"
;;
raw)
DUMP_INSTRUCTION="dump_raw $config_val"
;;
ssh)
DUMP_INSTRUCTION="dump_ssh $SSH_KEY_LOCATION $config_val"
;;
esac
done < "$KDUMP_CONF_PARSED"
}
fence_kdump_notify()
{
if [ -n "$FENCE_KDUMP_NODES" ]; then
# shellcheck disable=SC2086
$FENCE_KDUMP_SEND $FENCE_KDUMP_ARGS $FENCE_KDUMP_NODES &
fi
}
if [ "$1" = "--error-handler" ]; then
get_kdump_confs
do_failure_action
do_final_action
exit $?
fi
# continue here only if we have to save dump.
if [ -f /etc/fadump.initramfs ] && [ ! -f /proc/device-tree/rtas/ibm,kernel-dump ] && [ ! -f /proc/device-tree/ibm,opal/dump/mpipl-boot ]; then
exit 0
fi
read_kdump_confs
fence_kdump_notify
if ! get_host_ip; then
derror "get_host_ip exited with non-zero status!"
exit 1
fi
if [ -z "$DUMP_INSTRUCTION" ]; then
DUMP_INSTRUCTION="dump_fs $NEWROOT"
fi
if ! do_kdump_pre; then
derror "kdump_pre script exited with non-zero status!"
do_final_action
# During systemd service to reboot the machine, stop this shell script running
exit 1
fi
make_trace_mem "kdump saving vmcore" '1:shortmem' '2+:mem' '3+:slab'
do_dump
DUMP_RETVAL=$?
if ! do_kdump_post $DUMP_RETVAL; then
derror "kdump_post script exited with non-zero status!"
fi
if [ $DUMP_RETVAL -ne 0 ]; then
exit 1
fi
do_final_action