kexec-tools/mkdumprd
WANG Chao 7c48f71b6f kdump.sysconfig: default to "nofail" mount
Currently we have two issues against mounting filesystems by systemd.
1. If any failure in sysroot.mount, initrd.target won't be reached.
2. If any failure in mounting /etc/fstab, initrd.target won't be reached

Our kdump.sh is in dracut-pre-pivot hook which is ordered after
initrd.target. That means if systemd doesn't reach initrd.target,
pre-pivot service will not run.

Based on above, we can conclude that in order to run kdump.sh,
initrd.target must be reached.

To fix issue 1), we can add rootflags=nofail to 2nd kernel cmdline, so
that initrd.target will not require sysroot.mount. initrd.target
wouldn't care about the failures in sysroot.mount. That means
initrd.target can always be reached whether or not sysroot.mount fails.
So when initrd.target is reached, kdump.sh can be run.

To fix issue 2), we can append "nofail" mount options to every entry in
/etc/fstab. It has almost the same affects as to sysroot.mount.
initrd.target can be reached whether or not mount /etc/fstab fails. So
when initrd.target is reached, kdump.sh can be run.

If the mount failures block kdump from working properly (for example,
the dump target isn't mounted), the error handling will be done by
"default" action specified in /etc/kdump.conf. Otherwise kdump will
ignore the mount failures and dump as expected.

Signed-off-by: WANG Chao <chaowang@redhat.com>
Acked-by: Vivek Goyal <vgoyal@redhat.com>
Acked-by: Dave Young <dyoung@redhat.com>
2013-09-27 15:45:24 +08:00

575 lines
15 KiB
Bash

#!/bin/bash --norc
# New mkdumprd
#
# Copyright 2011 Red Hat, Inc.
#
# Written by Cong Wang <amwang@redhat.com>
#
. /lib/kdump/kdump-lib.sh
export IN_KDUMP=1
conf_file="/etc/kdump.conf"
SSH_KEY_LOCATION="/root/.ssh/kdump_id_rsa"
SAVE_PATH=$(grep ^path $conf_file| cut -d' ' -f2)
[ -z "$SAVE_PATH" ] && SAVE_PATH="/var/crash"
extra_modules=""
dracut_args=("--hostonly" "-o" "plymouth dash")
OVERRIDE_RESETTABLE=0
perror_exit() {
echo $@ >&2
exit 1
}
perror() {
echo $@ >&2
}
get_persistent_dev() {
local i _tmp _dev
_dev=$(udevadm info --query=name --name="$1" 2>/dev/null)
[ -z "$_dev" ] && {
perror_exit "Kernel dev name of $1 is not found."
}
for i in /dev/mapper/* /dev/disk/by-uuid/* /dev/disk/by-id/*; do
_tmp=$(udevadm info --query=name --name="$i" 2>/dev/null)
if [ "$_tmp" = "$_dev" ]; then
echo $i
return
fi
done
perror "WARNING: Persistent device name of $1 not found. Using $1 as dump target name"
echo $1
}
add_dracut_arg() {
local arg qarg is_quoted=0
while [ $# -gt 0 ];
do
arg="${1//\'/\"}"
#Handle quoted substring properly for passing it to dracut_args array.
if [ $is_quoted -eq 0 ]; then
if [[ "$arg" == "\"" ]] || [[ $arg != ${arg#\"} ]]; then
is_quoted=1
arg=${arg#\"}
fi
fi
if [ $is_quoted -eq 1 ]; then
qarg="$qarg $arg"
if [[ "$arg" == "\"" ]] || [[ $arg != ${arg%\"} ]]; then
is_quoted=0
arg=${qarg%\"}
qarg=""
else
shift
continue
fi
fi
dracut_args+=("$arg")
shift
done
}
add_dracut_module() {
add_dracut_arg "--add" "$1"
}
add_dracut_mount() {
add_dracut_arg "--mount" "$1"
}
add_dracut_sshkey() {
add_dracut_arg "--sshkey" "$1"
}
# Generic substring function. If $2 is in $1, return 0.
strstr() { [[ $1 =~ $2 ]]; }
target_is_root() {
local _t
_t=$(findmnt -k -n -r -o TARGET $1|sort|head -1)
[ "$_t" = "/" ]
}
# caller should ensure $1 is valid and mounted in 1st kernel
to_mount() {
local _dev=$1 _s _t _o _mntopts _pdev
_s=$(findmnt -k -f -n -r -o SOURCE $_dev)
_t=$(findmnt -k -f -n -r -o TARGET,FSTYPE $_dev)
_o=$(findmnt -k -f -n -r -o OPTIONS $_dev)
_o=${_o/#ro/rw} #mount fs target as rw in 2nd kernel
_o="${_o},nofail" #with nofail set, systemd won't block for mount failure
_mntopts="$_t $_o"
#for non-nfs _dev converting to use udev persistent name
if [ -b "$_s" ]; then
_pdev="$(get_persistent_dev $_s)"
if [ $? -ne 0 ]; then
return 1
fi
else
_pdev=$_dev
fi
echo "$_pdev $_mntopts"
}
to_mount_point() {
echo $(findmnt -k -f -n -r -o TARGET $1)
}
is_readonly_mount() {
local _mnt
_mnt=$(findmnt -k -f -n -r -o OPTIONS $1)
#fs/proc_namespace.c: show_mountinfo():
#seq_puts(m, mnt->mnt_flags & MNT_READONLY ? " ro" : " rw");
[[ "$_mnt" =~ ^ro ]]
}
#Function: get_ssh_size
#$1=dump target
get_ssh_size() {
local _opt _out _size
_opt="-i $SSH_KEY_LOCATION -o BatchMode=yes -o StrictHostKeyChecking=yes"
_out=$(ssh -q -n $_opt $1 "df -P $SAVE_PATH")
[ $? -ne 0 ] && {
perror_exit "checking remote ssh server available size failed."
}
#ssh output removed the line break, so print $11 instead of $4
_size=$(echo -n $_out|tail -1 | awk '{print $11}')
echo -n $_size
}
#mkdir if save path does not exist on ssh dump target
#$1=ssh dump target
#caller should ensure write permission on $DUMP_TARGET:$SAVE_PATH
mkdir_save_path_ssh()
{
local _opt _dir
_opt="-i $SSH_KEY_LOCATION -o BatchMode=yes -o StrictHostKeyChecking=yes"
ssh -q $_opt $1 mkdir -p $SAVE_PATH 2>&1 > /dev/null
_ret=$?
if [ $_ret -ne 0 ]; then
perror_exit "mkdir failed on $DUMP_TARGET:$SAVE_PATH"
fi
#check whether user has write permission on $SAVE_PATH/$DUMP_TARGET
_dir=$(ssh -qn $_opt $1 mktemp -dqp $SAVE_PATH 2>/dev/null)
_ret=$?
if [ $_ret -ne 0 ]; then
perror_exit "Could not create temporary directory on $DUMP_TARGET:$SAVE_PATH. Make sure user has write permission on destination"
fi
ssh -q $_opt $1 rmdir $_dir
return 0
}
#mkdir if save path does not exist on dump target filesystem
#$1=dump target
#caller should ensure $1 is mounted
mkdir_save_path_fs() {
local _mnt=$(to_mount_point $1)
local _remount="no"
local _ret
[ ! -d ${_mnt}/$SAVE_PATH ] && {
if is_readonly_mount $1; then
echo "Mounting $1 as read-write for creating dump directory.."
mount -o remount,rw $1 || {
perror_exit "Mounting $1 as read-write failed."
}
_remount="yes"
fi
mkdir -p ${_mnt}/$SAVE_PATH
_ret=$?
[ "$_remount" = "yes" ] && {
echo "Remounting $1 as read-only."
mount -o remount,ro $1 || {
perror_exit "Remounting $1 as read-only failed."
}
}
[ $_ret -ne 0 ] && {
perror_exit "Creating ${_mnt}/$SAVE_PATH failed."
}
}
}
#Function: get_fs_size
#$1=dump target
get_fs_size() {
local _mnt=$(to_mount_point $1)
echo -n $(df -P "${_mnt}/$SAVE_PATH"|tail -1|awk '{print $4}')
}
#Function: get_raw_size
#$1=dump target
get_raw_size() {
echo -n $(fdisk -s "$1")
}
#Function: check_size
#$1: dump type string ('raw', 'fs', 'ssh')
#$2: dump target
check_size() {
local avail memtotal
memtotal=$(awk '/MemTotal/{print $2}' /proc/meminfo)
case "$1" in
raw)
avail=$(get_raw_size "$2")
;;
ssh)
avail=$(get_ssh_size "$2")
;;
fs)
avail=$(get_fs_size "$2")
;;
*)
return
esac
if [ $? -ne 0 ]; then
perror_exit "Check dump target size failed"
fi
if [ $avail -lt $memtotal ]; then
echo "Warning: There might not be enough space to save a vmcore."
echo " The size of $2 should be greater than $memtotal kilo bytes."
fi
}
# $1: core_collector config value
verify_core_collector() {
if grep -q "^raw" $conf_file && [ "${1%% *}" != "makedumpfile" ]; then
echo "Warning: specifying a non-makedumpfile core collector, you will have to recover the vmcore manually."
fi
if is_ssh_dump_target || is_raw_dump_target; then
if [ "${1%% *}" = "makedumpfile" ]; then
! strstr "$1" "-F" && {
perror_exit "The specified dump target needs makedumpfile \"-F\" option."
}
fi
fi
}
add_mount() {
if ! target_is_root "$1"; then
local _mnt=$(to_mount "$1")
if [ $? -ne 0 ]; then
exit 1
fi
add_dracut_mount "$_mnt"
fi
}
# get_maj_min <device>
# Prints the major and minor of a device node.
# Example:
# $ get_maj_min /dev/sda2
# 8:2
get_maj_min() {
local _dev
_dev=$(stat -L -c '$((0x%t)):$((0x%T))' "$1" 2>/dev/null)
_dev=$(eval "echo $_dev")
echo $_dev
}
# ugly workaround for the lvm design
# There is no volume group device,
# so, there are no slave devices for volume groups.
# Logical volumes only have the slave devices they really live on,
# but you cannot create the logical volume without the volume group.
# And the volume group might be bigger than the devices the LV needs.
check_vol_slaves() {
local _lv _vg _pv
for i in /dev/mapper/*; do
_lv=$(get_maj_min $i)
if [[ $_lv = $2 ]]; then
_vg=$(lvm lvs --noheadings -o vg_name $i 2>/dev/null)
# strip space
_vg=$(echo $_vg)
if [[ $_vg ]]; then
for _pv in $(lvm vgs --noheadings -o pv_name "$_vg" 2>/dev/null)
do
check_block_and_slaves $1 $(get_maj_min $_pv) && return 0
done
fi
fi
done
return 1
}
# Walk all the slave relationships for a given block device.
# Stop when our helper function returns success
# $1 = function to call on every found block device
# $2 = block device in major:minor format
check_block_and_slaves() {
local _x
[[ -b /dev/block/$2 ]] || return 1 # Not a block device? So sorry.
"$1" $2 && return
check_vol_slaves "$@" && return 0
if [[ -f /sys/dev/block/$2/../dev ]]; then
check_block_and_slaves $1 $(cat "/sys/dev/block/$2/../dev") && return 0
fi
[[ -d /sys/dev/block/$2/slaves ]] || return 1
for _x in /sys/dev/block/$2/slaves/*/dev; do
[[ -f $_x ]] || continue
check_block_and_slaves $1 $(cat "$_x") && return 0
done
return 1
}
to_dev_name() {
local dev="${1//\"/}"
case "$dev" in
UUID=*)
dev=`blkid -U "${dev#UUID=}"`
;;
LABEL=*)
dev=`blkid -L "${dev#LABEL=}"`
;;
esac
echo $dev
}
get_block_dump_target()
{
local _target
if is_ssh_dump_target || is_nfs_dump_target; then
return
fi
_target=$(egrep "^ext[234]|^xfs|^btrfs|^minix|^raw" /etc/kdump.conf 2>/dev/null |awk '{print $2}')
[ -n "$_target" ] && echo $(to_dev_name $_target) && return
#get rootfs device name
_target=$(findmnt -k -f -n -o SOURCE /)
[ -b "$_target" ] && echo $(to_dev_name $_target)
}
get_default_action_target()
{
local _target
local _action=$(grep "^default" /etc/kdump.conf 2>/dev/null | awk '{print $2}')
if [ -n "$_action" ] && [ "$_action" = "dump_to_rootfs" ]; then
#get rootfs device name
_target=$(findmnt -k -f -n -o SOURCE /)
[ -b "$_target" ] && echo $(to_dev_name $_target)
fi
return
}
get_override_resettable()
{
local override_resettable
override_resettable=$(grep "^override_resettable" $conf_file)
if [ -n "$override_resettable" ]; then
OVERRIDE_RESETTABLE=$(echo $override_resettable | cut -d' ' -f2)
if [ "$OVERRIDE_RESETTABLE" != "0" ] && [ "$OVERRIDE_RESETTABLE" != "1" ];then
perror_exit "override_resettable value $OVERRIDE_RESETTABLE is invalid"
fi
fi
}
# $1: function name
for_each_block_target()
{
local dev majmin
#check dump target
dev=$(get_block_dump_target)
if [ -n "$dev" ]; then
majmin=$(get_maj_min $dev)
check_block_and_slaves $1 $majmin && return 1
fi
#check rootfs when default action dump_to_rootfs is set
dev=$(get_default_action_target)
if [ -n "$dev" ]; then
majmin=$(get_maj_min $dev)
check_block_and_slaves $1 $majmin && return 2
fi
return 0
}
#judge if a specific device with $1 is unresettable
#return false if unresettable.
is_unresettable()
{
local path="/sys/$(udevadm info --query=all --path=/sys/dev/block/$1 | awk '/^P:/ {print $2}' | sed -e 's/\(cciss[0-9]\+\/\).*/\1/g' -e 's/\/block\/.*$//')/resettable"
local resettable=1
if [ -f "$path" ]
then
resettable="$(cat $path)"
[ $resettable -eq 0 -a "$OVERRIDE_RESETTABLE" -eq 0 ] && {
local device=$(udevadm info --query=all --path=/sys/dev/block/$1 | awk -F= '/DEVNAME/{print $2}')
echo "Device $device is unresettable"
return 0
}
fi
return 1
}
#check if machine is resettable.
#return true if resettable
check_resettable()
{
local _ret _target
get_override_resettable
for_each_block_target is_unresettable
_ret=$?
[ $_ret -eq 0 ] && return
if [ $_ret -eq 1 ]; then
_target=$(get_block_dump_target)
perror "Can not save vmcore to target device $_target . This device can not be initialized in kdump kernel as it is not resettable"
elif [ $_ret -eq 2 ]; then
_target=$(get_default_action_target)
perror "Rootfs device $_target is not resettable, can not be used as the default target, please specify a default action"
fi
return 1
}
if ! check_resettable; then
exit 1
fi
# $1: maj:min
is_crypt()
{
local majmin=$1 dev line ID_FS_TYPE=""
line=$(udevadm info --query=property --path=/sys/dev/block/$majmin \
| grep "^ID_FS_TYPE")
eval "$line"
[[ "$ID_FS_TYPE" = "crypto_LUKS" ]] && {
dev=$(udevadm info --query=all --path=/sys/dev/block/$majmin | awk -F= '/DEVNAME/{print $2}')
perror "Device $dev is encrypted, can not be used in kdump."
return 0
}
return 1
}
check_crypt()
{
local _ret _target
for_each_block_target is_crypt
_ret=$?
[ $_ret -eq 0 ] && return
if [ $_ret -eq 1 ]; then
_target=$(get_block_dump_target)
perror "Can not save vmcore to target device $_target."
elif [ $_ret -eq 2 ]; then
perror "Default action is dump_to_rootfs but can not save vmcore to root device."
fi
return 1
}
if ! check_crypt; then
exit 1
fi
# firstly get right SSH_KEY_LOCATION
keyfile=$(awk '/^sshkey/ {print $2}' $conf_file)
if [ -f "$keyfile" ]; then
# canonicalize the path
SSH_KEY_LOCATION=$(/usr/bin/readlink -m $keyfile)
fi
if [ "$(uname -m)" = "s390x" ]; then
add_dracut_module "znet"
fi
while read config_opt config_val;
do
# remove inline comments after the end of a directive.
config_val=$(strip_comments $config_val)
case "$config_opt" in
extra_modules)
extra_modules="$extra_modules $config_val"
;;
ext[234]|xfs|btrfs|minix|nfs)
if ! findmnt $config_val >/dev/null; then
perror_exit "Dump target $config_val is probably not mounted."
fi
if [ "$config_opt" = "nfs" ]; then
add_dracut_module "nfs"
fi
add_mount "$config_val"
mkdir_save_path_fs $config_val
check_size fs $config_val
;;
raw)
#checking raw disk writable
dd if=$config_val count=1 of=/dev/null > /dev/null 2>&1 || {
perror_exit "Bad raw disk $config_val"
}
_praw=$(get_persistent_dev $config_val)
if [ $? -ne 0 ]; then
exit 1
fi
add_dracut_arg "--device" "$_praw"
check_size raw $config_val
;;
ssh)
if strstr "$config_val" "@";
then
check_size ssh $config_val
mkdir_save_path_ssh $config_val
add_dracut_module "ssh-client"
add_dracut_sshkey "$SSH_KEY_LOCATION"
else
perror_exit "Bad ssh dump target $config_val"
fi
;;
core_collector)
verify_core_collector "$config_val"
;;
dracut_args)
add_dracut_arg $config_val
;;
*)
if [ -n $(echo $config_opt | grep "^#.*$") ]
then
continue
fi
;;
esac
done < $conf_file
if [ -n "$extra_modules" ]
then
add_dracut_arg "--add-drivers" "$extra_modules"
fi
dracut "${dracut_args[@]}" "$@"
_rc=$?
sync
exit $_rc