72c1204567
Now mdcheck service can't be run successfully. We need to put mdcheck in to the right place (/usr/share/mdadm/mdcheck) and it needs to remove the dependency of mdadm_env.sh which is fixed in patch 76c224c6c. And there is a history problem. It needed KillMode=none before, so it removed the upstream patch 52c67fcdd. Now this problem has been fixed, so we can do the backport more easilly now. We don't need to remove the upstream patch here again. It adds a rhel only feature too for transient failure. Resolves: rhbz#2159923, rhbz#2150865, rhbz#2124071, rhbz#2203859 Signed-off-by: Xiao Ni <xni@redhat.com>
167 lines
3.8 KiB
Bash
167 lines
3.8 KiB
Bash
#!/bin/bash
|
|
|
|
# Copyright (C) 2014-2017 Neil Brown <neilb@suse.de>
|
|
#
|
|
#
|
|
# This program is free software; you can redistribute it and/or modify
|
|
# it under the terms of the GNU General Public License as published by
|
|
# the Free Software Foundation; either version 2 of the License, or
|
|
# (at your option) any later version.
|
|
#
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
#
|
|
# Author: Neil Brown
|
|
# Email: <neilb@suse.com>
|
|
|
|
# This script should be run periodically to automatically
|
|
# perform a 'check' on any md arrays.
|
|
#
|
|
# It supports a 'time budget' such that any incomplete 'check'
|
|
# will be checkpointed when that time has expired.
|
|
# A subsequent invocation can allow the 'check' to continue.
|
|
#
|
|
# Options are:
|
|
# --continue Don't start new checks, only continue old ones.
|
|
# --duration This is passed to "date --date=$duration" to find out
|
|
# when to finish
|
|
#
|
|
# To support '--continue', arrays are identified by UUID and the 'sync_completed'
|
|
# value is stored in /var/lib/mdcheck/$UUID
|
|
|
|
# convert a /dev/md name into /sys/.../md equivalent
|
|
sysname() {
|
|
set `ls -lLd $1`
|
|
maj=${5%,}
|
|
min=$6
|
|
readlink -f /sys/dev/block/$maj:$min
|
|
}
|
|
|
|
args=$(getopt -o hcd: -l help,continue,duration: -n mdcheck -- "$@")
|
|
rv=$?
|
|
if [ $rv -ne 0 ]; then exit $rv; fi
|
|
|
|
eval set -- $args
|
|
|
|
cont=
|
|
endtime=
|
|
while [ " $1" != " --" ]
|
|
do
|
|
case $1 in
|
|
--help )
|
|
echo >&2 'Usage: mdcheck [--continue] [--duration time-offset]'
|
|
echo >&2 ' time-offset must be understood by "date --date"'
|
|
exit 0
|
|
;;
|
|
--continue ) cont=yes ;;
|
|
--duration ) shift; dur=$1
|
|
endtime=$(date --date "$dur" "+%s")
|
|
;;
|
|
esac
|
|
shift
|
|
done
|
|
shift
|
|
|
|
# We need a temp file occasionally...
|
|
tmp=/var/lib/mdcheck/.md-check-$$
|
|
trap 'rm -f "$tmp"' 0 2 3 15
|
|
|
|
|
|
# firstly, clean out really old state files
|
|
mkdir -p /var/lib/mdcheck
|
|
find /var/lib/mdcheck -name "MD_UUID*" -type f -mtime +180 -exec rm {} \;
|
|
|
|
# Now look at each md device.
|
|
cnt=0
|
|
for dev in /dev/md?*
|
|
do
|
|
[ -e "$dev" ] || continue
|
|
sys=`sysname $dev`
|
|
if [ ! -f "$sys/md/sync_action" ]
|
|
then # cannot check this array
|
|
continue
|
|
fi
|
|
if [ "`cat $sys/md/sync_action`" != 'idle' ]
|
|
then # This array is busy
|
|
continue
|
|
fi
|
|
|
|
mdadm --detail --export "$dev" | grep '^MD_UUID=' > $tmp || continue
|
|
source $tmp
|
|
fl="/var/lib/mdcheck/MD_UUID_$MD_UUID"
|
|
if [ -z "$cont" ]
|
|
then
|
|
start=0
|
|
logger -p daemon.info mdcheck start checking $dev
|
|
elif [ -z "$MD_UUID" -o ! -f "$fl" ]
|
|
then
|
|
# Nothing to continue here
|
|
continue
|
|
else
|
|
start=`cat "$fl"`
|
|
logger -p daemon.info mdcheck continue checking $dev from $start
|
|
fi
|
|
|
|
cnt=$[cnt+1]
|
|
eval MD_${cnt}_fl=\$fl
|
|
eval MD_${cnt}_sys=\$sys
|
|
eval MD_${cnt}_dev=\$dev
|
|
echo $start > $fl
|
|
echo $start > $sys/md/sync_min
|
|
echo check > $sys/md/sync_action
|
|
done
|
|
|
|
if [ -z "$endtime" ]
|
|
then
|
|
exit 0
|
|
fi
|
|
|
|
while [ `date +%s` -lt $endtime ]
|
|
do
|
|
any=
|
|
for i in `eval echo {1..$cnt}`
|
|
do
|
|
eval fl=\$MD_${i}_fl
|
|
eval sys=\$MD_${i}_sys
|
|
eval dev=\$MD_${i}_dev
|
|
|
|
if [ -z "$fl" ]; then continue; fi
|
|
|
|
if [ "`cat $sys/md/sync_action`" != 'check' ]
|
|
then
|
|
logger -p daemon.info mdcheck finished checking $dev
|
|
eval MD_${i}_fl=
|
|
rm -f $fl
|
|
continue;
|
|
fi
|
|
read a rest < $sys/md/sync_completed
|
|
echo $a > $fl
|
|
any=yes
|
|
done
|
|
if [ -z "$any" ]; then exit 0; fi
|
|
sleep 120
|
|
done
|
|
|
|
# We've waited, and there are still checks running.
|
|
# Time to stop them.
|
|
for i in `eval echo {1..$cnt}`
|
|
do
|
|
eval fl=\$MD_${i}_fl
|
|
eval sys=\$MD_${i}_sys
|
|
eval dev=\$MD_${i}_dev
|
|
|
|
if [ -z "$fl" ]; then continue; fi
|
|
|
|
if [ "`cat $sys/md/sync_action`" != 'check' ]
|
|
then
|
|
eval MD_${i}_fl=
|
|
rm -f $fl
|
|
continue;
|
|
fi
|
|
echo idle > $sys/md/sync_action
|
|
cat $sys/md/sync_min > $fl
|
|
logger -p daemon.info pause checking $dev at `cat $fl`
|
|
done
|