- Don't run the raid-check script if the kernel doesn't support md devices

(bz557053) - Don't report any mismatch_cnt issues on raid1 devices as there are legitimate reasons why the count may not be 0 and we are getting enough false positives that it renders the check useless (bz554217, bz547128)
2010-02-19 23:54:16 +00:00 · 2010-02-19 23:54:16 +00:00 · 6aac98e8e7
commit 6aac98e8e7
parent 25ef040df7
2 changed files with 26 additions and 3 deletions
--- a/mdadm.spec
+++ b/mdadm.spec
@ -1,7 +1,7 @@
 Summary:     The mdadm program controls Linux md devices (software RAID arrays)
 Name:        mdadm
 Version:     3.1.1
-Release:     0.gcd9a8b5.2%{?dist}
+Release:     0.gcd9a8b5.3%{?dist}
 Source:      http://www.kernel.org/pub/linux/utils/raid/mdadm/mdadm-%{version}-gcd9a8b5.tar.bz2
 Source1:     mdmonitor.init
 Source2:     raid-check
@ -79,12 +79,19 @@ fi
 %attr(0700,root,root) %dir /var/run/mdadm

 %changelog
+* Fri Feb 19 2010 Doug Ledford <dledford@redhat.com> - 3.1.1-0.gcd9a8b5.3
+- Don't run the raid-check script if the kernel doesn't support
+  md devices (bz557053)
+- Don't report any mismatch_cnt issues on raid1 devices as there are
+  legitimate reasons why the count may not be 0 and we are getting enough
+  false positives that it renders the check useless (bz554217, bz547128)
+
 * Thu Feb 18 2010 Doug Ledford <dledford@redhat.com> - 3.1.1-0.gcd9a8b5.2
 - Fix s390/ppc64 UUID byte swap issue

 * Wed Feb 17 2010 Doug Ledford <dledford@redhat.com> - 3.1.1-0.gcd9a8b5.1
 - Update to head of upstream git repo, which contains a significant number
-  of bug fixes we need
+  of bug fixes we need (bz543746)

 * Fri Jan 15 2010 Doug Ledford <dledford@redhat.com> - 3.0.3-3
 - Fix crash when AUTO keyword is in mdadm.conf (bz552342)
--- a/18
+++ b/18
@ -4,6 +4,10 @@
 # Please use that file to enable/disable this script or to set the
 # type of check you wish performed.

+# We might be on a kernel with no raid support at all, exit if so
+[ -f /proc/mdstat ] || exit 0
+
+# and exit if we haven't been set up properly
 [ -f /etc/sysconfig/raid-check ] || exit 0
 . /etc/sysconfig/raid-check

@ -62,7 +66,19 @@ do
 done
 for dev in $check_list; do
 	mismatch_cnt=`cat /sys/block/$dev/md/mismatch_cnt`
-	if [ "$mismatch_cnt" -ne 0 ]; then
+	# Due to the fact that raid1 writes in the kernel are unbuffered,
+	# a raid1 array can have non-0 mismatch counts even when the
+	# array is healthy.  These non-0 counts will only exist in
+	# transient data areas where they don't pose a problem.  However,
+	# since we can't tell the difference between a non-0 count that
+	# is just in transient data or a non-0 count that signifies a
+	# real problem, simply don't check the mismatch_cnt on raid1
+	# devices as it's providing far too many false positives.  But by
+	# leaving the raid1 device in the check list and performing the
+	# check, we still catch and correct any bad sectors there might
+	# be in the device.
+	raid_lvl=`cat /sys/block/$dev/md/level`
+	if [ "$mismatch_cnt" -ne 0 -a "$raid_lvl" != "raid1" ]; then
 		echo "WARNING: mismatch_cnt is not 0 on /dev/$dev"
 	fi
 done