Bugfix release:

- Fix problem where reshape of RAID volume is broken after trying to stop all MD devices. - Enhance raid-check to allow the adming to specify the max number of concurrent arrays to be checked at any given time. - Resolves bz830177, bz820124 Signed-off-by: Jes Sorensen <Jes.Sorensen@redhat.com>
2012-06-25 19:47:51 +02:00 · 2012-06-25 19:47:51 +02:00 · 44f4b5d516
commit 44f4b5d516
parent d45e40da1b
4 changed files with 85 additions and 14 deletions
--- a/mdadm-3.2.5-imsm-fix-correct-checking-volume-s-degradation.patch
+++ b/mdadm-3.2.5-imsm-fix-correct-checking-volume-s-degradation.patch
@ -0,0 +1,46 @@
+From e1993023991a6fa6539cc604b4b3d6718833250d Mon Sep 17 00:00:00 2001
+From: Lukasz Dorau <lukasz.dorau@intel.com>
+Date: Fri, 25 May 2012 15:06:41 +0200
+Subject: [PATCH] imsm: fix: correct checking volume's degradation
+
+We do not check the return value of sysfs_get_ll() now. It is wrong.
+If reading of the sysfs "degraded" key does not succeed,
+the "new_degraded" variable will not be initiated
+and accidentally it can have the value of "degraded" variable.
+In that case the change of degradation will not be checked.
+
+It happens if mdadm is compiled with gcc's "-fstack-protector" option
+when one tries to stop a volume under reshape (e.g. OLCE).
+Reshape seems to be finished then (metadata is in normal/clean state)
+but it is not finished, it is broken and data are corrupted.
+
+Now we always check the return value of sysfs_get_ll().
+Even if reading of the sysfs "degraded" key does not succeed
+(rv == -1) the change of degradation will be checked.
+
+Signed-off-by: Lukasz Dorau <lukasz.dorau@intel.com>
+Signed-off-by: NeilBrown <neilb@suse.de>
+---
+ super-intel.c |    6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+diff --git a/super-intel.c b/super-intel.c
+index 6c87e20..07ab9ae 100644
+--- a/super-intel.c
+++ b/super-intel.c
+@@ -10370,8 +10370,10 @@ int check_degradation_change(struct mdinfo *info,
+ 			     int degraded)
+ {
+ 	unsigned long long new_degraded;
+-	sysfs_get_ll(info, NULL, "degraded", &new_degraded);
+-	if (new_degraded != (unsigned long long)degraded) {
+	int rv;
+
+	rv = sysfs_get_ll(info, NULL, "degraded", &new_degraded);
+	if ((rv == -1) || (new_degraded != (unsigned long long)degraded)) {
+ 		/* check each device to ensure it is still working */
+ 		struct mdinfo *sd;
+ 		new_degraded = 0;
+-- 
+1.7.10.2
+
--- a/5
+++ b/5
@ -31,9 +31,11 @@
 #	REPAIR_DEVS - a space delimited list of devs that the user
 #		specifically wants to run a repair on.
 #	SKIP_DEVS - a space delimited list of devs that should be skipped
-#       NICE - Change the raid check CPU and IO priority in order to make
+#	NICE - Change the raid check CPU and IO priority in order to make
 #		the system more responsive during lengthy checks.  Valid
 #		values are high, normal, low, idle.
+#	MAXCONCURENT - Limit the number of devices to be checked at a time.
+#		By default all devices will be checked at the same time.
 #
 # Note: the raid-check script intentionaly runs last in the cron.weekly
 # sequence.  This is so we can wait for all the resync operations to complete
@ -55,3 +57,4 @@ NICE=low
 CHECK_DEVS=""
 REPAIR_DEVS=""
 SKIP_DEVS=""
+MAXCONCURRENT=
--- a/mdadm.spec
+++ b/mdadm.spec
@ -1,7 +1,7 @@
 Summary:     The mdadm program controls Linux md devices (software RAID arrays)
 Name:        mdadm
 Version:     3.2.5
-Release:     2%{?dist}
+Release:     3%{?dist}
 Source:      http://www.kernel.org/pub/linux/utils/raid/mdadm/mdadm-%{version}.tar.xz
 Source1:     mdmonitor.init
 Source2:     raid-check
@ -11,6 +11,7 @@ Source5:     mdadm-cron
 Source6:     mdmonitor.service
 Source7:     mdmonitor-takeover.service
 Source8:     mdadm.conf
+Patch1:      mdadm-3.2.5-imsm-fix-correct-checking-volume-s-degradation.patch
 # Fedora customization patches
 Patch97:     mdadm-3.2.3-udev.patch
 Patch98:     mdadm-2.5.2-static.patch
@ -49,6 +50,7 @@ is not used as the system init process.
 %prep
 %setup -q

+%patch1 -p1 -b .reshape
 # Fedora customization patches
 %patch97 -p1 -b .udev
 %patch98 -p1 -b .static
@ -128,6 +130,13 @@ fi
 %{_initrddir}/*

 %changelog
+* Mon Jun 25 2012 Jes Sorensen <Jes.Sorensen@redhat.com> - 3.2.5-3
+- Fix problem where reshape of RAID volume is broken after trying to
+  stop all MD devices.
+- Enhance raid-check to allow the adming to specify the max number of
+  concurrent arrays to be checked at any given time.
+- Resolves bz830177, bz820124
+
 * Wed Jun 13 2012 Jes Sorensen <Jes.Sorensen@redhat.com> - 3.2.5-2
 - Fix uninstall script to remove dangling symlink to
  mdmonitor-takeover.service, if the mdadm package is uninstalled from
--- a/37
+++ b/37
@ -11,6 +11,25 @@
 [ -f /etc/sysconfig/raid-check ] || exit 0
 . /etc/sysconfig/raid-check

+# Wait until no more than arg1 arrays in arg2 list are busy
+waitbusy() {
+    local threshold=$(($1 + 1))
+    local dev_list="$2"
+    while true
+    do
+	local busy=0
+	local dev=""
+	for dev in $dev_list; do
+	    local sync_action=`cat /sys/block/$dev/md/sync_action`
+	    if [ "$sync_action" != "idle" ]; then
+		let busy++
+	    fi
+	done
+        [ $busy -lt $threshold ] && break
+	sleep 60
+    done
+}
+
 [ "$ENABLED" != "yes" ] && exit 0

 case "$CHECK" in
@ -70,6 +89,10 @@ done
 [ -z "$dev_list" ] && exit 0

 for dev in $dev_list; do
+    #Only run $MAXCONCURRENT checks at a time
+    if [ -n "$MAXCONCURRENT" ]; then
+	waitbusy $((MAXCONCURRENT - 1)) "$dev_list"
+    fi
    echo "${check[$dev]}" > /sys/block/$dev/md/sync_action

    resync_pid=""
@ -86,18 +109,8 @@ for dev in $dev_list; do
 done
 [ -z "$check_list" ] && exit 0

-checking=1
-while [ $checking -ne 0 ]
-do
-	sleep 60
-	checking=0
-	for dev in $check_list; do
-	sync_action=`cat /sys/block/$dev/md/sync_action`
-		if [ "$sync_action" != "idle" ]; then
-			checking=1
-		fi
-	done
-done
+waitbusy 0 "$check_list"
+
 for dev in $check_list; do
 	mismatch_cnt=`cat /sys/block/$dev/md/mismatch_cnt`
 	# Due to the fact that raid1/10 writes in the kernel are unbuffered,