Bugfix release:

- Fix problem where reshape of RAID volume is broken after trying to stop all MD devices. - Enhance raid-check to allow the adming to specify the max number of concurrent arrays to be checked at any given time. - Resolves bz830177, bz820124 Signed-off-by: Jes Sorensen <Jes.Sorensen@redhat.com>
2012-06-25 19:47:51 +02:00 · 2012-06-25 19:47:51 +02:00 · 44f4b5d516
commit 44f4b5d516
parent d45e40da1b
4 changed files with 85 additions and 14 deletions
--- a/mdadm-3.2.5-imsm-fix-correct-checking-volume-s-degradation.patch
+++ b/mdadm-3.2.5-imsm-fix-correct-checking-volume-s-degradation.patch
@ -0,0 +1,46 @@
 From e1993023991a6fa6539cc604b4b3d6718833250d Mon Sep 17 00:00:00 2001
 From: Lukasz Dorau <lukasz.dorau@intel.com>
 Date: Fri, 25 May 2012 15:06:41 +0200
 Subject: [PATCH] imsm: fix: correct checking volume's degradation
 We do not check the return value of sysfs_get_ll() now. It is wrong.
 If reading of the sysfs "degraded" key does not succeed,
 the "new_degraded" variable will not be initiated
 and accidentally it can have the value of "degraded" variable.
 In that case the change of degradation will not be checked.
 It happens if mdadm is compiled with gcc's "-fstack-protector" option
 when one tries to stop a volume under reshape (e.g. OLCE).
 Reshape seems to be finished then (metadata is in normal/clean state)
 but it is not finished, it is broken and data are corrupted.
 Now we always check the return value of sysfs_get_ll().
 Even if reading of the sysfs "degraded" key does not succeed
 (rv == -1) the change of degradation will be checked.
 Signed-off-by: Lukasz Dorau <lukasz.dorau@intel.com>
 Signed-off-by: NeilBrown <neilb@suse.de>
 ---
 super-intel.c |    6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)
 diff --git a/super-intel.c b/super-intel.c
 index 6c87e20..07ab9ae 100644
 --- a/super-intel.c
 +++ b/super-intel.c
@@ -10370,8 +10370,10 @@ int check_degradation_change(struct mdinfo *info,
 			     int degraded)
 {
 	unsigned long long new_degraded;
 -	sysfs_get_ll(info, NULL, "degraded", &new_degraded);
 -	if (new_degraded != (unsigned long long)degraded) {
 +	int rv;
 +
 +	rv = sysfs_get_ll(info, NULL, "degraded", &new_degraded);
 +	if ((rv == -1) || (new_degraded != (unsigned long long)degraded)) {
 		/* check each device to ensure it is still working */
 		struct mdinfo *sd;
 		new_degraded = 0;
 -- 
 1.7.10.2
--- a/5
+++ b/5
@ -31,9 +31,11 @@
 #	REPAIR_DEVS - a space delimited list of devs that the user
 #		specifically wants to run a repair on.
 #	SKIP_DEVS - a space delimited list of devs that should be skipped
-#       NICE - Change the raid check CPU and IO priority in order to make
+#	NICE - Change the raid check CPU and IO priority in order to make
 #		the system more responsive during lengthy checks.  Valid
 #		values are high, normal, low, idle.
 #	MAXCONCURENT - Limit the number of devices to be checked at a time.
 #		By default all devices will be checked at the same time.
 #
 # Note: the raid-check script intentionaly runs last in the cron.weekly
 # sequence.  This is so we can wait for all the resync operations to complete
@ -55,3 +57,4 @@ NICE=low
 CHECK_DEVS=""
 REPAIR_DEVS=""
 SKIP_DEVS=""
 MAXCONCURRENT=
--- a/mdadm.spec
+++ b/mdadm.spec
@ -1,7 +1,7 @@
 Summary:     The mdadm program controls Linux md devices (software RAID arrays)
 Name:        mdadm
 Version:     3.2.5
-Release:     2%{?dist}
+Release:     3%{?dist}
 Source:      http://www.kernel.org/pub/linux/utils/raid/mdadm/mdadm-%{version}.tar.xz
 Source1:     mdmonitor.init
 Source2:     raid-check
@ -11,6 +11,7 @@ Source5:     mdadm-cron
 Source6:     mdmonitor.service
 Source7:     mdmonitor-takeover.service
 Source8:     mdadm.conf
 Patch1:      mdadm-3.2.5-imsm-fix-correct-checking-volume-s-degradation.patch
 # Fedora customization patches
 Patch97:     mdadm-3.2.3-udev.patch
 Patch98:     mdadm-2.5.2-static.patch
@ -49,6 +50,7 @@ is not used as the system init process.
 %prep
 %setup -q
 %patch1 -p1 -b .reshape
 # Fedora customization patches
 %patch97 -p1 -b .udev
 %patch98 -p1 -b .static
@ -128,6 +130,13 @@ fi
 %{_initrddir}/*
 %changelog
 * Mon Jun 25 2012 Jes Sorensen <Jes.Sorensen@redhat.com> - 3.2.5-3
 - Fix problem where reshape of RAID volume is broken after trying to
  stop all MD devices.
 - Enhance raid-check to allow the adming to specify the max number of
  concurrent arrays to be checked at any given time.
 - Resolves bz830177, bz820124
 * Wed Jun 13 2012 Jes Sorensen <Jes.Sorensen@redhat.com> - 3.2.5-2
 - Fix uninstall script to remove dangling symlink to
  mdmonitor-takeover.service, if the mdadm package is uninstalled from
--- a/37
+++ b/37
@ -11,6 +11,25 @@
 [ -f /etc/sysconfig/raid-check ] || exit 0
 . /etc/sysconfig/raid-check
 # Wait until no more than arg1 arrays in arg2 list are busy
 waitbusy() {
    local threshold=$(($1 + 1))
    local dev_list="$2"
    while true
    do
 	local busy=0
 	local dev=""
 	for dev in $dev_list; do
 	    local sync_action=`cat /sys/block/$dev/md/sync_action`
 	    if [ "$sync_action" != "idle" ]; then
 		let busy++
 	    fi
 	done
        [ $busy -lt $threshold ] && break
 	sleep 60
    done
 }
 [ "$ENABLED" != "yes" ] && exit 0
 case "$CHECK" in
@ -70,6 +89,10 @@ done
 [ -z "$dev_list" ] && exit 0
 for dev in $dev_list; do
    #Only run $MAXCONCURRENT checks at a time
    if [ -n "$MAXCONCURRENT" ]; then
 	waitbusy $((MAXCONCURRENT - 1)) "$dev_list"
    fi
    echo "${check[$dev]}" > /sys/block/$dev/md/sync_action
    resync_pid=""
@ -86,18 +109,8 @@ for dev in $dev_list; do
 done
 [ -z "$check_list" ] && exit 0
-checking=1
+waitbusy 0 "$check_list"
-while [ $checking -ne 0 ]
+
 do
 	sleep 60
 	checking=0
 	for dev in $check_list; do
 	sync_action=`cat /sys/block/$dev/md/sync_action`
 		if [ "$sync_action" != "idle" ]; then
 			checking=1
 		fi
 	done
 done
 for dev in $check_list; do
 	mismatch_cnt=`cat /sys/block/$dev/md/mismatch_cnt`
 	# Due to the fact that raid1/10 writes in the kernel are unbuffered,