Bugfix release:
- Fix problem where reshape of RAID volume is broken after trying to stop all MD devices. - Enhance raid-check to allow the adming to specify the max number of concurrent arrays to be checked at any given time. - Resolves bz830177, bz820124 Signed-off-by: Jes Sorensen <Jes.Sorensen@redhat.com>
This commit is contained in:
parent
d45e40da1b
commit
44f4b5d516
@ -0,0 +1,46 @@
|
||||
From e1993023991a6fa6539cc604b4b3d6718833250d Mon Sep 17 00:00:00 2001
|
||||
From: Lukasz Dorau <lukasz.dorau@intel.com>
|
||||
Date: Fri, 25 May 2012 15:06:41 +0200
|
||||
Subject: [PATCH] imsm: fix: correct checking volume's degradation
|
||||
|
||||
We do not check the return value of sysfs_get_ll() now. It is wrong.
|
||||
If reading of the sysfs "degraded" key does not succeed,
|
||||
the "new_degraded" variable will not be initiated
|
||||
and accidentally it can have the value of "degraded" variable.
|
||||
In that case the change of degradation will not be checked.
|
||||
|
||||
It happens if mdadm is compiled with gcc's "-fstack-protector" option
|
||||
when one tries to stop a volume under reshape (e.g. OLCE).
|
||||
Reshape seems to be finished then (metadata is in normal/clean state)
|
||||
but it is not finished, it is broken and data are corrupted.
|
||||
|
||||
Now we always check the return value of sysfs_get_ll().
|
||||
Even if reading of the sysfs "degraded" key does not succeed
|
||||
(rv == -1) the change of degradation will be checked.
|
||||
|
||||
Signed-off-by: Lukasz Dorau <lukasz.dorau@intel.com>
|
||||
Signed-off-by: NeilBrown <neilb@suse.de>
|
||||
---
|
||||
super-intel.c | 6 ++++--
|
||||
1 file changed, 4 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/super-intel.c b/super-intel.c
|
||||
index 6c87e20..07ab9ae 100644
|
||||
--- a/super-intel.c
|
||||
+++ b/super-intel.c
|
||||
@@ -10370,8 +10370,10 @@ int check_degradation_change(struct mdinfo *info,
|
||||
int degraded)
|
||||
{
|
||||
unsigned long long new_degraded;
|
||||
- sysfs_get_ll(info, NULL, "degraded", &new_degraded);
|
||||
- if (new_degraded != (unsigned long long)degraded) {
|
||||
+ int rv;
|
||||
+
|
||||
+ rv = sysfs_get_ll(info, NULL, "degraded", &new_degraded);
|
||||
+ if ((rv == -1) || (new_degraded != (unsigned long long)degraded)) {
|
||||
/* check each device to ensure it is still working */
|
||||
struct mdinfo *sd;
|
||||
new_degraded = 0;
|
||||
--
|
||||
1.7.10.2
|
||||
|
@ -31,9 +31,11 @@
|
||||
# REPAIR_DEVS - a space delimited list of devs that the user
|
||||
# specifically wants to run a repair on.
|
||||
# SKIP_DEVS - a space delimited list of devs that should be skipped
|
||||
# NICE - Change the raid check CPU and IO priority in order to make
|
||||
# NICE - Change the raid check CPU and IO priority in order to make
|
||||
# the system more responsive during lengthy checks. Valid
|
||||
# values are high, normal, low, idle.
|
||||
# MAXCONCURENT - Limit the number of devices to be checked at a time.
|
||||
# By default all devices will be checked at the same time.
|
||||
#
|
||||
# Note: the raid-check script intentionaly runs last in the cron.weekly
|
||||
# sequence. This is so we can wait for all the resync operations to complete
|
||||
@ -55,3 +57,4 @@ NICE=low
|
||||
CHECK_DEVS=""
|
||||
REPAIR_DEVS=""
|
||||
SKIP_DEVS=""
|
||||
MAXCONCURRENT=
|
||||
|
11
mdadm.spec
11
mdadm.spec
@ -1,7 +1,7 @@
|
||||
Summary: The mdadm program controls Linux md devices (software RAID arrays)
|
||||
Name: mdadm
|
||||
Version: 3.2.5
|
||||
Release: 2%{?dist}
|
||||
Release: 3%{?dist}
|
||||
Source: http://www.kernel.org/pub/linux/utils/raid/mdadm/mdadm-%{version}.tar.xz
|
||||
Source1: mdmonitor.init
|
||||
Source2: raid-check
|
||||
@ -11,6 +11,7 @@ Source5: mdadm-cron
|
||||
Source6: mdmonitor.service
|
||||
Source7: mdmonitor-takeover.service
|
||||
Source8: mdadm.conf
|
||||
Patch1: mdadm-3.2.5-imsm-fix-correct-checking-volume-s-degradation.patch
|
||||
# Fedora customization patches
|
||||
Patch97: mdadm-3.2.3-udev.patch
|
||||
Patch98: mdadm-2.5.2-static.patch
|
||||
@ -49,6 +50,7 @@ is not used as the system init process.
|
||||
%prep
|
||||
%setup -q
|
||||
|
||||
%patch1 -p1 -b .reshape
|
||||
# Fedora customization patches
|
||||
%patch97 -p1 -b .udev
|
||||
%patch98 -p1 -b .static
|
||||
@ -128,6 +130,13 @@ fi
|
||||
%{_initrddir}/*
|
||||
|
||||
%changelog
|
||||
* Mon Jun 25 2012 Jes Sorensen <Jes.Sorensen@redhat.com> - 3.2.5-3
|
||||
- Fix problem where reshape of RAID volume is broken after trying to
|
||||
stop all MD devices.
|
||||
- Enhance raid-check to allow the adming to specify the max number of
|
||||
concurrent arrays to be checked at any given time.
|
||||
- Resolves bz830177, bz820124
|
||||
|
||||
* Wed Jun 13 2012 Jes Sorensen <Jes.Sorensen@redhat.com> - 3.2.5-2
|
||||
- Fix uninstall script to remove dangling symlink to
|
||||
mdmonitor-takeover.service, if the mdadm package is uninstalled from
|
||||
|
37
raid-check
37
raid-check
@ -11,6 +11,25 @@
|
||||
[ -f /etc/sysconfig/raid-check ] || exit 0
|
||||
. /etc/sysconfig/raid-check
|
||||
|
||||
# Wait until no more than arg1 arrays in arg2 list are busy
|
||||
waitbusy() {
|
||||
local threshold=$(($1 + 1))
|
||||
local dev_list="$2"
|
||||
while true
|
||||
do
|
||||
local busy=0
|
||||
local dev=""
|
||||
for dev in $dev_list; do
|
||||
local sync_action=`cat /sys/block/$dev/md/sync_action`
|
||||
if [ "$sync_action" != "idle" ]; then
|
||||
let busy++
|
||||
fi
|
||||
done
|
||||
[ $busy -lt $threshold ] && break
|
||||
sleep 60
|
||||
done
|
||||
}
|
||||
|
||||
[ "$ENABLED" != "yes" ] && exit 0
|
||||
|
||||
case "$CHECK" in
|
||||
@ -70,6 +89,10 @@ done
|
||||
[ -z "$dev_list" ] && exit 0
|
||||
|
||||
for dev in $dev_list; do
|
||||
#Only run $MAXCONCURRENT checks at a time
|
||||
if [ -n "$MAXCONCURRENT" ]; then
|
||||
waitbusy $((MAXCONCURRENT - 1)) "$dev_list"
|
||||
fi
|
||||
echo "${check[$dev]}" > /sys/block/$dev/md/sync_action
|
||||
|
||||
resync_pid=""
|
||||
@ -86,18 +109,8 @@ for dev in $dev_list; do
|
||||
done
|
||||
[ -z "$check_list" ] && exit 0
|
||||
|
||||
checking=1
|
||||
while [ $checking -ne 0 ]
|
||||
do
|
||||
sleep 60
|
||||
checking=0
|
||||
for dev in $check_list; do
|
||||
sync_action=`cat /sys/block/$dev/md/sync_action`
|
||||
if [ "$sync_action" != "idle" ]; then
|
||||
checking=1
|
||||
fi
|
||||
done
|
||||
done
|
||||
waitbusy 0 "$check_list"
|
||||
|
||||
for dev in $check_list; do
|
||||
mismatch_cnt=`cat /sys/block/$dev/md/mismatch_cnt`
|
||||
# Due to the fact that raid1/10 writes in the kernel are unbuffered,
|
||||
|
Loading…
Reference in New Issue
Block a user