Bugfix release:
- Fix problem where reshape of RAID volume is broken after trying to stop all MD devices. - Enhance raid-check to allow the adming to specify the max number of concurrent arrays to be checked at any given time. - Resolves bz830177, bz820124 Signed-off-by: Jes Sorensen <Jes.Sorensen@redhat.com>
This commit is contained in:
parent
d45e40da1b
commit
44f4b5d516
@ -0,0 +1,46 @@
|
|||||||
|
From e1993023991a6fa6539cc604b4b3d6718833250d Mon Sep 17 00:00:00 2001
|
||||||
|
From: Lukasz Dorau <lukasz.dorau@intel.com>
|
||||||
|
Date: Fri, 25 May 2012 15:06:41 +0200
|
||||||
|
Subject: [PATCH] imsm: fix: correct checking volume's degradation
|
||||||
|
|
||||||
|
We do not check the return value of sysfs_get_ll() now. It is wrong.
|
||||||
|
If reading of the sysfs "degraded" key does not succeed,
|
||||||
|
the "new_degraded" variable will not be initiated
|
||||||
|
and accidentally it can have the value of "degraded" variable.
|
||||||
|
In that case the change of degradation will not be checked.
|
||||||
|
|
||||||
|
It happens if mdadm is compiled with gcc's "-fstack-protector" option
|
||||||
|
when one tries to stop a volume under reshape (e.g. OLCE).
|
||||||
|
Reshape seems to be finished then (metadata is in normal/clean state)
|
||||||
|
but it is not finished, it is broken and data are corrupted.
|
||||||
|
|
||||||
|
Now we always check the return value of sysfs_get_ll().
|
||||||
|
Even if reading of the sysfs "degraded" key does not succeed
|
||||||
|
(rv == -1) the change of degradation will be checked.
|
||||||
|
|
||||||
|
Signed-off-by: Lukasz Dorau <lukasz.dorau@intel.com>
|
||||||
|
Signed-off-by: NeilBrown <neilb@suse.de>
|
||||||
|
---
|
||||||
|
super-intel.c | 6 ++++--
|
||||||
|
1 file changed, 4 insertions(+), 2 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/super-intel.c b/super-intel.c
|
||||||
|
index 6c87e20..07ab9ae 100644
|
||||||
|
--- a/super-intel.c
|
||||||
|
+++ b/super-intel.c
|
||||||
|
@@ -10370,8 +10370,10 @@ int check_degradation_change(struct mdinfo *info,
|
||||||
|
int degraded)
|
||||||
|
{
|
||||||
|
unsigned long long new_degraded;
|
||||||
|
- sysfs_get_ll(info, NULL, "degraded", &new_degraded);
|
||||||
|
- if (new_degraded != (unsigned long long)degraded) {
|
||||||
|
+ int rv;
|
||||||
|
+
|
||||||
|
+ rv = sysfs_get_ll(info, NULL, "degraded", &new_degraded);
|
||||||
|
+ if ((rv == -1) || (new_degraded != (unsigned long long)degraded)) {
|
||||||
|
/* check each device to ensure it is still working */
|
||||||
|
struct mdinfo *sd;
|
||||||
|
new_degraded = 0;
|
||||||
|
--
|
||||||
|
1.7.10.2
|
||||||
|
|
@ -31,9 +31,11 @@
|
|||||||
# REPAIR_DEVS - a space delimited list of devs that the user
|
# REPAIR_DEVS - a space delimited list of devs that the user
|
||||||
# specifically wants to run a repair on.
|
# specifically wants to run a repair on.
|
||||||
# SKIP_DEVS - a space delimited list of devs that should be skipped
|
# SKIP_DEVS - a space delimited list of devs that should be skipped
|
||||||
# NICE - Change the raid check CPU and IO priority in order to make
|
# NICE - Change the raid check CPU and IO priority in order to make
|
||||||
# the system more responsive during lengthy checks. Valid
|
# the system more responsive during lengthy checks. Valid
|
||||||
# values are high, normal, low, idle.
|
# values are high, normal, low, idle.
|
||||||
|
# MAXCONCURENT - Limit the number of devices to be checked at a time.
|
||||||
|
# By default all devices will be checked at the same time.
|
||||||
#
|
#
|
||||||
# Note: the raid-check script intentionaly runs last in the cron.weekly
|
# Note: the raid-check script intentionaly runs last in the cron.weekly
|
||||||
# sequence. This is so we can wait for all the resync operations to complete
|
# sequence. This is so we can wait for all the resync operations to complete
|
||||||
@ -55,3 +57,4 @@ NICE=low
|
|||||||
CHECK_DEVS=""
|
CHECK_DEVS=""
|
||||||
REPAIR_DEVS=""
|
REPAIR_DEVS=""
|
||||||
SKIP_DEVS=""
|
SKIP_DEVS=""
|
||||||
|
MAXCONCURRENT=
|
||||||
|
11
mdadm.spec
11
mdadm.spec
@ -1,7 +1,7 @@
|
|||||||
Summary: The mdadm program controls Linux md devices (software RAID arrays)
|
Summary: The mdadm program controls Linux md devices (software RAID arrays)
|
||||||
Name: mdadm
|
Name: mdadm
|
||||||
Version: 3.2.5
|
Version: 3.2.5
|
||||||
Release: 2%{?dist}
|
Release: 3%{?dist}
|
||||||
Source: http://www.kernel.org/pub/linux/utils/raid/mdadm/mdadm-%{version}.tar.xz
|
Source: http://www.kernel.org/pub/linux/utils/raid/mdadm/mdadm-%{version}.tar.xz
|
||||||
Source1: mdmonitor.init
|
Source1: mdmonitor.init
|
||||||
Source2: raid-check
|
Source2: raid-check
|
||||||
@ -11,6 +11,7 @@ Source5: mdadm-cron
|
|||||||
Source6: mdmonitor.service
|
Source6: mdmonitor.service
|
||||||
Source7: mdmonitor-takeover.service
|
Source7: mdmonitor-takeover.service
|
||||||
Source8: mdadm.conf
|
Source8: mdadm.conf
|
||||||
|
Patch1: mdadm-3.2.5-imsm-fix-correct-checking-volume-s-degradation.patch
|
||||||
# Fedora customization patches
|
# Fedora customization patches
|
||||||
Patch97: mdadm-3.2.3-udev.patch
|
Patch97: mdadm-3.2.3-udev.patch
|
||||||
Patch98: mdadm-2.5.2-static.patch
|
Patch98: mdadm-2.5.2-static.patch
|
||||||
@ -49,6 +50,7 @@ is not used as the system init process.
|
|||||||
%prep
|
%prep
|
||||||
%setup -q
|
%setup -q
|
||||||
|
|
||||||
|
%patch1 -p1 -b .reshape
|
||||||
# Fedora customization patches
|
# Fedora customization patches
|
||||||
%patch97 -p1 -b .udev
|
%patch97 -p1 -b .udev
|
||||||
%patch98 -p1 -b .static
|
%patch98 -p1 -b .static
|
||||||
@ -128,6 +130,13 @@ fi
|
|||||||
%{_initrddir}/*
|
%{_initrddir}/*
|
||||||
|
|
||||||
%changelog
|
%changelog
|
||||||
|
* Mon Jun 25 2012 Jes Sorensen <Jes.Sorensen@redhat.com> - 3.2.5-3
|
||||||
|
- Fix problem where reshape of RAID volume is broken after trying to
|
||||||
|
stop all MD devices.
|
||||||
|
- Enhance raid-check to allow the adming to specify the max number of
|
||||||
|
concurrent arrays to be checked at any given time.
|
||||||
|
- Resolves bz830177, bz820124
|
||||||
|
|
||||||
* Wed Jun 13 2012 Jes Sorensen <Jes.Sorensen@redhat.com> - 3.2.5-2
|
* Wed Jun 13 2012 Jes Sorensen <Jes.Sorensen@redhat.com> - 3.2.5-2
|
||||||
- Fix uninstall script to remove dangling symlink to
|
- Fix uninstall script to remove dangling symlink to
|
||||||
mdmonitor-takeover.service, if the mdadm package is uninstalled from
|
mdmonitor-takeover.service, if the mdadm package is uninstalled from
|
||||||
|
37
raid-check
37
raid-check
@ -11,6 +11,25 @@
|
|||||||
[ -f /etc/sysconfig/raid-check ] || exit 0
|
[ -f /etc/sysconfig/raid-check ] || exit 0
|
||||||
. /etc/sysconfig/raid-check
|
. /etc/sysconfig/raid-check
|
||||||
|
|
||||||
|
# Wait until no more than arg1 arrays in arg2 list are busy
|
||||||
|
waitbusy() {
|
||||||
|
local threshold=$(($1 + 1))
|
||||||
|
local dev_list="$2"
|
||||||
|
while true
|
||||||
|
do
|
||||||
|
local busy=0
|
||||||
|
local dev=""
|
||||||
|
for dev in $dev_list; do
|
||||||
|
local sync_action=`cat /sys/block/$dev/md/sync_action`
|
||||||
|
if [ "$sync_action" != "idle" ]; then
|
||||||
|
let busy++
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
[ $busy -lt $threshold ] && break
|
||||||
|
sleep 60
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
[ "$ENABLED" != "yes" ] && exit 0
|
[ "$ENABLED" != "yes" ] && exit 0
|
||||||
|
|
||||||
case "$CHECK" in
|
case "$CHECK" in
|
||||||
@ -70,6 +89,10 @@ done
|
|||||||
[ -z "$dev_list" ] && exit 0
|
[ -z "$dev_list" ] && exit 0
|
||||||
|
|
||||||
for dev in $dev_list; do
|
for dev in $dev_list; do
|
||||||
|
#Only run $MAXCONCURRENT checks at a time
|
||||||
|
if [ -n "$MAXCONCURRENT" ]; then
|
||||||
|
waitbusy $((MAXCONCURRENT - 1)) "$dev_list"
|
||||||
|
fi
|
||||||
echo "${check[$dev]}" > /sys/block/$dev/md/sync_action
|
echo "${check[$dev]}" > /sys/block/$dev/md/sync_action
|
||||||
|
|
||||||
resync_pid=""
|
resync_pid=""
|
||||||
@ -86,18 +109,8 @@ for dev in $dev_list; do
|
|||||||
done
|
done
|
||||||
[ -z "$check_list" ] && exit 0
|
[ -z "$check_list" ] && exit 0
|
||||||
|
|
||||||
checking=1
|
waitbusy 0 "$check_list"
|
||||||
while [ $checking -ne 0 ]
|
|
||||||
do
|
|
||||||
sleep 60
|
|
||||||
checking=0
|
|
||||||
for dev in $check_list; do
|
|
||||||
sync_action=`cat /sys/block/$dev/md/sync_action`
|
|
||||||
if [ "$sync_action" != "idle" ]; then
|
|
||||||
checking=1
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
done
|
|
||||||
for dev in $check_list; do
|
for dev in $check_list; do
|
||||||
mismatch_cnt=`cat /sys/block/$dev/md/mismatch_cnt`
|
mismatch_cnt=`cat /sys/block/$dev/md/mismatch_cnt`
|
||||||
# Due to the fact that raid1/10 writes in the kernel are unbuffered,
|
# Due to the fact that raid1/10 writes in the kernel are unbuffered,
|
||||||
|
Loading…
Reference in New Issue
Block a user