Backport various fixes for mdadm-3.4 from RHEL

Resolves: bz#1303380 Signed-off-by: Jes Sorensen <Jes.Sorensen@redhat.com>
2016-08-12 14:21:31 -04:00 · 2016-08-12 14:21:31 -04:00 · 6e2d10d9a3
commit 6e2d10d9a3
parent 79d2abf1e9
9 changed files with 510 additions and 2 deletions
--- a/mdadm-3.4-Grow_continue_command-remove-dead-code.patch
+++ b/mdadm-3.4-Grow_continue_command-remove-dead-code.patch
@ -0,0 +1,51 @@
 commit 12add44564f195878c3e346e4bbae845dec67db3
 Author: Jes Sorensen <Jes.Sorensen@redhat.com>
 Date:   Fri Mar 4 16:30:22 2016 -0500
    Grow: Grow_continue_command() remove dead code
    All cases where fd2 is used are completed with a close(fd2), so there
    is no need to set fd2 = -1 or check for it before exiting.
    Reviewed-by: NeilBrown <neilb@suse.com>
    Signed-off-by: Jes Sorensen <Jes.Sorensen@redhat.com>
 diff --git a/Grow.c b/Grow.c
 index c4f417e..c4af5c0 100755
 --- a/Grow.c
 +++ b/Grow.c
@@ -4752,7 +4752,7 @@ int Grow_continue_command(char *devname, int fd,
 	struct mdinfo *cc = NULL;
 	struct mdstat_ent *mdstat = NULL;
 	int cfd = -1;
 -	int fd2 = -1;
 +	int fd2;
 	dprintf("Grow continue from command line called for %s\n",
 		devname);
@@ -4796,8 +4796,6 @@ int Grow_continue_command(char *devname, int fd,
 				continue;
 			err = st->ss->load_super(st, fd2, NULL);
 			close(fd2);
 -			/* invalidate fd2 to avoid possible double close() */
 -			fd2 = -1;
 			if (err)
 				continue;
 			break;
@@ -4894,7 +4892,6 @@ int Grow_continue_command(char *devname, int fd,
 		sysfs_init(content, fd2, mdstat->devnm);
 		close(fd2);
 -		fd2 = -1;
 		/* start mdmon in case it is not running
 		 */
@@ -4924,8 +4921,6 @@ int Grow_continue_command(char *devname, int fd,
 	ret_val = Grow_continue(fd, st, content, backup_file, 1, 0);
 Grow_continue_command_exit:
 -	if (fd2 > -1)
 -		close(fd2);
 	if (cfd > -1)
 		close(cfd);
 	st->ss->free_super(st);
--- a/mdadm-3.4-IMSM-retry-reading-sync_completed-during-reshape.patch
+++ b/mdadm-3.4-IMSM-retry-reading-sync_completed-during-reshape.patch
@ -0,0 +1,56 @@
 From df2647fa5bbe84960dae11531e34bafef549b8ff Mon Sep 17 00:00:00 2001
 From: Pawel Baldysiak <pawel.baldysiak@intel.com>
 Date: Tue, 17 May 2016 13:24:41 +0200
 Subject: [PATCH] IMSM: retry reading sync_completed during reshape
 The sync_completed after restarting a reshape
 (for example - after reboot) is set to "delayed" until
 mdmon changes the state. Mdadm does not wait for that change with
 old kernels. If this condition occurs - it exits and reshape
 is not continuing. This patch adds retry of reading sync_complete
 with a delay. It gives time for mdmon to change the "delayed" state.
 Signed-off-by: Pawel Baldysiak <pawel.baldysiak@intel.com>
 Signed-off-by: Jes Sorensen <Jes.Sorensen@redhat.com>
 ---
 super-intel.c | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)
 diff --git a/super-intel.c b/super-intel.c
 index ba3ee48..7e2860c 100644
 --- a/super-intel.c
 +++ b/super-intel.c
@@ -10378,6 +10378,7 @@ exit_imsm_reshape_super:
 int wait_for_reshape_imsm(struct mdinfo *sra, int ndata)
 {
 	int fd = sysfs_get_fd(sra, NULL, "sync_completed");
 +	int retry = 3;
 	unsigned long long completed;
 	/* to_complete : new sync_max position */
 	unsigned long long to_complete = sra->reshape_progress;
@@ -10388,11 +10389,17 @@ int wait_for_reshape_imsm(struct mdinfo *sra, int ndata)
 		return 1;
 	}
 -	if (sysfs_fd_get_ll(fd, &completed) < 0) {
 -		dprintf("cannot read reshape_position (no reshape in progres)\n");
 -		close(fd);
 -		return 1;
 -	}
 +	do {
 +		if (sysfs_fd_get_ll(fd, &completed) < 0) {
 +			if (!retry) {
 +				dprintf("cannot read reshape_position (no reshape in progres)\n");
 +				close(fd);
 +				return 1;
 +			}
 +			usleep(30000);
 +		} else
 +			break;
 +	} while (retry--);
 	if (completed > position_to_set) {
 		dprintf("wrong next position to set %llu (%llu)\n",
 -- 
 2.5.5
--- a/mdadm-3.4-The-sys_name-array-in-the-mdinfo-structure-is-20-byt.patch
+++ b/mdadm-3.4-The-sys_name-array-in-the-mdinfo-structure-is-20-byt.patch
@ -0,0 +1,31 @@
 From 6e6e98746dba7e900f23e92bbb0da01fe7a169da Mon Sep 17 00:00:00 2001
 From: Nikhil Kshirsagar <nkshirsa@redhat.com>
 Date: Fri, 10 Jun 2016 08:50:10 +0530
 Subject: [PATCH] The sys_name array in the mdinfo structure is 20 bytes of
 storage.
 Increasing the size of this array to 32 bytes to handle cases with
 longer device names.
 Signed-off-by: Nikhil Kshirsagar <nkshirsa@redhat.com>
 Signed-off-by: Jes Sorensen <Jes.Sorensen@redhat.com>
 ---
 mdadm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
 diff --git a/mdadm.h b/mdadm.h
 index 3d6c638..1fd38a3 100755
 --- a/mdadm.h
 +++ b/mdadm.h
@@ -289,7 +289,7 @@ struct mdinfo {
 	int container_enough; /* flag external handlers can set to
 			       * indicate that subarrays have not enough (-1),
 			       * enough to start (0), or all expected disks (1) */
 -	char		sys_name[20];
 +	char		sys_name[32];
 	struct mdinfo *devs;
 	struct mdinfo *next;
 -- 
 2.5.5
--- a/mdadm-3.4-check-reshape_active-more-times-before-Grow_continue.patch
+++ b/mdadm-3.4-check-reshape_active-more-times-before-Grow_continue.patch
@ -0,0 +1,106 @@
 commit 8800f85381d0cd9689dee62bbbdafdb359100389
 Author: Xiao Ni <xni@redhat.com>
 Date:   Thu Jun 16 09:41:02 2016 +0800
    MDADM:Check mdinfo->reshape_active more times before calling Grow_continue
    When reshaping a 3 drives raid5 to 4 drives raid5, there is a chance that
    it can't start the reshape. If the disks are not enough to have spaces for
    relocating the data_offset, it needs to call start_reshape and then run
    mdadm --grow --continue by systemd. But mdadm --grow --continue fails
    because it checkes that info->reshape_active is 0.
    The info->reshape_active is got from the superblock of underlying devices.
    Function start_reshape write reshape to /sys/../sync_action. Before writing
    latest superblock to underlying devices, mdadm --grow --continue is called.
    There is a chance info->reshape_active is 0. We should wait for superblock
    updating more time before calling Grow_continue.
    Signed-off-by: Xiao Ni <xni@redhat.com>
    Signed-off-by: Jes Sorensen <Jes.Sorensen@redhat.com>
 diff --git a/Grow.c b/Grow.c
 index f184d9c..628f0e7 100755
 --- a/Grow.c
 +++ b/Grow.c
@@ -4788,6 +4788,7 @@ int Grow_continue_command(char *devname, int fd,
 	dprintf("Grow continue is run for ");
 	if (st->ss->external == 0) {
 		int d;
 +		int cnt = 5;
 		dprintf_cont("native array (%s)\n", devname);
 		if (ioctl(fd, GET_ARRAY_INFO, &array.array) < 0) {
 			pr_err("%s is not an active md array - aborting\n", devname);
@@ -4799,36 +4800,42 @@ int Grow_continue_command(char *devname, int fd,
 		 * FIXME we should really get what we need from
 		 * sysfs
 		 */
 -		for (d = 0; d < MAX_DISKS; d++) {
 -			mdu_disk_info_t disk;
 -			char *dv;
 -			int err;
 -			disk.number = d;
 -			if (ioctl(fd, GET_DISK_INFO, &disk) < 0)
 -				continue;
 -			if (disk.major == 0 && disk.minor == 0)
 -				continue;
 -			if ((disk.state & (1 << MD_DISK_ACTIVE)) == 0)
 -				continue;
 -			dv = map_dev(disk.major, disk.minor, 1);
 -			if (!dv)
 -				continue;
 -			fd2 = dev_open(dv, O_RDONLY);
 -			if (fd2 < 0)
 -				continue;
 -			err = st->ss->load_super(st, fd2, NULL);
 -			close(fd2);
 -			if (err)
 -				continue;
 -			break;
 -		}
 -		if (d == MAX_DISKS) {
 -			pr_err("Unable to load metadata for %s\n",
 -			       devname);
 -			ret_val = 1;
 -			goto Grow_continue_command_exit;
 -		}
 -		st->ss->getinfo_super(st, content, NULL);
 +		do {
 +			for (d = 0; d < MAX_DISKS; d++) {
 +				mdu_disk_info_t disk;
 +				char *dv;
 +				int err;
 +				disk.number = d;
 +				if (ioctl(fd, GET_DISK_INFO, &disk) < 0)
 +					continue;
 +				if (disk.major == 0 && disk.minor == 0)
 +					continue;
 +				if ((disk.state & (1 << MD_DISK_ACTIVE)) == 0)
 +					continue;
 +				dv = map_dev(disk.major, disk.minor, 1);
 +				if (!dv)
 +					continue;
 +				fd2 = dev_open(dv, O_RDONLY);
 +				if (fd2 < 0)
 +					continue;
 +				err = st->ss->load_super(st, fd2, NULL);
 +				close(fd2);
 +				if (err)
 +					continue;
 +				break;
 +			}
 +			if (d == MAX_DISKS) {
 +				pr_err("Unable to load metadata for %s\n",
 +				       devname);
 +				ret_val = 1;
 +				goto Grow_continue_command_exit;
 +			}
 +			st->ss->getinfo_super(st, content, NULL);
 +			if (!content->reshape_active)
 +				sleep(3);
 +			else
 +				break;
 +		} while (cnt-- > 0);
 	} else {
 		char *container;
--- a/mdadm-3.4-imsm-add-handling-of-sync_action-is-equal-to-idle.patch
+++ b/mdadm-3.4-imsm-add-handling-of-sync_action-is-equal-to-idle.patch
@ -0,0 +1,42 @@
 From b2be2b628b6305712c8df0b3a20ddddc0ac410fb Mon Sep 17 00:00:00 2001
 From: Alexey Obitotskiy <aleksey.obitotskiy@intel.com>
 Date: Thu, 16 Jun 2016 11:31:36 +0200
 Subject: [PATCH 1/2] imsm: add handling of sync_action is equal to 'idle'
 After resync is stopped sync_action value become 'idle'.
 We treat this case as normal termination of waiting, not as error.
 Signed-off-by: Alexey Obitotskiy <aleksey.obitotskiy@intel.com>
 Reviewed-by: Pawel Baldysiak <pawel.baldysiak@intel.com>
 Signed-off-by: Jes Sorensen <Jes.Sorensen@redhat.com>
 ---
 super-intel.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)
 diff --git a/super-intel.c b/super-intel.c
 index 7e2860c..7950bef 100644
 --- a/super-intel.c
 +++ b/super-intel.c
@@ -10423,6 +10423,8 @@ int wait_for_reshape_imsm(struct mdinfo *sra, int ndata)
 		if (sysfs_get_str(sra, NULL, "sync_action",
 				  action, 20) > 0 &&
 				strncmp(action, "reshape", 7) != 0) {
 +			if (strncmp(action, "idle", 4) == 0)
 +				break;
 			close(fd);
 			return -1;
 		}
@@ -10432,9 +10434,9 @@ int wait_for_reshape_imsm(struct mdinfo *sra, int ndata)
 			return 1;
 		}
 	} while (completed < position_to_set);
 +
 	close(fd);
 	return 0;
 -
 }
 /*******************************************************************************
 -- 
 2.5.5
--- a/mdadm-3.4-imsm-properly-handle-values-of-sync_completed.patch
+++ b/mdadm-3.4-imsm-properly-handle-values-of-sync_completed.patch
@ -0,0 +1,89 @@
 From 0febb20c458a488460eadade74a6c283aadaf96a Mon Sep 17 00:00:00 2001
 From: Alexey Obitotskiy <aleksey.obitotskiy@intel.com>
 Date: Thu, 16 Jun 2016 11:31:37 +0200
 Subject: [PATCH 2/2] imsm: properly handle values of sync_completed
 The sync_completed can be set to such values:
 - two numbers of processed sectors and total during synchronization,
 separated with '/';
 - 'none' if synchronization process is stopped;
 - 'delayed' if synchronization process is delayed.
 Handle value of sync_completed not only as numbers but
 also check for 'none' and 'delayed'.
 Signed-off-by: Alexey Obitotskiy <aleksey.obitotskiy@intel.com>
 Reviewed-by: Pawel Baldysiak <pawel.baldysiak@intel.com>
 Signed-off-by: Jes Sorensen <Jes.Sorensen@redhat.com>
 ---
 super-intel.c | 36 ++++++++++++++++++++++++++++++++++--
 1 file changed, 34 insertions(+), 2 deletions(-)
 diff --git a/super-intel.c b/super-intel.c
 index 7950bef..92817e9 100644
 --- a/super-intel.c
 +++ b/super-intel.c
@@ -10363,6 +10363,33 @@ exit_imsm_reshape_super:
 	return ret_val;
 }
 +#define COMPLETED_OK		0
 +#define COMPLETED_NONE		1
 +#define COMPLETED_DELAYED	2
 +
 +static int read_completed(int fd, unsigned long long *val)
 +{
 +	int ret;
 +	char buf[50];
 +
 +	ret = sysfs_fd_get_str(fd, buf, 50);
 +	if (ret < 0)
 +		return ret;
 +
 +	ret = COMPLETED_OK;
 +	if (strncmp(buf, "none", 4) == 0) {
 +		ret = COMPLETED_NONE;
 +	} else if (strncmp(buf, "delayed", 7) == 0) {
 +		ret = COMPLETED_DELAYED;
 +	} else {
 +		char *ep;
 +		*val = strtoull(buf, &ep, 0);
 +		if (ep == buf || (*ep != 0 && *ep != '\n' && *ep != ' '))
 +			ret = -1;
 +	}
 +	return ret;
 +}
 +
 /*******************************************************************************
  * Function:	wait_for_reshape_imsm
  * Description:	Function writes new sync_max value and waits until
@@ -10417,8 +10444,10 @@ int wait_for_reshape_imsm(struct mdinfo *sra, int ndata)
 	}
 	do {
 +		int rc;
 		char action[20];
 		int timeout = 3000;
 +
 		sysfs_wait(fd, &timeout);
 		if (sysfs_get_str(sra, NULL, "sync_action",
 				  action, 20) > 0 &&
@@ -10428,11 +10457,14 @@ int wait_for_reshape_imsm(struct mdinfo *sra, int ndata)
 			close(fd);
 			return -1;
 		}
 -		if (sysfs_fd_get_ll(fd, &completed) < 0) {
 +
 +		rc = read_completed(fd, &completed);
 +		if (rc < 0) {
 			dprintf("cannot read reshape_position (in loop)\n");
 			close(fd);
 			return 1;
 -		}
 +		} else if (rc == COMPLETED_NONE)
 +			break;
 	} while (completed < position_to_set);
 	close(fd);
 -- 
 2.5.5
--- a/mdadm-3.4-super-intel-ensure-suspended-region-is-removed-when-.patch
+++ b/mdadm-3.4-super-intel-ensure-suspended-region-is-removed-when-.patch
@ -0,0 +1,63 @@
 From 942e1cdb4a6a5be02672bc686169c679e775c2be Mon Sep 17 00:00:00 2001
 From: NeilBrown <neilb@suse.com>
 Date: Thu, 18 Feb 2016 15:53:32 +1100
 Subject: [PATCH] super-intel: ensure suspended region is removed when reshape
 completes.
 A recent commit removed a call to abort_reshape() when IMSM reshape
 completed.  An unanticipated result of this is that the suspended
 region is not cleared as it should be.
 So after a reshape, a region of the array will cause all IO to block.
 Re-instate the required updates to suspend_{lo,hi} coped from
 abort_reshape().
 This is caught (sometimes) by the test suite.
 Also fix a couple of typos found while exploring the code.
 Reported-by: Ken Moffat <zarniwhoop@ntlworld.com>
 Cc: Artur Paszkiewicz <artur.paszkiewicz@intel.com>
 Fixes: 2139b03c2080 ("imsm: don't call abort_reshape() in imsm_manage_reshape()")
 Signed-off-by: NeilBrown <neilb@suse.com>
 Signed-off-by: Jes Sorensen <Jes.Sorensen@redhat.com>
 ---
 super-intel.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)
 diff --git a/super-intel.c b/super-intel.c
 index 90b7b6d..ff0506d 100644
 --- a/super-intel.c
 +++ b/super-intel.c
@@ -10465,7 +10465,7 @@ int check_degradation_change(struct mdinfo *info,
  * Function:	imsm_manage_reshape
  * Description:	Function finds array under reshape and it manages reshape
  *		process. It creates stripes backups (if required) and sets
 - *		checheckpoits.
 + *		checkpoints.
  * Parameters:
  *	afd		: Backup handle (nattive) - not used
  *	sra		: general array info
@@ -10595,7 +10595,7 @@ static int imsm_manage_reshape(
 		start = current_position * 512;
 -		/* allign reading start to old geometry */
 +		/* align reading start to old geometry */
 		start_buf_shift = start % old_data_stripe_length;
 		start_src = start - start_buf_shift;
@@ -10700,6 +10700,10 @@ static int imsm_manage_reshape(
 	ret_val = 1;
 abort:
 	free(buf);
 +	/* See Grow.c: abort_reshape() for further explanation */
 +	sysfs_set_num(sra, NULL, "suspend_lo", 0x7FFFFFFFFFFFFFFFULL);
 +	sysfs_set_num(sra, NULL, "suspend_hi", 0);
 +	sysfs_set_num(sra, NULL, "suspend_lo", 0);
 	return ret_val;
 }
 -- 
 2.5.0
--- a/mdadm-3.4-super1-Clear-memory-allocated-for-superblock-bitmap-.patch
+++ b/mdadm-3.4-super1-Clear-memory-allocated-for-superblock-bitmap-.patch
@ -0,0 +1,36 @@
 From 1dcee1c9cbcf9592275914706b76b1931490092c Mon Sep 17 00:00:00 2001
 From: Jes Sorensen <Jes.Sorensen@redhat.com>
 Date: Wed, 6 Apr 2016 16:13:59 -0400
 Subject: [PATCH] super1: Clear memory allocated for superblock + bitmap before
 use
 load_super1() did not clear memory allocated for the superblock +
 bitmap. This causes issues if the superblock does not contain a bitmap
 as later checks of bitmap features would rely on the bits being
 cleared.
 This bug has been around for a long time, but was only exposed in
 mdadm-3.4 with the introduction of the clustering code.
 Reported-by: Jan Stodola <jstodola@redhat.com>
 Signed-off-by: Jes Sorensen <Jes.Sorensen@redhat.com>
 ---
 super1.c | 2 ++
 1 file changed, 2 insertions(+)
 diff --git a/super1.c b/super1.c
 index d6f3c93..8d5543f 100644
 --- a/super1.c
 +++ b/super1.c
@@ -2016,6 +2016,8 @@ static int load_super1(struct supertype *st, int fd, char *devname)
 		return 1;
 	}
 +	memset(super, 0, SUPER1_SIZE);
 +
 	if (aread(&afd, super, MAX_SB_SIZE) != MAX_SB_SIZE) {
 		if (devname)
 			pr_err("Cannot read superblock on %s\n",
 -- 
 2.5.5
--- a/mdadm.spec
+++ b/mdadm.spec
@ -1,7 +1,7 @@
 Summary:     The mdadm program controls Linux md devices (software RAID arrays)
 Name:        mdadm
 Version:     3.4
-Release:     1%{?dist}
+Release:     2%{?dist}
 Source:      http://www.kernel.org/pub/linux/utils/raid/mdadm/mdadm-%{version}.tar.xz
 Source1:     mdmonitor.init
 Source2:     raid-check
@ -12,6 +12,15 @@ Source6:     mdmonitor.service
 Source7:     mdadm.conf
 Source8:     mdadm_event.conf
 Patch1:      mdadm-3.4.1-fix-some-type-comparison.patch
 Patch2:      mdadm-3.4-super-intel-ensure-suspended-region-is-removed-when-.patch
 Patch3:      mdadm-3.4-super1-Clear-memory-allocated-for-superblock-bitmap-.patch
 Patch4:      mdadm-3.4-IMSM-retry-reading-sync_completed-during-reshape.patch
 Patch5:      mdadm-3.4-imsm-add-handling-of-sync_action-is-equal-to-idle.patch
 Patch6:      mdadm-3.4-imsm-properly-handle-values-of-sync_completed.patch
 Patch7:      mdadm-3.4-The-sys_name-array-in-the-mdinfo-structure-is-20-byt.patch
 Patch8:      mdadm-3.4-Grow_continue_command-remove-dead-code.patch
 Patch9:      mdadm-3.4-check-reshape_active-more-times-before-Grow_continue.patch
 # Fedora customization patches
 Patch97:     mdadm-3.3-udev.patch
 Patch98:     mdadm-2.5.2-static.patch
@ -28,6 +37,8 @@ Requires(preun): systemd-units
 Requires(postun): systemd-units coreutils
 Requires: libreport-filesystem
 %define _hardened_build 1
 %description 
 The mdadm program is used to create, manage, and monitor Linux MD (software
 RAID) devices.  As such, it provides similar functionality to the raidtools
@ -38,12 +49,22 @@ file can be used to help with some common tasks.
 %prep
 %setup -q
 %patch1 -p1 -b .comparison
 %patch2  -p1 -b .stop-reshape
 %patch3  -p1 -b .clear
 %patch4  -p1 -b .retry
 %patch5  -p1 -b .syncaction
 %patch6  -p1 -b .synccompleted
 %patch7  -p1 -b .sysname
 %patch8  -p1 -b .dead
 %patch9  -p1 -b .before
 # Fedora customization patches
 %patch97 -p1 -b .udev
 %patch98 -p1 -b .static
 %build
-make %{?_smp_mflags} CXFLAGS="$RPM_OPT_FLAGS" SYSCONFDIR="%{_sysconfdir}" mdadm mdmon
+make %{?_smp_mflags} CXFLAGS="$RPM_OPT_FLAGS" LDFLAGS="$RPM_LD_FLAGS" SYSCONFDIR="%{_sysconfdir}" mdadm mdmon
 %install
 rm -rf %{buildroot}
@ -102,6 +123,19 @@ rm -rf %{buildroot}
 /etc/libreport/events.d/*
 %changelog
 * Fri Aug 12 2016 Jes Sorensen <Jes.Sorensen@redhat.com> - 3.4-2
 - Fix i686 build error
 - Fix problem where it was not possible to stop an IMSM array during reshape
 - Fix Degraded Raid1 array becomes inactive after rebooting
 - Fix problem with raid0 arrays not being detected by Anaconda due to it
  setting MALLOC_PERTURB_
 - Fix problem with reshaping IMSM arrays, where a new reshape could be
  launched before the first reshape had fully completed, leading to
  unpected results.
 - Fix problem with mdadm large device names overflowing an internal buffer
 - Fix problem about reshape stuck at beginning
 - Resolves bz1303380
 * Fri Aug 12 2016 Jes Sorensen <Jes.Sorensen@redhat.com> - 3.4-1
 - Upgrade to mdadm-3.4
 - Resolves bz1303380