import mdadm-4.1-8.el8

This commit is contained in:
CentOS Sources 2019-08-05 10:47:21 -04:00 committed by Stepan Oksanichenko
commit 2274dc32c8
37 changed files with 3688 additions and 0 deletions

1
.gitignore vendored Normal file
View File

@ -0,0 +1 @@
SOURCES/mdadm-4.1.tar.xz

1
.mdadm.metadata Normal file
View File

@ -0,0 +1 @@
4bbbd02674ac67dc9773f41aace7677aa5374c1c SOURCES/mdadm-4.1.tar.xz

View File

@ -0,0 +1,39 @@
From 0833f9c3dbaaee202b92ea956f9e2decc7b9593a Mon Sep 17 00:00:00 2001
From: Gioh Kim <gi-oh.kim@profitbricks.com>
Date: Tue, 6 Nov 2018 15:27:42 +0100
Subject: [RHEL7.7 PATCH 01/24] Assemble: keep MD_DISK_FAILFAST and
MD_DISK_WRITEMOSTLY flag
Before updating superblock of slave disks, desired_state value
is set for the target state of the slave disks. But it forgets
to check MD_DISK_FAILFAST and MD_DISK_WRITEMOSTLY flags. Then
start_arrays() calls ADD_NEW_DISK ioctl-call and pass the state
without MD_DISK_FAILFAST and MD_DISK_WRITEMOSTLY.
Currenlty it does not generate any problem because kernel does not
care MD_DISK_FAILFAST or MD_DISK_WRITEMOSTLY flags.
Reviewed-by: NeilBrown <neilb@suse.com>
Signed-off-by: Gioh Kim <gi-oh.kim@profitbricks.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
Assemble.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/Assemble.c b/Assemble.c
index a79466c..f39c9e1 100644
--- a/Assemble.c
+++ b/Assemble.c
@@ -1704,6 +1704,9 @@ try_again:
else
desired_state = (1<<MD_DISK_ACTIVE) | (1<<MD_DISK_SYNC);
+ desired_state |= devices[j].i.disk.state & ((1<<MD_DISK_FAILFAST) |
+ (1<<MD_DISK_WRITEMOSTLY));
+
if (!devices[j].uptodate)
continue;
--
2.7.5

View File

@ -0,0 +1,77 @@
From 6b6112842030309c297a521918d1a2e982426fa3 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.com>
Date: Fri, 9 Nov 2018 17:12:33 +1100
Subject: [RHEL7.7 PATCH 02/24] Document PART-POLICY lines
PART-POLICY has been accepted in mdadm.conf since the same
time that POLICY was accepted, but it was never documented.
So add the missing documentation.
Also fix a bug which would have stopped it from working if
anyone had ever tried to use it.
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
mdadm.conf.5 | 24 +++++++++++++++++++++++-
policy.c | 2 +-
2 files changed, 24 insertions(+), 2 deletions(-)
diff --git a/mdadm.conf.5 b/mdadm.conf.5
index 18512cb..47c962a 100644
--- a/mdadm.conf.5
+++ b/mdadm.conf.5
@@ -501,7 +501,7 @@ To update hot plug configuration it is necessary to execute
.B mdadm \-\-udev\-rules
command after changing the config file
-Key words used in the
+Keywords used in the
.I POLICY
line and supported values are:
@@ -565,6 +565,28 @@ be automatically added to that array (or it's container)
as above and the disk will become a spare in remaining cases
.RE
+.TP
+.B PART-POLICY
+This is similar to
+.B POLICY
+and accepts the same keyword assignments. It allows a consistent set
+of policies to applied to each of the partitions of a device.
+
+A
+.B PART-POLICY
+line should set
+.I type=disk
+and identify the path to one or more disk devices. Each partition on
+these disks will be treated according to the
+.I action=
+setting from this line. If a
+.I domain
+is set in the line, then the domain associated with each patition will
+be based on the domain, but with
+.RB \(dq -part N\(dq
+appended, when N is the partition number for the partition that was
+found.
+
.SH EXAMPLE
DEVICE /dev/sd[bcdjkl]1
.br
diff --git a/policy.c b/policy.c
index c0d18a7..258f393 100644
--- a/policy.c
+++ b/policy.c
@@ -300,7 +300,7 @@ static int path_has_part(char *path, char **part)
l--;
if (l < 5 || strncmp(path+l-5, "-part", 5) != 0)
return 0;
- *part = path+l-4;
+ *part = path+l-5;
return 1;
}
--
2.7.5

View File

@ -0,0 +1,334 @@
From cd72f9d114da206baa01fd56ff2d8ffcc08f3239 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.com>
Date: Fri, 9 Nov 2018 17:12:33 +1100
Subject: [RHEL7.7 PATCH 03/24] policy: support devices with multiple paths.
As new releases of Linux some time change the name of
a path, some distros keep "legacy" names as well. This
is useful, but confuses mdadm which assumes each device has
precisely one path.
So change this assumption: allow a disk to have several
paths, and allow any to match when looking for a policy
which matches a disk.
Reported-and-tested-by: Mariusz Tkaczyk <mariusz.tkaczyk@intel.com>
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
Incremental.c | 5 +-
mdadm.h | 2 +-
policy.c | 163 ++++++++++++++++++++++++++++++++--------------------------
3 files changed, 95 insertions(+), 75 deletions(-)
diff --git a/Incremental.c b/Incremental.c
index a4ff7d4..d4d3c35 100644
--- a/Incremental.c
+++ b/Incremental.c
@@ -1080,6 +1080,7 @@ static int partition_try_spare(char *devname, int *dfdp, struct dev_policy *pol,
struct supertype *st2 = NULL;
char *devname = NULL;
unsigned long long devsectors;
+ char *pathlist[2];
if (de->d_ino == 0 || de->d_name[0] == '.' ||
(de->d_type != DT_LNK && de->d_type != DT_UNKNOWN))
@@ -1094,7 +1095,9 @@ static int partition_try_spare(char *devname, int *dfdp, struct dev_policy *pol,
/* This is a partition - skip it */
goto next;
- pol2 = path_policy(de->d_name, type_disk);
+ pathlist[0] = de->d_name;
+ pathlist[1] = NULL;
+ pol2 = path_policy(pathlist, type_disk);
domain_merge(&domlist, pol2, st ? st->ss->name : NULL);
if (domain_test(domlist, pol, st ? st->ss->name : NULL) != 1)
diff --git a/mdadm.h b/mdadm.h
index 387e681..705bd9b 100644
--- a/mdadm.h
+++ b/mdadm.h
@@ -1247,7 +1247,7 @@ extern void policyline(char *line, char *type);
extern void policy_add(char *type, ...);
extern void policy_free(void);
-extern struct dev_policy *path_policy(char *path, char *type);
+extern struct dev_policy *path_policy(char **paths, char *type);
extern struct dev_policy *disk_policy(struct mdinfo *disk);
extern struct dev_policy *devid_policy(int devid);
extern void dev_policy_free(struct dev_policy *p);
diff --git a/policy.c b/policy.c
index 258f393..fa67d55 100644
--- a/policy.c
+++ b/policy.c
@@ -189,15 +189,17 @@ struct dev_policy *pol_find(struct dev_policy *pol, char *name)
return pol;
}
-static char *disk_path(struct mdinfo *disk)
+static char **disk_paths(struct mdinfo *disk)
{
struct stat stb;
int prefix_len;
DIR *by_path;
char symlink[PATH_MAX] = "/dev/disk/by-path/";
- char nm[PATH_MAX];
+ char **paths;
+ int cnt = 0;
struct dirent *ent;
- int rv;
+
+ paths = xmalloc(sizeof(*paths) * (cnt+1));
by_path = opendir(symlink);
if (by_path) {
@@ -214,22 +216,13 @@ static char *disk_path(struct mdinfo *disk)
continue;
if (stb.st_rdev != makedev(disk->disk.major, disk->disk.minor))
continue;
- closedir(by_path);
- return xstrdup(ent->d_name);
+ paths[cnt++] = xstrdup(ent->d_name);
+ paths = xrealloc(paths, sizeof(*paths) * (cnt+1));
}
closedir(by_path);
}
- /* A NULL path isn't really acceptable - use the devname.. */
- sprintf(symlink, "/sys/dev/block/%d:%d", disk->disk.major, disk->disk.minor);
- rv = readlink(symlink, nm, sizeof(nm)-1);
- if (rv > 0) {
- char *dname;
- nm[rv] = 0;
- dname = strrchr(nm, '/');
- if (dname)
- return xstrdup(dname + 1);
- }
- return xstrdup("unknown");
+ paths[cnt] = NULL;
+ return paths;
}
char type_part[] = "part";
@@ -246,18 +239,53 @@ static char *disk_type(struct mdinfo *disk)
return type_disk;
}
-static int pol_match(struct rule *rule, char *path, char *type)
+static int path_has_part(char *path, char **part)
+{
+ /* check if path ends with "-partNN" and
+ * if it does, place a pointer to "-pathNN"
+ * in 'part'.
+ */
+ int l;
+ if (!path)
+ return 0;
+ l = strlen(path);
+ while (l > 1 && isdigit(path[l-1]))
+ l--;
+ if (l < 5 || strncmp(path+l-5, "-part", 5) != 0)
+ return 0;
+ *part = path+l-5;
+ return 1;
+}
+
+static int pol_match(struct rule *rule, char **paths, char *type, char **part)
{
- /* check if this rule matches on path and type */
+ /* Check if this rule matches on any path and type.
+ * If 'part' is not NULL, then 'path' must end in -partN, which
+ * we ignore for matching, and return in *part on success.
+ */
int pathok = 0; /* 0 == no path, 1 == match, -1 == no match yet */
int typeok = 0;
- while (rule) {
+ for (; rule; rule = rule->next) {
if (rule->name == rule_path) {
+ char *p;
+ int i;
if (pathok == 0)
pathok = -1;
- if (path && fnmatch(rule->value, path, 0) == 0)
- pathok = 1;
+ if (!paths)
+ continue;
+ for (i = 0; paths[i]; i++) {
+ if (part) {
+ if (!path_has_part(paths[i], &p))
+ continue;
+ *p = '\0';
+ *part = p+1;
+ }
+ if (fnmatch(rule->value, paths[i], 0) == 0)
+ pathok = 1;
+ if (part)
+ *p = '-';
+ }
}
if (rule->name == rule_type) {
if (typeok == 0)
@@ -265,7 +293,6 @@ static int pol_match(struct rule *rule, char *path, char *type)
if (type && strcmp(rule->value, type) == 0)
typeok = 1;
}
- rule = rule->next;
}
return pathok >= 0 && typeok >= 0;
}
@@ -286,24 +313,6 @@ static void pol_merge(struct dev_policy **pol, struct rule *rule)
pol_new(pol, r->name, r->value, metadata);
}
-static int path_has_part(char *path, char **part)
-{
- /* check if path ends with "-partNN" and
- * if it does, place a pointer to "-pathNN"
- * in 'part'.
- */
- int l;
- if (!path)
- return 0;
- l = strlen(path);
- while (l > 1 && isdigit(path[l-1]))
- l--;
- if (l < 5 || strncmp(path+l-5, "-part", 5) != 0)
- return 0;
- *part = path+l-5;
- return 1;
-}
-
static void pol_merge_part(struct dev_policy **pol, struct rule *rule, char *part)
{
/* copy any name assignments from rule into pol, appending
@@ -352,7 +361,7 @@ static int config_rules_has_path = 0;
* path_policy() gathers policy information for the
* disk described in the given a 'path' and a 'type'.
*/
-struct dev_policy *path_policy(char *path, char *type)
+struct dev_policy *path_policy(char **paths, char *type)
{
struct pol_rule *rules;
struct dev_policy *pol = NULL;
@@ -361,27 +370,24 @@ struct dev_policy *path_policy(char *path, char *type)
rules = config_rules;
while (rules) {
- char *part;
+ char *part = NULL;
if (rules->type == rule_policy)
- if (pol_match(rules->rule, path, type))
+ if (pol_match(rules->rule, paths, type, NULL))
pol_merge(&pol, rules->rule);
if (rules->type == rule_part && strcmp(type, type_part) == 0)
- if (path_has_part(path, &part)) {
- *part = 0;
- if (pol_match(rules->rule, path, type_disk))
- pol_merge_part(&pol, rules->rule, part+1);
- *part = '-';
- }
+ if (pol_match(rules->rule, paths, type_disk, &part))
+ pol_merge_part(&pol, rules->rule, part);
rules = rules->next;
}
/* Now add any metadata-specific internal knowledge
* about this path
*/
- for (i=0; path && superlist[i]; i++)
+ for (i=0; paths[0] && superlist[i]; i++)
if (superlist[i]->get_disk_controller_domain) {
const char *d =
- superlist[i]->get_disk_controller_domain(path);
+ superlist[i]->get_disk_controller_domain(
+ paths[0]);
if (d)
pol_new(&pol, pol_domain, d, superlist[i]->name);
}
@@ -400,22 +406,34 @@ void pol_add(struct dev_policy **pol,
pol_dedup(*pol);
}
+static void free_paths(char **paths)
+{
+ int i;
+
+ if (!paths)
+ return;
+
+ for (i = 0; paths[i]; i++)
+ free(paths[i]);
+ free(paths);
+}
+
/*
* disk_policy() gathers policy information for the
* disk described in the given mdinfo (disk.{major,minor}).
*/
struct dev_policy *disk_policy(struct mdinfo *disk)
{
- char *path = NULL;
+ char **paths = NULL;
char *type = disk_type(disk);
struct dev_policy *pol = NULL;
if (config_rules_has_path)
- path = disk_path(disk);
+ paths = disk_paths(disk);
- pol = path_policy(path, type);
+ pol = path_policy(paths, type);
- free(path);
+ free_paths(paths);
return pol;
}
@@ -756,27 +774,26 @@ int policy_check_path(struct mdinfo *disk, struct map_ent *array)
{
char path[PATH_MAX];
FILE *f = NULL;
- char *id_path = disk_path(disk);
- int rv;
+ char **id_paths = disk_paths(disk);
+ int i;
+ int rv = 0;
- if (!id_path)
- return 0;
+ for (i = 0; id_paths[i]; i++) {
+ snprintf(path, PATH_MAX, FAILED_SLOTS_DIR "/%s", id_paths[i]);
+ f = fopen(path, "r");
+ if (!f)
+ continue;
- snprintf(path, PATH_MAX, FAILED_SLOTS_DIR "/%s", id_path);
- f = fopen(path, "r");
- if (!f) {
- free(id_path);
- return 0;
+ rv = fscanf(f, " %s %x:%x:%x:%x\n",
+ array->metadata,
+ array->uuid,
+ array->uuid+1,
+ array->uuid+2,
+ array->uuid+3);
+ fclose(f);
+ break;
}
-
- rv = fscanf(f, " %s %x:%x:%x:%x\n",
- array->metadata,
- array->uuid,
- array->uuid+1,
- array->uuid+2,
- array->uuid+3);
- fclose(f);
- free(id_path);
+ free_paths(id_paths);
return rv == 5;
}
--
2.7.5

View File

@ -0,0 +1,137 @@
From 4199d3c629c14866505923d19fa50017ee92d2e1 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.com>
Date: Wed, 5 Dec 2018 16:35:00 +1100
Subject: [RHEL7.7 PATCH 04/24] mdcheck: add systemd unit files to run mdcheck.
Having the mdcheck script is not use if is never run.
This patch adds systemd unit files so that it can easily
be run on the first Sunday of each month for 6 hours,
then on every subsequent morning until the check is
finished.
The units still need to be enabled with
systemctl enable mdcheck_start.timer
The timer will only actually be started when an array
which might need it becomes active.
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
Makefile | 5 ++++-
systemd/mdcheck_continue.service | 18 ++++++++++++++++++
systemd/mdcheck_continue.timer | 13 +++++++++++++
systemd/mdcheck_start.service | 17 +++++++++++++++++
systemd/mdcheck_start.timer | 15 +++++++++++++++
5 files changed, 67 insertions(+), 1 deletion(-)
create mode 100644 systemd/mdcheck_continue.service
create mode 100644 systemd/mdcheck_continue.timer
create mode 100644 systemd/mdcheck_start.service
create mode 100644 systemd/mdcheck_start.timer
diff --git a/Makefile b/Makefile
index 2767ac6..afb62cc 100644
--- a/Makefile
+++ b/Makefile
@@ -276,7 +276,10 @@ install-udev: udev-md-raid-arrays.rules udev-md-raid-assembly.rules udev-md-raid
install-systemd: systemd/mdmon@.service
@for file in mdmon@.service mdmonitor.service mdadm-last-resort@.timer \
- mdadm-last-resort@.service mdadm-grow-continue@.service; \
+ mdadm-last-resort@.service mdadm-grow-continue@.service \
+ mdcheck_start.timer mdcheck_start.service \
+ mdcheck_continue.timer mdcheck_continue.service \
+ ; \
do sed -e 's,BINDIR,$(BINDIR),g' systemd/$$file > .install.tmp.2 && \
$(ECHO) $(INSTALL) -D -m 644 systemd/$$file $(DESTDIR)$(SYSTEMD_DIR)/$$file ; \
$(INSTALL) -D -m 644 .install.tmp.2 $(DESTDIR)$(SYSTEMD_DIR)/$$file ; \
diff --git a/systemd/mdcheck_continue.service b/systemd/mdcheck_continue.service
new file mode 100644
index 0000000..592c607
--- /dev/null
+++ b/systemd/mdcheck_continue.service
@@ -0,0 +1,18 @@
+# This file is part of mdadm.
+#
+# mdadm is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+
+[Unit]
+Description=MD array scrubbing - continuation
+ConditionPathExistsGlob = /var/lib/mdcheck/MD_UUID_*
+
+[Service]
+Type=oneshot
+Environment= MDADM_CHECK_DURATION='"6 hours"'
+EnvironmentFile=-/run/sysconfig/mdadm
+ExecStartPre=-/usr/lib/mdadm/mdadm_env.sh
+ExecStart=/usr/share/mdadm/mdcheck --continue --duration $MDADM_CHECK_DURATION
+
diff --git a/systemd/mdcheck_continue.timer b/systemd/mdcheck_continue.timer
new file mode 100644
index 0000000..3ccfd78
--- /dev/null
+++ b/systemd/mdcheck_continue.timer
@@ -0,0 +1,13 @@
+# This file is part of mdadm.
+#
+# mdadm is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+
+[Unit]
+Description=MD array scrubbing - continuation
+
+[Timer]
+OnCalendar= 1:05:00
+
diff --git a/systemd/mdcheck_start.service b/systemd/mdcheck_start.service
new file mode 100644
index 0000000..812141b
--- /dev/null
+++ b/systemd/mdcheck_start.service
@@ -0,0 +1,17 @@
+# This file is part of mdadm.
+#
+# mdadm is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+
+[Unit]
+Description=MD array scrubbing
+Wants=mdcheck_continue.timer
+
+[Service]
+Type=oneshot
+Environment= MDADM_CHECK_DURATION='"6 hours"'
+EnvironmentFile=-/run/sysconfig/mdadm
+ExecStartPre=-/usr/lib/mdadm/mdadm_env.sh
+ExecStart=/usr/share/mdadm/mdcheck --duration $MDADM_CHECK_DURATION
diff --git a/systemd/mdcheck_start.timer b/systemd/mdcheck_start.timer
new file mode 100644
index 0000000..6480736
--- /dev/null
+++ b/systemd/mdcheck_start.timer
@@ -0,0 +1,15 @@
+# This file is part of mdadm.
+#
+# mdadm is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+
+[Unit]
+Description=MD array scrubbing
+
+[Timer]
+OnCalendar=Sun *-*-1..7 1:00:00
+
+[Install]
+WantedBy= mdmonitor.service
--
2.7.5

View File

@ -0,0 +1,83 @@
From 7cd7e91ab3de5aa75dc963cb08b0618c1885cf0d Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.com>
Date: Wed, 5 Dec 2018 16:35:00 +1100
Subject: [RHEL7.7 PATCH 05/24] Monitor: add system timer to run --oneshot
periodically
"mdadm --monitor --oneshot" can be used to get a warning
if there are any degraded arrays. It can be helpful to get
this warning periodically while the condition persists.
This patch add a systemd service and timer which can
be enabled with
systemctl enable mdmonitor-oneshot.service
and will then provide daily warnings.
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
Makefile | 1 +
systemd/mdmonitor-oneshot.service | 15 +++++++++++++++
systemd/mdmonitor-oneshot.timer | 15 +++++++++++++++
3 files changed, 31 insertions(+)
create mode 100644 systemd/mdmonitor-oneshot.service
create mode 100644 systemd/mdmonitor-oneshot.timer
diff --git a/Makefile b/Makefile
index afb62cc..dfe00b0 100644
--- a/Makefile
+++ b/Makefile
@@ -279,6 +279,7 @@ install-systemd: systemd/mdmon@.service
mdadm-last-resort@.service mdadm-grow-continue@.service \
mdcheck_start.timer mdcheck_start.service \
mdcheck_continue.timer mdcheck_continue.service \
+ mdmonitor-oneshot.timer mdmonitor-oneshot.service \
; \
do sed -e 's,BINDIR,$(BINDIR),g' systemd/$$file > .install.tmp.2 && \
$(ECHO) $(INSTALL) -D -m 644 systemd/$$file $(DESTDIR)$(SYSTEMD_DIR)/$$file ; \
diff --git a/systemd/mdmonitor-oneshot.service b/systemd/mdmonitor-oneshot.service
new file mode 100644
index 0000000..fd469b1
--- /dev/null
+++ b/systemd/mdmonitor-oneshot.service
@@ -0,0 +1,15 @@
+# This file is part of mdadm.
+#
+# mdadm is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+
+[Unit]
+Description=Reminder for degraded MD arrays
+
+[Service]
+Environment= MDADM_MONITOR_ARGS=--scan
+EnvironmentFile=-/run/sysconfig/mdadm
+ExecStartPre=-/usr/lib/mdadm/mdadm_env.sh
+ExecStart=BINDIR/mdadm --monitor --oneshot $MDADM_MONITOR_ARGS
diff --git a/systemd/mdmonitor-oneshot.timer b/systemd/mdmonitor-oneshot.timer
new file mode 100644
index 0000000..cb54bda
--- /dev/null
+++ b/systemd/mdmonitor-oneshot.timer
@@ -0,0 +1,15 @@
+# This file is part of mdadm.
+#
+# mdadm is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+
+[Unit]
+Description=Reminder for degraded MD arrays
+
+[Timer]
+OnCalendar= 2:00:00
+
+[Install]
+WantedBy= mdmonitor.service
--
2.7.5

View File

@ -0,0 +1,83 @@
From d7a1fda2769ba272d89de6caeab35d52b73a9c3c Mon Sep 17 00:00:00 2001
From: Mariusz Tkaczyk <mariusz.tkaczyk@intel.com>
Date: Wed, 17 Oct 2018 12:11:41 +0200
Subject: [RHEL7.7 PATCH 06/24] imsm: update metadata correctly while raid10
double degradation
Mdmon calls end_migration() when map state changes from normal to
degraded. It is not valid because in raid 10 double degradation case
mdmon breaks checkpointing but array is still rebuilding.
In this case mdmon has to mark map as degraded and continues marking
recovery checkpoint in metadata. Migration can be finished only if newly
failed device is a rebuilding device.
Add catching double degraded to degraded transition. Migration is
finished but map state doesn't change, array is still degraded.
Update failed_disk_num correctly. If double degradation
happens rebuild will start on the lowest slot, but this variable points
to the first failed slot. If second fail happens while rebuild this
variable shouldn't be updated until rebuild is not finished.
Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@intel.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
super-intel.c | 25 +++++++++++++++++++------
1 file changed, 19 insertions(+), 6 deletions(-)
diff --git a/super-intel.c b/super-intel.c
index 6438987..d2035cc 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -8136,7 +8136,8 @@ static int mark_failure(struct intel_super *super,
set_imsm_ord_tbl_ent(map2, slot2,
idx | IMSM_ORD_REBUILD);
}
- if (map->failed_disk_num == 0xff)
+ if (map->failed_disk_num == 0xff ||
+ (!is_rebuilding(dev) && map->failed_disk_num > slot))
map->failed_disk_num = slot;
clear_disk_badblocks(super->bbm_log, ord_to_idx(ord));
@@ -8558,13 +8559,25 @@ static void imsm_set_disk(struct active_array *a, int n, int state)
break;
}
if (is_rebuilding(dev)) {
- dprintf_cont("while rebuilding.");
+ dprintf_cont("while rebuilding ");
if (map->map_state != map_state) {
- dprintf_cont(" Map state change");
- end_migration(dev, super, map_state);
+ dprintf_cont("map state change ");
+ if (n == map->failed_disk_num) {
+ dprintf_cont("end migration");
+ end_migration(dev, super, map_state);
+ } else {
+ dprintf_cont("raid10 double degradation, map state change");
+ map->map_state = map_state;
+ }
super->updates_pending++;
- } else if (!rebuild_done) {
+ } else if (!rebuild_done)
break;
+ else if (n == map->failed_disk_num) {
+ /* r10 double degraded to degraded transition */
+ dprintf_cont("raid10 double degradation end migration");
+ end_migration(dev, super, map_state);
+ a->last_checkpoint = 0;
+ super->updates_pending++;
}
/* check if recovery is really finished */
@@ -8575,7 +8588,7 @@ static void imsm_set_disk(struct active_array *a, int n, int state)
}
if (recovery_not_finished) {
dprintf_cont("\n");
- dprintf("Rebuild has not finished yet, state not changed");
+ dprintf_cont("Rebuild has not finished yet, map state changes only if raid10 double degradation happens");
if (a->last_checkpoint < mdi->recovery_start) {
a->last_checkpoint =
mdi->recovery_start;
--
2.7.5

View File

@ -0,0 +1,43 @@
From 563ac108659980b3d1e226fe416254a86656235f Mon Sep 17 00:00:00 2001
From: Gioh Kim <gi-oh.kim@cloud.ionos.com>
Date: Tue, 6 Nov 2018 16:20:17 +0100
Subject: [RHEL7.7 PATCH 07/24] Assemble: mask FAILFAST and WRITEMOSTLY flags
when finding the most recent device
If devices[].i.disk.state has MD_DISK_FAILFAST or MD_DISK_WRITEMOSTLY
flag, it cannot be the most recent device. Both flags should be masked
before checking the state.
Reviewed-by: NeilBrown <neilb@suse.com>
Signed-off-by: Gioh Kim <gi-oh.kim@cloud.ionos.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
Assemble.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/Assemble.c b/Assemble.c
index f39c9e1..9f75c68 100644
--- a/Assemble.c
+++ b/Assemble.c
@@ -578,6 +578,7 @@ static int load_devices(struct devs *devices, char *devmap,
struct supertype *tst;
int i;
int dfd;
+ int disk_state;
if (tmpdev->used != 1)
continue;
@@ -711,7 +712,9 @@ static int load_devices(struct devs *devices, char *devmap,
devices[devcnt].i.disk.major = major(stb.st_rdev);
devices[devcnt].i.disk.minor = minor(stb.st_rdev);
- if (devices[devcnt].i.disk.state == 6) {
+ disk_state = devices[devcnt].i.disk.state & ~((1<<MD_DISK_FAILFAST) |
+ (1<<MD_DISK_WRITEMOSTLY));
+ if (disk_state == ((1<<MD_DISK_ACTIVE) | (1<<MD_DISK_SYNC))) {
if (most_recent < 0 ||
devices[devcnt].i.events
> devices[most_recent].i.events) {
--
2.7.5

View File

@ -0,0 +1,34 @@
From 085df42259cba7863cd6ebe5cd0d8492ac5b869e Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.com>
Date: Thu, 6 Dec 2018 10:35:41 +1100
Subject: [RHEL7.7 PATCH 08/24] Grow: avoid overflow in compute_backup_blocks()
With a chunk size of 16Meg and data drive count of 8,
this calculate can easily overflow the 'int' type that
is used for the multiplications.
So force it to use "long" instead.
Reported-and-tested-by: Ed Spiridonov <edo.rus@gmail.com>
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
Grow.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/Grow.c b/Grow.c
index 4436a4d..76f82c0 100644
--- a/Grow.c
+++ b/Grow.c
@@ -1196,7 +1196,8 @@ unsigned long compute_backup_blocks(int nchunk, int ochunk,
/* Find GCD */
a = GCD(a, b);
/* LCM == product / GCD */
- blocks = (ochunk/512) * (nchunk/512) * odata * ndata / a;
+ blocks = (unsigned long)(ochunk/512) * (unsigned long)(nchunk/512) *
+ odata * ndata / a;
return blocks;
}
--
2.7.5

View File

@ -0,0 +1,30 @@
From 76d505dec6c9f92564553596fc8350324be82463 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.com>
Date: Thu, 6 Dec 2018 10:36:28 +1100
Subject: [RHEL7.7 PATCH 09/24] Grow: report correct new chunk size.
When using "--grow --chunk=" to change chunk
size, the old chunksize is reported instead of the new.
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
Grow.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Grow.c b/Grow.c
index 76f82c0..363b209 100644
--- a/Grow.c
+++ b/Grow.c
@@ -3286,7 +3286,7 @@ static int reshape_array(char *container, int fd, char *devname,
goto release;
} else if (verbose >= 0)
printf("chunk size for %s set to %d\n",
- devname, array.chunk_size);
+ devname, info->new_chunk);
}
unfreeze(st);
return 0;
--
2.7.5

View File

@ -0,0 +1,31 @@
From 467e6a1b4ece8e552ee638dab7f44a4d235ece1a Mon Sep 17 00:00:00 2001
From: Gioh Kim <gi-oh.kim@cloud.ionos.com>
Date: Fri, 7 Dec 2018 12:04:44 +0100
Subject: [RHEL7.7 PATCH 10/24] policy.c: prevent NULL pointer referencing
paths could be NULL and paths[0] should be followed by NULL pointer
checking.
Reviewed-by: NeilBrown <neilb@suse.com>
Signed-off-by: Gioh Kim <gi-oh.kim@cloud.ionos.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
policy.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/policy.c b/policy.c
index fa67d55..e3a0671 100644
--- a/policy.c
+++ b/policy.c
@@ -383,7 +383,7 @@ struct dev_policy *path_policy(char **paths, char *type)
/* Now add any metadata-specific internal knowledge
* about this path
*/
- for (i=0; paths[0] && superlist[i]; i++)
+ for (i=0; paths && paths[0] && superlist[i]; i++)
if (superlist[i]->get_disk_controller_domain) {
const char *d =
superlist[i]->get_disk_controller_domain(
--
2.7.5

View File

@ -0,0 +1,36 @@
From 757e55435997e355ee9b03e5d913b5496a3c39a8 Mon Sep 17 00:00:00 2001
From: Mariusz Tkaczyk <mariusz.tkaczyk@intel.com>
Date: Tue, 11 Dec 2018 15:04:07 +0100
Subject: [RHEL7.7 PATCH 11/24] policy.c: Fix for compiler error
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
After cd72f9d(policy: support devices with multiple paths.) compilation
on old compilers fails because "p may be used uninitialized
in this function".
Initialize it with NULL to prevent this.
Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@intel.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
policy.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/policy.c b/policy.c
index e3a0671..3c53bd3 100644
--- a/policy.c
+++ b/policy.c
@@ -268,7 +268,7 @@ static int pol_match(struct rule *rule, char **paths, char *type, char **part)
for (; rule; rule = rule->next) {
if (rule->name == rule_path) {
- char *p;
+ char *p = NULL;
int i;
if (pathok == 0)
pathok = -1;
--
2.7.5

View File

@ -0,0 +1,95 @@
From a4e96fd8f3f0b5416783237c1cb6ee87e7eff23d Mon Sep 17 00:00:00 2001
From: Mariusz Tkaczyk <mariusz.tkaczyk@intel.com>
Date: Fri, 8 Feb 2019 11:07:10 +0100
Subject: [RHEL7.7 PATCH 12/24] imsm: finish recovery when drive with rebuild
fails
Commit d7a1fda2769b ("imsm: update metadata correctly while raid10 double
degradation") resolves main Imsm double degradation problems but it
omits one case. Now metadata hangs in the rebuilding state if the drive
under rebuild is removed during recovery from double degradation.
The root cause of this problem is comparing new map_state with current
and if they both are degraded assuming that nothing new happens.
Don't rely on map states, just check if device is failed. If the drive
under rebuild fails then finish migration, in other cases update map
state only (second fail means that destination map state can't be normal).
To avoid problems with reassembling move end_migration (called after
double degradation successful recovery) after check if recovery really
finished, for details see (7ce057018 "imsm: fix: rebuild does not
continue after reboot").
Remove redundant code responsible for finishing rebuild process. Function
end_migration do exactly the same. Set last_checkpoint to 0, to prepare
it for the next rebuild.
Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@intel.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
super-intel.c | 26 +++++++++++---------------
1 file changed, 11 insertions(+), 15 deletions(-)
diff --git a/super-intel.c b/super-intel.c
index d2035cc..38a1b6c 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -8560,26 +8560,22 @@ static void imsm_set_disk(struct active_array *a, int n, int state)
}
if (is_rebuilding(dev)) {
dprintf_cont("while rebuilding ");
- if (map->map_state != map_state) {
- dprintf_cont("map state change ");
+ if (state & DS_FAULTY) {
+ dprintf_cont("removing failed drive ");
if (n == map->failed_disk_num) {
dprintf_cont("end migration");
end_migration(dev, super, map_state);
+ a->last_checkpoint = 0;
} else {
- dprintf_cont("raid10 double degradation, map state change");
+ dprintf_cont("fail detected during rebuild, changing map state");
map->map_state = map_state;
}
super->updates_pending++;
- } else if (!rebuild_done)
- break;
- else if (n == map->failed_disk_num) {
- /* r10 double degraded to degraded transition */
- dprintf_cont("raid10 double degradation end migration");
- end_migration(dev, super, map_state);
- a->last_checkpoint = 0;
- super->updates_pending++;
}
+ if (!rebuild_done)
+ break;
+
/* check if recovery is really finished */
for (mdi = a->info.devs; mdi ; mdi = mdi->next)
if (mdi->recovery_start != MaxSector) {
@@ -8588,7 +8584,7 @@ static void imsm_set_disk(struct active_array *a, int n, int state)
}
if (recovery_not_finished) {
dprintf_cont("\n");
- dprintf_cont("Rebuild has not finished yet, map state changes only if raid10 double degradation happens");
+ dprintf_cont("Rebuild has not finished yet");
if (a->last_checkpoint < mdi->recovery_start) {
a->last_checkpoint =
mdi->recovery_start;
@@ -8598,9 +8594,9 @@ static void imsm_set_disk(struct active_array *a, int n, int state)
}
dprintf_cont(" Rebuild done, still degraded");
- dev->vol.migr_state = 0;
- set_migr_type(dev, 0);
- dev->vol.curr_migr_unit = 0;
+ end_migration(dev, super, map_state);
+ a->last_checkpoint = 0;
+ super->updates_pending++;
for (i = 0; i < map->num_members; i++) {
int idx = get_imsm_ord_tbl_ent(dev, i, MAP_0);
--
2.7.5

View File

@ -0,0 +1,322 @@
From 9f4218274cd4a1e1f356a1617f9a1d09960cf255 Mon Sep 17 00:00:00 2001
From: Pawel Baldysiak <pawel.baldysiak@intel.com>
Date: Mon, 28 Jan 2019 17:10:41 +0100
Subject: [RHEL7.7 PATCH 13/24] imsm: fix reshape for >2TB drives
If reshape is performed on drives larger then 2 TB,
migration checkpoint area that is calculated exeeds 32-bit value.
This checkpoint area is a reserved space threated as backup
during reshape - at the end of the drive, right before metadata.
As a result - wrong space is used and the data that may exists there
is overwritten.
Adding additional field to migration record to track high order 32-bits
of pba of this area. Three other fields that may exceed 32-bit value
for large drives are added as well.
Signed-off-by: Pawel Baldysiak <pawel.baldysiak@intel.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
super-intel.c | 149 +++++++++++++++++++++++++++++++++++++++++-----------------
1 file changed, 107 insertions(+), 42 deletions(-)
diff --git a/super-intel.c b/super-intel.c
index 38a1b6c..1cc7d5f 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -296,7 +296,7 @@ struct migr_record {
__u32 rec_status; /* Status used to determine how to restart
* migration in case it aborts
* in some fashion */
- __u32 curr_migr_unit; /* 0..numMigrUnits-1 */
+ __u32 curr_migr_unit_lo; /* 0..numMigrUnits-1 */
__u32 family_num; /* Family number of MPB
* containing the RaidDev
* that is migrating */
@@ -306,16 +306,23 @@ struct migr_record {
__u32 dest_depth_per_unit; /* Num member blocks each destMap
* member disk
* advances per unit-of-operation */
- __u32 ckpt_area_pba; /* Pba of first block of ckpt copy area */
- __u32 dest_1st_member_lba; /* First member lba on first
- * stripe of destination */
- __u32 num_migr_units; /* Total num migration units-of-op */
+ __u32 ckpt_area_pba_lo; /* Pba of first block of ckpt copy area */
+ __u32 dest_1st_member_lba_lo; /* First member lba on first
+ * stripe of destination */
+ __u32 num_migr_units_lo; /* Total num migration units-of-op */
__u32 post_migr_vol_cap; /* Size of volume after
* migration completes */
__u32 post_migr_vol_cap_hi; /* Expansion space for LBA64 */
__u32 ckpt_read_disk_num; /* Which member disk in destSubMap[0] the
* migration ckpt record was read from
* (for recovered migrations) */
+ __u32 curr_migr_unit_hi; /* 0..numMigrUnits-1 high order 32 bits */
+ __u32 ckpt_area_pba_hi; /* Pba of first block of ckpt copy area
+ * high order 32 bits */
+ __u32 dest_1st_member_lba_hi; /* First member lba on first stripe of
+ * destination - high order 32 bits */
+ __u32 num_migr_units_hi; /* Total num migration units-of-op
+ * high order 32 bits */
} __attribute__ ((__packed__));
struct md_list {
@@ -1208,6 +1215,38 @@ static unsigned long long imsm_dev_size(struct imsm_dev *dev)
return join_u32(dev->size_low, dev->size_high);
}
+static unsigned long long migr_chkp_area_pba(struct migr_record *migr_rec)
+{
+ if (migr_rec == NULL)
+ return 0;
+ return join_u32(migr_rec->ckpt_area_pba_lo,
+ migr_rec->ckpt_area_pba_hi);
+}
+
+static unsigned long long current_migr_unit(struct migr_record *migr_rec)
+{
+ if (migr_rec == NULL)
+ return 0;
+ return join_u32(migr_rec->curr_migr_unit_lo,
+ migr_rec->curr_migr_unit_hi);
+}
+
+static unsigned long long migr_dest_1st_member_lba(struct migr_record *migr_rec)
+{
+ if (migr_rec == NULL)
+ return 0;
+ return join_u32(migr_rec->dest_1st_member_lba_lo,
+ migr_rec->dest_1st_member_lba_hi);
+}
+
+static unsigned long long get_num_migr_units(struct migr_record *migr_rec)
+{
+ if (migr_rec == NULL)
+ return 0;
+ return join_u32(migr_rec->num_migr_units_lo,
+ migr_rec->num_migr_units_hi);
+}
+
static void set_total_blocks(struct imsm_disk *disk, unsigned long long n)
{
split_ull(n, &disk->total_blocks_lo, &disk->total_blocks_hi);
@@ -1233,6 +1272,33 @@ static void set_imsm_dev_size(struct imsm_dev *dev, unsigned long long n)
split_ull(n, &dev->size_low, &dev->size_high);
}
+static void set_migr_chkp_area_pba(struct migr_record *migr_rec,
+ unsigned long long n)
+{
+ split_ull(n, &migr_rec->ckpt_area_pba_lo, &migr_rec->ckpt_area_pba_hi);
+}
+
+static void set_current_migr_unit(struct migr_record *migr_rec,
+ unsigned long long n)
+{
+ split_ull(n, &migr_rec->curr_migr_unit_lo,
+ &migr_rec->curr_migr_unit_hi);
+}
+
+static void set_migr_dest_1st_member_lba(struct migr_record *migr_rec,
+ unsigned long long n)
+{
+ split_ull(n, &migr_rec->dest_1st_member_lba_lo,
+ &migr_rec->dest_1st_member_lba_hi);
+}
+
+static void set_num_migr_units(struct migr_record *migr_rec,
+ unsigned long long n)
+{
+ split_ull(n, &migr_rec->num_migr_units_lo,
+ &migr_rec->num_migr_units_hi);
+}
+
static unsigned long long per_dev_array_size(struct imsm_map *map)
{
unsigned long long array_size = 0;
@@ -1629,12 +1695,14 @@ void convert_to_4k_imsm_migr_rec(struct intel_super *super)
struct migr_record *migr_rec = super->migr_rec;
migr_rec->blocks_per_unit /= IMSM_4K_DIV;
- migr_rec->ckpt_area_pba /= IMSM_4K_DIV;
- migr_rec->dest_1st_member_lba /= IMSM_4K_DIV;
migr_rec->dest_depth_per_unit /= IMSM_4K_DIV;
split_ull((join_u32(migr_rec->post_migr_vol_cap,
migr_rec->post_migr_vol_cap_hi) / IMSM_4K_DIV),
&migr_rec->post_migr_vol_cap, &migr_rec->post_migr_vol_cap_hi);
+ set_migr_chkp_area_pba(migr_rec,
+ migr_chkp_area_pba(migr_rec) / IMSM_4K_DIV);
+ set_migr_dest_1st_member_lba(migr_rec,
+ migr_dest_1st_member_lba(migr_rec) / IMSM_4K_DIV);
}
void convert_to_4k_imsm_disk(struct imsm_disk *disk)
@@ -1727,8 +1795,8 @@ void examine_migr_rec_imsm(struct intel_super *super)
printf("Normal\n");
else
printf("Contains Data\n");
- printf(" Current Unit : %u\n",
- __le32_to_cpu(migr_rec->curr_migr_unit));
+ printf(" Current Unit : %llu\n",
+ current_migr_unit(migr_rec));
printf(" Family : %u\n",
__le32_to_cpu(migr_rec->family_num));
printf(" Ascending : %u\n",
@@ -1737,16 +1805,15 @@ void examine_migr_rec_imsm(struct intel_super *super)
__le32_to_cpu(migr_rec->blocks_per_unit));
printf(" Dest. Depth Per Unit : %u\n",
__le32_to_cpu(migr_rec->dest_depth_per_unit));
- printf(" Checkpoint Area pba : %u\n",
- __le32_to_cpu(migr_rec->ckpt_area_pba));
- printf(" First member lba : %u\n",
- __le32_to_cpu(migr_rec->dest_1st_member_lba));
- printf(" Total Number of Units : %u\n",
- __le32_to_cpu(migr_rec->num_migr_units));
- printf(" Size of volume : %u\n",
- __le32_to_cpu(migr_rec->post_migr_vol_cap));
- printf(" Expansion space for LBA64 : %u\n",
- __le32_to_cpu(migr_rec->post_migr_vol_cap_hi));
+ printf(" Checkpoint Area pba : %llu\n",
+ migr_chkp_area_pba(migr_rec));
+ printf(" First member lba : %llu\n",
+ migr_dest_1st_member_lba(migr_rec));
+ printf(" Total Number of Units : %llu\n",
+ get_num_migr_units(migr_rec));
+ printf(" Size of volume : %llu\n",
+ join_u32(migr_rec->post_migr_vol_cap,
+ migr_rec->post_migr_vol_cap_hi));
printf(" Record was read from : %u\n",
__le32_to_cpu(migr_rec->ckpt_read_disk_num));
@@ -1759,13 +1826,15 @@ void convert_from_4k_imsm_migr_rec(struct intel_super *super)
struct migr_record *migr_rec = super->migr_rec;
migr_rec->blocks_per_unit *= IMSM_4K_DIV;
- migr_rec->ckpt_area_pba *= IMSM_4K_DIV;
- migr_rec->dest_1st_member_lba *= IMSM_4K_DIV;
migr_rec->dest_depth_per_unit *= IMSM_4K_DIV;
split_ull((join_u32(migr_rec->post_migr_vol_cap,
migr_rec->post_migr_vol_cap_hi) * IMSM_4K_DIV),
&migr_rec->post_migr_vol_cap,
&migr_rec->post_migr_vol_cap_hi);
+ set_migr_chkp_area_pba(migr_rec,
+ migr_chkp_area_pba(migr_rec) * IMSM_4K_DIV);
+ set_migr_dest_1st_member_lba(migr_rec,
+ migr_dest_1st_member_lba(migr_rec) * IMSM_4K_DIV);
}
void convert_from_4k(struct intel_super *super)
@@ -3096,7 +3165,7 @@ static int imsm_create_metadata_checkpoint_update(
return 0;
}
(*u)->type = update_general_migration_checkpoint;
- (*u)->curr_migr_unit = __le32_to_cpu(super->migr_rec->curr_migr_unit);
+ (*u)->curr_migr_unit = current_migr_unit(super->migr_rec);
dprintf("prepared for %u\n", (*u)->curr_migr_unit);
return update_memory_size;
@@ -3397,13 +3466,13 @@ static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info,
case MIGR_GEN_MIGR: {
__u64 blocks_per_unit = blocks_per_migr_unit(super,
dev);
- __u64 units = __le32_to_cpu(migr_rec->curr_migr_unit);
+ __u64 units = current_migr_unit(migr_rec);
unsigned long long array_blocks;
int used_disks;
if (__le32_to_cpu(migr_rec->ascending_migr) &&
(units <
- (__le32_to_cpu(migr_rec->num_migr_units)-1)) &&
+ (get_num_migr_units(migr_rec)-1)) &&
(super->migr_rec->rec_status ==
__cpu_to_le32(UNIT_SRC_IN_CP_AREA)))
units++;
@@ -10697,7 +10766,7 @@ void init_migr_record_imsm(struct supertype *st, struct imsm_dev *dev,
if (array_blocks % __le32_to_cpu(migr_rec->blocks_per_unit))
num_migr_units++;
- migr_rec->num_migr_units = __cpu_to_le32(num_migr_units);
+ set_num_migr_units(migr_rec, num_migr_units);
migr_rec->post_migr_vol_cap = dev->size_low;
migr_rec->post_migr_vol_cap_hi = dev->size_high;
@@ -10714,7 +10783,7 @@ void init_migr_record_imsm(struct supertype *st, struct imsm_dev *dev,
min_dev_sectors = dev_sectors;
close(fd);
}
- migr_rec->ckpt_area_pba = __cpu_to_le32(min_dev_sectors -
+ set_migr_chkp_area_pba(migr_rec, min_dev_sectors -
RAID_DISK_RESERVED_BLOCKS_IMSM_HI);
write_imsm_migr_rec(st);
@@ -10765,8 +10834,7 @@ int save_backup_imsm(struct supertype *st,
start = info->reshape_progress * 512;
for (i = 0; i < new_disks; i++) {
- target_offsets[i] = (unsigned long long)
- __le32_to_cpu(super->migr_rec->ckpt_area_pba) * 512;
+ target_offsets[i] = migr_chkp_area_pba(super->migr_rec) * 512;
/* move back copy area adderss, it will be moved forward
* in restore_stripes() using start input variable
*/
@@ -10845,12 +10913,11 @@ int save_checkpoint_imsm(struct supertype *st, struct mdinfo *info, int state)
if (info->reshape_progress % blocks_per_unit)
curr_migr_unit++;
- super->migr_rec->curr_migr_unit =
- __cpu_to_le32(curr_migr_unit);
+ set_current_migr_unit(super->migr_rec, curr_migr_unit);
super->migr_rec->rec_status = __cpu_to_le32(state);
- super->migr_rec->dest_1st_member_lba =
- __cpu_to_le32(curr_migr_unit *
- __le32_to_cpu(super->migr_rec->dest_depth_per_unit));
+ set_migr_dest_1st_member_lba(super->migr_rec,
+ super->migr_rec->dest_depth_per_unit * curr_migr_unit);
+
if (write_imsm_migr_rec(st) < 0) {
dprintf("imsm: Cannot write migration record outside backup area\n");
return 1;
@@ -10884,8 +10951,8 @@ int recover_backup_imsm(struct supertype *st, struct mdinfo *info)
char *buf = NULL;
int retval = 1;
unsigned int sector_size = super->sector_size;
- unsigned long curr_migr_unit = __le32_to_cpu(migr_rec->curr_migr_unit);
- unsigned long num_migr_units = __le32_to_cpu(migr_rec->num_migr_units);
+ unsigned long curr_migr_unit = current_migr_unit(migr_rec);
+ unsigned long num_migr_units = get_num_migr_units(migr_rec);
char buffer[20];
int skipped_disks = 0;
@@ -10912,11 +10979,9 @@ int recover_backup_imsm(struct supertype *st, struct mdinfo *info)
map_dest = get_imsm_map(id->dev, MAP_0);
new_disks = map_dest->num_members;
- read_offset = (unsigned long long)
- __le32_to_cpu(migr_rec->ckpt_area_pba) * 512;
+ read_offset = migr_chkp_area_pba(migr_rec) * 512;
- write_offset = ((unsigned long long)
- __le32_to_cpu(migr_rec->dest_1st_member_lba) +
+ write_offset = (migr_dest_1st_member_lba(migr_rec) +
pba_of_lba0(map_dest)) * 512;
unit_len = __le32_to_cpu(migr_rec->dest_depth_per_unit) * 512;
@@ -12019,12 +12084,12 @@ static int imsm_manage_reshape(
max_position = sra->component_size * ndata;
source_layout = imsm_level_to_layout(map_src->raid_level);
- while (__le32_to_cpu(migr_rec->curr_migr_unit) <
- __le32_to_cpu(migr_rec->num_migr_units)) {
+ while (current_migr_unit(migr_rec) <
+ get_num_migr_units(migr_rec)) {
/* current reshape position [blocks] */
unsigned long long current_position =
__le32_to_cpu(migr_rec->blocks_per_unit)
- * __le32_to_cpu(migr_rec->curr_migr_unit);
+ * current_migr_unit(migr_rec);
unsigned long long border;
/* Check that array hasn't become failed.
--
2.7.5

View File

@ -0,0 +1,101 @@
From ebf3be9931f31df54df52b1821479e6a80a4d9c6 Mon Sep 17 00:00:00 2001
From: Dimitri John Ledkov <xnox@ubuntu.com>
Date: Tue, 15 Jan 2019 19:08:37 +0000
Subject: [RHEL7.7 PATCH 14/24] Fix spelling typos.
Signed-off-by: Dimitri John Ledkov <xnox@ubuntu.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
Assemble.c | 2 +-
Create.c | 2 +-
Grow.c | 6 +++---
super-ddf.c | 2 +-
super-intel.c | 2 +-
5 files changed, 7 insertions(+), 7 deletions(-)
diff --git a/Assemble.c b/Assemble.c
index 9f75c68..9f050c1 100644
--- a/Assemble.c
+++ b/Assemble.c
@@ -879,7 +879,7 @@ static int force_array(struct mdinfo *content,
current_events = devices[chosen_drive].i.events;
add_another:
if (c->verbose >= 0)
- pr_err("forcing event count in %s(%d) from %d upto %d\n",
+ pr_err("forcing event count in %s(%d) from %d up to %d\n",
devices[chosen_drive].devname,
devices[chosen_drive].i.disk.raid_disk,
(int)(devices[chosen_drive].i.events),
diff --git a/Create.c b/Create.c
index 04b1dfc..6f1b228 100644
--- a/Create.c
+++ b/Create.c
@@ -823,7 +823,7 @@ int Create(struct supertype *st, char *mddev,
}
bitmap_fd = open(s->bitmap_file, O_RDWR);
if (bitmap_fd < 0) {
- pr_err("weird: %s cannot be openned\n",
+ pr_err("weird: %s cannot be opened\n",
s->bitmap_file);
goto abort_locked;
}
diff --git a/Grow.c b/Grow.c
index 363b209..6d32661 100644
--- a/Grow.c
+++ b/Grow.c
@@ -446,7 +446,7 @@ int Grow_addbitmap(char *devname, int fd, struct context *c, struct shape *s)
if (offset_setable) {
st->ss->getinfo_super(st, mdi, NULL);
if (sysfs_init(mdi, fd, NULL)) {
- pr_err("failed to intialize sysfs.\n");
+ pr_err("failed to initialize sysfs.\n");
free(mdi);
}
rv = sysfs_set_num_signed(mdi, NULL, "bitmap/location",
@@ -2178,7 +2178,7 @@ size_change_error:
memset(&info, 0, sizeof(info));
info.array = array;
if (sysfs_init(&info, fd, NULL)) {
- pr_err("failed to intialize sysfs.\n");
+ pr_err("failed to initialize sysfs.\n");
rv = 1;
goto release;
}
@@ -2903,7 +2903,7 @@ static int impose_level(int fd, int level, char *devname, int verbose)
struct mdinfo info;
if (sysfs_init(&info, fd, NULL)) {
- pr_err("failed to intialize sysfs.\n");
+ pr_err("failed to initialize sysfs.\n");
return 1;
}
diff --git a/super-ddf.c b/super-ddf.c
index 618542c..c095e8a 100644
--- a/super-ddf.c
+++ b/super-ddf.c
@@ -1900,7 +1900,7 @@ static struct vd_config *find_vdcr(struct ddf_super *ddf, unsigned int inst,
return conf;
}
bad:
- pr_err("Could't find disk %d in array %u\n", n, inst);
+ pr_err("Couldn't find disk %d in array %u\n", n, inst);
return NULL;
}
diff --git a/super-intel.c b/super-intel.c
index 1cc7d5f..c399433 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -10034,7 +10034,7 @@ static void imsm_process_update(struct supertype *st,
break;
}
default:
- pr_err("error: unsuported process update type:(type: %d)\n", type);
+ pr_err("error: unsupported process update type:(type: %d)\n", type);
}
}
--
2.7.5

View File

@ -0,0 +1,46 @@
From e3615ecb5b6ad8eb408296878aad5628e0e27166 Mon Sep 17 00:00:00 2001
From: Coly Li <colyli@suse.de>
Date: Tue, 12 Feb 2019 12:53:18 +0800
Subject: [RHEL7.7 PATCH 15/24] Detail.c: do not skip first character when
calling xstrdup in Detail()
'Commit b9c9bd9bacaa ("Detail: ensure --export names are acceptable as
shell variables")' duplicates mdi->sys_name to sysdev string by,
char *sysdev = xstrdup(mdi->sys_name + 1);
which skips the first character of mdi->sys_name. Then when running
mdadm --detail <md device> --export, the output looks like,
MD_DEVICE_ev_sda2_ROLE=1
MD_DEVICE_ev_sda2_DEV=/dev/sda2
The first character of md device (between MD_DEVICE and _ROLE/_DEV)
is dropped. The expected output should be,
MD_DEVICE_dev_sda2_ROLE=1
MD_DEVICE_dev_sda2_DEV=/dev/sda2
This patch removes the '+ 1' from calling xstrdup() in Detail(), which
gets the dropped first character back.
Reported-by: Arvin Schnell <aschnell@suse.com>
Fixes: b9c9bd9bacaa ("Detail: ensure --export names are acceptable as 4 shell variables")
Signed-off-by: Coly Li <colyli@suse.de>
Cc: NeilBrown <neilb@suse.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
Detail.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Detail.c b/Detail.c
index b3e857a..20ea03a 100644
--- a/Detail.c
+++ b/Detail.c
@@ -284,7 +284,7 @@ int Detail(char *dev, struct context *c)
struct mdinfo *mdi;
for (mdi = sra->devs; mdi; mdi = mdi->next) {
char *path;
- char *sysdev = xstrdup(mdi->sys_name + 1);
+ char *sysdev = xstrdup(mdi->sys_name);
char *cp;
path = map_dev(mdi->disk.major,
--
2.7.5

View File

@ -0,0 +1,70 @@
From cab114c5ca870e5f1b57fb2602cd9a038271c2e0 Mon Sep 17 00:00:00 2001
From: Corey Hickey <bugfood-c@fatooh.org>
Date: Mon, 11 Feb 2019 17:18:38 -0800
Subject: [RHEL7.7 PATCH 16/24] Fix reshape for decreasing data offset
...when not changing the number of disks.
This patch needs context to explain. These are the relevant parts of
the original code (condensed and annotated):
if (dir > 0) {
/* Increase data offset (reshape backwards) */
if (data_offset < sd->data_offset + min) {
pr_err("--data-offset too small on %s\n",
dn);
goto release;
}
} else {
/* Decrease data offset (reshape forwards) */
if (data_offset < sd->data_offset - min) {
pr_err("--data-offset too small on %s\n",
dn);
goto release;
}
}
When this code is reached, mdadm has already decided on a reshape
direction. When increasing the data offset, the reshape runs backwards
(dir==1); when decreasing the data offset, the reshape runs forwards
(dir==-1).
The conditional within the backwards reshape is correct: the requested
offset must be larger than the old offset plus a minimum delta; thus the
reshape has room to work.
For the forwards reshape, the requested offset needs to be smaller than
the old offset minus a minimum delta; to do this correctly, the
comparison must be reversed.
Also update the error message.
Note: I have tested this change on a RAID 5 on Linux 4.18.0 and verified
that there were no errors from the kernel and that the device data
remained intact. I do not know if there are considerations for different
RAID levels.
Signed-off-by: Corey Hickey <bugfood-c@fatooh.org>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
Grow.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/Grow.c b/Grow.c
index 6d32661..764374f 100644
--- a/Grow.c
+++ b/Grow.c
@@ -2613,8 +2613,8 @@ static int set_new_data_offset(struct mdinfo *sra, struct supertype *st,
goto release;
}
if (data_offset != INVALID_SECTORS &&
- data_offset < sd->data_offset - min) {
- pr_err("--data-offset too small on %s\n",
+ data_offset > sd->data_offset - min) {
+ pr_err("--data-offset too large on %s\n",
dn);
goto release;
}
--
2.7.5

View File

@ -0,0 +1,100 @@
From 76b906d2406cdf136f64de77e881eb2d180108d9 Mon Sep 17 00:00:00 2001
From: Gioh Kim <gi-oh.kim@cloud.ionos.com>
Date: Fri, 7 Dec 2018 14:30:09 +0100
Subject: [RHEL7.7 PATCH 17/24] mdadm/tests: add one test case for failfast of
raid1
This creates raid1 device with the failfast option and check all
slaves have the failfast flag. And it does assembling and growing
the raid1 device and check the failfast works fine.
Signed-off-by: Gioh Kim <gi-oh.kim@cloud.ionos.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
tests/05r1-failfast | 74 +++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 74 insertions(+)
create mode 100644 tests/05r1-failfast
diff --git a/tests/05r1-failfast b/tests/05r1-failfast
new file mode 100644
index 0000000..823dd6f
--- /dev/null
+++ b/tests/05r1-failfast
@@ -0,0 +1,74 @@
+
+# create a simple mirror and check failfast flag works
+mdadm -CR $md0 -e1.2 --level=raid1 --failfast -n2 $dev0 $dev1
+check raid1
+if grep -v failfast /sys/block/md0/md/rd*/state > /dev/null
+then
+ die "failfast missing"
+fi
+
+# Removing works with the failfast flag
+mdadm $md0 -f $dev0
+mdadm $md0 -r $dev0
+if grep -v failfast /sys/block/md0/md/rd1/state > /dev/null
+then
+ die "failfast missing"
+fi
+
+# Adding works with the failfast flag
+mdadm $md0 -a --failfast $dev0
+check wait
+if grep -v failfast /sys/block/md0/md/rd0/state > /dev/null
+then
+ die "failfast missing"
+fi
+
+mdadm -S $md0
+
+# Assembling works with the failfast flag
+mdadm -A $md0 $dev0 $dev1
+check raid1
+if grep -v failfast /sys/block/md0/md/rd*/state > /dev/null
+then
+ die "failfast missing"
+fi
+
+# Adding works with the nofailfast flag
+mdadm $md0 -f $dev0
+mdadm $md0 -r $dev0
+mdadm $md0 -a --nofailfast $dev0
+check wait
+if grep failfast /sys/block/md0/md/rd0/state > /dev/null
+then
+ die "failfast should be missing"
+fi
+
+# Assembling with one faulty slave works with the failfast flag
+mdadm $md0 -f $dev0
+mdadm $md0 -r $dev0
+mdadm -S $md0
+mdadm -A $md0 $dev0 $dev1
+check raid1
+mdadm -S $md0
+
+# Spare works with the failfast flag
+mdadm -CR $md0 -e1.2 --level=raid1 --failfast -n2 $dev0 $dev1
+check raid1
+mdadm $md0 -a --failfast $dev2
+check wait
+check spares 1
+if grep -v failfast /sys/block/md0/md/rd*/state > /dev/null
+then
+ die "failfast missing"
+fi
+
+# Grow works with the failfast flag
+mdadm -G $md0 --raid-devices=3
+check wait
+if grep -v failfast /sys/block/md0/md/rd*/state > /dev/null
+then
+ die "failfast missing"
+fi
+mdadm -S $md0
+
+exit 0
--
2.7.5

View File

@ -0,0 +1,50 @@
From 69d084784de196acec8ab703cd1b379af211d624 Mon Sep 17 00:00:00 2001
From: Artur Paszkiewicz <artur.paszkiewicz@intel.com>
Date: Fri, 22 Feb 2019 10:15:45 +0100
Subject: [RHEL7.7 PATCH 18/24] mdmon: don't attempt to manage new arrays when
terminating
When mdmon gets a SIGTERM, it stops managing arrays that are clean. If
there is more that one array in the container and one of them is dirty
and the clean one is still present in mdstat, mdmon will treat it as a
new array and start managing it again. This leads to a cycle of
remove_old() / manage_new() calls for the clean array, until the other
one also becomes clean.
Prevent this by not calling manage_new() if sigterm is set. Also, remove
a check for sigterm in manage_new() because the condition will never be
true.
Signed-off-by: Artur Paszkiewicz <artur.paszkiewicz@intel.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
managemon.c | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/managemon.c b/managemon.c
index 101231c..29b91ba 100644
--- a/managemon.c
+++ b/managemon.c
@@ -727,9 +727,7 @@ static void manage_new(struct mdstat_ent *mdstat,
dprintf("inst: %s action: %d state: %d\n", inst,
new->action_fd, new->info.state_fd);
- if (sigterm)
- new->info.safe_mode_delay = 1;
- else if (mdi->safe_mode_delay >= 50)
+ if (mdi->safe_mode_delay >= 50)
/* Normal start, mdadm set this. */
new->info.safe_mode_delay = mdi->safe_mode_delay;
else
@@ -803,7 +801,7 @@ void manage(struct mdstat_ent *mdstat, struct supertype *container)
break;
}
}
- if (a == NULL || !a->container)
+ if ((a == NULL || !a->container) && !sigterm)
manage_new(mdstat, container, a);
}
}
--
2.7.5

View File

@ -0,0 +1,58 @@
From d2e11da4b7fd0453e942f43e4196dc63b3dbd708 Mon Sep 17 00:00:00 2001
From: Pawel Baldysiak <pawel.baldysiak@intel.com>
Date: Fri, 22 Feb 2019 13:30:27 +0100
Subject: [RHEL7.7 PATCH 19/24] mdmon: wait for previous mdmon to exit during
takeover
Since the patch c76242c5("mdmon: get safe mode delay file descriptor
early"), safe_mode_dalay is set properly by initrd mdmon. But in some
cases with filesystem traffic since the very start of the system, it
might take a while to transit to clean state. Due to fact that new
mdmon does not wait for the old one to exit - it might happen that the
new one switches safe_mode_delay back to seconds, before old one exits.
As the result two mdmons are running concurrently on same array.
Wait for the old mdmon to exit by pinging it with SIGUSR1 signal, just
in case it is sleeping.
Signed-off-by: Pawel Baldysiak <pawel.baldysiak@intel.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
mdmon.c | 14 +++++++++++---
1 file changed, 11 insertions(+), 3 deletions(-)
diff --git a/mdmon.c b/mdmon.c
index 0955fcc..ff985d2 100644
--- a/mdmon.c
+++ b/mdmon.c
@@ -171,6 +171,7 @@ static void try_kill_monitor(pid_t pid, char *devname, int sock)
int fd;
int n;
long fl;
+ int rv;
/* first rule of survival... don't off yourself */
if (pid == getpid())
@@ -201,9 +202,16 @@ static void try_kill_monitor(pid_t pid, char *devname, int sock)
fl &= ~O_NONBLOCK;
fcntl(sock, F_SETFL, fl);
n = read(sock, buf, 100);
- /* Ignore result, it is just the wait that
- * matters
- */
+
+ /* If there is I/O going on it might took some time to get to
+ * clean state. Wait for monitor to exit fully to avoid races.
+ * Ping it with SIGUSR1 in case that it is sleeping */
+ for (n = 0; n < 25; n++) {
+ rv = kill(pid, SIGUSR1);
+ if (rv < 0)
+ break;
+ usleep(200000);
+ }
}
void remove_pidfile(char *devname)
--
2.7.5

View File

@ -0,0 +1,52 @@
From 2b57e4fe041d52ae29866c93a878a11c07223cff Mon Sep 17 00:00:00 2001
From: Pawel Baldysiak <pawel.baldysiak@intel.com>
Date: Fri, 22 Feb 2019 12:56:27 +0100
Subject: [RHEL7.7 PATCH 20/24] Assemble: Fix starting array with initial
reshape checkpoint
If array was stopped during reshape initialization,
there might be a "0" checkpoint recorded in metadata.
If array with such condition (reshape with position 0)
is passed to kernel - it will refuse to start such array.
Treat such array as normal during assemble, Grow_continue() will
reinitialize and start the reshape.
Signed-off-by: Pawel Baldysiak <pawel.baldysiak@intel.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
Assemble.c | 18 ++++++++++++++++--
1 file changed, 16 insertions(+), 2 deletions(-)
diff --git a/Assemble.c b/Assemble.c
index 9f050c1..420c7b3 100644
--- a/Assemble.c
+++ b/Assemble.c
@@ -2061,8 +2061,22 @@ int assemble_container_content(struct supertype *st, int mdfd,
spare, &c->backup_file, c->verbose) == 1)
return 1;
- err = sysfs_set_str(content, NULL,
- "array_state", "readonly");
+ if (content->reshape_progress == 0) {
+ /* If reshape progress is 0 - we are assembling the
+ * array that was stopped, before reshape has started.
+ * Array needs to be started as active, Grow_continue()
+ * will start the reshape.
+ */
+ sysfs_set_num(content, NULL, "reshape_position",
+ MaxSector);
+ err = sysfs_set_str(content, NULL,
+ "array_state", "active");
+ sysfs_set_num(content, NULL, "reshape_position", 0);
+ } else {
+ err = sysfs_set_str(content, NULL,
+ "array_state", "readonly");
+ }
+
if (err)
return 1;
--
2.7.5

View File

@ -0,0 +1,59 @@
From 227aeaa872d4898273cf87a4253898823d556c43 Mon Sep 17 00:00:00 2001
From: Corey Hickey <bugfood-c@fatooh.org>
Date: Mon, 11 Feb 2019 17:42:27 -0800
Subject: [RHEL7.7 PATCH 21/24] add missing units to --examine
Within the output of "mdadm --examine", there are three sizes reported
on adjacent lines. For example:
$ sudo mdadm --examine /dev/md3
[...]
Avail Dev Size : 17580545024 (8383.06 GiB 9001.24 GB)
Array Size : 17580417024 (16765.99 GiB 18002.35 GB)
Used Dev Size : 11720278016 (5588.66 GiB 6000.78 GB)
[...]
This can be confusing, since the first and third line are in 512-byte
sectors, and the second is in KiB.
Add units to avoid ambiguity.
(I don't particularly like the "KiB" notation, but it is at least
unambiguous.)
Signed-off-by: Corey Hickey <bugfood-c@fatooh.org>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
super1.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/super1.c b/super1.c
index 636a286..b85dc20 100644
--- a/super1.c
+++ b/super1.c
@@ -360,7 +360,7 @@ static void examine_super1(struct supertype *st, char *homehost)
printf(" Raid Level : %s\n", c?c:"-unknown-");
printf(" Raid Devices : %d\n", __le32_to_cpu(sb->raid_disks));
printf("\n");
- printf(" Avail Dev Size : %llu%s\n",
+ printf(" Avail Dev Size : %llu sectors%s\n",
(unsigned long long)__le64_to_cpu(sb->data_size),
human_size(__le64_to_cpu(sb->data_size)<<9));
if (__le32_to_cpu(sb->level) > 0) {
@@ -378,11 +378,11 @@ static void examine_super1(struct supertype *st, char *homehost)
if (ddsks) {
long long asize = __le64_to_cpu(sb->size);
asize = (asize << 9) * ddsks / ddsks_denom;
- printf(" Array Size : %llu%s\n",
+ printf(" Array Size : %llu KiB%s\n",
asize >> 10, human_size(asize));
}
if (sb->size != sb->data_size)
- printf(" Used Dev Size : %llu%s\n",
+ printf(" Used Dev Size : %llu sectors%s\n",
(unsigned long long)__le64_to_cpu(sb->size),
human_size(__le64_to_cpu(sb->size)<<9));
}
--
2.7.5

View File

@ -0,0 +1,117 @@
From 05501181f18cdccdb0b3cec1d8cf59f0995504d7 Mon Sep 17 00:00:00 2001
From: Pawel Baldysiak <pawel.baldysiak@intel.com>
Date: Fri, 8 Mar 2019 12:19:11 +0100
Subject: [RHEL7.7 PATCH 22/24] imsm: fix spare activation for old matrix
arrays
During spare activation get_extents() calculates metadata reserved space based
on smallest active RAID member or it will take the defaults. Since patch
611d9529("imsm: change reserved space to 4MB") default is extended. If array
was created prior that patch, reserved space is smaller. In case of matrix
RAID - spare is activated in each array one-by-one, so it is spare for first
activation, but treated as "active" during second one.
In case of adding spare drive to old matrix RAID with the size the same as
already existing member drive the routine will take the defaults during second
run and mdmon will refuse to rebuild second volume, claiming that the drive
does not have enough free space.
Add parameter to get_extents(), so the during spare activation reserved space
is always based on smallest active drive - even if given drive is already
active in some other array of matrix RAID.
Signed-off-by: Pawel Baldysiak <pawel.baldysiak@intel.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
super-intel.c | 19 ++++++++++---------
1 file changed, 10 insertions(+), 9 deletions(-)
diff --git a/super-intel.c b/super-intel.c
index c399433..5a7c9f8 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -1313,7 +1313,8 @@ static unsigned long long per_dev_array_size(struct imsm_map *map)
return array_size;
}
-static struct extent *get_extents(struct intel_super *super, struct dl *dl)
+static struct extent *get_extents(struct intel_super *super, struct dl *dl,
+ int get_minimal_reservation)
{
/* find a list of used extents on the given physical device */
struct extent *rv, *e;
@@ -1325,7 +1326,7 @@ static struct extent *get_extents(struct intel_super *super, struct dl *dl)
* regardless of whether the OROM has assigned sectors from the
* IMSM_RESERVED_SECTORS region
*/
- if (dl->index == -1)
+ if (dl->index == -1 || get_minimal_reservation)
reservation = imsm_min_reserved_sectors(super);
else
reservation = MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS;
@@ -1386,7 +1387,7 @@ static __u32 imsm_reserved_sectors(struct intel_super *super, struct dl *dl)
if (dl->index == -1)
return MPB_SECTOR_CNT;
- e = get_extents(super, dl);
+ e = get_extents(super, dl, 0);
if (!e)
return MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS;
@@ -1478,7 +1479,7 @@ static __u32 imsm_min_reserved_sectors(struct intel_super *super)
return rv;
/* find last lba used by subarrays on the smallest active disk */
- e = get_extents(super, dl_min);
+ e = get_extents(super, dl_min, 0);
if (!e)
return rv;
for (i = 0; e[i].size; i++)
@@ -1519,7 +1520,7 @@ int get_spare_criteria_imsm(struct supertype *st, struct spare_criteria *c)
if (!dl)
return -EINVAL;
/* find last lba used by subarrays */
- e = get_extents(super, dl);
+ e = get_extents(super, dl, 0);
if (!e)
return -EINVAL;
for (i = 0; e[i].size; i++)
@@ -7203,7 +7204,7 @@ static int validate_geometry_imsm_volume(struct supertype *st, int level,
pos = 0;
i = 0;
- e = get_extents(super, dl);
+ e = get_extents(super, dl, 0);
if (!e) continue;
do {
unsigned long long esize;
@@ -7261,7 +7262,7 @@ static int validate_geometry_imsm_volume(struct supertype *st, int level,
}
/* retrieve the largest free space block */
- e = get_extents(super, dl);
+ e = get_extents(super, dl, 0);
maxsize = 0;
i = 0;
if (e) {
@@ -7359,7 +7360,7 @@ static int imsm_get_free_size(struct supertype *st, int raiddisks,
if (super->orom && dl->index < 0 && mpb->num_raid_devs)
continue;
- e = get_extents(super, dl);
+ e = get_extents(super, dl, 0);
if (!e)
continue;
for (i = 1; e[i-1].size; i++)
@@ -8846,7 +8847,7 @@ static struct dl *imsm_add_spare(struct intel_super *super, int slot,
/* Does this unused device have the requisite free space?
* It needs to be able to cover all member volumes
*/
- ex = get_extents(super, dl);
+ ex = get_extents(super, dl, 1);
if (!ex) {
dprintf("cannot get extents\n");
continue;
--
2.7.5

View File

@ -0,0 +1,94 @@
From 22dc741f63e6403d59c2c14f56fd4791265f9bbb Mon Sep 17 00:00:00 2001
From: Mariusz Tkaczyk <mariusz.tkaczyk@intel.com>
Date: Mon, 1 Apr 2019 16:53:41 +0200
Subject: [RHEL7.7 PATCH 23/24] Create: Block rounding size to max
When passed size is smaller than chunk, mdadm rounds it to 0 but 0 there
means max available space.
Block it for every metadata. Remove the same check from imsm routine.
Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@intel.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
Create.c | 23 ++++++++++++++++++++---
super-intel.c | 5 ++---
2 files changed, 22 insertions(+), 6 deletions(-)
diff --git a/Create.c b/Create.c
index 6f1b228..292f92a 100644
--- a/Create.c
+++ b/Create.c
@@ -27,6 +27,18 @@
#include "md_p.h"
#include <ctype.h>
+static int round_size_and_verify(unsigned long long *size, int chunk)
+{
+ if (*size == 0)
+ return 0;
+ *size &= ~(unsigned long long)(chunk - 1);
+ if (*size == 0) {
+ pr_err("Size cannot be smaller than chunk.\n");
+ return 1;
+ }
+ return 0;
+}
+
static int default_layout(struct supertype *st, int level, int verbose)
{
int layout = UnSet;
@@ -248,11 +260,14 @@ int Create(struct supertype *st, char *mddev,
pr_err("unknown level %d\n", s->level);
return 1;
}
+
if (s->size == MAX_SIZE)
/* use '0' to mean 'max' now... */
s->size = 0;
if (s->size && s->chunk && s->chunk != UnSet)
- s->size &= ~(unsigned long long)(s->chunk - 1);
+ if (round_size_and_verify(&s->size, s->chunk))
+ return 1;
+
newsize = s->size * 2;
if (st && ! st->ss->validate_geometry(st, s->level, s->layout, s->raiddisks,
&s->chunk, s->size*2,
@@ -267,7 +282,8 @@ int Create(struct supertype *st, char *mddev,
/* default chunk was just set */
if (c->verbose > 0)
pr_err("chunk size defaults to %dK\n", s->chunk);
- s->size &= ~(unsigned long long)(s->chunk - 1);
+ if (round_size_and_verify(&s->size, s->chunk))
+ return 1;
do_default_chunk = 0;
}
}
@@ -413,7 +429,8 @@ int Create(struct supertype *st, char *mddev,
/* default chunk was just set */
if (c->verbose > 0)
pr_err("chunk size defaults to %dK\n", s->chunk);
- s->size &= ~(unsigned long long)(s->chunk - 1);
+ if (round_size_and_verify(&s->size, s->chunk))
+ return 1;
do_default_chunk = 0;
}
}
diff --git a/super-intel.c b/super-intel.c
index 5a7c9f8..2ba045a 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -7455,9 +7455,8 @@ static int validate_geometry_imsm(struct supertype *st, int level, int layout,
verbose);
}
- if (size && ((size < 1024) || (*chunk != UnSet &&
- size < (unsigned long long) *chunk))) {
- pr_err("Given size must be greater than 1M and chunk size.\n");
+ if (size && (size < 1024)) {
+ pr_err("Given size must be greater than 1M.\n");
/* Depends on algorithm in Create.c :
* if container was given (dev == NULL) return -1,
* if block device was given ( dev != NULL) return 0.
--
2.7.5

View File

@ -0,0 +1,31 @@
From 3c9b46cf9ae15a9be98fc47e2080bd9494496246 Mon Sep 17 00:00:00 2001
From: Liwei Song <liwei.song@windriver.com>
Date: Tue, 19 Mar 2019 23:51:05 -0400
Subject: [RHEL7.7 PATCH 24/24] udev: Add udev rules to create by-partuuid for
md device
This rules will create link under /dev/disk/by-partuuid/ for
MD devices partition, with which will support specify
root=PARTUUID=XXX to boot rootfs.
Signed-off-by: Liwei Song <liwei.song@windriver.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
udev-md-raid-arrays.rules | 1 +
1 file changed, 1 insertion(+)
diff --git a/udev-md-raid-arrays.rules b/udev-md-raid-arrays.rules
index c95ec7b..5b99d58 100644
--- a/udev-md-raid-arrays.rules
+++ b/udev-md-raid-arrays.rules
@@ -30,6 +30,7 @@ IMPORT{builtin}="blkid"
OPTIONS+="link_priority=100"
OPTIONS+="watch"
ENV{ID_FS_USAGE}=="filesystem|other|crypto", ENV{ID_FS_UUID_ENC}=="?*", SYMLINK+="disk/by-uuid/$env{ID_FS_UUID_ENC}"
+ENV{ID_FS_USAGE}=="filesystem|other", ENV{ID_PART_ENTRY_UUID}=="?*", SYMLINK+="disk/by-partuuid/$env{ID_PART_ENTRY_UUID}"
ENV{ID_FS_USAGE}=="filesystem|other", ENV{ID_FS_LABEL_ENC}=="?*", SYMLINK+="disk/by-label/$env{ID_FS_LABEL_ENC}"
ENV{MD_LEVEL}=="raid[1-9]*", ENV{SYSTEMD_WANTS}+="mdmonitor.service"
--
2.7.5

View File

@ -0,0 +1,23 @@
--- mdadm-3.2.1/Makefile.static 2011-03-27 22:31:20.000000000 -0400
+++ mdadm-3.2.1/Makefile 2011-03-28 10:16:55.277900184 -0400
@@ -238,16 +238,16 @@ install : mdadm mdmon install-man instal
$(INSTALL) -D $(STRIP) -m 755 mdmon $(DESTDIR)$(BINDIR)/mdmon
install-static : mdadm.static install-man
- $(INSTALL) -D $(STRIP) -m 755 mdadm.static $(DESTDIR)$(BINDIR)/mdadm
+ $(INSTALL) -D $(STRIP) -m 755 mdadm.static $(DESTDIR)$(BINDIR)/mdadm.static
install-tcc : mdadm.tcc install-man
- $(INSTALL) -D $(STRIP) -m 755 mdadm.tcc $(DESTDIR)$(BINDIR)/mdadm
+ $(INSTALL) -D $(STRIP) -m 755 mdadm.tcc $(DESTDIR)$(BINDIR)/mdadm.tcc
install-uclibc : mdadm.uclibc install-man
- $(INSTALL) -D $(STRIP) -m 755 mdadm.uclibc $(DESTDIR)$(BINDIR)/mdadm
+ $(INSTALL) -D $(STRIP) -m 755 mdadm.uclibc $(DESTDIR)$(BINDIR)/mdadm.uclibc
install-klibc : mdadm.klibc install-man
- $(INSTALL) -D $(STRIP) -m 755 mdadm.klibc $(DESTDIR)$(BINDIR)/mdadm
+ $(INSTALL) -D $(STRIP) -m 755 mdadm.klibc $(DESTDIR)$(BINDIR)/mdadm.klibc
install-man: mdadm.8 md.4 mdadm.conf.5 mdmon.8
$(INSTALL) -D -m 644 mdadm.8 $(DESTDIR)$(MAN8DIR)/mdadm.8

View File

@ -0,0 +1,13 @@
--- mdadm-4.1_rc1/udev-md-raid-assembly.rules~ 2018-06-22 13:10:58.196250086 +0800
+++ mdadm-4.1_rc1/udev-md-raid-assembly.rules 2018-06-22 13:11:37.761241080 +0800
@@ -5,6 +5,10 @@
ENV{ANACONDA}=="?*", GOTO="md_inc_end"
# assemble md arrays
+# In Fedora we handle the raid components in 65-md-incremental.rules so that
+# we can do things like honor anaconda command line options and such
+GOTO="md_inc_end"
+
SUBSYSTEM!="block", GOTO="md_inc_end"
# skip non-initialized devices

3
SOURCES/mdadm-cron Normal file
View File

@ -0,0 +1,3 @@
# Run system wide raid-check once a week on Sunday at 1am by default
0 1 * * Sun root /usr/sbin/raid-check

View File

@ -0,0 +1,60 @@
#!/bin/bash
#
# Configuration file for /etc/cron.weekly/raid-check
#
# options:
# ENABLED - must be yes in order for the raid check to proceed
# CHECK - can be either check or repair depending on the type of
# operation the user desires. A check operation will scan
# the drives looking for bad sectors and automatically
# repairing only bad sectors. If it finds good sectors that
# contain bad data (meaning that the data in a sector does
# not agree with what the data from another disk indicates
# the data should be, for example the parity block + the other
# data blocks would cause us to think that this data block
# is incorrect), then it does nothing but increments the
# counter in the file /sys/block/$dev/md/mismatch_count.
# This allows the sysadmin to inspect the data in the sector
# and the data that would be produced by rebuilding the
# sector from redundant information and pick the correct
# data to keep. The repair option does the same thing, but
# when it encounters a mismatch in the data, it automatically
# updates the data to be consistent. However, since we really
# don't know whether it's the parity or the data block that's
# correct (or which data block in the case of raid1), it's
# luck of the draw whether or not the user gets the right
# data instead of the bad data. This option is the default
# option for devices not listed in either CHECK_DEVS or
# REPAIR_DEVS.
# CHECK_DEVS - a space delimited list of devs that the user specifically
# wants to run a check operation on.
# REPAIR_DEVS - a space delimited list of devs that the user
# specifically wants to run a repair on.
# SKIP_DEVS - a space delimited list of devs that should be skipped
# NICE - Change the raid check CPU and IO priority in order to make
# the system more responsive during lengthy checks. Valid
# values are high, normal, low, idle.
# MAXCONCURENT - Limit the number of devices to be checked at a time.
# By default all devices will be checked at the same time.
#
# Note: the raid-check script intentionaly runs last in the cron.weekly
# sequence. This is so we can wait for all the resync operations to complete
# and then check the mismatch_count on each array without unduly delaying
# other weekly cron jobs. If any arrays have a non-0 mismatch_count after
# the check completes, we echo a warning to stdout which will then me emailed
# to the admin as long as mails from cron jobs have not been redirected to
# /dev/null. We do not wait for repair operations to complete as the
# md stack will correct any mismatch_cnts automatically.
#
# Note2: you can not use symbolic names for the raid devices, such as you
# /dev/md/root. The names used in this file must match the names seen in
# /proc/mdstat and in /sys/block.
ENABLED=yes
CHECK=check
NICE=low
# To check devs /dev/md0 and /dev/md3, use "md0 md3"
CHECK_DEVS=""
REPAIR_DEVS=""
SKIP_DEVS=""
MAXCONCURRENT=

1
SOURCES/mdadm.conf Normal file
View File

@ -0,0 +1 @@
d /var/run/mdadm 0710 root root -

67
SOURCES/mdadm.rules Normal file
View File

@ -0,0 +1,67 @@
# This file causes block devices with Linux RAID (mdadm) signatures to
# automatically cause mdadm to be run.
# See udev(8) for syntax
# Don't process any events if anaconda is running as anaconda brings up
# raid devices manually
ENV{ANACONDA}=="?*", GOTO="md_end"
# Also don't process disks that are slated to be a multipath device
ENV{DM_MULTIPATH_DEVICE_PATH}=="1", GOTO="md_end"
# We process add events on block devices (since they are ready as soon as
# they are added to the system), but we must process change events as well
# on any dm devices (like LUKS partitions or LVM logical volumes) and on
# md devices because both of these first get added, then get brought live
# and trigger a change event. The reason we don't process change events
# on bare hard disks is because if you stop all arrays on a disk, then
# run fdisk on the disk to change the partitions, when fdisk exits it
# triggers a change event, and we want to wait until all the fdisks on
# all member disks are done before we do anything. Unfortunately, we have
# no way of knowing that, so we just have to let those arrays be brought
# up manually after fdisk has been run on all of the disks.
# First, process all add events (md and dm devices will not really do
# anything here, just regular disks, and this also won't get any imsm
# array members either)
SUBSYSTEM=="block", ACTION=="add", ENV{ID_FS_TYPE}=="linux_raid_member", \
IMPORT{program}="/sbin/mdadm -I $env{DEVNAME} --export $devnode --offroot ${DEVLINKS}"
SUBSYSTEM=="block", ACTION=="add", ENV{ID_FS_TYPE}=="linux_raid_member", \
ENV{MD_STARTED}=="*unsafe*", ENV{MD_FOREIGN}=="no", ENV{SYSTEMD_WANTS}+="mdadm-last-resort@$env{MD_DEVICE}.timer"
SUBSYSTEM=="block", ACTION=="remove", ENV{ID_PATH}=="?*", \
ENV{ID_FS_TYPE}=="linux_raid_member", \
RUN+="/sbin/mdadm -If $name --path $env{ID_PATH}"
SUBSYSTEM=="block", ACTION=="remove", ENV{ID_PATH}!="?*", \
ENV{ID_FS_TYPE}=="linux_raid_member", \
RUN+="/sbin/mdadm -If $name"
# Next, check to make sure the BIOS raid stuff wasn't turned off via cmdline
IMPORT{cmdline}="noiswmd"
IMPORT{cmdline}="nodmraid"
ENV{noiswmd}=="?*", GOTO="md_imsm_inc_end"
ENV{nodmraid}=="?*", GOTO="md_imsm_inc_end"
SUBSYSTEM=="block", ACTION=="add", ENV{ID_FS_TYPE}=="isw_raid_member", \
RUN+="/sbin/mdadm -I $env{DEVNAME}"
SUBSYSTEM=="block", ACTION=="remove", ENV{ID_PATH}=="?*", \
ENV{ID_FS_TYPE}=="isw_raid_member", \
RUN+="/sbin/mdadm -If $name --path $env{ID_PATH}"
SUBSYSTEM=="block", ACTION=="remove", ENV{ID_PATH}!="?*", \
ENV{ID_FS_TYPE}=="isw_raid_member", \
RUN+="/sbin/mdadm -If $name"
LABEL="md_imsm_inc_end"
# Next make sure that this isn't a dm device we should skip for some reason
ENV{DM_UDEV_RULES_VSN}!="?*", GOTO="dm_change_end"
ENV{DM_UDEV_DISABLE_OTHER_RULES_FLAG}=="1", GOTO="dm_change_end"
ENV{DM_SUSPENDED}=="1", GOTO="dm_change_end"
KERNEL=="dm-*", SUBSYSTEM=="block", ENV{ID_FS_TYPE}=="linux_raid_member", \
ACTION=="change", RUN+="/sbin/mdadm -I $env{DEVNAME}"
LABEL="dm_change_end"
# Finally catch any nested md raid arrays. If we brought up an md raid
# array that's part of another md raid array, it won't be ready to be used
# until the change event that occurs when it becomes live
KERNEL=="md*", SUBSYSTEM=="block", ENV{ID_FS_TYPE}=="linux_raid_member", \
ACTION=="change", RUN+="/sbin/mdadm -I $env{DEVNAME}"
LABEL="md_end"

5
SOURCES/mdadm_event.conf Normal file
View File

@ -0,0 +1,5 @@
# Save /proc/mdstat in case of crash in mdadm/mdmon
EVENT=post-create component=mdadm
cat /proc/mdstat >> mdstat_data
echo "Saved output of /proc/mdstat"

118
SOURCES/mdmonitor.init Executable file
View File

@ -0,0 +1,118 @@
#!/bin/bash
#
# mdmonitor This starts, stops, and reloads the mdadm-based
# software RAID monitoring and management facility
#
# chkconfig: 2345 15 85
# description: software RAID monitoring and management
# config: /etc/mdadm.conf
#
# Copyright 2002 Red Hat, Inc.
#
### BEGIN INIT INFO
# Default-Start: 2 3 4 5
# Default-Stop: 0 1 6
# Short-Description: Start and stop the MD software RAID monitor
# Description: The mdmonitor service checks the status of all software
# RAID arrays on the system. In the event that any of the arrays
# transition into a degraded state, it notifies the system
# administrator. Other options are available, see the mdadm.conf
# and mdadm man pages for possible ways to configure this service.
### END INIT INFO
PIDPATH=/var/run/mdadm
PIDFILE=/var/run/mdadm/mdadm.pid
PATH=/sbin:/usr/sbin:$PATH
RETVAL=0
OPTIONS="--monitor --scan -f --pid-file=$PIDFILE"
prog=mdmonitor
# Source function library.
. /etc/rc.d/init.d/functions
usage ()
{
echo "Usage: service $prog {start|stop|status|restart|try-restart|force-reload}"
RETVAL=1
}
start ()
{
# (Re)start mdmon to take over monitoring of mdmon started from the initrd
for i in /dev/md/*.pid; do
if [ -r $i ]; then
origprog="$prog"; prog="mdmon"
action $"Starting $prog: " /sbin/mdmon --takeover --all
prog="$origprog"
break
fi
done
# Make sure configuration file exists and has information we can use
# MAILADDR or PROGRAM or both must be set in order to run mdadm --monitor
[ -f /etc/mdadm.conf ] || return 6
grep '^\(MAILADDR\|PROGRAM\) .' /etc/mdadm.conf >/dev/null 2>&1 || return 6
# Create our directory if it isn't there yet
if [ ! -d $PIDPATH ]; then
mkdir -m 0700 $PIDPATH >&/dev/null
RC=$?
[ -x /sbin/restorecon ] && /sbin/restorecon $PIDPATH
if [ $RC -ne 0 ]; then
echo -n "Failed to create /var/run/mdadm"
failure
echo
return 1
fi
fi
if [ -f "$PIDFILE" ]; then
checkpid `cat $PIDFILE` && return 0
fi
echo -n $"Starting $prog: "
cd /
daemon --user=root mdadm ${OPTIONS}
ret=$?
[ $ret -eq "0" ] && touch /var/lock/subsys/$prog
echo
return $ret
}
stop ()
{
[ -f /var/lock/subsys/$prog ] || return 0
echo -n "Killing $prog: "
killproc mdadm
echo
rm -f $PIDFILE
rm -f /var/lock/subsys/$prog
}
restart ()
{
stop
start
}
condrestart ()
{
[ -e /var/lock/subsys/$prog ] && restart || return 0
}
case "$1" in
start|stop|restart|condrestart|try-restart|force-reload)
[ `id -u` != "0" ] && exit 4 ;;
esac
case "$1" in
start) start; RETVAL=$? ;;
stop) stop; RETVAL=$? ;;
status) status -p $PIDFILE $prog ; RETVAL=$? ;;
restart) restart; RETVAL=$? ;;
reload) RETVAL=3 ;;
condrestart|try-restart|force-reload) condrestart; RETVAL=$? ;;
*) usage ; RETVAL=2 ;;
esac
exit $RETVAL

12
SOURCES/mdmonitor.service Normal file
View File

@ -0,0 +1,12 @@
[Unit]
Description=Software RAID monitoring and management
ConditionPathExists=/etc/mdadm.conf
[Service]
Type=forking
PIDFile=/var/run/mdadm/mdadm.pid
EnvironmentFile=-/etc/sysconfig/mdmonitor
ExecStart=/sbin/mdadm --monitor --scan -f --pid-file=/var/run/mdadm/mdadm.pid
[Install]
WantedBy=multi-user.target

135
SOURCES/raid-check Normal file
View File

@ -0,0 +1,135 @@
#!/bin/bash
#
# This script reads it's configuration from /etc/sysconfig/raid-check
# Please use that file to enable/disable this script or to set the
# type of check you wish performed.
# We might be on a kernel with no raid support at all, exit if so
[ -f /proc/mdstat ] || exit 0
# and exit if we haven't been set up properly
[ -f /etc/sysconfig/raid-check ] || exit 0
. /etc/sysconfig/raid-check
# Wait until no more than arg1 arrays in arg2 list are busy
waitbusy() {
local threshold=$(($1 + 1))
local dev_list="$2"
while true
do
local busy=0
local dev=""
for dev in $dev_list; do
local sync_action=`cat /sys/block/$dev/md/sync_action`
if [ "$sync_action" != "idle" ]; then
let busy++
fi
done
[ $busy -lt $threshold ] && break
sleep 60
done
}
[ "$ENABLED" != "yes" ] && exit 0
case "$CHECK" in
check) ;;
repair) ;;
*) exit 0;;
esac
ionice=""
renice=""
case $NICE in
high)
renice="-n -5"
;;
low)
renice="-n 5"
ionice="-c2 -n7"
;;
idle)
renice="-n 15"
ionice="-c3"
;;
*)
;;
esac
active_list=`grep "^md.*: active" /proc/mdstat | cut -f 1 -d ' '`
[ -z "$active_list" ] && exit 0
declare -A check
dev_list=""
check_list=""
for dev in $active_list; do
echo $SKIP_DEVS | grep -w $dev >&/dev/null && continue
if [ -f /sys/block/$dev/md/sync_action ]; then
# Only perform the checks on idle, healthy arrays, but delay
# actually writing the check field until the next loop so we
# don't switch currently idle arrays to active, which happens
# when two or more arrays are on the same physical disk
array_state=`cat /sys/block/$dev/md/array_state`
if [ "$array_state" != "clean" -a "$array_state" != "active" ]; then
continue
fi
sync_action=`cat /sys/block/$dev/md/sync_action`
if [ "$sync_action" != idle ]; then
continue
fi
ck=""
echo $REPAIR_DEVS | grep -w $dev >&/dev/null && ck="repair"
echo $CHECK_DEVS | grep -w $dev >&/dev/null && ck="check"
[ -z "$ck" ] && ck=$CHECK
dev_list="$dev_list $dev"
check[$dev]=$ck
[ "$ck" = "check" ] && check_list="$check_list $dev"
fi
done
[ -z "$dev_list" ] && exit 0
for dev in $dev_list; do
#Only run $MAXCONCURRENT checks at a time
if [ -n "$MAXCONCURRENT" ]; then
waitbusy $((MAXCONCURRENT - 1)) "$dev_list"
fi
echo "${check[$dev]}" > /sys/block/$dev/md/sync_action
resync_pid=""
wait=10
while [ $wait -gt 0 -a -z "$resync_pid" ]; do
sleep 6
let wait--
resync_pid=$(ps -ef | awk -v mddev=$dev 'BEGIN { pattern = "^\\[" mddev "_resync]$" } $8 ~ pattern { print $2 }')
done
[ -n "$resync_pid" -a -n "$renice" ] &&
renice $renice -p $resync_pid >&/dev/null
[ -n "$resync_pid" -a -n "$ionice" ] &&
ionice $ionice -p $resync_pid >&/dev/null
done
[ -z "$check_list" ] && exit 0
waitbusy 0 "$check_list"
for dev in $check_list; do
mismatch_cnt=`cat /sys/block/$dev/md/mismatch_cnt`
# Due to the fact that raid1/10 writes in the kernel are unbuffered,
# a raid1 array can have non-0 mismatch counts even when the
# array is healthy. These non-0 counts will only exist in
# transient data areas where they don't pose a problem. However,
# since we can't tell the difference between a non-0 count that
# is just in transient data or a non-0 count that signifies a
# real problem, simply don't check the mismatch_cnt on raid1
# devices as it's providing far too many false positives. But by
# leaving the raid1 device in the check list and performing the
# check, we still catch and correct any bad sectors there might
# be in the device.
raid_lvl=`cat /sys/block/$dev/md/level`
if [ "$raid_lvl" = "raid1" -o "$raid_lvl" = "raid10" ]; then
continue
fi
if [ "$mismatch_cnt" -ne 0 ]; then
echo "WARNING: mismatch_cnt is not 0 on /dev/$dev"
fi
done

1127
SPECS/mdadm.spec Normal file

File diff suppressed because it is too large Load Diff