import mdadm-4.2-rc1_1.el8

This commit is contained in:
CentOS Sources 2021-11-09 05:04:40 -05:00 committed by Stepan Oksanichenko
parent 88ea55ea34
commit 488abefe10
114 changed files with 101 additions and 9475 deletions

2
.gitignore vendored
View File

@ -1 +1 @@
SOURCES/mdadm-4.1.tar.xz
SOURCES/mdadm-4.2-rc1.tar.xz

View File

@ -1 +1 @@
4bbbd02674ac67dc9773f41aace7677aa5374c1c SOURCES/mdadm-4.1.tar.xz
cceb5f208a2d77a220a68f23d6cab5a0e8704685 SOURCES/mdadm-4.2-rc1.tar.xz

View File

@ -1,39 +0,0 @@
From 0833f9c3dbaaee202b92ea956f9e2decc7b9593a Mon Sep 17 00:00:00 2001
From: Gioh Kim <gi-oh.kim@profitbricks.com>
Date: Tue, 6 Nov 2018 15:27:42 +0100
Subject: [RHEL7.7 PATCH 01/24] Assemble: keep MD_DISK_FAILFAST and
MD_DISK_WRITEMOSTLY flag
Before updating superblock of slave disks, desired_state value
is set for the target state of the slave disks. But it forgets
to check MD_DISK_FAILFAST and MD_DISK_WRITEMOSTLY flags. Then
start_arrays() calls ADD_NEW_DISK ioctl-call and pass the state
without MD_DISK_FAILFAST and MD_DISK_WRITEMOSTLY.
Currenlty it does not generate any problem because kernel does not
care MD_DISK_FAILFAST or MD_DISK_WRITEMOSTLY flags.
Reviewed-by: NeilBrown <neilb@suse.com>
Signed-off-by: Gioh Kim <gi-oh.kim@profitbricks.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
Assemble.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/Assemble.c b/Assemble.c
index a79466c..f39c9e1 100644
--- a/Assemble.c
+++ b/Assemble.c
@@ -1704,6 +1704,9 @@ try_again:
else
desired_state = (1<<MD_DISK_ACTIVE) | (1<<MD_DISK_SYNC);
+ desired_state |= devices[j].i.disk.state & ((1<<MD_DISK_FAILFAST) |
+ (1<<MD_DISK_WRITEMOSTLY));
+
if (!devices[j].uptodate)
continue;
--
2.7.5

View File

@ -0,0 +1,48 @@
From 83b3de7795d2a421eb6ae4ab97656a250bb898ea Mon Sep 17 00:00:00 2001
From: Xiao Ni <xni@redhat.com>
Date: Fri, 23 Apr 2021 14:01:30 +0800
Subject: [PATCH 1/2] Fix some building errors
There are some building errors if treating warning as errors.
Fix them in this patch.
Signed-off-by: Xiao Ni <xni@redhat.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
super-intel.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/super-intel.c b/super-intel.c
index be0313d..5469912 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -3192,7 +3192,7 @@ static int imsm_create_metadata_checkpoint_update(
}
(*u)->type = update_general_migration_checkpoint;
(*u)->curr_migr_unit = current_migr_unit(super->migr_rec);
- dprintf("prepared for %llu\n", (*u)->curr_migr_unit);
+ dprintf("prepared for %llu\n", (unsigned long long)(*u)->curr_migr_unit);
return update_memory_size;
}
@@ -11127,7 +11127,7 @@ int recover_backup_imsm(struct supertype *st, struct mdinfo *info)
skipped_disks++;
continue;
}
- if (read(dl_disk->fd, buf, unit_len) != unit_len) {
+ if (read(dl_disk->fd, buf, unit_len) != (ssize_t)unit_len) {
pr_err("Cannot read copy area block: %s\n",
strerror(errno));
skipped_disks++;
@@ -11139,7 +11139,7 @@ int recover_backup_imsm(struct supertype *st, struct mdinfo *info)
skipped_disks++;
continue;
}
- if (write(dl_disk->fd, buf, unit_len) != unit_len) {
+ if (write(dl_disk->fd, buf, unit_len) != (ssize_t)unit_len) {
pr_err("Cannot restore block: %s\n",
strerror(errno));
skipped_disks++;
--
2.7.5

View File

@ -1,77 +0,0 @@
From 6b6112842030309c297a521918d1a2e982426fa3 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.com>
Date: Fri, 9 Nov 2018 17:12:33 +1100
Subject: [RHEL7.7 PATCH 02/24] Document PART-POLICY lines
PART-POLICY has been accepted in mdadm.conf since the same
time that POLICY was accepted, but it was never documented.
So add the missing documentation.
Also fix a bug which would have stopped it from working if
anyone had ever tried to use it.
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
mdadm.conf.5 | 24 +++++++++++++++++++++++-
policy.c | 2 +-
2 files changed, 24 insertions(+), 2 deletions(-)
diff --git a/mdadm.conf.5 b/mdadm.conf.5
index 18512cb..47c962a 100644
--- a/mdadm.conf.5
+++ b/mdadm.conf.5
@@ -501,7 +501,7 @@ To update hot plug configuration it is necessary to execute
.B mdadm \-\-udev\-rules
command after changing the config file
-Key words used in the
+Keywords used in the
.I POLICY
line and supported values are:
@@ -565,6 +565,28 @@ be automatically added to that array (or it's container)
as above and the disk will become a spare in remaining cases
.RE
+.TP
+.B PART-POLICY
+This is similar to
+.B POLICY
+and accepts the same keyword assignments. It allows a consistent set
+of policies to applied to each of the partitions of a device.
+
+A
+.B PART-POLICY
+line should set
+.I type=disk
+and identify the path to one or more disk devices. Each partition on
+these disks will be treated according to the
+.I action=
+setting from this line. If a
+.I domain
+is set in the line, then the domain associated with each patition will
+be based on the domain, but with
+.RB \(dq -part N\(dq
+appended, when N is the partition number for the partition that was
+found.
+
.SH EXAMPLE
DEVICE /dev/sd[bcdjkl]1
.br
diff --git a/policy.c b/policy.c
index c0d18a7..258f393 100644
--- a/policy.c
+++ b/policy.c
@@ -300,7 +300,7 @@ static int path_has_part(char *path, char **part)
l--;
if (l < 5 || strncmp(path+l-5, "-part", 5) != 0)
return 0;
- *part = path+l-4;
+ *part = path+l-5;
return 1;
}
--
2.7.5

View File

@ -0,0 +1,36 @@
From 0530e2e0d8c9ecb5171e70bc48e1a6566f317378 Mon Sep 17 00:00:00 2001
From: Norbert Szulc <norbert.szulc@intel.com>
Date: Wed, 5 May 2021 13:01:02 +0200
Subject: [PATCH 2/2] Prevent user from using --stop with ambiguous args
When both --scan and device name is passed to --stop action,
then is executed only for given device. Scan is ignored.
Block the operation when both --scan and device name are passed.
Signed-off-by: Norbert Szulc <norbert.szulc@intel.com>
Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@linux.intel.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
mdadm.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/mdadm.c b/mdadm.c
index 9a4317d..dcc26ba 100644
--- a/mdadm.c
+++ b/mdadm.c
@@ -2041,6 +2041,11 @@ static int misc_list(struct mddev_dev *devlist,
rv |= Manage_run(dv->devname, mdfd, c);
break;
case 'S':
+ if (c->scan) {
+ pr_err("--stop not meaningful with both a --scan assembly and a device name.\n");
+ rv |= 1;
+ break;
+ }
rv |= Manage_stop(dv->devname, mdfd, c->verbose, 0);
break;
case 'o':
--
2.7.5

View File

@ -1,334 +0,0 @@
From cd72f9d114da206baa01fd56ff2d8ffcc08f3239 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.com>
Date: Fri, 9 Nov 2018 17:12:33 +1100
Subject: [RHEL7.7 PATCH 03/24] policy: support devices with multiple paths.
As new releases of Linux some time change the name of
a path, some distros keep "legacy" names as well. This
is useful, but confuses mdadm which assumes each device has
precisely one path.
So change this assumption: allow a disk to have several
paths, and allow any to match when looking for a policy
which matches a disk.
Reported-and-tested-by: Mariusz Tkaczyk <mariusz.tkaczyk@intel.com>
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
Incremental.c | 5 +-
mdadm.h | 2 +-
policy.c | 163 ++++++++++++++++++++++++++++++++--------------------------
3 files changed, 95 insertions(+), 75 deletions(-)
diff --git a/Incremental.c b/Incremental.c
index a4ff7d4..d4d3c35 100644
--- a/Incremental.c
+++ b/Incremental.c
@@ -1080,6 +1080,7 @@ static int partition_try_spare(char *devname, int *dfdp, struct dev_policy *pol,
struct supertype *st2 = NULL;
char *devname = NULL;
unsigned long long devsectors;
+ char *pathlist[2];
if (de->d_ino == 0 || de->d_name[0] == '.' ||
(de->d_type != DT_LNK && de->d_type != DT_UNKNOWN))
@@ -1094,7 +1095,9 @@ static int partition_try_spare(char *devname, int *dfdp, struct dev_policy *pol,
/* This is a partition - skip it */
goto next;
- pol2 = path_policy(de->d_name, type_disk);
+ pathlist[0] = de->d_name;
+ pathlist[1] = NULL;
+ pol2 = path_policy(pathlist, type_disk);
domain_merge(&domlist, pol2, st ? st->ss->name : NULL);
if (domain_test(domlist, pol, st ? st->ss->name : NULL) != 1)
diff --git a/mdadm.h b/mdadm.h
index 387e681..705bd9b 100644
--- a/mdadm.h
+++ b/mdadm.h
@@ -1247,7 +1247,7 @@ extern void policyline(char *line, char *type);
extern void policy_add(char *type, ...);
extern void policy_free(void);
-extern struct dev_policy *path_policy(char *path, char *type);
+extern struct dev_policy *path_policy(char **paths, char *type);
extern struct dev_policy *disk_policy(struct mdinfo *disk);
extern struct dev_policy *devid_policy(int devid);
extern void dev_policy_free(struct dev_policy *p);
diff --git a/policy.c b/policy.c
index 258f393..fa67d55 100644
--- a/policy.c
+++ b/policy.c
@@ -189,15 +189,17 @@ struct dev_policy *pol_find(struct dev_policy *pol, char *name)
return pol;
}
-static char *disk_path(struct mdinfo *disk)
+static char **disk_paths(struct mdinfo *disk)
{
struct stat stb;
int prefix_len;
DIR *by_path;
char symlink[PATH_MAX] = "/dev/disk/by-path/";
- char nm[PATH_MAX];
+ char **paths;
+ int cnt = 0;
struct dirent *ent;
- int rv;
+
+ paths = xmalloc(sizeof(*paths) * (cnt+1));
by_path = opendir(symlink);
if (by_path) {
@@ -214,22 +216,13 @@ static char *disk_path(struct mdinfo *disk)
continue;
if (stb.st_rdev != makedev(disk->disk.major, disk->disk.minor))
continue;
- closedir(by_path);
- return xstrdup(ent->d_name);
+ paths[cnt++] = xstrdup(ent->d_name);
+ paths = xrealloc(paths, sizeof(*paths) * (cnt+1));
}
closedir(by_path);
}
- /* A NULL path isn't really acceptable - use the devname.. */
- sprintf(symlink, "/sys/dev/block/%d:%d", disk->disk.major, disk->disk.minor);
- rv = readlink(symlink, nm, sizeof(nm)-1);
- if (rv > 0) {
- char *dname;
- nm[rv] = 0;
- dname = strrchr(nm, '/');
- if (dname)
- return xstrdup(dname + 1);
- }
- return xstrdup("unknown");
+ paths[cnt] = NULL;
+ return paths;
}
char type_part[] = "part";
@@ -246,18 +239,53 @@ static char *disk_type(struct mdinfo *disk)
return type_disk;
}
-static int pol_match(struct rule *rule, char *path, char *type)
+static int path_has_part(char *path, char **part)
+{
+ /* check if path ends with "-partNN" and
+ * if it does, place a pointer to "-pathNN"
+ * in 'part'.
+ */
+ int l;
+ if (!path)
+ return 0;
+ l = strlen(path);
+ while (l > 1 && isdigit(path[l-1]))
+ l--;
+ if (l < 5 || strncmp(path+l-5, "-part", 5) != 0)
+ return 0;
+ *part = path+l-5;
+ return 1;
+}
+
+static int pol_match(struct rule *rule, char **paths, char *type, char **part)
{
- /* check if this rule matches on path and type */
+ /* Check if this rule matches on any path and type.
+ * If 'part' is not NULL, then 'path' must end in -partN, which
+ * we ignore for matching, and return in *part on success.
+ */
int pathok = 0; /* 0 == no path, 1 == match, -1 == no match yet */
int typeok = 0;
- while (rule) {
+ for (; rule; rule = rule->next) {
if (rule->name == rule_path) {
+ char *p;
+ int i;
if (pathok == 0)
pathok = -1;
- if (path && fnmatch(rule->value, path, 0) == 0)
- pathok = 1;
+ if (!paths)
+ continue;
+ for (i = 0; paths[i]; i++) {
+ if (part) {
+ if (!path_has_part(paths[i], &p))
+ continue;
+ *p = '\0';
+ *part = p+1;
+ }
+ if (fnmatch(rule->value, paths[i], 0) == 0)
+ pathok = 1;
+ if (part)
+ *p = '-';
+ }
}
if (rule->name == rule_type) {
if (typeok == 0)
@@ -265,7 +293,6 @@ static int pol_match(struct rule *rule, char *path, char *type)
if (type && strcmp(rule->value, type) == 0)
typeok = 1;
}
- rule = rule->next;
}
return pathok >= 0 && typeok >= 0;
}
@@ -286,24 +313,6 @@ static void pol_merge(struct dev_policy **pol, struct rule *rule)
pol_new(pol, r->name, r->value, metadata);
}
-static int path_has_part(char *path, char **part)
-{
- /* check if path ends with "-partNN" and
- * if it does, place a pointer to "-pathNN"
- * in 'part'.
- */
- int l;
- if (!path)
- return 0;
- l = strlen(path);
- while (l > 1 && isdigit(path[l-1]))
- l--;
- if (l < 5 || strncmp(path+l-5, "-part", 5) != 0)
- return 0;
- *part = path+l-5;
- return 1;
-}
-
static void pol_merge_part(struct dev_policy **pol, struct rule *rule, char *part)
{
/* copy any name assignments from rule into pol, appending
@@ -352,7 +361,7 @@ static int config_rules_has_path = 0;
* path_policy() gathers policy information for the
* disk described in the given a 'path' and a 'type'.
*/
-struct dev_policy *path_policy(char *path, char *type)
+struct dev_policy *path_policy(char **paths, char *type)
{
struct pol_rule *rules;
struct dev_policy *pol = NULL;
@@ -361,27 +370,24 @@ struct dev_policy *path_policy(char *path, char *type)
rules = config_rules;
while (rules) {
- char *part;
+ char *part = NULL;
if (rules->type == rule_policy)
- if (pol_match(rules->rule, path, type))
+ if (pol_match(rules->rule, paths, type, NULL))
pol_merge(&pol, rules->rule);
if (rules->type == rule_part && strcmp(type, type_part) == 0)
- if (path_has_part(path, &part)) {
- *part = 0;
- if (pol_match(rules->rule, path, type_disk))
- pol_merge_part(&pol, rules->rule, part+1);
- *part = '-';
- }
+ if (pol_match(rules->rule, paths, type_disk, &part))
+ pol_merge_part(&pol, rules->rule, part);
rules = rules->next;
}
/* Now add any metadata-specific internal knowledge
* about this path
*/
- for (i=0; path && superlist[i]; i++)
+ for (i=0; paths[0] && superlist[i]; i++)
if (superlist[i]->get_disk_controller_domain) {
const char *d =
- superlist[i]->get_disk_controller_domain(path);
+ superlist[i]->get_disk_controller_domain(
+ paths[0]);
if (d)
pol_new(&pol, pol_domain, d, superlist[i]->name);
}
@@ -400,22 +406,34 @@ void pol_add(struct dev_policy **pol,
pol_dedup(*pol);
}
+static void free_paths(char **paths)
+{
+ int i;
+
+ if (!paths)
+ return;
+
+ for (i = 0; paths[i]; i++)
+ free(paths[i]);
+ free(paths);
+}
+
/*
* disk_policy() gathers policy information for the
* disk described in the given mdinfo (disk.{major,minor}).
*/
struct dev_policy *disk_policy(struct mdinfo *disk)
{
- char *path = NULL;
+ char **paths = NULL;
char *type = disk_type(disk);
struct dev_policy *pol = NULL;
if (config_rules_has_path)
- path = disk_path(disk);
+ paths = disk_paths(disk);
- pol = path_policy(path, type);
+ pol = path_policy(paths, type);
- free(path);
+ free_paths(paths);
return pol;
}
@@ -756,27 +774,26 @@ int policy_check_path(struct mdinfo *disk, struct map_ent *array)
{
char path[PATH_MAX];
FILE *f = NULL;
- char *id_path = disk_path(disk);
- int rv;
+ char **id_paths = disk_paths(disk);
+ int i;
+ int rv = 0;
- if (!id_path)
- return 0;
+ for (i = 0; id_paths[i]; i++) {
+ snprintf(path, PATH_MAX, FAILED_SLOTS_DIR "/%s", id_paths[i]);
+ f = fopen(path, "r");
+ if (!f)
+ continue;
- snprintf(path, PATH_MAX, FAILED_SLOTS_DIR "/%s", id_path);
- f = fopen(path, "r");
- if (!f) {
- free(id_path);
- return 0;
+ rv = fscanf(f, " %s %x:%x:%x:%x\n",
+ array->metadata,
+ array->uuid,
+ array->uuid+1,
+ array->uuid+2,
+ array->uuid+3);
+ fclose(f);
+ break;
}
-
- rv = fscanf(f, " %s %x:%x:%x:%x\n",
- array->metadata,
- array->uuid,
- array->uuid+1,
- array->uuid+2,
- array->uuid+3);
- fclose(f);
- free(id_path);
+ free_paths(id_paths);
return rv == 5;
}
--
2.7.5

View File

@ -1,137 +0,0 @@
From 4199d3c629c14866505923d19fa50017ee92d2e1 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.com>
Date: Wed, 5 Dec 2018 16:35:00 +1100
Subject: [RHEL7.7 PATCH 04/24] mdcheck: add systemd unit files to run mdcheck.
Having the mdcheck script is not use if is never run.
This patch adds systemd unit files so that it can easily
be run on the first Sunday of each month for 6 hours,
then on every subsequent morning until the check is
finished.
The units still need to be enabled with
systemctl enable mdcheck_start.timer
The timer will only actually be started when an array
which might need it becomes active.
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
Makefile | 5 ++++-
systemd/mdcheck_continue.service | 18 ++++++++++++++++++
systemd/mdcheck_continue.timer | 13 +++++++++++++
systemd/mdcheck_start.service | 17 +++++++++++++++++
systemd/mdcheck_start.timer | 15 +++++++++++++++
5 files changed, 67 insertions(+), 1 deletion(-)
create mode 100644 systemd/mdcheck_continue.service
create mode 100644 systemd/mdcheck_continue.timer
create mode 100644 systemd/mdcheck_start.service
create mode 100644 systemd/mdcheck_start.timer
diff --git a/Makefile b/Makefile
index 2767ac6..afb62cc 100644
--- a/Makefile
+++ b/Makefile
@@ -276,7 +276,10 @@ install-udev: udev-md-raid-arrays.rules udev-md-raid-assembly.rules udev-md-raid
install-systemd: systemd/mdmon@.service
@for file in mdmon@.service mdmonitor.service mdadm-last-resort@.timer \
- mdadm-last-resort@.service mdadm-grow-continue@.service; \
+ mdadm-last-resort@.service mdadm-grow-continue@.service \
+ mdcheck_start.timer mdcheck_start.service \
+ mdcheck_continue.timer mdcheck_continue.service \
+ ; \
do sed -e 's,BINDIR,$(BINDIR),g' systemd/$$file > .install.tmp.2 && \
$(ECHO) $(INSTALL) -D -m 644 systemd/$$file $(DESTDIR)$(SYSTEMD_DIR)/$$file ; \
$(INSTALL) -D -m 644 .install.tmp.2 $(DESTDIR)$(SYSTEMD_DIR)/$$file ; \
diff --git a/systemd/mdcheck_continue.service b/systemd/mdcheck_continue.service
new file mode 100644
index 0000000..592c607
--- /dev/null
+++ b/systemd/mdcheck_continue.service
@@ -0,0 +1,18 @@
+# This file is part of mdadm.
+#
+# mdadm is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+
+[Unit]
+Description=MD array scrubbing - continuation
+ConditionPathExistsGlob = /var/lib/mdcheck/MD_UUID_*
+
+[Service]
+Type=oneshot
+Environment= MDADM_CHECK_DURATION='"6 hours"'
+EnvironmentFile=-/run/sysconfig/mdadm
+ExecStartPre=-/usr/lib/mdadm/mdadm_env.sh
+ExecStart=/usr/share/mdadm/mdcheck --continue --duration $MDADM_CHECK_DURATION
+
diff --git a/systemd/mdcheck_continue.timer b/systemd/mdcheck_continue.timer
new file mode 100644
index 0000000..3ccfd78
--- /dev/null
+++ b/systemd/mdcheck_continue.timer
@@ -0,0 +1,13 @@
+# This file is part of mdadm.
+#
+# mdadm is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+
+[Unit]
+Description=MD array scrubbing - continuation
+
+[Timer]
+OnCalendar= 1:05:00
+
diff --git a/systemd/mdcheck_start.service b/systemd/mdcheck_start.service
new file mode 100644
index 0000000..812141b
--- /dev/null
+++ b/systemd/mdcheck_start.service
@@ -0,0 +1,17 @@
+# This file is part of mdadm.
+#
+# mdadm is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+
+[Unit]
+Description=MD array scrubbing
+Wants=mdcheck_continue.timer
+
+[Service]
+Type=oneshot
+Environment= MDADM_CHECK_DURATION='"6 hours"'
+EnvironmentFile=-/run/sysconfig/mdadm
+ExecStartPre=-/usr/lib/mdadm/mdadm_env.sh
+ExecStart=/usr/share/mdadm/mdcheck --duration $MDADM_CHECK_DURATION
diff --git a/systemd/mdcheck_start.timer b/systemd/mdcheck_start.timer
new file mode 100644
index 0000000..6480736
--- /dev/null
+++ b/systemd/mdcheck_start.timer
@@ -0,0 +1,15 @@
+# This file is part of mdadm.
+#
+# mdadm is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+
+[Unit]
+Description=MD array scrubbing
+
+[Timer]
+OnCalendar=Sun *-*-1..7 1:00:00
+
+[Install]
+WantedBy= mdmonitor.service
--
2.7.5

View File

@ -1,83 +0,0 @@
From 7cd7e91ab3de5aa75dc963cb08b0618c1885cf0d Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.com>
Date: Wed, 5 Dec 2018 16:35:00 +1100
Subject: [RHEL7.7 PATCH 05/24] Monitor: add system timer to run --oneshot
periodically
"mdadm --monitor --oneshot" can be used to get a warning
if there are any degraded arrays. It can be helpful to get
this warning periodically while the condition persists.
This patch add a systemd service and timer which can
be enabled with
systemctl enable mdmonitor-oneshot.service
and will then provide daily warnings.
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
Makefile | 1 +
systemd/mdmonitor-oneshot.service | 15 +++++++++++++++
systemd/mdmonitor-oneshot.timer | 15 +++++++++++++++
3 files changed, 31 insertions(+)
create mode 100644 systemd/mdmonitor-oneshot.service
create mode 100644 systemd/mdmonitor-oneshot.timer
diff --git a/Makefile b/Makefile
index afb62cc..dfe00b0 100644
--- a/Makefile
+++ b/Makefile
@@ -279,6 +279,7 @@ install-systemd: systemd/mdmon@.service
mdadm-last-resort@.service mdadm-grow-continue@.service \
mdcheck_start.timer mdcheck_start.service \
mdcheck_continue.timer mdcheck_continue.service \
+ mdmonitor-oneshot.timer mdmonitor-oneshot.service \
; \
do sed -e 's,BINDIR,$(BINDIR),g' systemd/$$file > .install.tmp.2 && \
$(ECHO) $(INSTALL) -D -m 644 systemd/$$file $(DESTDIR)$(SYSTEMD_DIR)/$$file ; \
diff --git a/systemd/mdmonitor-oneshot.service b/systemd/mdmonitor-oneshot.service
new file mode 100644
index 0000000..fd469b1
--- /dev/null
+++ b/systemd/mdmonitor-oneshot.service
@@ -0,0 +1,15 @@
+# This file is part of mdadm.
+#
+# mdadm is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+
+[Unit]
+Description=Reminder for degraded MD arrays
+
+[Service]
+Environment= MDADM_MONITOR_ARGS=--scan
+EnvironmentFile=-/run/sysconfig/mdadm
+ExecStartPre=-/usr/lib/mdadm/mdadm_env.sh
+ExecStart=BINDIR/mdadm --monitor --oneshot $MDADM_MONITOR_ARGS
diff --git a/systemd/mdmonitor-oneshot.timer b/systemd/mdmonitor-oneshot.timer
new file mode 100644
index 0000000..cb54bda
--- /dev/null
+++ b/systemd/mdmonitor-oneshot.timer
@@ -0,0 +1,15 @@
+# This file is part of mdadm.
+#
+# mdadm is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+
+[Unit]
+Description=Reminder for degraded MD arrays
+
+[Timer]
+OnCalendar= 2:00:00
+
+[Install]
+WantedBy= mdmonitor.service
--
2.7.5

View File

@ -1,83 +0,0 @@
From d7a1fda2769ba272d89de6caeab35d52b73a9c3c Mon Sep 17 00:00:00 2001
From: Mariusz Tkaczyk <mariusz.tkaczyk@intel.com>
Date: Wed, 17 Oct 2018 12:11:41 +0200
Subject: [RHEL7.7 PATCH 06/24] imsm: update metadata correctly while raid10
double degradation
Mdmon calls end_migration() when map state changes from normal to
degraded. It is not valid because in raid 10 double degradation case
mdmon breaks checkpointing but array is still rebuilding.
In this case mdmon has to mark map as degraded and continues marking
recovery checkpoint in metadata. Migration can be finished only if newly
failed device is a rebuilding device.
Add catching double degraded to degraded transition. Migration is
finished but map state doesn't change, array is still degraded.
Update failed_disk_num correctly. If double degradation
happens rebuild will start on the lowest slot, but this variable points
to the first failed slot. If second fail happens while rebuild this
variable shouldn't be updated until rebuild is not finished.
Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@intel.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
super-intel.c | 25 +++++++++++++++++++------
1 file changed, 19 insertions(+), 6 deletions(-)
diff --git a/super-intel.c b/super-intel.c
index 6438987..d2035cc 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -8136,7 +8136,8 @@ static int mark_failure(struct intel_super *super,
set_imsm_ord_tbl_ent(map2, slot2,
idx | IMSM_ORD_REBUILD);
}
- if (map->failed_disk_num == 0xff)
+ if (map->failed_disk_num == 0xff ||
+ (!is_rebuilding(dev) && map->failed_disk_num > slot))
map->failed_disk_num = slot;
clear_disk_badblocks(super->bbm_log, ord_to_idx(ord));
@@ -8558,13 +8559,25 @@ static void imsm_set_disk(struct active_array *a, int n, int state)
break;
}
if (is_rebuilding(dev)) {
- dprintf_cont("while rebuilding.");
+ dprintf_cont("while rebuilding ");
if (map->map_state != map_state) {
- dprintf_cont(" Map state change");
- end_migration(dev, super, map_state);
+ dprintf_cont("map state change ");
+ if (n == map->failed_disk_num) {
+ dprintf_cont("end migration");
+ end_migration(dev, super, map_state);
+ } else {
+ dprintf_cont("raid10 double degradation, map state change");
+ map->map_state = map_state;
+ }
super->updates_pending++;
- } else if (!rebuild_done) {
+ } else if (!rebuild_done)
break;
+ else if (n == map->failed_disk_num) {
+ /* r10 double degraded to degraded transition */
+ dprintf_cont("raid10 double degradation end migration");
+ end_migration(dev, super, map_state);
+ a->last_checkpoint = 0;
+ super->updates_pending++;
}
/* check if recovery is really finished */
@@ -8575,7 +8588,7 @@ static void imsm_set_disk(struct active_array *a, int n, int state)
}
if (recovery_not_finished) {
dprintf_cont("\n");
- dprintf("Rebuild has not finished yet, state not changed");
+ dprintf_cont("Rebuild has not finished yet, map state changes only if raid10 double degradation happens");
if (a->last_checkpoint < mdi->recovery_start) {
a->last_checkpoint =
mdi->recovery_start;
--
2.7.5

View File

@ -1,43 +0,0 @@
From 563ac108659980b3d1e226fe416254a86656235f Mon Sep 17 00:00:00 2001
From: Gioh Kim <gi-oh.kim@cloud.ionos.com>
Date: Tue, 6 Nov 2018 16:20:17 +0100
Subject: [RHEL7.7 PATCH 07/24] Assemble: mask FAILFAST and WRITEMOSTLY flags
when finding the most recent device
If devices[].i.disk.state has MD_DISK_FAILFAST or MD_DISK_WRITEMOSTLY
flag, it cannot be the most recent device. Both flags should be masked
before checking the state.
Reviewed-by: NeilBrown <neilb@suse.com>
Signed-off-by: Gioh Kim <gi-oh.kim@cloud.ionos.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
Assemble.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/Assemble.c b/Assemble.c
index f39c9e1..9f75c68 100644
--- a/Assemble.c
+++ b/Assemble.c
@@ -578,6 +578,7 @@ static int load_devices(struct devs *devices, char *devmap,
struct supertype *tst;
int i;
int dfd;
+ int disk_state;
if (tmpdev->used != 1)
continue;
@@ -711,7 +712,9 @@ static int load_devices(struct devs *devices, char *devmap,
devices[devcnt].i.disk.major = major(stb.st_rdev);
devices[devcnt].i.disk.minor = minor(stb.st_rdev);
- if (devices[devcnt].i.disk.state == 6) {
+ disk_state = devices[devcnt].i.disk.state & ~((1<<MD_DISK_FAILFAST) |
+ (1<<MD_DISK_WRITEMOSTLY));
+ if (disk_state == ((1<<MD_DISK_ACTIVE) | (1<<MD_DISK_SYNC))) {
if (most_recent < 0 ||
devices[devcnt].i.events
> devices[most_recent].i.events) {
--
2.7.5

View File

@ -1,34 +0,0 @@
From 085df42259cba7863cd6ebe5cd0d8492ac5b869e Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.com>
Date: Thu, 6 Dec 2018 10:35:41 +1100
Subject: [RHEL7.7 PATCH 08/24] Grow: avoid overflow in compute_backup_blocks()
With a chunk size of 16Meg and data drive count of 8,
this calculate can easily overflow the 'int' type that
is used for the multiplications.
So force it to use "long" instead.
Reported-and-tested-by: Ed Spiridonov <edo.rus@gmail.com>
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
Grow.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/Grow.c b/Grow.c
index 4436a4d..76f82c0 100644
--- a/Grow.c
+++ b/Grow.c
@@ -1196,7 +1196,8 @@ unsigned long compute_backup_blocks(int nchunk, int ochunk,
/* Find GCD */
a = GCD(a, b);
/* LCM == product / GCD */
- blocks = (ochunk/512) * (nchunk/512) * odata * ndata / a;
+ blocks = (unsigned long)(ochunk/512) * (unsigned long)(nchunk/512) *
+ odata * ndata / a;
return blocks;
}
--
2.7.5

View File

@ -1,30 +0,0 @@
From 76d505dec6c9f92564553596fc8350324be82463 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.com>
Date: Thu, 6 Dec 2018 10:36:28 +1100
Subject: [RHEL7.7 PATCH 09/24] Grow: report correct new chunk size.
When using "--grow --chunk=" to change chunk
size, the old chunksize is reported instead of the new.
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
Grow.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Grow.c b/Grow.c
index 76f82c0..363b209 100644
--- a/Grow.c
+++ b/Grow.c
@@ -3286,7 +3286,7 @@ static int reshape_array(char *container, int fd, char *devname,
goto release;
} else if (verbose >= 0)
printf("chunk size for %s set to %d\n",
- devname, array.chunk_size);
+ devname, info->new_chunk);
}
unfreeze(st);
return 0;
--
2.7.5

View File

@ -1,31 +0,0 @@
From 467e6a1b4ece8e552ee638dab7f44a4d235ece1a Mon Sep 17 00:00:00 2001
From: Gioh Kim <gi-oh.kim@cloud.ionos.com>
Date: Fri, 7 Dec 2018 12:04:44 +0100
Subject: [RHEL7.7 PATCH 10/24] policy.c: prevent NULL pointer referencing
paths could be NULL and paths[0] should be followed by NULL pointer
checking.
Reviewed-by: NeilBrown <neilb@suse.com>
Signed-off-by: Gioh Kim <gi-oh.kim@cloud.ionos.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
policy.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/policy.c b/policy.c
index fa67d55..e3a0671 100644
--- a/policy.c
+++ b/policy.c
@@ -383,7 +383,7 @@ struct dev_policy *path_policy(char **paths, char *type)
/* Now add any metadata-specific internal knowledge
* about this path
*/
- for (i=0; paths[0] && superlist[i]; i++)
+ for (i=0; paths && paths[0] && superlist[i]; i++)
if (superlist[i]->get_disk_controller_domain) {
const char *d =
superlist[i]->get_disk_controller_domain(
--
2.7.5

View File

@ -1,36 +0,0 @@
From 757e55435997e355ee9b03e5d913b5496a3c39a8 Mon Sep 17 00:00:00 2001
From: Mariusz Tkaczyk <mariusz.tkaczyk@intel.com>
Date: Tue, 11 Dec 2018 15:04:07 +0100
Subject: [RHEL7.7 PATCH 11/24] policy.c: Fix for compiler error
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
After cd72f9d(policy: support devices with multiple paths.) compilation
on old compilers fails because "p may be used uninitialized
in this function".
Initialize it with NULL to prevent this.
Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@intel.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
policy.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/policy.c b/policy.c
index e3a0671..3c53bd3 100644
--- a/policy.c
+++ b/policy.c
@@ -268,7 +268,7 @@ static int pol_match(struct rule *rule, char **paths, char *type, char **part)
for (; rule; rule = rule->next) {
if (rule->name == rule_path) {
- char *p;
+ char *p = NULL;
int i;
if (pathok == 0)
pathok = -1;
--
2.7.5

View File

@ -1,95 +0,0 @@
From a4e96fd8f3f0b5416783237c1cb6ee87e7eff23d Mon Sep 17 00:00:00 2001
From: Mariusz Tkaczyk <mariusz.tkaczyk@intel.com>
Date: Fri, 8 Feb 2019 11:07:10 +0100
Subject: [RHEL7.7 PATCH 12/24] imsm: finish recovery when drive with rebuild
fails
Commit d7a1fda2769b ("imsm: update metadata correctly while raid10 double
degradation") resolves main Imsm double degradation problems but it
omits one case. Now metadata hangs in the rebuilding state if the drive
under rebuild is removed during recovery from double degradation.
The root cause of this problem is comparing new map_state with current
and if they both are degraded assuming that nothing new happens.
Don't rely on map states, just check if device is failed. If the drive
under rebuild fails then finish migration, in other cases update map
state only (second fail means that destination map state can't be normal).
To avoid problems with reassembling move end_migration (called after
double degradation successful recovery) after check if recovery really
finished, for details see (7ce057018 "imsm: fix: rebuild does not
continue after reboot").
Remove redundant code responsible for finishing rebuild process. Function
end_migration do exactly the same. Set last_checkpoint to 0, to prepare
it for the next rebuild.
Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@intel.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
super-intel.c | 26 +++++++++++---------------
1 file changed, 11 insertions(+), 15 deletions(-)
diff --git a/super-intel.c b/super-intel.c
index d2035cc..38a1b6c 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -8560,26 +8560,22 @@ static void imsm_set_disk(struct active_array *a, int n, int state)
}
if (is_rebuilding(dev)) {
dprintf_cont("while rebuilding ");
- if (map->map_state != map_state) {
- dprintf_cont("map state change ");
+ if (state & DS_FAULTY) {
+ dprintf_cont("removing failed drive ");
if (n == map->failed_disk_num) {
dprintf_cont("end migration");
end_migration(dev, super, map_state);
+ a->last_checkpoint = 0;
} else {
- dprintf_cont("raid10 double degradation, map state change");
+ dprintf_cont("fail detected during rebuild, changing map state");
map->map_state = map_state;
}
super->updates_pending++;
- } else if (!rebuild_done)
- break;
- else if (n == map->failed_disk_num) {
- /* r10 double degraded to degraded transition */
- dprintf_cont("raid10 double degradation end migration");
- end_migration(dev, super, map_state);
- a->last_checkpoint = 0;
- super->updates_pending++;
}
+ if (!rebuild_done)
+ break;
+
/* check if recovery is really finished */
for (mdi = a->info.devs; mdi ; mdi = mdi->next)
if (mdi->recovery_start != MaxSector) {
@@ -8588,7 +8584,7 @@ static void imsm_set_disk(struct active_array *a, int n, int state)
}
if (recovery_not_finished) {
dprintf_cont("\n");
- dprintf_cont("Rebuild has not finished yet, map state changes only if raid10 double degradation happens");
+ dprintf_cont("Rebuild has not finished yet");
if (a->last_checkpoint < mdi->recovery_start) {
a->last_checkpoint =
mdi->recovery_start;
@@ -8598,9 +8594,9 @@ static void imsm_set_disk(struct active_array *a, int n, int state)
}
dprintf_cont(" Rebuild done, still degraded");
- dev->vol.migr_state = 0;
- set_migr_type(dev, 0);
- dev->vol.curr_migr_unit = 0;
+ end_migration(dev, super, map_state);
+ a->last_checkpoint = 0;
+ super->updates_pending++;
for (i = 0; i < map->num_members; i++) {
int idx = get_imsm_ord_tbl_ent(dev, i, MAP_0);
--
2.7.5

View File

@ -1,322 +0,0 @@
From 9f4218274cd4a1e1f356a1617f9a1d09960cf255 Mon Sep 17 00:00:00 2001
From: Pawel Baldysiak <pawel.baldysiak@intel.com>
Date: Mon, 28 Jan 2019 17:10:41 +0100
Subject: [RHEL7.7 PATCH 13/24] imsm: fix reshape for >2TB drives
If reshape is performed on drives larger then 2 TB,
migration checkpoint area that is calculated exeeds 32-bit value.
This checkpoint area is a reserved space threated as backup
during reshape - at the end of the drive, right before metadata.
As a result - wrong space is used and the data that may exists there
is overwritten.
Adding additional field to migration record to track high order 32-bits
of pba of this area. Three other fields that may exceed 32-bit value
for large drives are added as well.
Signed-off-by: Pawel Baldysiak <pawel.baldysiak@intel.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
super-intel.c | 149 +++++++++++++++++++++++++++++++++++++++++-----------------
1 file changed, 107 insertions(+), 42 deletions(-)
diff --git a/super-intel.c b/super-intel.c
index 38a1b6c..1cc7d5f 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -296,7 +296,7 @@ struct migr_record {
__u32 rec_status; /* Status used to determine how to restart
* migration in case it aborts
* in some fashion */
- __u32 curr_migr_unit; /* 0..numMigrUnits-1 */
+ __u32 curr_migr_unit_lo; /* 0..numMigrUnits-1 */
__u32 family_num; /* Family number of MPB
* containing the RaidDev
* that is migrating */
@@ -306,16 +306,23 @@ struct migr_record {
__u32 dest_depth_per_unit; /* Num member blocks each destMap
* member disk
* advances per unit-of-operation */
- __u32 ckpt_area_pba; /* Pba of first block of ckpt copy area */
- __u32 dest_1st_member_lba; /* First member lba on first
- * stripe of destination */
- __u32 num_migr_units; /* Total num migration units-of-op */
+ __u32 ckpt_area_pba_lo; /* Pba of first block of ckpt copy area */
+ __u32 dest_1st_member_lba_lo; /* First member lba on first
+ * stripe of destination */
+ __u32 num_migr_units_lo; /* Total num migration units-of-op */
__u32 post_migr_vol_cap; /* Size of volume after
* migration completes */
__u32 post_migr_vol_cap_hi; /* Expansion space for LBA64 */
__u32 ckpt_read_disk_num; /* Which member disk in destSubMap[0] the
* migration ckpt record was read from
* (for recovered migrations) */
+ __u32 curr_migr_unit_hi; /* 0..numMigrUnits-1 high order 32 bits */
+ __u32 ckpt_area_pba_hi; /* Pba of first block of ckpt copy area
+ * high order 32 bits */
+ __u32 dest_1st_member_lba_hi; /* First member lba on first stripe of
+ * destination - high order 32 bits */
+ __u32 num_migr_units_hi; /* Total num migration units-of-op
+ * high order 32 bits */
} __attribute__ ((__packed__));
struct md_list {
@@ -1208,6 +1215,38 @@ static unsigned long long imsm_dev_size(struct imsm_dev *dev)
return join_u32(dev->size_low, dev->size_high);
}
+static unsigned long long migr_chkp_area_pba(struct migr_record *migr_rec)
+{
+ if (migr_rec == NULL)
+ return 0;
+ return join_u32(migr_rec->ckpt_area_pba_lo,
+ migr_rec->ckpt_area_pba_hi);
+}
+
+static unsigned long long current_migr_unit(struct migr_record *migr_rec)
+{
+ if (migr_rec == NULL)
+ return 0;
+ return join_u32(migr_rec->curr_migr_unit_lo,
+ migr_rec->curr_migr_unit_hi);
+}
+
+static unsigned long long migr_dest_1st_member_lba(struct migr_record *migr_rec)
+{
+ if (migr_rec == NULL)
+ return 0;
+ return join_u32(migr_rec->dest_1st_member_lba_lo,
+ migr_rec->dest_1st_member_lba_hi);
+}
+
+static unsigned long long get_num_migr_units(struct migr_record *migr_rec)
+{
+ if (migr_rec == NULL)
+ return 0;
+ return join_u32(migr_rec->num_migr_units_lo,
+ migr_rec->num_migr_units_hi);
+}
+
static void set_total_blocks(struct imsm_disk *disk, unsigned long long n)
{
split_ull(n, &disk->total_blocks_lo, &disk->total_blocks_hi);
@@ -1233,6 +1272,33 @@ static void set_imsm_dev_size(struct imsm_dev *dev, unsigned long long n)
split_ull(n, &dev->size_low, &dev->size_high);
}
+static void set_migr_chkp_area_pba(struct migr_record *migr_rec,
+ unsigned long long n)
+{
+ split_ull(n, &migr_rec->ckpt_area_pba_lo, &migr_rec->ckpt_area_pba_hi);
+}
+
+static void set_current_migr_unit(struct migr_record *migr_rec,
+ unsigned long long n)
+{
+ split_ull(n, &migr_rec->curr_migr_unit_lo,
+ &migr_rec->curr_migr_unit_hi);
+}
+
+static void set_migr_dest_1st_member_lba(struct migr_record *migr_rec,
+ unsigned long long n)
+{
+ split_ull(n, &migr_rec->dest_1st_member_lba_lo,
+ &migr_rec->dest_1st_member_lba_hi);
+}
+
+static void set_num_migr_units(struct migr_record *migr_rec,
+ unsigned long long n)
+{
+ split_ull(n, &migr_rec->num_migr_units_lo,
+ &migr_rec->num_migr_units_hi);
+}
+
static unsigned long long per_dev_array_size(struct imsm_map *map)
{
unsigned long long array_size = 0;
@@ -1629,12 +1695,14 @@ void convert_to_4k_imsm_migr_rec(struct intel_super *super)
struct migr_record *migr_rec = super->migr_rec;
migr_rec->blocks_per_unit /= IMSM_4K_DIV;
- migr_rec->ckpt_area_pba /= IMSM_4K_DIV;
- migr_rec->dest_1st_member_lba /= IMSM_4K_DIV;
migr_rec->dest_depth_per_unit /= IMSM_4K_DIV;
split_ull((join_u32(migr_rec->post_migr_vol_cap,
migr_rec->post_migr_vol_cap_hi) / IMSM_4K_DIV),
&migr_rec->post_migr_vol_cap, &migr_rec->post_migr_vol_cap_hi);
+ set_migr_chkp_area_pba(migr_rec,
+ migr_chkp_area_pba(migr_rec) / IMSM_4K_DIV);
+ set_migr_dest_1st_member_lba(migr_rec,
+ migr_dest_1st_member_lba(migr_rec) / IMSM_4K_DIV);
}
void convert_to_4k_imsm_disk(struct imsm_disk *disk)
@@ -1727,8 +1795,8 @@ void examine_migr_rec_imsm(struct intel_super *super)
printf("Normal\n");
else
printf("Contains Data\n");
- printf(" Current Unit : %u\n",
- __le32_to_cpu(migr_rec->curr_migr_unit));
+ printf(" Current Unit : %llu\n",
+ current_migr_unit(migr_rec));
printf(" Family : %u\n",
__le32_to_cpu(migr_rec->family_num));
printf(" Ascending : %u\n",
@@ -1737,16 +1805,15 @@ void examine_migr_rec_imsm(struct intel_super *super)
__le32_to_cpu(migr_rec->blocks_per_unit));
printf(" Dest. Depth Per Unit : %u\n",
__le32_to_cpu(migr_rec->dest_depth_per_unit));
- printf(" Checkpoint Area pba : %u\n",
- __le32_to_cpu(migr_rec->ckpt_area_pba));
- printf(" First member lba : %u\n",
- __le32_to_cpu(migr_rec->dest_1st_member_lba));
- printf(" Total Number of Units : %u\n",
- __le32_to_cpu(migr_rec->num_migr_units));
- printf(" Size of volume : %u\n",
- __le32_to_cpu(migr_rec->post_migr_vol_cap));
- printf(" Expansion space for LBA64 : %u\n",
- __le32_to_cpu(migr_rec->post_migr_vol_cap_hi));
+ printf(" Checkpoint Area pba : %llu\n",
+ migr_chkp_area_pba(migr_rec));
+ printf(" First member lba : %llu\n",
+ migr_dest_1st_member_lba(migr_rec));
+ printf(" Total Number of Units : %llu\n",
+ get_num_migr_units(migr_rec));
+ printf(" Size of volume : %llu\n",
+ join_u32(migr_rec->post_migr_vol_cap,
+ migr_rec->post_migr_vol_cap_hi));
printf(" Record was read from : %u\n",
__le32_to_cpu(migr_rec->ckpt_read_disk_num));
@@ -1759,13 +1826,15 @@ void convert_from_4k_imsm_migr_rec(struct intel_super *super)
struct migr_record *migr_rec = super->migr_rec;
migr_rec->blocks_per_unit *= IMSM_4K_DIV;
- migr_rec->ckpt_area_pba *= IMSM_4K_DIV;
- migr_rec->dest_1st_member_lba *= IMSM_4K_DIV;
migr_rec->dest_depth_per_unit *= IMSM_4K_DIV;
split_ull((join_u32(migr_rec->post_migr_vol_cap,
migr_rec->post_migr_vol_cap_hi) * IMSM_4K_DIV),
&migr_rec->post_migr_vol_cap,
&migr_rec->post_migr_vol_cap_hi);
+ set_migr_chkp_area_pba(migr_rec,
+ migr_chkp_area_pba(migr_rec) * IMSM_4K_DIV);
+ set_migr_dest_1st_member_lba(migr_rec,
+ migr_dest_1st_member_lba(migr_rec) * IMSM_4K_DIV);
}
void convert_from_4k(struct intel_super *super)
@@ -3096,7 +3165,7 @@ static int imsm_create_metadata_checkpoint_update(
return 0;
}
(*u)->type = update_general_migration_checkpoint;
- (*u)->curr_migr_unit = __le32_to_cpu(super->migr_rec->curr_migr_unit);
+ (*u)->curr_migr_unit = current_migr_unit(super->migr_rec);
dprintf("prepared for %u\n", (*u)->curr_migr_unit);
return update_memory_size;
@@ -3397,13 +3466,13 @@ static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info,
case MIGR_GEN_MIGR: {
__u64 blocks_per_unit = blocks_per_migr_unit(super,
dev);
- __u64 units = __le32_to_cpu(migr_rec->curr_migr_unit);
+ __u64 units = current_migr_unit(migr_rec);
unsigned long long array_blocks;
int used_disks;
if (__le32_to_cpu(migr_rec->ascending_migr) &&
(units <
- (__le32_to_cpu(migr_rec->num_migr_units)-1)) &&
+ (get_num_migr_units(migr_rec)-1)) &&
(super->migr_rec->rec_status ==
__cpu_to_le32(UNIT_SRC_IN_CP_AREA)))
units++;
@@ -10697,7 +10766,7 @@ void init_migr_record_imsm(struct supertype *st, struct imsm_dev *dev,
if (array_blocks % __le32_to_cpu(migr_rec->blocks_per_unit))
num_migr_units++;
- migr_rec->num_migr_units = __cpu_to_le32(num_migr_units);
+ set_num_migr_units(migr_rec, num_migr_units);
migr_rec->post_migr_vol_cap = dev->size_low;
migr_rec->post_migr_vol_cap_hi = dev->size_high;
@@ -10714,7 +10783,7 @@ void init_migr_record_imsm(struct supertype *st, struct imsm_dev *dev,
min_dev_sectors = dev_sectors;
close(fd);
}
- migr_rec->ckpt_area_pba = __cpu_to_le32(min_dev_sectors -
+ set_migr_chkp_area_pba(migr_rec, min_dev_sectors -
RAID_DISK_RESERVED_BLOCKS_IMSM_HI);
write_imsm_migr_rec(st);
@@ -10765,8 +10834,7 @@ int save_backup_imsm(struct supertype *st,
start = info->reshape_progress * 512;
for (i = 0; i < new_disks; i++) {
- target_offsets[i] = (unsigned long long)
- __le32_to_cpu(super->migr_rec->ckpt_area_pba) * 512;
+ target_offsets[i] = migr_chkp_area_pba(super->migr_rec) * 512;
/* move back copy area adderss, it will be moved forward
* in restore_stripes() using start input variable
*/
@@ -10845,12 +10913,11 @@ int save_checkpoint_imsm(struct supertype *st, struct mdinfo *info, int state)
if (info->reshape_progress % blocks_per_unit)
curr_migr_unit++;
- super->migr_rec->curr_migr_unit =
- __cpu_to_le32(curr_migr_unit);
+ set_current_migr_unit(super->migr_rec, curr_migr_unit);
super->migr_rec->rec_status = __cpu_to_le32(state);
- super->migr_rec->dest_1st_member_lba =
- __cpu_to_le32(curr_migr_unit *
- __le32_to_cpu(super->migr_rec->dest_depth_per_unit));
+ set_migr_dest_1st_member_lba(super->migr_rec,
+ super->migr_rec->dest_depth_per_unit * curr_migr_unit);
+
if (write_imsm_migr_rec(st) < 0) {
dprintf("imsm: Cannot write migration record outside backup area\n");
return 1;
@@ -10884,8 +10951,8 @@ int recover_backup_imsm(struct supertype *st, struct mdinfo *info)
char *buf = NULL;
int retval = 1;
unsigned int sector_size = super->sector_size;
- unsigned long curr_migr_unit = __le32_to_cpu(migr_rec->curr_migr_unit);
- unsigned long num_migr_units = __le32_to_cpu(migr_rec->num_migr_units);
+ unsigned long curr_migr_unit = current_migr_unit(migr_rec);
+ unsigned long num_migr_units = get_num_migr_units(migr_rec);
char buffer[20];
int skipped_disks = 0;
@@ -10912,11 +10979,9 @@ int recover_backup_imsm(struct supertype *st, struct mdinfo *info)
map_dest = get_imsm_map(id->dev, MAP_0);
new_disks = map_dest->num_members;
- read_offset = (unsigned long long)
- __le32_to_cpu(migr_rec->ckpt_area_pba) * 512;
+ read_offset = migr_chkp_area_pba(migr_rec) * 512;
- write_offset = ((unsigned long long)
- __le32_to_cpu(migr_rec->dest_1st_member_lba) +
+ write_offset = (migr_dest_1st_member_lba(migr_rec) +
pba_of_lba0(map_dest)) * 512;
unit_len = __le32_to_cpu(migr_rec->dest_depth_per_unit) * 512;
@@ -12019,12 +12084,12 @@ static int imsm_manage_reshape(
max_position = sra->component_size * ndata;
source_layout = imsm_level_to_layout(map_src->raid_level);
- while (__le32_to_cpu(migr_rec->curr_migr_unit) <
- __le32_to_cpu(migr_rec->num_migr_units)) {
+ while (current_migr_unit(migr_rec) <
+ get_num_migr_units(migr_rec)) {
/* current reshape position [blocks] */
unsigned long long current_position =
__le32_to_cpu(migr_rec->blocks_per_unit)
- * __le32_to_cpu(migr_rec->curr_migr_unit);
+ * current_migr_unit(migr_rec);
unsigned long long border;
/* Check that array hasn't become failed.
--
2.7.5

View File

@ -1,101 +0,0 @@
From ebf3be9931f31df54df52b1821479e6a80a4d9c6 Mon Sep 17 00:00:00 2001
From: Dimitri John Ledkov <xnox@ubuntu.com>
Date: Tue, 15 Jan 2019 19:08:37 +0000
Subject: [RHEL7.7 PATCH 14/24] Fix spelling typos.
Signed-off-by: Dimitri John Ledkov <xnox@ubuntu.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
Assemble.c | 2 +-
Create.c | 2 +-
Grow.c | 6 +++---
super-ddf.c | 2 +-
super-intel.c | 2 +-
5 files changed, 7 insertions(+), 7 deletions(-)
diff --git a/Assemble.c b/Assemble.c
index 9f75c68..9f050c1 100644
--- a/Assemble.c
+++ b/Assemble.c
@@ -879,7 +879,7 @@ static int force_array(struct mdinfo *content,
current_events = devices[chosen_drive].i.events;
add_another:
if (c->verbose >= 0)
- pr_err("forcing event count in %s(%d) from %d upto %d\n",
+ pr_err("forcing event count in %s(%d) from %d up to %d\n",
devices[chosen_drive].devname,
devices[chosen_drive].i.disk.raid_disk,
(int)(devices[chosen_drive].i.events),
diff --git a/Create.c b/Create.c
index 04b1dfc..6f1b228 100644
--- a/Create.c
+++ b/Create.c
@@ -823,7 +823,7 @@ int Create(struct supertype *st, char *mddev,
}
bitmap_fd = open(s->bitmap_file, O_RDWR);
if (bitmap_fd < 0) {
- pr_err("weird: %s cannot be openned\n",
+ pr_err("weird: %s cannot be opened\n",
s->bitmap_file);
goto abort_locked;
}
diff --git a/Grow.c b/Grow.c
index 363b209..6d32661 100644
--- a/Grow.c
+++ b/Grow.c
@@ -446,7 +446,7 @@ int Grow_addbitmap(char *devname, int fd, struct context *c, struct shape *s)
if (offset_setable) {
st->ss->getinfo_super(st, mdi, NULL);
if (sysfs_init(mdi, fd, NULL)) {
- pr_err("failed to intialize sysfs.\n");
+ pr_err("failed to initialize sysfs.\n");
free(mdi);
}
rv = sysfs_set_num_signed(mdi, NULL, "bitmap/location",
@@ -2178,7 +2178,7 @@ size_change_error:
memset(&info, 0, sizeof(info));
info.array = array;
if (sysfs_init(&info, fd, NULL)) {
- pr_err("failed to intialize sysfs.\n");
+ pr_err("failed to initialize sysfs.\n");
rv = 1;
goto release;
}
@@ -2903,7 +2903,7 @@ static int impose_level(int fd, int level, char *devname, int verbose)
struct mdinfo info;
if (sysfs_init(&info, fd, NULL)) {
- pr_err("failed to intialize sysfs.\n");
+ pr_err("failed to initialize sysfs.\n");
return 1;
}
diff --git a/super-ddf.c b/super-ddf.c
index 618542c..c095e8a 100644
--- a/super-ddf.c
+++ b/super-ddf.c
@@ -1900,7 +1900,7 @@ static struct vd_config *find_vdcr(struct ddf_super *ddf, unsigned int inst,
return conf;
}
bad:
- pr_err("Could't find disk %d in array %u\n", n, inst);
+ pr_err("Couldn't find disk %d in array %u\n", n, inst);
return NULL;
}
diff --git a/super-intel.c b/super-intel.c
index 1cc7d5f..c399433 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -10034,7 +10034,7 @@ static void imsm_process_update(struct supertype *st,
break;
}
default:
- pr_err("error: unsuported process update type:(type: %d)\n", type);
+ pr_err("error: unsupported process update type:(type: %d)\n", type);
}
}
--
2.7.5

View File

@ -1,46 +0,0 @@
From e3615ecb5b6ad8eb408296878aad5628e0e27166 Mon Sep 17 00:00:00 2001
From: Coly Li <colyli@suse.de>
Date: Tue, 12 Feb 2019 12:53:18 +0800
Subject: [RHEL7.7 PATCH 15/24] Detail.c: do not skip first character when
calling xstrdup in Detail()
'Commit b9c9bd9bacaa ("Detail: ensure --export names are acceptable as
shell variables")' duplicates mdi->sys_name to sysdev string by,
char *sysdev = xstrdup(mdi->sys_name + 1);
which skips the first character of mdi->sys_name. Then when running
mdadm --detail <md device> --export, the output looks like,
MD_DEVICE_ev_sda2_ROLE=1
MD_DEVICE_ev_sda2_DEV=/dev/sda2
The first character of md device (between MD_DEVICE and _ROLE/_DEV)
is dropped. The expected output should be,
MD_DEVICE_dev_sda2_ROLE=1
MD_DEVICE_dev_sda2_DEV=/dev/sda2
This patch removes the '+ 1' from calling xstrdup() in Detail(), which
gets the dropped first character back.
Reported-by: Arvin Schnell <aschnell@suse.com>
Fixes: b9c9bd9bacaa ("Detail: ensure --export names are acceptable as 4 shell variables")
Signed-off-by: Coly Li <colyli@suse.de>
Cc: NeilBrown <neilb@suse.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
Detail.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Detail.c b/Detail.c
index b3e857a..20ea03a 100644
--- a/Detail.c
+++ b/Detail.c
@@ -284,7 +284,7 @@ int Detail(char *dev, struct context *c)
struct mdinfo *mdi;
for (mdi = sra->devs; mdi; mdi = mdi->next) {
char *path;
- char *sysdev = xstrdup(mdi->sys_name + 1);
+ char *sysdev = xstrdup(mdi->sys_name);
char *cp;
path = map_dev(mdi->disk.major,
--
2.7.5

View File

@ -1,70 +0,0 @@
From cab114c5ca870e5f1b57fb2602cd9a038271c2e0 Mon Sep 17 00:00:00 2001
From: Corey Hickey <bugfood-c@fatooh.org>
Date: Mon, 11 Feb 2019 17:18:38 -0800
Subject: [RHEL7.7 PATCH 16/24] Fix reshape for decreasing data offset
...when not changing the number of disks.
This patch needs context to explain. These are the relevant parts of
the original code (condensed and annotated):
if (dir > 0) {
/* Increase data offset (reshape backwards) */
if (data_offset < sd->data_offset + min) {
pr_err("--data-offset too small on %s\n",
dn);
goto release;
}
} else {
/* Decrease data offset (reshape forwards) */
if (data_offset < sd->data_offset - min) {
pr_err("--data-offset too small on %s\n",
dn);
goto release;
}
}
When this code is reached, mdadm has already decided on a reshape
direction. When increasing the data offset, the reshape runs backwards
(dir==1); when decreasing the data offset, the reshape runs forwards
(dir==-1).
The conditional within the backwards reshape is correct: the requested
offset must be larger than the old offset plus a minimum delta; thus the
reshape has room to work.
For the forwards reshape, the requested offset needs to be smaller than
the old offset minus a minimum delta; to do this correctly, the
comparison must be reversed.
Also update the error message.
Note: I have tested this change on a RAID 5 on Linux 4.18.0 and verified
that there were no errors from the kernel and that the device data
remained intact. I do not know if there are considerations for different
RAID levels.
Signed-off-by: Corey Hickey <bugfood-c@fatooh.org>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
Grow.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/Grow.c b/Grow.c
index 6d32661..764374f 100644
--- a/Grow.c
+++ b/Grow.c
@@ -2613,8 +2613,8 @@ static int set_new_data_offset(struct mdinfo *sra, struct supertype *st,
goto release;
}
if (data_offset != INVALID_SECTORS &&
- data_offset < sd->data_offset - min) {
- pr_err("--data-offset too small on %s\n",
+ data_offset > sd->data_offset - min) {
+ pr_err("--data-offset too large on %s\n",
dn);
goto release;
}
--
2.7.5

View File

@ -1,100 +0,0 @@
From 76b906d2406cdf136f64de77e881eb2d180108d9 Mon Sep 17 00:00:00 2001
From: Gioh Kim <gi-oh.kim@cloud.ionos.com>
Date: Fri, 7 Dec 2018 14:30:09 +0100
Subject: [RHEL7.7 PATCH 17/24] mdadm/tests: add one test case for failfast of
raid1
This creates raid1 device with the failfast option and check all
slaves have the failfast flag. And it does assembling and growing
the raid1 device and check the failfast works fine.
Signed-off-by: Gioh Kim <gi-oh.kim@cloud.ionos.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
tests/05r1-failfast | 74 +++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 74 insertions(+)
create mode 100644 tests/05r1-failfast
diff --git a/tests/05r1-failfast b/tests/05r1-failfast
new file mode 100644
index 0000000..823dd6f
--- /dev/null
+++ b/tests/05r1-failfast
@@ -0,0 +1,74 @@
+
+# create a simple mirror and check failfast flag works
+mdadm -CR $md0 -e1.2 --level=raid1 --failfast -n2 $dev0 $dev1
+check raid1
+if grep -v failfast /sys/block/md0/md/rd*/state > /dev/null
+then
+ die "failfast missing"
+fi
+
+# Removing works with the failfast flag
+mdadm $md0 -f $dev0
+mdadm $md0 -r $dev0
+if grep -v failfast /sys/block/md0/md/rd1/state > /dev/null
+then
+ die "failfast missing"
+fi
+
+# Adding works with the failfast flag
+mdadm $md0 -a --failfast $dev0
+check wait
+if grep -v failfast /sys/block/md0/md/rd0/state > /dev/null
+then
+ die "failfast missing"
+fi
+
+mdadm -S $md0
+
+# Assembling works with the failfast flag
+mdadm -A $md0 $dev0 $dev1
+check raid1
+if grep -v failfast /sys/block/md0/md/rd*/state > /dev/null
+then
+ die "failfast missing"
+fi
+
+# Adding works with the nofailfast flag
+mdadm $md0 -f $dev0
+mdadm $md0 -r $dev0
+mdadm $md0 -a --nofailfast $dev0
+check wait
+if grep failfast /sys/block/md0/md/rd0/state > /dev/null
+then
+ die "failfast should be missing"
+fi
+
+# Assembling with one faulty slave works with the failfast flag
+mdadm $md0 -f $dev0
+mdadm $md0 -r $dev0
+mdadm -S $md0
+mdadm -A $md0 $dev0 $dev1
+check raid1
+mdadm -S $md0
+
+# Spare works with the failfast flag
+mdadm -CR $md0 -e1.2 --level=raid1 --failfast -n2 $dev0 $dev1
+check raid1
+mdadm $md0 -a --failfast $dev2
+check wait
+check spares 1
+if grep -v failfast /sys/block/md0/md/rd*/state > /dev/null
+then
+ die "failfast missing"
+fi
+
+# Grow works with the failfast flag
+mdadm -G $md0 --raid-devices=3
+check wait
+if grep -v failfast /sys/block/md0/md/rd*/state > /dev/null
+then
+ die "failfast missing"
+fi
+mdadm -S $md0
+
+exit 0
--
2.7.5

View File

@ -1,50 +0,0 @@
From 69d084784de196acec8ab703cd1b379af211d624 Mon Sep 17 00:00:00 2001
From: Artur Paszkiewicz <artur.paszkiewicz@intel.com>
Date: Fri, 22 Feb 2019 10:15:45 +0100
Subject: [RHEL7.7 PATCH 18/24] mdmon: don't attempt to manage new arrays when
terminating
When mdmon gets a SIGTERM, it stops managing arrays that are clean. If
there is more that one array in the container and one of them is dirty
and the clean one is still present in mdstat, mdmon will treat it as a
new array and start managing it again. This leads to a cycle of
remove_old() / manage_new() calls for the clean array, until the other
one also becomes clean.
Prevent this by not calling manage_new() if sigterm is set. Also, remove
a check for sigterm in manage_new() because the condition will never be
true.
Signed-off-by: Artur Paszkiewicz <artur.paszkiewicz@intel.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
managemon.c | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/managemon.c b/managemon.c
index 101231c..29b91ba 100644
--- a/managemon.c
+++ b/managemon.c
@@ -727,9 +727,7 @@ static void manage_new(struct mdstat_ent *mdstat,
dprintf("inst: %s action: %d state: %d\n", inst,
new->action_fd, new->info.state_fd);
- if (sigterm)
- new->info.safe_mode_delay = 1;
- else if (mdi->safe_mode_delay >= 50)
+ if (mdi->safe_mode_delay >= 50)
/* Normal start, mdadm set this. */
new->info.safe_mode_delay = mdi->safe_mode_delay;
else
@@ -803,7 +801,7 @@ void manage(struct mdstat_ent *mdstat, struct supertype *container)
break;
}
}
- if (a == NULL || !a->container)
+ if ((a == NULL || !a->container) && !sigterm)
manage_new(mdstat, container, a);
}
}
--
2.7.5

View File

@ -1,58 +0,0 @@
From d2e11da4b7fd0453e942f43e4196dc63b3dbd708 Mon Sep 17 00:00:00 2001
From: Pawel Baldysiak <pawel.baldysiak@intel.com>
Date: Fri, 22 Feb 2019 13:30:27 +0100
Subject: [RHEL7.7 PATCH 19/24] mdmon: wait for previous mdmon to exit during
takeover
Since the patch c76242c5("mdmon: get safe mode delay file descriptor
early"), safe_mode_dalay is set properly by initrd mdmon. But in some
cases with filesystem traffic since the very start of the system, it
might take a while to transit to clean state. Due to fact that new
mdmon does not wait for the old one to exit - it might happen that the
new one switches safe_mode_delay back to seconds, before old one exits.
As the result two mdmons are running concurrently on same array.
Wait for the old mdmon to exit by pinging it with SIGUSR1 signal, just
in case it is sleeping.
Signed-off-by: Pawel Baldysiak <pawel.baldysiak@intel.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
mdmon.c | 14 +++++++++++---
1 file changed, 11 insertions(+), 3 deletions(-)
diff --git a/mdmon.c b/mdmon.c
index 0955fcc..ff985d2 100644
--- a/mdmon.c
+++ b/mdmon.c
@@ -171,6 +171,7 @@ static void try_kill_monitor(pid_t pid, char *devname, int sock)
int fd;
int n;
long fl;
+ int rv;
/* first rule of survival... don't off yourself */
if (pid == getpid())
@@ -201,9 +202,16 @@ static void try_kill_monitor(pid_t pid, char *devname, int sock)
fl &= ~O_NONBLOCK;
fcntl(sock, F_SETFL, fl);
n = read(sock, buf, 100);
- /* Ignore result, it is just the wait that
- * matters
- */
+
+ /* If there is I/O going on it might took some time to get to
+ * clean state. Wait for monitor to exit fully to avoid races.
+ * Ping it with SIGUSR1 in case that it is sleeping */
+ for (n = 0; n < 25; n++) {
+ rv = kill(pid, SIGUSR1);
+ if (rv < 0)
+ break;
+ usleep(200000);
+ }
}
void remove_pidfile(char *devname)
--
2.7.5

View File

@ -1,52 +0,0 @@
From 2b57e4fe041d52ae29866c93a878a11c07223cff Mon Sep 17 00:00:00 2001
From: Pawel Baldysiak <pawel.baldysiak@intel.com>
Date: Fri, 22 Feb 2019 12:56:27 +0100
Subject: [RHEL7.7 PATCH 20/24] Assemble: Fix starting array with initial
reshape checkpoint
If array was stopped during reshape initialization,
there might be a "0" checkpoint recorded in metadata.
If array with such condition (reshape with position 0)
is passed to kernel - it will refuse to start such array.
Treat such array as normal during assemble, Grow_continue() will
reinitialize and start the reshape.
Signed-off-by: Pawel Baldysiak <pawel.baldysiak@intel.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
Assemble.c | 18 ++++++++++++++++--
1 file changed, 16 insertions(+), 2 deletions(-)
diff --git a/Assemble.c b/Assemble.c
index 9f050c1..420c7b3 100644
--- a/Assemble.c
+++ b/Assemble.c
@@ -2061,8 +2061,22 @@ int assemble_container_content(struct supertype *st, int mdfd,
spare, &c->backup_file, c->verbose) == 1)
return 1;
- err = sysfs_set_str(content, NULL,
- "array_state", "readonly");
+ if (content->reshape_progress == 0) {
+ /* If reshape progress is 0 - we are assembling the
+ * array that was stopped, before reshape has started.
+ * Array needs to be started as active, Grow_continue()
+ * will start the reshape.
+ */
+ sysfs_set_num(content, NULL, "reshape_position",
+ MaxSector);
+ err = sysfs_set_str(content, NULL,
+ "array_state", "active");
+ sysfs_set_num(content, NULL, "reshape_position", 0);
+ } else {
+ err = sysfs_set_str(content, NULL,
+ "array_state", "readonly");
+ }
+
if (err)
return 1;
--
2.7.5

View File

@ -1,59 +0,0 @@
From 227aeaa872d4898273cf87a4253898823d556c43 Mon Sep 17 00:00:00 2001
From: Corey Hickey <bugfood-c@fatooh.org>
Date: Mon, 11 Feb 2019 17:42:27 -0800
Subject: [RHEL7.7 PATCH 21/24] add missing units to --examine
Within the output of "mdadm --examine", there are three sizes reported
on adjacent lines. For example:
$ sudo mdadm --examine /dev/md3
[...]
Avail Dev Size : 17580545024 (8383.06 GiB 9001.24 GB)
Array Size : 17580417024 (16765.99 GiB 18002.35 GB)
Used Dev Size : 11720278016 (5588.66 GiB 6000.78 GB)
[...]
This can be confusing, since the first and third line are in 512-byte
sectors, and the second is in KiB.
Add units to avoid ambiguity.
(I don't particularly like the "KiB" notation, but it is at least
unambiguous.)
Signed-off-by: Corey Hickey <bugfood-c@fatooh.org>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
super1.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/super1.c b/super1.c
index 636a286..b85dc20 100644
--- a/super1.c
+++ b/super1.c
@@ -360,7 +360,7 @@ static void examine_super1(struct supertype *st, char *homehost)
printf(" Raid Level : %s\n", c?c:"-unknown-");
printf(" Raid Devices : %d\n", __le32_to_cpu(sb->raid_disks));
printf("\n");
- printf(" Avail Dev Size : %llu%s\n",
+ printf(" Avail Dev Size : %llu sectors%s\n",
(unsigned long long)__le64_to_cpu(sb->data_size),
human_size(__le64_to_cpu(sb->data_size)<<9));
if (__le32_to_cpu(sb->level) > 0) {
@@ -378,11 +378,11 @@ static void examine_super1(struct supertype *st, char *homehost)
if (ddsks) {
long long asize = __le64_to_cpu(sb->size);
asize = (asize << 9) * ddsks / ddsks_denom;
- printf(" Array Size : %llu%s\n",
+ printf(" Array Size : %llu KiB%s\n",
asize >> 10, human_size(asize));
}
if (sb->size != sb->data_size)
- printf(" Used Dev Size : %llu%s\n",
+ printf(" Used Dev Size : %llu sectors%s\n",
(unsigned long long)__le64_to_cpu(sb->size),
human_size(__le64_to_cpu(sb->size)<<9));
}
--
2.7.5

View File

@ -1,117 +0,0 @@
From 05501181f18cdccdb0b3cec1d8cf59f0995504d7 Mon Sep 17 00:00:00 2001
From: Pawel Baldysiak <pawel.baldysiak@intel.com>
Date: Fri, 8 Mar 2019 12:19:11 +0100
Subject: [RHEL7.7 PATCH 22/24] imsm: fix spare activation for old matrix
arrays
During spare activation get_extents() calculates metadata reserved space based
on smallest active RAID member or it will take the defaults. Since patch
611d9529("imsm: change reserved space to 4MB") default is extended. If array
was created prior that patch, reserved space is smaller. In case of matrix
RAID - spare is activated in each array one-by-one, so it is spare for first
activation, but treated as "active" during second one.
In case of adding spare drive to old matrix RAID with the size the same as
already existing member drive the routine will take the defaults during second
run and mdmon will refuse to rebuild second volume, claiming that the drive
does not have enough free space.
Add parameter to get_extents(), so the during spare activation reserved space
is always based on smallest active drive - even if given drive is already
active in some other array of matrix RAID.
Signed-off-by: Pawel Baldysiak <pawel.baldysiak@intel.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
super-intel.c | 19 ++++++++++---------
1 file changed, 10 insertions(+), 9 deletions(-)
diff --git a/super-intel.c b/super-intel.c
index c399433..5a7c9f8 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -1313,7 +1313,8 @@ static unsigned long long per_dev_array_size(struct imsm_map *map)
return array_size;
}
-static struct extent *get_extents(struct intel_super *super, struct dl *dl)
+static struct extent *get_extents(struct intel_super *super, struct dl *dl,
+ int get_minimal_reservation)
{
/* find a list of used extents on the given physical device */
struct extent *rv, *e;
@@ -1325,7 +1326,7 @@ static struct extent *get_extents(struct intel_super *super, struct dl *dl)
* regardless of whether the OROM has assigned sectors from the
* IMSM_RESERVED_SECTORS region
*/
- if (dl->index == -1)
+ if (dl->index == -1 || get_minimal_reservation)
reservation = imsm_min_reserved_sectors(super);
else
reservation = MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS;
@@ -1386,7 +1387,7 @@ static __u32 imsm_reserved_sectors(struct intel_super *super, struct dl *dl)
if (dl->index == -1)
return MPB_SECTOR_CNT;
- e = get_extents(super, dl);
+ e = get_extents(super, dl, 0);
if (!e)
return MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS;
@@ -1478,7 +1479,7 @@ static __u32 imsm_min_reserved_sectors(struct intel_super *super)
return rv;
/* find last lba used by subarrays on the smallest active disk */
- e = get_extents(super, dl_min);
+ e = get_extents(super, dl_min, 0);
if (!e)
return rv;
for (i = 0; e[i].size; i++)
@@ -1519,7 +1520,7 @@ int get_spare_criteria_imsm(struct supertype *st, struct spare_criteria *c)
if (!dl)
return -EINVAL;
/* find last lba used by subarrays */
- e = get_extents(super, dl);
+ e = get_extents(super, dl, 0);
if (!e)
return -EINVAL;
for (i = 0; e[i].size; i++)
@@ -7203,7 +7204,7 @@ static int validate_geometry_imsm_volume(struct supertype *st, int level,
pos = 0;
i = 0;
- e = get_extents(super, dl);
+ e = get_extents(super, dl, 0);
if (!e) continue;
do {
unsigned long long esize;
@@ -7261,7 +7262,7 @@ static int validate_geometry_imsm_volume(struct supertype *st, int level,
}
/* retrieve the largest free space block */
- e = get_extents(super, dl);
+ e = get_extents(super, dl, 0);
maxsize = 0;
i = 0;
if (e) {
@@ -7359,7 +7360,7 @@ static int imsm_get_free_size(struct supertype *st, int raiddisks,
if (super->orom && dl->index < 0 && mpb->num_raid_devs)
continue;
- e = get_extents(super, dl);
+ e = get_extents(super, dl, 0);
if (!e)
continue;
for (i = 1; e[i-1].size; i++)
@@ -8846,7 +8847,7 @@ static struct dl *imsm_add_spare(struct intel_super *super, int slot,
/* Does this unused device have the requisite free space?
* It needs to be able to cover all member volumes
*/
- ex = get_extents(super, dl);
+ ex = get_extents(super, dl, 1);
if (!ex) {
dprintf("cannot get extents\n");
continue;
--
2.7.5

View File

@ -1,94 +0,0 @@
From 22dc741f63e6403d59c2c14f56fd4791265f9bbb Mon Sep 17 00:00:00 2001
From: Mariusz Tkaczyk <mariusz.tkaczyk@intel.com>
Date: Mon, 1 Apr 2019 16:53:41 +0200
Subject: [RHEL7.7 PATCH 23/24] Create: Block rounding size to max
When passed size is smaller than chunk, mdadm rounds it to 0 but 0 there
means max available space.
Block it for every metadata. Remove the same check from imsm routine.
Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@intel.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
Create.c | 23 ++++++++++++++++++++---
super-intel.c | 5 ++---
2 files changed, 22 insertions(+), 6 deletions(-)
diff --git a/Create.c b/Create.c
index 6f1b228..292f92a 100644
--- a/Create.c
+++ b/Create.c
@@ -27,6 +27,18 @@
#include "md_p.h"
#include <ctype.h>
+static int round_size_and_verify(unsigned long long *size, int chunk)
+{
+ if (*size == 0)
+ return 0;
+ *size &= ~(unsigned long long)(chunk - 1);
+ if (*size == 0) {
+ pr_err("Size cannot be smaller than chunk.\n");
+ return 1;
+ }
+ return 0;
+}
+
static int default_layout(struct supertype *st, int level, int verbose)
{
int layout = UnSet;
@@ -248,11 +260,14 @@ int Create(struct supertype *st, char *mddev,
pr_err("unknown level %d\n", s->level);
return 1;
}
+
if (s->size == MAX_SIZE)
/* use '0' to mean 'max' now... */
s->size = 0;
if (s->size && s->chunk && s->chunk != UnSet)
- s->size &= ~(unsigned long long)(s->chunk - 1);
+ if (round_size_and_verify(&s->size, s->chunk))
+ return 1;
+
newsize = s->size * 2;
if (st && ! st->ss->validate_geometry(st, s->level, s->layout, s->raiddisks,
&s->chunk, s->size*2,
@@ -267,7 +282,8 @@ int Create(struct supertype *st, char *mddev,
/* default chunk was just set */
if (c->verbose > 0)
pr_err("chunk size defaults to %dK\n", s->chunk);
- s->size &= ~(unsigned long long)(s->chunk - 1);
+ if (round_size_and_verify(&s->size, s->chunk))
+ return 1;
do_default_chunk = 0;
}
}
@@ -413,7 +429,8 @@ int Create(struct supertype *st, char *mddev,
/* default chunk was just set */
if (c->verbose > 0)
pr_err("chunk size defaults to %dK\n", s->chunk);
- s->size &= ~(unsigned long long)(s->chunk - 1);
+ if (round_size_and_verify(&s->size, s->chunk))
+ return 1;
do_default_chunk = 0;
}
}
diff --git a/super-intel.c b/super-intel.c
index 5a7c9f8..2ba045a 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -7455,9 +7455,8 @@ static int validate_geometry_imsm(struct supertype *st, int level, int layout,
verbose);
}
- if (size && ((size < 1024) || (*chunk != UnSet &&
- size < (unsigned long long) *chunk))) {
- pr_err("Given size must be greater than 1M and chunk size.\n");
+ if (size && (size < 1024)) {
+ pr_err("Given size must be greater than 1M.\n");
/* Depends on algorithm in Create.c :
* if container was given (dev == NULL) return -1,
* if block device was given ( dev != NULL) return 0.
--
2.7.5

View File

@ -1,31 +0,0 @@
From 3c9b46cf9ae15a9be98fc47e2080bd9494496246 Mon Sep 17 00:00:00 2001
From: Liwei Song <liwei.song@windriver.com>
Date: Tue, 19 Mar 2019 23:51:05 -0400
Subject: [RHEL7.7 PATCH 24/24] udev: Add udev rules to create by-partuuid for
md device
This rules will create link under /dev/disk/by-partuuid/ for
MD devices partition, with which will support specify
root=PARTUUID=XXX to boot rootfs.
Signed-off-by: Liwei Song <liwei.song@windriver.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
udev-md-raid-arrays.rules | 1 +
1 file changed, 1 insertion(+)
diff --git a/udev-md-raid-arrays.rules b/udev-md-raid-arrays.rules
index c95ec7b..5b99d58 100644
--- a/udev-md-raid-arrays.rules
+++ b/udev-md-raid-arrays.rules
@@ -30,6 +30,7 @@ IMPORT{builtin}="blkid"
OPTIONS+="link_priority=100"
OPTIONS+="watch"
ENV{ID_FS_USAGE}=="filesystem|other|crypto", ENV{ID_FS_UUID_ENC}=="?*", SYMLINK+="disk/by-uuid/$env{ID_FS_UUID_ENC}"
+ENV{ID_FS_USAGE}=="filesystem|other", ENV{ID_PART_ENTRY_UUID}=="?*", SYMLINK+="disk/by-partuuid/$env{ID_PART_ENTRY_UUID}"
ENV{ID_FS_USAGE}=="filesystem|other", ENV{ID_FS_LABEL_ENC}=="?*", SYMLINK+="disk/by-label/$env{ID_FS_LABEL_ENC}"
ENV{MD_LEVEL}=="raid[1-9]*", ENV{SYSTEMD_WANTS}+="mdmonitor.service"
--
2.7.5

View File

@ -1,109 +0,0 @@
From ae7d61e35ec2ab6361c3e509a8db00698ef3396f Mon Sep 17 00:00:00 2001
From: Artur Paszkiewicz <artur.paszkiewicz@intel.com>
Date: Tue, 7 May 2019 16:08:47 +0200
Subject: [RHEL7.8 PATCH V2 25/47] mdmon: fix wrong array state when disk fails
during mdmon startup
If a member drive disappears and is set faulty by the kernel during
mdmon startup, after ss->load_container() but before manage_new(), mdmon
will try to readd the faulty drive to the array and start rebuilding.
Metadata on the active drive is updated, but the faulty drive is not
removed from the array and is left in a "blocked" state and any write
request to the array will block. If the faulty drive reappears in the
system e.g. after a reboot, the array will not assemble because metadata
on the drives will be incompatible (at least on imsm).
Fix this by adding a new option for sysfs_read(): "GET_DEVS_ALL". This
is an extension for the "GET_DEVS" option and causes all member devices
to be returned, even if the associated block device has been removed.
Use this option in manage_new() to include the faulty device on the
active_array's devices list. Mdmon will then properly remove the faulty
device from the array and update the metadata to reflect the degraded
state.
Signed-off-by: Artur Paszkiewicz <artur.paszkiewicz@intel.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
managemon.c | 2 +-
mdadm.h | 1 +
super-intel.c | 2 +-
sysfs.c | 23 ++++++++++++++---------
4 files changed, 17 insertions(+), 11 deletions(-)
diff --git a/managemon.c b/managemon.c
index 29b91ba..200cf83 100644
--- a/managemon.c
+++ b/managemon.c
@@ -678,7 +678,7 @@ static void manage_new(struct mdstat_ent *mdstat,
mdi = sysfs_read(-1, mdstat->devnm,
GET_LEVEL|GET_CHUNK|GET_DISKS|GET_COMPONENT|
GET_SAFEMODE|GET_DEVS|GET_OFFSET|GET_SIZE|GET_STATE|
- GET_LAYOUT);
+ GET_LAYOUT|GET_DEVS_ALL);
if (!mdi)
return;
diff --git a/mdadm.h b/mdadm.h
index 705bd9b..427cc52 100644
--- a/mdadm.h
+++ b/mdadm.h
@@ -647,6 +647,7 @@ enum sysfs_read_flags {
GET_ERROR = (1 << 24),
GET_ARRAY_STATE = (1 << 25),
GET_CONSISTENCY_POLICY = (1 << 26),
+ GET_DEVS_ALL = (1 << 27),
};
/* If fd >= 0, get the array it is open on,
diff --git a/super-intel.c b/super-intel.c
index 2ba045a..4fd5e84 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -8560,7 +8560,7 @@ static void imsm_set_disk(struct active_array *a, int n, int state)
disk = get_imsm_disk(super, ord_to_idx(ord));
/* check for new failures */
- if (state & DS_FAULTY) {
+ if (disk && (state & DS_FAULTY)) {
if (mark_failure(super, dev, disk, ord_to_idx(ord)))
super->updates_pending++;
}
diff --git a/sysfs.c b/sysfs.c
index df6fdda..2dd9ab6 100644
--- a/sysfs.c
+++ b/sysfs.c
@@ -313,17 +313,22 @@ struct mdinfo *sysfs_read(int fd, char *devnm, unsigned long options)
/* assume this is a stale reference to a hot
* removed device
*/
- free(dev);
- continue;
+ if (!(options & GET_DEVS_ALL)) {
+ free(dev);
+ continue;
+ }
+ } else {
+ sscanf(buf, "%d:%d", &dev->disk.major, &dev->disk.minor);
}
- sscanf(buf, "%d:%d", &dev->disk.major, &dev->disk.minor);
- /* special case check for block devices that can go 'offline' */
- strcpy(dbase, "block/device/state");
- if (load_sys(fname, buf, sizeof(buf)) == 0 &&
- strncmp(buf, "offline", 7) == 0) {
- free(dev);
- continue;
+ if (!(options & GET_DEVS_ALL)) {
+ /* special case check for block devices that can go 'offline' */
+ strcpy(dbase, "block/device/state");
+ if (load_sys(fname, buf, sizeof(buf)) == 0 &&
+ strncmp(buf, "offline", 7) == 0) {
+ free(dev);
+ continue;
+ }
}
/* finally add this disk to the array */
--
2.7.5

View File

@ -1,212 +0,0 @@
From 4ec389e3f0c1233f5aa2d5b4e63d96e33d2a37f0 Mon Sep 17 00:00:00 2001
From: Roman Sobanski <roman.sobanski@intel.com>
Date: Tue, 2 Jul 2019 13:29:27 +0200
Subject: [RHEL7.8 PATCH V2 26/47] Enable probe_roms to scan more than 6 roms.
In some cases if more than 6 oroms exist, resource for particular
controller may not be found. Change method for storing
adapter_rom_resources from array to list.
Signed-off-by: Roman Sobanski <roman.sobanski@intel.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
probe_roms.c | 98 ++++++++++++++++++++++++++++++++++--------------------------
1 file changed, 56 insertions(+), 42 deletions(-)
diff --git a/probe_roms.c b/probe_roms.c
index b0b0883..7ea04c7 100644
--- a/probe_roms.c
+++ b/probe_roms.c
@@ -35,6 +35,9 @@ static const int rom_len = 0xf0000 - 0xc0000; /* option-rom memory region */
static int _sigbus;
static unsigned long rom_align;
+static void roms_deinit(void);
+static int roms_init(void);
+
static void sigbus(int sig)
{
_sigbus = 1;
@@ -75,6 +78,7 @@ void probe_roms_exit(void)
munmap(rom_mem, rom_len);
rom_mem = MAP_FAILED;
}
+ roms_deinit();
}
int probe_roms_init(unsigned long align)
@@ -91,6 +95,9 @@ int probe_roms_init(unsigned long align)
else
return -1;
+ if (roms_init())
+ return -1;
+
if (signal(SIGBUS, sigbus) == SIG_ERR)
rc = -1;
if (rc == 0) {
@@ -131,6 +138,7 @@ struct resource {
unsigned long end;
unsigned long data;
const char *name;
+ struct resource *next;
};
static struct resource system_rom_resource = {
@@ -147,37 +155,7 @@ static struct resource extension_rom_resource = {
.end = 0xeffff,
};
-static struct resource adapter_rom_resources[] = { {
- .name = "Adapter ROM",
- .start = 0xc8000,
- .data = 0,
- .end = 0,
-}, {
- .name = "Adapter ROM",
- .start = 0,
- .data = 0,
- .end = 0,
-}, {
- .name = "Adapter ROM",
- .start = 0,
- .data = 0,
- .end = 0,
-}, {
- .name = "Adapter ROM",
- .start = 0,
- .data = 0,
- .end = 0,
-}, {
- .name = "Adapter ROM",
- .start = 0,
- .data = 0,
- .end = 0,
-}, {
- .name = "Adapter ROM",
- .start = 0,
- .data = 0,
- .end = 0,
-} };
+static struct resource *adapter_rom_resources;
static struct resource video_rom_resource = {
.name = "Video ROM",
@@ -186,8 +164,35 @@ static struct resource video_rom_resource = {
.end = 0xc7fff,
};
+static int roms_init(void)
+{
+ adapter_rom_resources = malloc(sizeof(struct resource));
+ if (adapter_rom_resources == NULL)
+ return 1;
+ adapter_rom_resources->name = "Adapter ROM";
+ adapter_rom_resources->start = 0xc8000;
+ adapter_rom_resources->data = 0;
+ adapter_rom_resources->end = 0;
+ adapter_rom_resources->next = NULL;
+ return 0;
+}
+
+static void roms_deinit(void)
+{
+ struct resource *res;
+
+ res = adapter_rom_resources;
+ while (res) {
+ struct resource *tmp = res;
+
+ res = res->next;
+ free(tmp);
+ }
+}
+
#define ROMSIGNATURE 0xaa55
+
static int romsignature(const unsigned char *rom)
{
const unsigned short * const ptr = (const unsigned short *)rom;
@@ -208,16 +213,14 @@ static int romchecksum(const unsigned char *rom, unsigned long length)
int scan_adapter_roms(scan_fn fn)
{
/* let scan_fn examing each of the adapter roms found by probe_roms */
- unsigned int i;
+ struct resource *res = adapter_rom_resources;
int found;
if (rom_fd < 0)
return 0;
found = 0;
- for (i = 0; i < ARRAY_SIZE(adapter_rom_resources); i++) {
- struct resource *res = &adapter_rom_resources[i];
-
+ while (res) {
if (res->start) {
found = fn(isa_bus_to_virt(res->start),
isa_bus_to_virt(res->end),
@@ -226,6 +229,7 @@ int scan_adapter_roms(scan_fn fn)
break;
} else
break;
+ res = res->next;
}
return found;
@@ -241,14 +245,14 @@ void probe_roms(void)
const void *rom;
unsigned long start, length, upper;
unsigned char c;
- unsigned int i;
+ struct resource *res = adapter_rom_resources;
__u16 val=0;
if (rom_fd < 0)
return;
/* video rom */
- upper = adapter_rom_resources[0].start;
+ upper = res->start;
for (start = video_rom_resource.start; start < upper; start += rom_align) {
rom = isa_bus_to_virt(start);
if (!romsignature(rom))
@@ -283,8 +287,9 @@ void probe_roms(void)
upper = extension_rom_resource.start;
}
+ struct resource *prev_res = res;
/* check for adapter roms on 2k boundaries */
- for (i = 0; i < ARRAY_SIZE(adapter_rom_resources) && start < upper; start += rom_align) {
+ for (; start < upper; start += rom_align) {
rom = isa_bus_to_virt(start);
if (!romsignature(rom))
continue;
@@ -308,10 +313,19 @@ void probe_roms(void)
if (!length || start + length > upper || !romchecksum(rom, length))
continue;
- adapter_rom_resources[i].start = start;
- adapter_rom_resources[i].data = start + (unsigned long) val;
- adapter_rom_resources[i].end = start + length - 1;
+ if (res == NULL) {
+ res = calloc(1, sizeof(struct resource));
+ if (res == NULL)
+ return;
+ prev_res->next = res;
+ }
+
+ res->start = start;
+ res->data = start + (unsigned long)val;
+ res->end = start + length - 1;
- start = adapter_rom_resources[i++].end & ~(rom_align - 1);
+ start = res->end & ~(rom_align - 1);
+ prev_res = res;
+ res = res->next;
}
}
--
2.7.5

View File

@ -1,39 +0,0 @@
From a4f7290c20c2ff78328c9db0b18029165cfb05b2 Mon Sep 17 00:00:00 2001
From: Jes Sorensen <jsorensen@fb.com>
Date: Tue, 9 Jul 2019 13:26:08 -0400
Subject: [RHEL7.8 PATCH V2 27/47] super-intel: Fix issue with abs() being
irrelevant
gcc9 complains about subtracting unsigned from unsigned and code
assuming the result can be negative.
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
super-intel.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/super-intel.c b/super-intel.c
index 4fd5e84..230e164 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -2875,7 +2875,7 @@ static unsigned long long calc_component_size(struct imsm_map *map,
{
unsigned long long component_size;
unsigned long long dev_size = imsm_dev_size(dev);
- unsigned long long calc_dev_size = 0;
+ long long calc_dev_size = 0;
unsigned int member_disks = imsm_num_data_members(map);
if (member_disks == 0)
@@ -2889,7 +2889,7 @@ static unsigned long long calc_component_size(struct imsm_map *map,
* 2048 blocks per each device. If the difference is higher it means
* that array size was expanded and num_data_stripes was not updated.
*/
- if ((unsigned int)abs(calc_dev_size - dev_size) >
+ if (llabs(calc_dev_size - (long long)dev_size) >
(1 << SECT_PER_MB_SHIFT) * member_disks) {
component_size = dev_size / member_disks;
dprintf("Invalid num_data_stripes in metadata; expected=%llu, found=%llu\n",
--
2.7.5

View File

@ -1,57 +0,0 @@
From 7039d1f8200b9599b23db5953934fdb43b0442e0 Mon Sep 17 00:00:00 2001
From: Jes Sorensen <jsorensen@fb.com>
Date: Tue, 9 Jul 2019 14:15:38 -0400
Subject: [RHEL7.8 PATCH V2 28/47] mdadm.h: Introduced unaligned
{get,put}_unaligned{16,32}()
We need these to avoid gcc9 going all crazy on us.
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
mdadm.h | 30 ++++++++++++++++++++++++++++++
1 file changed, 30 insertions(+)
diff --git a/mdadm.h b/mdadm.h
index 427cc52..0fa9e1b 100644
--- a/mdadm.h
+++ b/mdadm.h
@@ -192,6 +192,36 @@ struct dlm_lksb {
#endif /* __KLIBC__ */
/*
+ * Partially stolen from include/linux/unaligned/packed_struct.h
+ */
+struct __una_u16 { __u16 x; } __attribute__ ((packed));
+struct __una_u32 { __u32 x; } __attribute__ ((packed));
+
+static inline __u16 __get_unaligned16(const void *p)
+{
+ const struct __una_u16 *ptr = (const struct __una_u16 *)p;
+ return ptr->x;
+}
+
+static inline __u32 __get_unaligned32(const void *p)
+{
+ const struct __una_u32 *ptr = (const struct __una_u32 *)p;
+ return ptr->x;
+}
+
+static inline void __put_unaligned16(__u16 val, void *p)
+{
+ struct __una_u16 *ptr = (struct __una_u16 *)p;
+ ptr->x = val;
+}
+
+static inline void __put_unaligned32(__u32 val, void *p)
+{
+ struct __una_u32 *ptr = (struct __una_u32 *)p;
+ ptr->x = val;
+}
+
+/*
* Check at compile time that something is of a particular type.
* Always evaluates to 1 so you may use it easily in comparisons.
*/
--
2.7.5

View File

@ -1,38 +0,0 @@
From 486720e0c2418e7e2e0a16221f7c42a308622254 Mon Sep 17 00:00:00 2001
From: Jes Sorensen <jsorensen@fb.com>
Date: Tue, 9 Jul 2019 14:49:22 -0400
Subject: [RHEL7.8 PATCH V2 29/47] super-intel: Use put_unaligned in split_ull
Shut up some gcc9 errors by using put_unaligned() accessors. Not pretty,
but better than it was.
Also correct to the correct swap macros.
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
super-intel.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/super-intel.c b/super-intel.c
index 230e164..d7e8a65 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -1165,12 +1165,12 @@ static int count_memberships(struct dl *dl, struct intel_super *super)
static __u32 imsm_min_reserved_sectors(struct intel_super *super);
-static int split_ull(unsigned long long n, __u32 *lo, __u32 *hi)
+static int split_ull(unsigned long long n, void *lo, void *hi)
{
if (lo == 0 || hi == 0)
return 1;
- *lo = __le32_to_cpu((unsigned)n);
- *hi = __le32_to_cpu((unsigned)(n >> 32));
+ __put_unaligned32(__cpu_to_le32((__u32)n), lo);
+ __put_unaligned32(__cpu_to_le32((n >> 32)), hi);
return 0;
}
--
2.7.5

View File

@ -1,345 +0,0 @@
From b06815989179e0f153e44e4336290e655edce9a1 Mon Sep 17 00:00:00 2001
From: Mariusz Dabrowski <mariusz.dabrowski@intel.com>
Date: Wed, 10 Jul 2019 13:38:53 +0200
Subject: [RHEL7.8 PATCH V2 30/47] mdadm: load default sysfs attributes after
assemblation
Added new type of line to mdadm.conf which allows to specify values of
sysfs attributes for MD devices that should be loaded after the array is
assembled. Each line is interpreted as list of structures containing
sysname of MD device (md126 etc.) and list of sysfs attributes and their
values.
Signed-off-by: Mariusz Dabrowski <mariusz.dabrowski@intel.com>
Signed-off-by: Krzysztof Smolinski <krzysztof.smolinski@intel.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
Assemble.c | 12 +++--
Incremental.c | 1 +
config.c | 7 ++-
mdadm.conf.5 | 25 ++++++++++
mdadm.h | 3 ++
sysfs.c | 158 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
6 files changed, 202 insertions(+), 4 deletions(-)
diff --git a/Assemble.c b/Assemble.c
index 420c7b3..b2e6914 100644
--- a/Assemble.c
+++ b/Assemble.c
@@ -1063,9 +1063,12 @@ static int start_array(int mdfd,
mddev, okcnt + sparecnt + journalcnt,
okcnt + sparecnt + journalcnt == 1 ? "" : "s");
if (okcnt < (unsigned)content->array.raid_disks)
- fprintf(stderr, " (out of %d)",
+ fprintf(stderr, " (out of %d)\n",
content->array.raid_disks);
- fprintf(stderr, "\n");
+ else {
+ fprintf(stderr, "\n");
+ sysfs_rules_apply(mddev, content);
+ }
}
if (st->ss->validate_container) {
@@ -1139,6 +1142,7 @@ static int start_array(int mdfd,
rv = ioctl(mdfd, RUN_ARRAY, NULL);
reopen_mddev(mdfd); /* drop O_EXCL */
if (rv == 0) {
+ sysfs_rules_apply(mddev, content);
if (c->verbose >= 0) {
pr_err("%s has been started with %d drive%s",
mddev, okcnt, okcnt==1?"":"s");
@@ -2130,10 +2134,12 @@ int assemble_container_content(struct supertype *st, int mdfd,
pr_err("array %s now has %d device%s",
chosen_name, working + preexist,
working + preexist == 1 ? "":"s");
- else
+ else {
+ sysfs_rules_apply(chosen_name, content);
pr_err("Started %s with %d device%s",
chosen_name, working + preexist,
working + preexist == 1 ? "":"s");
+ }
if (preexist)
fprintf(stderr, " (%d new)", working);
if (expansion)
diff --git a/Incremental.c b/Incremental.c
index d4d3c35..98dbcd9 100644
--- a/Incremental.c
+++ b/Incremental.c
@@ -480,6 +480,7 @@ int Incremental(struct mddev_dev *devlist, struct context *c,
pr_err("container %s now has %d device%s\n",
chosen_name, info.array.working_disks,
info.array.working_disks == 1?"":"s");
+ sysfs_rules_apply(chosen_name, &info);
wait_for(chosen_name, mdfd);
if (st->ss->external)
strcpy(devnm, fd2devnm(mdfd));
diff --git a/config.c b/config.c
index e14eae0..7592b2d 100644
--- a/config.c
+++ b/config.c
@@ -80,7 +80,8 @@ char DefaultAltConfFile[] = CONFFILE2;
char DefaultAltConfDir[] = CONFFILE2 ".d";
enum linetype { Devices, Array, Mailaddr, Mailfrom, Program, CreateDev,
- Homehost, HomeCluster, AutoMode, Policy, PartPolicy, LTEnd };
+ Homehost, HomeCluster, AutoMode, Policy, PartPolicy, Sysfs,
+ LTEnd };
char *keywords[] = {
[Devices] = "devices",
[Array] = "array",
@@ -93,6 +94,7 @@ char *keywords[] = {
[AutoMode] = "auto",
[Policy] = "policy",
[PartPolicy]="part-policy",
+ [Sysfs] = "sysfs",
[LTEnd] = NULL
};
@@ -764,6 +766,9 @@ void conf_file(FILE *f)
case PartPolicy:
policyline(line, rule_part);
break;
+ case Sysfs:
+ sysfsline(line);
+ break;
default:
pr_err("Unknown keyword %s\n", line);
}
diff --git a/mdadm.conf.5 b/mdadm.conf.5
index 47c962a..27dbab1 100644
--- a/mdadm.conf.5
+++ b/mdadm.conf.5
@@ -587,6 +587,26 @@ be based on the domain, but with
appended, when N is the partition number for the partition that was
found.
+.TP
+.B SYSFS
+The SYSFS line lists custom values of MD device's sysfs attributes which will be
+stored in sysfs after the array is assembled. Multiple lines are allowed and each
+line has to contain the uuid or the name of the device to which it relates.
+.RS 4
+.TP
+.B uuid=
+hexadecimal identifier of MD device. This has to match the uuid stored in the
+superblock.
+.TP
+.B name=
+name of the MD device as was given to
+.I mdadm
+when the array was created. It will be ignored if
+.B uuid
+is not empty.
+.TP
+.RS 7
+
.SH EXAMPLE
DEVICE /dev/sd[bcdjkl]1
.br
@@ -657,6 +677,11 @@ CREATE group=system mode=0640 auto=part\-8
HOMEHOST <system>
.br
AUTO +1.x homehost \-all
+.br
+SYSFS name=/dev/md/raid5 group_thread_cnt=4 sync_speed_max=1000000
+.br
+SYSFS uuid=bead5eb6:31c17a27:da120ba2:7dfda40d group_thread_cnt=4
+sync_speed_max=1000000
.SH SEE ALSO
.BR mdadm (8),
diff --git a/mdadm.h b/mdadm.h
index 0fa9e1b..c36d7fd 100644
--- a/mdadm.h
+++ b/mdadm.h
@@ -1322,6 +1322,9 @@ void domain_add(struct domainlist **domp, char *domain);
extern void policy_save_path(char *id_path, struct map_ent *array);
extern int policy_check_path(struct mdinfo *disk, struct map_ent *array);
+extern void sysfs_rules_apply(char *devnm, struct mdinfo *dev);
+extern void sysfsline(char *line);
+
#if __GNUC__ < 3
struct stat64;
#endif
diff --git a/sysfs.c b/sysfs.c
index 2dd9ab6..c313781 100644
--- a/sysfs.c
+++ b/sysfs.c
@@ -26,9 +26,22 @@
#include "mdadm.h"
#include <dirent.h>
#include <ctype.h>
+#include "dlink.h"
#define MAX_SYSFS_PATH_LEN 120
+struct dev_sysfs_rule {
+ struct dev_sysfs_rule *next;
+ char *devname;
+ int uuid[4];
+ int uuid_set;
+ struct sysfs_entry {
+ struct sysfs_entry *next;
+ char *name;
+ char *value;
+ } *entry;
+};
+
int load_sys(char *path, char *buf, int len)
{
int fd = open(path, O_RDONLY);
@@ -999,3 +1012,148 @@ int sysfs_wait(int fd, int *msec)
}
return n;
}
+
+int sysfs_rules_apply_check(const struct mdinfo *sra,
+ const struct sysfs_entry *ent)
+{
+ /* Check whether parameter is regular file,
+ * exists and is under specified directory.
+ */
+ char fname[MAX_SYSFS_PATH_LEN];
+ char dname[MAX_SYSFS_PATH_LEN];
+ char resolved_path[PATH_MAX];
+ char resolved_dir[PATH_MAX];
+
+ if (sra == NULL || ent == NULL)
+ return -1;
+
+ snprintf(dname, MAX_SYSFS_PATH_LEN, "/sys/block/%s/md/", sra->sys_name);
+ snprintf(fname, MAX_SYSFS_PATH_LEN, "%s/%s", dname, ent->name);
+
+ if (realpath(fname, resolved_path) == NULL ||
+ realpath(dname, resolved_dir) == NULL)
+ return -1;
+
+ if (strncmp(resolved_dir, resolved_path,
+ strnlen(resolved_dir, PATH_MAX)) != 0)
+ return -1;
+
+ return 0;
+}
+
+static struct dev_sysfs_rule *sysfs_rules;
+
+void sysfs_rules_apply(char *devnm, struct mdinfo *dev)
+{
+ struct dev_sysfs_rule *rules = sysfs_rules;
+
+ while (rules) {
+ struct sysfs_entry *ent = rules->entry;
+ int match = 0;
+
+ if (!rules->uuid_set) {
+ if (rules->devname)
+ match = strcmp(devnm, rules->devname) == 0;
+ } else {
+ match = memcmp(dev->uuid, rules->uuid,
+ sizeof(int[4])) == 0;
+ }
+
+ while (match && ent) {
+ if (sysfs_rules_apply_check(dev, ent) < 0)
+ pr_err("SYSFS: failed to write '%s' to '%s'\n",
+ ent->value, ent->name);
+ else
+ sysfs_set_str(dev, NULL, ent->name, ent->value);
+ ent = ent->next;
+ }
+ rules = rules->next;
+ }
+}
+
+static void sysfs_rule_free(struct dev_sysfs_rule *rule)
+{
+ struct sysfs_entry *entry;
+
+ while (rule) {
+ struct dev_sysfs_rule *tmp = rule->next;
+
+ entry = rule->entry;
+ while (entry) {
+ struct sysfs_entry *tmp = entry->next;
+
+ free(entry->name);
+ free(entry->value);
+ free(entry);
+ entry = tmp;
+ }
+
+ if (rule->devname)
+ free(rule->devname);
+ free(rule);
+ rule = tmp;
+ }
+}
+
+void sysfsline(char *line)
+{
+ struct dev_sysfs_rule *sr;
+ char *w;
+
+ sr = xcalloc(1, sizeof(*sr));
+ for (w = dl_next(line); w != line ; w = dl_next(w)) {
+ if (strncasecmp(w, "name=", 5) == 0) {
+ char *devname = w + 5;
+
+ if (strncmp(devname, "/dev/md/", 8) == 0) {
+ if (sr->devname)
+ pr_err("Only give one device per SYSFS line: %s\n",
+ devname);
+ else
+ sr->devname = xstrdup(devname);
+ } else {
+ pr_err("%s is an invalid name for an md device - ignored.\n",
+ devname);
+ }
+ } else if (strncasecmp(w, "uuid=", 5) == 0) {
+ char *uuid = w + 5;
+
+ if (sr->uuid_set) {
+ pr_err("Only give one uuid per SYSFS line: %s\n",
+ uuid);
+ } else {
+ if (parse_uuid(w + 5, sr->uuid) &&
+ memcmp(sr->uuid, uuid_zero,
+ sizeof(int[4])) != 0)
+ sr->uuid_set = 1;
+ else
+ pr_err("Invalid uuid: %s\n", uuid);
+ }
+ } else {
+ struct sysfs_entry *prop;
+
+ char *sep = strchr(w, '=');
+
+ if (sep == NULL || *(sep + 1) == 0) {
+ pr_err("Cannot parse \"%s\" - ignoring.\n", w);
+ continue;
+ }
+
+ prop = xmalloc(sizeof(*prop));
+ prop->value = xstrdup(sep + 1);
+ *sep = 0;
+ prop->name = xstrdup(w);
+ prop->next = sr->entry;
+ sr->entry = prop;
+ }
+ }
+
+ if (!sr->devname && !sr->uuid_set) {
+ pr_err("Device name not found in sysfs config entry - ignoring.\n");
+ sysfs_rule_free(sr);
+ return;
+ }
+
+ sr->next = sysfs_rules;
+ sysfs_rules = sr;
+}
--
2.7.5

View File

@ -1,34 +0,0 @@
From 452dc4d13a012cdcb05088c0dbc699959c4d6c73 Mon Sep 17 00:00:00 2001
From: Baruch Siach <baruch@tkos.co.il>
Date: Tue, 6 Aug 2019 16:05:23 +0300
Subject: [RHEL7.8 PATCH V2 31/47] mdadm.h: include sysmacros.h unconditionally
musl libc now also requires sys/sysmacros.h for the major/minor macros.
All supported libc implementations carry sys/sysmacros.h, including
diet-libc, klibc, and uclibc-ng.
Cc: Hauke Mehrtens <hauke@hauke-m.de>
Signed-off-by: Baruch Siach <baruch@tkos.co.il>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
mdadm.h | 2 --
1 file changed, 2 deletions(-)
diff --git a/mdadm.h b/mdadm.h
index c36d7fd..d61a9ca 100644
--- a/mdadm.h
+++ b/mdadm.h
@@ -45,10 +45,8 @@ extern __off64_t lseek64 __P ((int __fd, __off64_t __offset, int __whence));
#include <errno.h>
#include <string.h>
#include <syslog.h>
-#ifdef __GLIBC__
/* Newer glibc requires sys/sysmacros.h directly for makedev() */
#include <sys/sysmacros.h>
-#endif
#ifdef __dietlibc__
#include <strings.h>
/* dietlibc has deprecated random and srandom!! */
--
2.7.5

View File

@ -1,161 +0,0 @@
From d11abe4bd5cad39803726ddff1888674e417bda5 Mon Sep 17 00:00:00 2001
From: Coly Li <colyli@suse.de>
Date: Wed, 31 Jul 2019 13:29:29 +0800
Subject: [RHEL7.8 PATCH V2 32/47] mdadm: add --no-devices to avoid component
devices detail information
When people assemble a md raid device with a large number of
component deivces (e.g. 1500 DASD disks), the raid device detail
information generated by 'mdadm --detail --export $devnode' is very
large. It is because the detail information contains information of
all the component disks (even the missing/failed ones).
In such condition, when udev-md-raid-arrays.rules is triggered and
internally calls "mdadm --detail --no-devices --export $devnode",
user may observe systemd error message ""invalid message length". It
is because the following on-stack raw message buffer in systemd code
is not big enough,
systemd/src/libudev/libudev-monitor.c
_public_ struct udev_device *udev_monito ...
struct ucred *cred;
union {
struct udev_monitor_netlink_header nlh;
char raw[8192];
} buf;
Even change size of raw[] from 8KB to larger size, it may still be not
enough for detail message of a md raid device with much larger number of
component devices.
To fix this problem, an extra option '--no-devices' is added (the
original idea is proposed by Neil Brown). When printing detailed
information of a md raid device, if '--no-devices' is specified, then
all component devices information will not be printed, then the output
message size can be restricted to a small number, even with the systemd
only has 8KB on-disk raw buffer, the md raid array udev rules can work
correctly without failure message.
Signed-off-by: Coly Li <colyli@suse.de>
Reviewed-by: NeilBrown <neilb@suse.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
Detail.c | 24 ++++++++++++++++--------
ReadMe.c | 1 +
mdadm.c | 4 ++++
mdadm.h | 2 ++
4 files changed, 23 insertions(+), 8 deletions(-)
diff --git a/Detail.c b/Detail.c
index 20ea03a..ad60434 100644
--- a/Detail.c
+++ b/Detail.c
@@ -56,7 +56,7 @@ int Detail(char *dev, struct context *c)
*/
int fd = open(dev, O_RDONLY);
mdu_array_info_t array;
- mdu_disk_info_t *disks;
+ mdu_disk_info_t *disks = NULL;
int next;
int d;
time_t atime;
@@ -280,7 +280,7 @@ int Detail(char *dev, struct context *c)
}
map_free(map);
}
- if (sra) {
+ if (!c->no_devices && sra) {
struct mdinfo *mdi;
for (mdi = sra->devs; mdi; mdi = mdi->next) {
char *path;
@@ -655,12 +655,17 @@ This is pretty boring
printf("\n\n");
}
- if (array.raid_disks)
- printf(" Number Major Minor RaidDevice State\n");
- else
- printf(" Number Major Minor RaidDevice\n");
+ if (!c->no_devices) {
+ if (array.raid_disks)
+ printf(" Number Major Minor RaidDevice State\n");
+ else
+ printf(" Number Major Minor RaidDevice\n");
+ }
}
- free(info);
+
+ /* if --no_devices specified, not print component devices info */
+ if (c->no_devices)
+ goto skip_devices_state;
for (d = 0; d < max_disks * 2; d++) {
char *dv;
@@ -747,6 +752,8 @@ This is pretty boring
if (!c->brief)
printf("\n");
}
+
+skip_devices_state:
if (spares && c->brief && array.raid_disks)
printf(" spares=%d", spares);
if (c->brief && st && st->sb)
@@ -766,8 +773,9 @@ This is pretty boring
!enough(array.level, array.raid_disks, array.layout, 1, avail))
rv = 2;
- free(disks);
out:
+ free(info);
+ free(disks);
close(fd);
free(subarray);
free(avail);
diff --git a/ReadMe.c b/ReadMe.c
index 12ccf83..eaf1042 100644
--- a/ReadMe.c
+++ b/ReadMe.c
@@ -181,6 +181,7 @@ struct option long_options[] = {
/* For Detail/Examine */
{"brief", 0, 0, Brief},
+ {"no-devices",0, 0, NoDevices},
{"export", 0, 0, 'Y'},
{"sparc2.2", 0, 0, Sparc22},
{"test", 0, 0, 't'},
diff --git a/mdadm.c b/mdadm.c
index 25a1abd..1fb8086 100644
--- a/mdadm.c
+++ b/mdadm.c
@@ -159,6 +159,10 @@ int main(int argc, char *argv[])
c.brief = 1;
continue;
+ case NoDevices:
+ c.no_devices = 1;
+ continue;
+
case 'Y': c.export++;
continue;
diff --git a/mdadm.h b/mdadm.h
index d61a9ca..43b07d5 100644
--- a/mdadm.h
+++ b/mdadm.h
@@ -440,6 +440,7 @@ enum special_options {
NoSharing,
HelpOptions,
Brief,
+ NoDevices,
ManageOpt,
Add,
AddSpare,
@@ -550,6 +551,7 @@ struct context {
int runstop;
int verbose;
int brief;
+ int no_devices;
int force;
char *homehost;
int require_homehost;
--
2.7.5

View File

@ -1,42 +0,0 @@
From 1a52f1fc0266d438c996789d4addbfac999a6139 Mon Sep 17 00:00:00 2001
From: Coly Li <colyli@suse.de>
Date: Wed, 31 Jul 2019 13:29:30 +0800
Subject: [RHEL7.8 PATCH V2 33/47] udev: add --no-devices option for calling
'mdadm --detail'
When creating symlink of a md raid device, the detailed information of
component disks are unnecessary for rule udev-md-raid-arrays.rules. For
md raid devices with huge number of component disks (e.g. 1500 DASD
disks), the detail information of component devices can be very large
and exceed udev monitor's on-stack message buffer.
This patch adds '--no-devices' option when calling mdadm by,
IMPORT{program}="BINDIR/mdadm --detail --no-devices --export $devnode"
Now the detailed output won't include component disks information,
and the error message "invalid message length" reported by systemd can
be removed.
Signed-off-by: Coly Li <colyli@suse.de>
Reviewed-by: NeilBrown <neilb@suse.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
udev-md-raid-arrays.rules | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/udev-md-raid-arrays.rules b/udev-md-raid-arrays.rules
index 5b99d58..d391665 100644
--- a/udev-md-raid-arrays.rules
+++ b/udev-md-raid-arrays.rules
@@ -17,7 +17,7 @@ TEST!="md/array_state", ENV{SYSTEMD_READY}="0", GOTO="md_end"
ATTR{md/array_state}=="|clear|inactive", ENV{SYSTEMD_READY}="0", GOTO="md_end"
LABEL="md_ignore_state"
-IMPORT{program}="BINDIR/mdadm --detail --export $devnode"
+IMPORT{program}="BINDIR/mdadm --detail --no-devices --export $devnode"
ENV{DEVTYPE}=="disk", ENV{MD_NAME}=="?*", SYMLINK+="disk/by-id/md-name-$env{MD_NAME}", OPTIONS+="string_escape=replace"
ENV{DEVTYPE}=="disk", ENV{MD_UUID}=="?*", SYMLINK+="disk/by-id/md-uuid-$env{MD_UUID}"
ENV{DEVTYPE}=="disk", ENV{MD_DEVNAME}=="?*", SYMLINK+="md/$env{MD_DEVNAME}"
--
2.7.5

View File

@ -1,44 +0,0 @@
From 91c97c5432028875db5f8abeddb5cb5f31902001 Mon Sep 17 00:00:00 2001
From: Mariusz Tkaczyk <mariusz.tkaczyk@intel.com>
Date: Mon, 15 Jul 2019 09:25:35 +0200
Subject: [RHEL7.8 PATCH V2 34/47] imsm: close removed drive fd.
When member drive fails, managemon prepares metadata update and adds
the drive to disk_mgmt_list with DISK_REMOVE flag. It fills only
minor and major. It is enough to recognize the device later.
Monitor thread while processing this update will remove the drive from
super only if it is a spare. It never removes failed member from
disks list. As a result, it still keeps opened descriptor to
non-existing device.
If removed drive is not a spare fill fd in disk_cfg structure
(prepared by managemon), monitor will close fd during freeing it.
Also set this drive fd to -1 in super to avoid double closing because
monitor will close the fd (if needed) while replacing removed drive
in array.
Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@intel.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
super-intel.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/super-intel.c b/super-intel.c
index d7e8a65..a103a3f 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -9200,6 +9200,9 @@ static int add_remove_disk_update(struct intel_super *super)
remove_disk_super(super,
disk_cfg->major,
disk_cfg->minor);
+ } else {
+ disk_cfg->fd = disk->fd;
+ disk->fd = -1;
}
}
/* release allocate disk structure */
--
2.7.5

View File

@ -1,46 +0,0 @@
From fd5b09c9a9107f0393ce194c4aac6e7b8f163e85 Mon Sep 17 00:00:00 2001
From: Krzysztof Smolinski <krzysztof.smolinski@intel.com>
Date: Fri, 16 Aug 2019 11:06:17 +0200
Subject: [RHEL7.8 PATCH V2 35/47] mdadm: check value returned by snprintf
against errors
GCC 8 checks possible truncation during snprintf more strictly
than GCC 7 which result in compilation errors. To fix this
problem checking result of snprintf against errors has been added.
Signed-off-by: Krzysztof Smolinski <krzysztof.smolinski@intel.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
sysfs.c | 12 ++++++++++--
1 file changed, 10 insertions(+), 2 deletions(-)
diff --git a/sysfs.c b/sysfs.c
index c313781..2995713 100644
--- a/sysfs.c
+++ b/sysfs.c
@@ -1023,12 +1023,20 @@ int sysfs_rules_apply_check(const struct mdinfo *sra,
char dname[MAX_SYSFS_PATH_LEN];
char resolved_path[PATH_MAX];
char resolved_dir[PATH_MAX];
+ int result;
if (sra == NULL || ent == NULL)
return -1;
- snprintf(dname, MAX_SYSFS_PATH_LEN, "/sys/block/%s/md/", sra->sys_name);
- snprintf(fname, MAX_SYSFS_PATH_LEN, "%s/%s", dname, ent->name);
+ result = snprintf(dname, MAX_SYSFS_PATH_LEN,
+ "/sys/block/%s/md/", sra->sys_name);
+ if (result < 0 || result >= MAX_SYSFS_PATH_LEN)
+ return -1;
+
+ result = snprintf(fname, MAX_SYSFS_PATH_LEN,
+ "%s/%s", dname, ent->name);
+ if (result < 0 || result >= MAX_SYSFS_PATH_LEN)
+ return -1;
if (realpath(fname, resolved_path) == NULL ||
realpath(dname, resolved_dir) == NULL)
--
2.7.5

View File

@ -1,163 +0,0 @@
From 43ebc9105e9dafe5145b3e801c05da4736bf6e02 Mon Sep 17 00:00:00 2001
From: "Guilherme G. Piccoli" <gpiccoli@canonical.com>
Date: Tue, 3 Sep 2019 16:49:01 -0300
Subject: [RHEL7.8 PATCH V2 36/47] mdadm: Introduce new array state 'broken'
for raid0/linear
Currently if a md raid0/linear array gets one or more members removed while
being mounted, kernel keeps showing state 'clean' in the 'array_state'
sysfs attribute. Despite udev signaling the member device is gone, 'mdadm'
cannot issue the STOP_ARRAY ioctl successfully, given the array is mounted.
Nothing else hints that something is wrong (except that the removed devices
don't show properly in the output of mdadm 'detail' command). There is no
other property to be checked, and if user is not performing reads/writes
to the array, even kernel log is quiet and doesn't give a clue about the
missing member.
This patch is the mdadm counterpart of kernel new array state 'broken'.
The 'broken' state mimics the state 'clean' in every aspect, being useful
only to distinguish if an array has some member missing. All necessary
paths in mdadm were changed to deal with 'broken' state, and in case the
tool runs in a kernel that is not updated, it'll work normally, i.e., it
doesn't require the 'broken' state in order to work.
Also, this patch changes the way the array state is showed in the 'detail'
command (for raid0/linear only) - now it takes the 'array_state' sysfs
attribute into account instead of only rely in the MD_SB_CLEAN flag.
Cc: Jes Sorensen <jes.sorensen@gmail.com>
Cc: NeilBrown <neilb@suse.de>
Cc: Song Liu <songliubraving@fb.com>
Signed-off-by: Guilherme G. Piccoli <gpiccoli@canonical.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
Detail.c | 14 ++++++++++++--
Monitor.c | 8 ++++++--
maps.c | 1 +
mdadm.h | 1 +
mdmon.h | 2 +-
monitor.c | 4 ++--
6 files changed, 23 insertions(+), 7 deletions(-)
diff --git a/Detail.c b/Detail.c
index ad60434..3e61e37 100644
--- a/Detail.c
+++ b/Detail.c
@@ -81,6 +81,7 @@ int Detail(char *dev, struct context *c)
int external;
int inactive;
int is_container = 0;
+ char *arrayst;
if (fd < 0) {
pr_err("cannot open %s: %s\n",
@@ -485,9 +486,18 @@ int Detail(char *dev, struct context *c)
else
st = ", degraded";
+ if (array.state & (1 << MD_SB_CLEAN)) {
+ if ((array.level == 0) ||
+ (array.level == LEVEL_LINEAR))
+ arrayst = map_num(sysfs_array_states,
+ sra->array_state);
+ else
+ arrayst = "clean";
+ } else
+ arrayst = "active";
+
printf(" State : %s%s%s%s%s%s \n",
- (array.state & (1 << MD_SB_CLEAN)) ?
- "clean" : "active", st,
+ arrayst, st,
(!e || (e->percent < 0 &&
e->percent != RESYNC_PENDING &&
e->percent != RESYNC_DELAYED)) ?
diff --git a/Monitor.c b/Monitor.c
index 036103f..b527165 100644
--- a/Monitor.c
+++ b/Monitor.c
@@ -1055,8 +1055,11 @@ int Wait(char *dev)
}
}
+/* The state "broken" is used only for RAID0/LINEAR - it's the same as
+ * "clean", but used in case the array has one or more members missing.
+ */
static char *clean_states[] = {
- "clear", "inactive", "readonly", "read-auto", "clean", NULL };
+ "clear", "inactive", "readonly", "read-auto", "clean", "broken", NULL };
int WaitClean(char *dev, int verbose)
{
@@ -1116,7 +1119,8 @@ int WaitClean(char *dev, int verbose)
rv = read(state_fd, buf, sizeof(buf));
if (rv < 0)
break;
- if (sysfs_match_word(buf, clean_states) <= 4)
+ if (sysfs_match_word(buf, clean_states) <
+ (int)ARRAY_SIZE(clean_states) - 1)
break;
rv = sysfs_wait(state_fd, &delay);
if (rv < 0 && errno != EINTR)
diff --git a/maps.c b/maps.c
index 02a0474..49b7f2c 100644
--- a/maps.c
+++ b/maps.c
@@ -150,6 +150,7 @@ mapping_t sysfs_array_states[] = {
{ "read-auto", ARRAY_READ_AUTO },
{ "clean", ARRAY_CLEAN },
{ "write-pending", ARRAY_WRITE_PENDING },
+ { "broken", ARRAY_BROKEN },
{ NULL, ARRAY_UNKNOWN_STATE }
};
diff --git a/mdadm.h b/mdadm.h
index 43b07d5..c88ceab 100644
--- a/mdadm.h
+++ b/mdadm.h
@@ -373,6 +373,7 @@ struct mdinfo {
ARRAY_ACTIVE,
ARRAY_WRITE_PENDING,
ARRAY_ACTIVE_IDLE,
+ ARRAY_BROKEN,
ARRAY_UNKNOWN_STATE,
} array_state;
struct md_bb bb;
diff --git a/mdmon.h b/mdmon.h
index 818367c..b3d72ac 100644
--- a/mdmon.h
+++ b/mdmon.h
@@ -21,7 +21,7 @@
extern const char Name[];
enum array_state { clear, inactive, suspended, readonly, read_auto,
- clean, active, write_pending, active_idle, bad_word};
+ clean, active, write_pending, active_idle, broken, bad_word};
enum sync_action { idle, reshape, resync, recover, check, repair, bad_action };
diff --git a/monitor.c b/monitor.c
index 81537ed..e0d3be6 100644
--- a/monitor.c
+++ b/monitor.c
@@ -26,7 +26,7 @@
static char *array_states[] = {
"clear", "inactive", "suspended", "readonly", "read-auto",
- "clean", "active", "write-pending", "active-idle", NULL };
+ "clean", "active", "write-pending", "active-idle", "broken", NULL };
static char *sync_actions[] = {
"idle", "reshape", "resync", "recover", "check", "repair", NULL
};
@@ -476,7 +476,7 @@ static int read_and_act(struct active_array *a, fd_set *fds)
a->next_state = clean;
ret |= ARRAY_DIRTY;
}
- if (a->curr_state == clean) {
+ if ((a->curr_state == clean) || (a->curr_state == broken)) {
a->container->ss->set_array_state(a, 1);
}
if (a->curr_state == active ||
--
2.7.5

View File

@ -1,40 +0,0 @@
From 2c2d9c48d2daf0d78d20494c3779c0f6dc4bfa75 Mon Sep 17 00:00:00 2001
From: Nigel Croxon <ncroxon@redhat.com>
Date: Tue, 24 Sep 2019 11:39:24 -0400
Subject: [RHEL7.8 PATCH V2 37/47] mdadm: force a uuid swap on big endian
The code path for metadata 0.90 calls a common routine
fname_from_uuid that uses metadata 1.2. The code expects member
swapuuid to be setup and usable. But it is only setup when using
metadata 1.2. Since the metadata 0.90 did not create swapuuid
and set it. The test (st->ss == &super1) ? 1 : st->ss->swapuuid
fails. The swapuuid is set at compile time based on byte order.
Any call based on metadata 0.90 and on big endian processors,
the --export uuid will be incorrect.
Signed-Off-by: Nigel Croxon <ncroxon@redhat.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
util.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/util.c b/util.c
index c26cf5f..64dd409 100644
--- a/util.c
+++ b/util.c
@@ -685,8 +685,12 @@ char *fname_from_uuid(struct supertype *st, struct mdinfo *info,
// work, but can't have it set if we want this printout to match
// all the other uuid printouts in super1.c, so we force swapuuid
// to 1 to make our printout match the rest of super1
+#if __BYTE_ORDER == BIG_ENDIAN
+ return __fname_from_uuid(info->uuid, 1, buf, sep);
+#else
return __fname_from_uuid(info->uuid, (st->ss == &super1) ? 1 :
st->ss->swapuuid, buf, sep);
+#endif
}
int check_ext2(int fd, char *name)
--
2.7.5

View File

@ -1,99 +0,0 @@
From e53cb968691d9e40d83caf5570da3bb7b83c64e1 Mon Sep 17 00:00:00 2001
From: Guoqing Jiang <gqjiang@suse.com>
Date: Fri, 31 May 2019 10:10:00 +0800
Subject: [RHEL7.8 PATCH V2 38/47] mdadm/md.4: add the descriptions for bitmap
sysfs nodes
The sysfs nodes under bitmap are not recorded in md.4,
add them based on md.rst and kernel source code.
Cc: NeilBrown <neilb@suse.com>
Signed-off-by: Guoqing Jiang <gqjiang@suse.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
md.4 | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 69 insertions(+)
diff --git a/md.4 b/md.4
index 3a1d677..e86707a 100644
--- a/md.4
+++ b/md.4
@@ -1101,6 +1101,75 @@ stripe that requires some "prereading". For fairness this defaults to
maximizes sequential-write throughput at the cost of fairness to threads
doing small or random writes.
+.TP
+.B md/bitmap/backlog
+The value stored in the file only has any effect on RAID1 when write-mostly
+devices are active, and write requests to those devices are proceed in the
+background.
+
+This variable sets a limit on the number of concurrent background writes,
+the valid values are 0 to 16383, 0 means that write-behind is not allowed,
+while any other number means it can happen. If there are more write requests
+than the number, new writes will by synchronous.
+
+.TP
+.B md/bitmap/can_clear
+This is for externally managed bitmaps, where the kernel writes the bitmap
+itself, but metadata describing the bitmap is managed by mdmon or similar.
+
+When the array is degraded, bits mustn't be cleared. When the array becomes
+optimal again, bit can be cleared, but first the metadata needs to record
+the current event count. So md sets this to 'false' and notifies mdmon,
+then mdmon updates the metadata and writes 'true'.
+
+There is no code in mdmon to actually do this, so maybe it doesn't even
+work.
+
+.TP
+.B md/bitmap/chunksize
+The bitmap chunksize can only be changed when no bitmap is active, and
+the value should be power of 2 and at least 512.
+
+.TP
+.B md/bitmap/location
+This indicates where the write-intent bitmap for the array is stored.
+It can be "none" or "file" or a signed offset from the array metadata
+- measured in sectors. You cannot set a file by writing here - that can
+only be done with the SET_BITMAP_FILE ioctl.
+
+Write 'none' to 'bitmap/location' will clear bitmap, and the previous
+location value must be write to it to restore bitmap.
+
+.TP
+.B md/bitmap/max_backlog_used
+This keeps track of the maximum number of concurrent write-behind requests
+for an md array, writing any value to this file will clear it.
+
+.TP
+.B md/bitmap/metadata
+This can be 'internal' or 'clustered' or 'external'. 'internal' is set
+by default, which means the metadata for bitmap is stored in the first 256
+bytes of the bitmap space. 'clustered' means separate bitmap metadata are
+used for each cluster node. 'external' means that bitmap metadata is managed
+externally to the kernel.
+
+.TP
+.B md/bitmap/space
+This shows the space (in sectors) which is available at md/bitmap/location,
+and allows the kernel to know when it is safe to resize the bitmap to match
+a resized array. It should big enough to contain the total bytes in the bitmap.
+
+For 1.0 metadata, assume we can use up to the superblock if before, else
+to 4K beyond superblock. For other metadata versions, assume no change is
+possible.
+
+.TP
+.B md/bitmap/time_base
+This shows the time (in seconds) between disk flushes, and is used to looking
+for bits in the bitmap to be cleared.
+
+The default value is 5 seconds, and it should be an unsigned long value.
+
.SS KERNEL PARAMETERS
The md driver recognised several different kernel parameters.
--
2.7.5

View File

@ -1,35 +0,0 @@
From 8063fd0f9e8abd718bd65928c19bc607cee5acd8 Mon Sep 17 00:00:00 2001
From: Xiao Ni <xni@redhat.com>
Date: Mon, 30 Sep 2019 19:47:59 +0800
Subject: [RHEL7.8 PATCH V2 39/47] Init devlist as an array
devlist is an string. It will change to an array if there is disk that
is sbd disk. If one device is sbd, it runs devlist=().
This line code changes devlist from a string to an array. If there is
no sbd device, it can't run this line code. So it will still be a string.
The later codes need an array, rather than an string. So init devlist
as an array to fix this problem.
Signed-off-by: Xiao Ni <xni@redhat.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
clustermd_tests/func.sh | 3 +++
1 file changed, 3 insertions(+)
diff --git a/clustermd_tests/func.sh b/clustermd_tests/func.sh
index 642cc96..801d604 100644
--- a/clustermd_tests/func.sh
+++ b/clustermd_tests/func.sh
@@ -39,6 +39,9 @@ fetch_devlist()
devlist=($(ls /dev/disk/by-path/*$ISCSI_ID*))
fi
# sbd disk cannot use in testing
+ # Init devlist as an array
+ i=''
+ devlist=(${devlist[@]#$i})
for i in ${devlist[@]}
do
sbd -d $i dump &> /dev/null
--
2.7.5

View File

@ -1,31 +0,0 @@
From 611093148574164fcf4f24f8c076d09473f655d7 Mon Sep 17 00:00:00 2001
From: Xiao Ni <xni@redhat.com>
Date: Mon, 30 Sep 2019 19:48:00 +0800
Subject: [RHEL7.8 PATCH V2 40/47] Don't need to check recovery after re-add
when no I/O writes to raid
If there is no write I/O between removing member disk and re-add it, there is no
recovery after re-adding member disk.
Signed-off-by: Xiao Ni <xni@redhat.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
clustermd_tests/02r1_Manage_re-add | 2 --
1 file changed, 2 deletions(-)
diff --git a/clustermd_tests/02r1_Manage_re-add b/clustermd_tests/02r1_Manage_re-add
index dd9c416..d0d13e5 100644
--- a/clustermd_tests/02r1_Manage_re-add
+++ b/clustermd_tests/02r1_Manage_re-add
@@ -9,8 +9,6 @@ check all state UU
check all dmesg
mdadm --manage $md0 --fail $dev0 --remove $dev0
mdadm --manage $md0 --re-add $dev0
-check $NODE1 recovery
-check all wait
check all state UU
check all dmesg
stop_md all $md0
--
2.7.5

View File

@ -1,47 +0,0 @@
From 7bd59e7926c6921121087eb067befaa896c900a4 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Wed, 18 Sep 2019 15:12:55 +1000
Subject: [RHEL7.8 PATCH V2 41/47] udev: allow for udev attribute reading bug.
There is a bug in udev (which will hopefully get fixed, but
we should allow for it anways).
When reading a sysfs attribute, it first reads the whole
value of the attribute, then reads again expecting to get
a read of 0 bytes, like you would with an ordinary file.
If the sysfs attribute changed between these two reads, it can
get a mixture of two values.
In particular, if it reads when 'array_state' is changing from
'clear' to 'inactive', it can find the value as "clear\nve".
This causes the test for "|clear|active" to fail, so systemd is allowed
to think that the array is ready - when it isn't.
So change the pattern to allow for this but adding a wildcard at
the end.
Also don't allow for an empty string - reading array_state will
never return an empty string - if it exists at all, it will be
non-empty.
Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
udev-md-raid-arrays.rules | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/udev-md-raid-arrays.rules b/udev-md-raid-arrays.rules
index d391665..c8fa8e8 100644
--- a/udev-md-raid-arrays.rules
+++ b/udev-md-raid-arrays.rules
@@ -14,7 +14,7 @@ ENV{DEVTYPE}=="partition", GOTO="md_ignore_state"
# never leave state 'inactive'
ATTR{md/metadata_version}=="external:[A-Za-z]*", ATTR{md/array_state}=="inactive", GOTO="md_ignore_state"
TEST!="md/array_state", ENV{SYSTEMD_READY}="0", GOTO="md_end"
-ATTR{md/array_state}=="|clear|inactive", ENV{SYSTEMD_READY}="0", GOTO="md_end"
+ATTR{md/array_state}=="clear*|inactive", ENV{SYSTEMD_READY}="0", GOTO="md_end"
LABEL="md_ignore_state"
IMPORT{program}="BINDIR/mdadm --detail --no-devices --export $devnode"
--
2.7.5

View File

@ -1,40 +0,0 @@
From b6180160f78f0182b296bdceed6419b26a6fccc7 Mon Sep 17 00:00:00 2001
From: Mariusz Tkaczyk <mariusz.tkaczyk@intel.com>
Date: Fri, 4 Oct 2019 12:07:28 +0200
Subject: [RHEL7.8 PATCH V2 42/47] imsm: save current_vol number
The imsm container_content routine will set curr_volume index in super
for getting volume information. This flag has never been restored to
original value, later other function may rely on it.
Restore this flag to original value.
Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@intel.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
super-intel.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/super-intel.c b/super-intel.c
index a103a3f..e02bbd7 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -7826,6 +7826,7 @@ static struct mdinfo *container_content_imsm(struct supertype *st, char *subarra
int sb_errors = 0;
struct dl *d;
int spare_disks = 0;
+ int current_vol = super->current_vol;
/* do not assemble arrays when not all attributes are supported */
if (imsm_check_attributes(mpb->attributes) == 0) {
@@ -7993,6 +7994,7 @@ static struct mdinfo *container_content_imsm(struct supertype *st, char *subarra
rest = this;
}
+ super->current_vol = current_vol;
return rest;
}
--
2.7.5

View File

@ -1,50 +0,0 @@
From 1a1ced1e2e64a6b4b349a3fb559f6b39e4cf7103 Mon Sep 17 00:00:00 2001
From: Krzysztof Smolinski <krzysztof.smolinski@intel.com>
Date: Fri, 8 Nov 2019 11:59:11 +0100
Subject: [RHEL7.8 PATCH V2 43/47] imsm: allow to specify second volume size
Removed checks which limited second volume size only to max value (the
largest size that fits on all current drives). It is now permitted
to create second volume with size lower then maximum possible.
Signed-off-by: Krzysztof Smolinski <krzysztof.smolinski@intel.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
super-intel.c | 14 ++++----------
1 file changed, 4 insertions(+), 10 deletions(-)
diff --git a/super-intel.c b/super-intel.c
index e02bbd7..713058c 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -7298,11 +7298,8 @@ static int validate_geometry_imsm_volume(struct supertype *st, int level,
maxsize = merge_extents(super, i);
- if (!check_env("IMSM_NO_PLATFORM") &&
- mpb->num_raid_devs > 0 && size && size != maxsize) {
- pr_err("attempting to create a second volume with size less then remaining space. Aborting...\n");
- return 0;
- }
+ if (mpb->num_raid_devs > 0 && size && size != maxsize)
+ pr_err("attempting to create a second volume with size less then remaining space.\n");
if (maxsize < size || maxsize == 0) {
if (verbose) {
@@ -7393,11 +7390,8 @@ static int imsm_get_free_size(struct supertype *st, int raiddisks,
}
maxsize = size;
}
- if (!check_env("IMSM_NO_PLATFORM") &&
- mpb->num_raid_devs > 0 && size && size != maxsize) {
- pr_err("attempting to create a second volume with size less then remaining space. Aborting...\n");
- return 0;
- }
+ if (mpb->num_raid_devs > 0 && size && size != maxsize)
+ pr_err("attempting to create a second volume with size less then remaining space.\n");
cnt = 0;
for (dl = super->disks; dl; dl = dl->next)
if (dl->e)
--
2.7.5

View File

@ -1,45 +0,0 @@
From 6636788aaf4ec0cacaefb6e77592e4a68e70a957 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Wed, 30 Oct 2019 10:32:41 +1100
Subject: [RHEL7.8 PATCH V2 44/47] mdcheck: when mdcheck_start is enabled,
enable mdcheck_continue too.
mdcheck_continue continues a regular array scan that was started by
mdcheck_start.
mdcheck_start will ensure that mdcheck_continue is active.
Howver if you reboot after a check has started, but before it finishes,
then mdcheck_continue won't cause it to continue, because nothing
starts it on boot.
So add an install option for mdcheck_contine, and make sure it
gets enabled when mdcheck_start is enabled.
Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
systemd/mdcheck_continue.timer | 2 ++
systemd/mdcheck_start.timer | 1 +
2 files changed, 3 insertions(+)
diff --git a/systemd/mdcheck_continue.timer b/systemd/mdcheck_continue.timer
index 3ccfd78..dba1074 100644
--- a/systemd/mdcheck_continue.timer
+++ b/systemd/mdcheck_continue.timer
@@ -11,3 +11,5 @@ Description=MD array scrubbing - continuation
[Timer]
OnCalendar= 1:05:00
+[Install]
+WantedBy= mdmonitor.service
diff --git a/systemd/mdcheck_start.timer b/systemd/mdcheck_start.timer
index 6480736..9e7e02a 100644
--- a/systemd/mdcheck_start.timer
+++ b/systemd/mdcheck_start.timer
@@ -13,3 +13,4 @@ OnCalendar=Sun *-*-1..7 1:00:00
[Install]
WantedBy= mdmonitor.service
+Also= mdcheck_continue.timer
--
2.7.5

View File

@ -1,51 +0,0 @@
From 4ca799c581703d4d0ad840833c037c2fff088ca7 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Wed, 30 Oct 2019 10:32:41 +1100
Subject: [RHEL7.8 PATCH V2 45/47] mdcheck: use ${} to pass variable to mdcheck
$MDADM_CHECK_DURATION allows the value to be split on spaces.
${MDADM_CHECK_DURATION} avoids such splitting.
Making this change removes the need for double quoting when setting
the default Environment, and means that double quoting isn't needed
in the EnvironmentFile.
Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
systemd/mdcheck_continue.service | 5 ++---
systemd/mdcheck_start.service | 4 ++--
2 files changed, 4 insertions(+), 5 deletions(-)
diff --git a/systemd/mdcheck_continue.service b/systemd/mdcheck_continue.service
index 592c607..deac695 100644
--- a/systemd/mdcheck_continue.service
+++ b/systemd/mdcheck_continue.service
@@ -11,8 +11,7 @@ ConditionPathExistsGlob = /var/lib/mdcheck/MD_UUID_*
[Service]
Type=oneshot
-Environment= MDADM_CHECK_DURATION='"6 hours"'
+Environment= MDADM_CHECK_DURATION="6 hours"
EnvironmentFile=-/run/sysconfig/mdadm
ExecStartPre=-/usr/lib/mdadm/mdadm_env.sh
-ExecStart=/usr/share/mdadm/mdcheck --continue --duration $MDADM_CHECK_DURATION
-
+ExecStart=/usr/share/mdadm/mdcheck --continue --duration ${MDADM_CHECK_DURATION}
diff --git a/systemd/mdcheck_start.service b/systemd/mdcheck_start.service
index 812141b..f17f1aa 100644
--- a/systemd/mdcheck_start.service
+++ b/systemd/mdcheck_start.service
@@ -11,7 +11,7 @@ Wants=mdcheck_continue.timer
[Service]
Type=oneshot
-Environment= MDADM_CHECK_DURATION='"6 hours"'
+Environment= MDADM_CHECK_DURATION="6 hours"
EnvironmentFile=-/run/sysconfig/mdadm
ExecStartPre=-/usr/lib/mdadm/mdadm_env.sh
-ExecStart=/usr/share/mdadm/mdcheck --duration $MDADM_CHECK_DURATION
+ExecStart=/usr/share/mdadm/mdcheck --duration ${MDADM_CHECK_DURATION}
--
2.7.5

View File

@ -1,29 +0,0 @@
From 85b83a7920bca5b93d2458f093f2c640a130614c Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Wed, 30 Oct 2019 10:32:41 +1100
Subject: [RHEL7.8 PATCH V2 46/47] SUSE-mdadm_env.sh: handle
MDADM_CHECK_DURATION
The suse sysconfig/mdadm allows MDADM_CHECK_DURATION
to be set, but it is currently ignored.
Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
systemd/SUSE-mdadm_env.sh | 3 +++
1 file changed, 3 insertions(+)
diff --git a/systemd/SUSE-mdadm_env.sh b/systemd/SUSE-mdadm_env.sh
index 10b2e74..c13b48a 100644
--- a/systemd/SUSE-mdadm_env.sh
+++ b/systemd/SUSE-mdadm_env.sh
@@ -43,3 +43,6 @@ fi
mkdir -p /run/sysconfig
echo "MDADM_MONITOR_ARGS=$MDADM_RAIDDEVICES $MDADM_DELAY $MDADM_MAIL $MDADM_PROGRAM $MDADM_SCAN $MDADM_SEND_MAIL $MDADM_CONFIG" > /run/sysconfig/mdadm
+if [ -n "$MDADM_CHECK_DURATION" ]; then
+ echo "MDADM_CHECK_DURATION=$MDADM_CHECK_DURATION" >> /run/sysconfig/mdadm
+fi
--
2.7.5

View File

@ -1,122 +0,0 @@
From 761e3bd9f5e3aafa95ad3ae50a637dc67c8774f0 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Thu, 31 Oct 2019 15:15:38 +1100
Subject: [RHEL7.8 PATCH V2 47/47] super-intel: don't mark structs 'packed'
unnecessarily
super-intel marks a number of structures 'packed', but this
doesn't change the layout - they are already well organized.
This is a problem a gcc warns when code takes the address
of a field in a packet struct - as super-intel sometimes does.
So remove the marking where isn't needed.
Do ensure this does introduce a regression, add a compile-time
assertion that the size of the structure is exactly the value
it had before the 'packed' notation was removed.
Note that a couple of structure do need to be packed.
As the address of fields is never taken, that is safe.
Signed-off-by: NeilBrown <neilb@suse.de>
Acked-by: Artur Paszkiewicz <artur.paszkiewicz@intel.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
super-intel.c | 32 ++++++++++++++++++++++++++------
1 file changed, 26 insertions(+), 6 deletions(-)
diff --git a/super-intel.c b/super-intel.c
index 713058c..a7fbed4 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -96,6 +96,19 @@
* mutliple PPL area
*/
+/*
+ * This macro let's us ensure that no-one accidentally
+ * changes the size of a struct
+ */
+#define ASSERT_SIZE(_struct, size) \
+static inline void __assert_size_##_struct(void) \
+{ \
+ switch (0) { \
+ case 0: break; \
+ case (sizeof(struct _struct) == size): break; \
+ } \
+}
+
/* Disk configuration info. */
#define IMSM_MAX_DEVICES 255
struct imsm_disk {
@@ -112,6 +125,7 @@ struct imsm_disk {
#define IMSM_DISK_FILLERS 3
__u32 filler[IMSM_DISK_FILLERS]; /* 0xF5 - 0x107 MPB_DISK_FILLERS for future expansion */
};
+ASSERT_SIZE(imsm_disk, 48)
/* map selector for map managment
*/
@@ -146,7 +160,8 @@ struct imsm_map {
__u32 disk_ord_tbl[1]; /* disk_ord_tbl[num_members],
* top byte contains some flags
*/
-} __attribute__ ((packed));
+};
+ASSERT_SIZE(imsm_map, 52)
struct imsm_vol {
__u32 curr_migr_unit;
@@ -169,7 +184,8 @@ struct imsm_vol {
__u32 filler[4];
struct imsm_map map[1];
/* here comes another one if migr_state */
-} __attribute__ ((packed));
+};
+ASSERT_SIZE(imsm_vol, 84)
struct imsm_dev {
__u8 volume[MAX_RAID_SERIAL_LEN];
@@ -220,7 +236,8 @@ struct imsm_dev {
#define IMSM_DEV_FILLERS 3
__u32 filler[IMSM_DEV_FILLERS];
struct imsm_vol vol;
-} __attribute__ ((packed));
+};
+ASSERT_SIZE(imsm_dev, 164)
struct imsm_super {
__u8 sig[MAX_SIGNATURE_LENGTH]; /* 0x00 - 0x1F */
@@ -248,7 +265,8 @@ struct imsm_super {
struct imsm_disk disk[1]; /* 0xD8 diskTbl[numDisks] */
/* here comes imsm_dev[num_raid_devs] */
/* here comes BBM logs */
-} __attribute__ ((packed));
+};
+ASSERT_SIZE(imsm_super, 264)
#define BBM_LOG_MAX_ENTRIES 254
#define BBM_LOG_MAX_LBA_ENTRY_VAL 256 /* Represents 256 LBAs */
@@ -269,7 +287,8 @@ struct bbm_log {
__u32 signature; /* 0xABADB10C */
__u32 entry_count;
struct bbm_log_entry marked_block_entries[BBM_LOG_MAX_ENTRIES];
-} __attribute__ ((__packed__));
+};
+ASSERT_SIZE(bbm_log, 2040)
static char *map_state_str[] = { "normal", "uninitialized", "degraded", "failed" };
@@ -323,7 +342,8 @@ struct migr_record {
* destination - high order 32 bits */
__u32 num_migr_units_hi; /* Total num migration units-of-op
* high order 32 bits */
-} __attribute__ ((__packed__));
+};
+ASSERT_SIZE(migr_record, 64)
struct md_list {
/* usage marker:
--
2.7.5

View File

@ -1,45 +0,0 @@
From e1512e7b7d060f0346738b237ea34eac21b29a26 Mon Sep 17 00:00:00 2001
From: Xiao Ni <xni@redhat.com>
Date: Wed, 18 Dec 2019 14:46:21 +0800
Subject: [RHEL8.2 PATCH 1/1] mdcheck service can't start succesfully because
of syntax error
It reports error when starting mdcheck_start and mdcheck_continue service.
Invalid environment assignment, ignoring: MDADM_CHECK_DURATION="6 hours"
Signed-off-by: Xiao Ni <xni@redhat.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
systemd/mdcheck_continue.service | 2 +-
systemd/mdcheck_start.service | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/systemd/mdcheck_continue.service b/systemd/mdcheck_continue.service
index deac695..aa02dde 100644
--- a/systemd/mdcheck_continue.service
+++ b/systemd/mdcheck_continue.service
@@ -11,7 +11,7 @@ ConditionPathExistsGlob = /var/lib/mdcheck/MD_UUID_*
[Service]
Type=oneshot
-Environment= MDADM_CHECK_DURATION="6 hours"
+Environment= "MDADM_CHECK_DURATION=6 hours"
EnvironmentFile=-/run/sysconfig/mdadm
ExecStartPre=-/usr/lib/mdadm/mdadm_env.sh
ExecStart=/usr/share/mdadm/mdcheck --continue --duration ${MDADM_CHECK_DURATION}
diff --git a/systemd/mdcheck_start.service b/systemd/mdcheck_start.service
index f17f1aa..da62d5f 100644
--- a/systemd/mdcheck_start.service
+++ b/systemd/mdcheck_start.service
@@ -11,7 +11,7 @@ Wants=mdcheck_continue.timer
[Service]
Type=oneshot
-Environment= MDADM_CHECK_DURATION="6 hours"
+Environment= "MDADM_CHECK_DURATION=6 hours"
EnvironmentFile=-/run/sysconfig/mdadm
ExecStartPre=-/usr/lib/mdadm/mdadm_env.sh
ExecStart=/usr/share/mdadm/mdcheck --duration ${MDADM_CHECK_DURATION}
--
2.7.5

View File

@ -1,41 +0,0 @@
From 02af379337c73e751ad97c0fed9123121f8b4289 Mon Sep 17 00:00:00 2001
From: Jes Sorensen <jsorensen@fb.com>
Date: Wed, 27 Nov 2019 10:19:54 -0500
Subject: [RHEL8.2 PATCH 49/61] Remove last traces of HOT_ADD_DISK
This ioctl is no longer used, so remove all references to it.
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
Manage.c | 2 --
md_u.h | 1 -
2 files changed, 3 deletions(-)
diff --git a/Manage.c b/Manage.c
index ffe55f8..deeba2b 100644
--- a/Manage.c
+++ b/Manage.c
@@ -1289,8 +1289,6 @@ int Manage_subdevs(char *devname, int fd,
/* Do something to each dev.
* devmode can be
* 'a' - add the device
- * try HOT_ADD_DISK
- * If that fails EINVAL, try ADD_NEW_DISK
* 'S' - add the device as a spare - don't try re-add
* 'j' - add the device as a journal device
* 'A' - re-add the device
diff --git a/md_u.h b/md_u.h
index 2d66d52..b30893c 100644
--- a/md_u.h
+++ b/md_u.h
@@ -28,7 +28,6 @@
#define ADD_NEW_DISK _IOW (MD_MAJOR, 0x21, mdu_disk_info_t)
#define HOT_REMOVE_DISK _IO (MD_MAJOR, 0x22)
#define SET_ARRAY_INFO _IOW (MD_MAJOR, 0x23, mdu_array_info_t)
-#define HOT_ADD_DISK _IO (MD_MAJOR, 0x28)
#define SET_DISK_FAULTY _IO (MD_MAJOR, 0x29)
#define SET_BITMAP_FILE _IOW (MD_MAJOR, 0x2b, int)
--
2.7.5

View File

@ -1,51 +0,0 @@
From 9cf361f8791d86aaced821c19af556819bc03732 Mon Sep 17 00:00:00 2001
From: Jes Sorensen <jsorensen@fb.com>
Date: Wed, 27 Nov 2019 11:33:15 -0500
Subject: [RHEL8.2 PATCH 50/61] Fix up a few formatting issues
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
Manage.c | 13 +++++++++----
1 file changed, 9 insertions(+), 4 deletions(-)
diff --git a/Manage.c b/Manage.c
index deeba2b..b22c396 100644
--- a/Manage.c
+++ b/Manage.c
@@ -1728,8 +1728,10 @@ int move_spare(char *from_devname, char *to_devname, dev_t devid)
int fd2 = open(from_devname, O_RDONLY);
if (fd1 < 0 || fd2 < 0) {
- if (fd1>=0) close(fd1);
- if (fd2>=0) close(fd2);
+ if (fd1 >= 0)
+ close(fd1);
+ if (fd2 >= 0)
+ close(fd2);
return 0;
}
@@ -1743,7 +1745,8 @@ int move_spare(char *from_devname, char *to_devname, dev_t devid)
devlist.disposition = 'r';
if (Manage_subdevs(from_devname, fd2, &devlist, -1, 0, NULL, 0) == 0) {
devlist.disposition = 'a';
- if (Manage_subdevs(to_devname, fd1, &devlist, -1, 0, NULL, 0) == 0) {
+ if (Manage_subdevs(to_devname, fd1, &devlist, -1, 0,
+ NULL, 0) == 0) {
/* make sure manager is aware of changes */
ping_manager(to_devname);
ping_manager(from_devname);
@@ -1751,7 +1754,9 @@ int move_spare(char *from_devname, char *to_devname, dev_t devid)
close(fd2);
return 1;
}
- else Manage_subdevs(from_devname, fd2, &devlist, -1, 0, NULL, 0);
+ else
+ Manage_subdevs(from_devname, fd2, &devlist,
+ -1, 0, NULL, 0);
}
close(fd1);
close(fd2);
--
2.7.5

View File

@ -1,26 +0,0 @@
From 4b31846f3f90aa24f883ceed80e91f204c0a9389 Mon Sep 17 00:00:00 2001
From: Xiao Ni <xni@redhat.com>
Date: Fri, 29 Nov 2019 17:14:47 +0800
Subject: [RHEL8.2 PATCH 51/61] Remove unused code
Signed-off-by: Xiao Ni <xni@redhat.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
platform-intel.h | 1 -
1 file changed, 1 deletion(-)
diff --git a/platform-intel.h b/platform-intel.h
index 29c85f1..7cb370e 100644
--- a/platform-intel.h
+++ b/platform-intel.h
@@ -169,7 +169,6 @@ static inline int fls(int x)
r -= 2;
}
if (!(x & 0x80000000u)) {
- x <<= 1;
r -= 1;
}
return r;
--
2.7.5

View File

@ -1,176 +0,0 @@
From b771faef931c798a4553db0a8c1366aff90079c6 Mon Sep 17 00:00:00 2001
From: Blazej Kucman <blazej.kucman@intel.com>
Date: Fri, 29 Nov 2019 15:21:08 +0100
Subject: [RHEL8.2 PATCH 52/61] imsm: return correct uuid for volume in detail
Fixes the side effect of the patch b6180160f ("imsm: save current_vol number")
- wrong UUID is printed in detail for each volume.
New parameter "subarray" is added to determine what info should be extracted
from metadata (subarray or container).
The parameter affects only IMSM metadata.
Signed-off-by: Blazej Kucman <blazej.kucman@intel.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
Detail.c | 4 ++--
mdadm.h | 5 +++--
super-ddf.c | 5 +++--
super-intel.c | 20 ++++++++++++++++++--
super0.c | 4 ++--
super1.c | 4 ++--
6 files changed, 30 insertions(+), 12 deletions(-)
diff --git a/Detail.c b/Detail.c
index 3e61e37..24fa462 100644
--- a/Detail.c
+++ b/Detail.c
@@ -623,7 +623,7 @@ This is pretty boring
free_mdstat(ms);
if (st && st->sb)
- st->ss->detail_super(st, c->homehost);
+ st->ss->detail_super(st, c->homehost, subarray);
if (array.raid_disks == 0 && sra &&
sra->array.major_version == -1 &&
@@ -767,7 +767,7 @@ skip_devices_state:
if (spares && c->brief && array.raid_disks)
printf(" spares=%d", spares);
if (c->brief && st && st->sb)
- st->ss->brief_detail_super(st);
+ st->ss->brief_detail_super(st, subarray);
if (st)
st->ss->free_super(st);
diff --git a/mdadm.h b/mdadm.h
index c88ceab..91f1338 100644
--- a/mdadm.h
+++ b/mdadm.h
@@ -847,8 +847,9 @@ extern struct superswitch {
/* Used to report details of an active array.
* ->load_super was possibly given a 'component' string.
*/
- void (*detail_super)(struct supertype *st, char *homehost);
- void (*brief_detail_super)(struct supertype *st);
+ void (*detail_super)(struct supertype *st, char *homehost,
+ char *subarray);
+ void (*brief_detail_super)(struct supertype *st, char *subarray);
void (*export_detail_super)(struct supertype *st);
/* Optional: platform hardware / firmware details */
diff --git a/super-ddf.c b/super-ddf.c
index c095e8a..7802063 100644
--- a/super-ddf.c
+++ b/super-ddf.c
@@ -1730,7 +1730,8 @@ err:
return 1;
}
-static void detail_super_ddf(struct supertype *st, char *homehost)
+static void detail_super_ddf(struct supertype *st, char *homehost,
+ char *subarray)
{
struct ddf_super *sb = st->sb;
int cnt = be16_to_cpu(sb->virt->populated_vdes);
@@ -1787,7 +1788,7 @@ static void uuid_of_ddf_subarray(const struct ddf_super *ddf,
memcpy(uuid, sha, 4*4);
}
-static void brief_detail_super_ddf(struct supertype *st)
+static void brief_detail_super_ddf(struct supertype *st, char *subarray)
{
struct mdinfo info;
char nbuf[64];
diff --git a/super-intel.c b/super-intel.c
index a7fbed4..86dcb69 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -2183,23 +2183,39 @@ err:
return 1;
}
-static void detail_super_imsm(struct supertype *st, char *homehost)
+static void detail_super_imsm(struct supertype *st, char *homehost,
+ char *subarray)
{
struct mdinfo info;
char nbuf[64];
+ struct intel_super *super = st->sb;
+ int temp_vol = super->current_vol;
+
+ if (subarray)
+ super->current_vol = strtoul(subarray, NULL, 10);
getinfo_super_imsm(st, &info, NULL);
fname_from_uuid(st, &info, nbuf, ':');
printf("\n UUID : %s\n", nbuf + 5);
+
+ super->current_vol = temp_vol;
}
-static void brief_detail_super_imsm(struct supertype *st)
+static void brief_detail_super_imsm(struct supertype *st, char *subarray)
{
struct mdinfo info;
char nbuf[64];
+ struct intel_super *super = st->sb;
+ int temp_vol = super->current_vol;
+
+ if (subarray)
+ super->current_vol = strtoul(subarray, NULL, 10);
+
getinfo_super_imsm(st, &info, NULL);
fname_from_uuid(st, &info, nbuf, ':');
printf(" UUID=%s", nbuf + 5);
+
+ super->current_vol = temp_vol;
}
static int imsm_read_serial(int fd, char *devname, __u8 *serial);
diff --git a/super0.c b/super0.c
index 42989b9..6b7c0e3 100644
--- a/super0.c
+++ b/super0.c
@@ -348,7 +348,7 @@ err:
return 1;
}
-static void detail_super0(struct supertype *st, char *homehost)
+static void detail_super0(struct supertype *st, char *homehost, char *subarray)
{
mdp_super_t *sb = st->sb;
printf(" UUID : ");
@@ -368,7 +368,7 @@ static void detail_super0(struct supertype *st, char *homehost)
printf("\n Events : %d.%d\n\n", sb->events_hi, sb->events_lo);
}
-static void brief_detail_super0(struct supertype *st)
+static void brief_detail_super0(struct supertype *st, char *subarray)
{
mdp_super_t *sb = st->sb;
printf(" UUID=");
diff --git a/super1.c b/super1.c
index b85dc20..929466d 100644
--- a/super1.c
+++ b/super1.c
@@ -833,7 +833,7 @@ err:
return 1;
}
-static void detail_super1(struct supertype *st, char *homehost)
+static void detail_super1(struct supertype *st, char *homehost, char *subarray)
{
struct mdp_superblock_1 *sb = st->sb;
bitmap_super_t *bms = (bitmap_super_t*)(((char*)sb) + MAX_SB_SIZE);
@@ -857,7 +857,7 @@ static void detail_super1(struct supertype *st, char *homehost)
(unsigned long long)__le64_to_cpu(sb->events));
}
-static void brief_detail_super1(struct supertype *st)
+static void brief_detail_super1(struct supertype *st, char *subarray)
{
struct mdp_superblock_1 *sb = st->sb;
int i;
--
2.7.5

View File

@ -1,208 +0,0 @@
From 6da53c0e2aab200605722795798b1e4f2352cd64 Mon Sep 17 00:00:00 2001
From: Blazej Kucman <blazej.kucman@intel.com>
Date: Mon, 2 Dec 2019 10:52:05 +0100
Subject: [RHEL8.2 PATCH 53/61] imsm: Change the way of printing nvme drives in
detail-platform.
Change NVMe controller path to device node path
in mdadm --detail-platform and print serial number.
The method imsm_read_serial always trimes serial to
MAX_RAID_SERIAL_LEN, added parameter 'serial_buf_len'
will be used to check the serial fit
to passed buffor, if not, will be trimed.
Signed-off-by: Blazej Kucman <blazej.kucman@intel.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
super-intel.c | 97 ++++++++++++++++++++++++++++-------------------------------
1 file changed, 46 insertions(+), 51 deletions(-)
diff --git a/super-intel.c b/super-intel.c
index 86dcb69..5c1f759 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -2218,7 +2218,8 @@ static void brief_detail_super_imsm(struct supertype *st, char *subarray)
super->current_vol = temp_vol;
}
-static int imsm_read_serial(int fd, char *devname, __u8 *serial);
+static int imsm_read_serial(int fd, char *devname, __u8 *serial,
+ size_t serial_buf_len);
static void fd2devname(int fd, char *name);
static int ahci_enumerate_ports(const char *hba_path, int port_count, int host_base, int verbose)
@@ -2364,8 +2365,9 @@ static int ahci_enumerate_ports(const char *hba_path, int port_count, int host_b
else {
fd2devname(fd, buf);
printf(" Port%d : %s", port, buf);
- if (imsm_read_serial(fd, NULL, (__u8 *) buf) == 0)
- printf(" (%.*s)\n", MAX_RAID_SERIAL_LEN, buf);
+ if (imsm_read_serial(fd, NULL, (__u8 *)buf,
+ sizeof(buf)) == 0)
+ printf(" (%s)\n", buf);
else
printf(" ()\n");
close(fd);
@@ -2388,52 +2390,45 @@ static int ahci_enumerate_ports(const char *hba_path, int port_count, int host_b
return err;
}
-static int print_vmd_attached_devs(struct sys_dev *hba)
+static int print_nvme_info(struct sys_dev *hba)
{
+ char buf[1024];
struct dirent *ent;
DIR *dir;
- char path[292];
- char link[256];
- char *c, *rp;
-
- if (hba->type != SYS_DEV_VMD)
- return 1;
+ char *rp;
+ int fd;
- /* scroll through /sys/dev/block looking for devices attached to
- * this hba
- */
- dir = opendir("/sys/bus/pci/drivers/nvme");
+ dir = opendir("/sys/block/");
if (!dir)
return 1;
for (ent = readdir(dir); ent; ent = readdir(dir)) {
- int n;
-
- /* is 'ent' a device? check that the 'subsystem' link exists and
- * that its target matches 'bus'
- */
- sprintf(path, "/sys/bus/pci/drivers/nvme/%s/subsystem",
- ent->d_name);
- n = readlink(path, link, sizeof(link));
- if (n < 0 || n >= (int)sizeof(link))
- continue;
- link[n] = '\0';
- c = strrchr(link, '/');
- if (!c)
- continue;
- if (strncmp("pci", c+1, strlen("pci")) != 0)
- continue;
-
- sprintf(path, "/sys/bus/pci/drivers/nvme/%s", ent->d_name);
-
- rp = realpath(path, NULL);
- if (!rp)
- continue;
+ if (strstr(ent->d_name, "nvme")) {
+ sprintf(buf, "/sys/block/%s", ent->d_name);
+ rp = realpath(buf, NULL);
+ if (!rp)
+ continue;
+ if (path_attached_to_hba(rp, hba->path)) {
+ fd = open_dev(ent->d_name);
+ if (fd < 0) {
+ free(rp);
+ continue;
+ }
- if (path_attached_to_hba(rp, hba->path)) {
- printf(" NVMe under VMD : %s\n", rp);
+ fd2devname(fd, buf);
+ if (hba->type == SYS_DEV_VMD)
+ printf(" NVMe under VMD : %s", buf);
+ else if (hba->type == SYS_DEV_NVME)
+ printf(" NVMe Device : %s", buf);
+ if (!imsm_read_serial(fd, NULL, (__u8 *)buf,
+ sizeof(buf)))
+ printf(" (%s)\n", buf);
+ else
+ printf("()\n");
+ close(fd);
+ }
+ free(rp);
}
- free(rp);
}
closedir(dir);
@@ -2648,7 +2643,7 @@ static int detail_platform_imsm(int verbose, int enumerate_only, char *controlle
char buf[PATH_MAX];
printf(" I/O Controller : %s (%s)\n",
vmd_domain_to_controller(hba, buf), get_sys_dev_type(hba->type));
- if (print_vmd_attached_devs(hba)) {
+ if (print_nvme_info(hba)) {
if (verbose > 0)
pr_err("failed to get devices attached to VMD domain.\n");
result |= 2;
@@ -2663,7 +2658,7 @@ static int detail_platform_imsm(int verbose, int enumerate_only, char *controlle
if (entry->type == SYS_DEV_NVME) {
for (hba = list; hba; hba = hba->next) {
if (hba->type == SYS_DEV_NVME)
- printf(" NVMe Device : %s\n", hba->path);
+ print_nvme_info(hba);
}
printf("\n");
continue;
@@ -4028,11 +4023,11 @@ static int nvme_get_serial(int fd, void *buf, size_t buf_len)
extern int scsi_get_serial(int fd, void *buf, size_t buf_len);
static int imsm_read_serial(int fd, char *devname,
- __u8 serial[MAX_RAID_SERIAL_LEN])
+ __u8 *serial, size_t serial_buf_len)
{
char buf[50];
int rv;
- int len;
+ size_t len;
char *dest;
char *src;
unsigned int i;
@@ -4075,13 +4070,13 @@ static int imsm_read_serial(int fd, char *devname,
len = dest - buf;
dest = buf;
- /* truncate leading characters */
- if (len > MAX_RAID_SERIAL_LEN) {
- dest += len - MAX_RAID_SERIAL_LEN;
- len = MAX_RAID_SERIAL_LEN;
+ if (len > serial_buf_len) {
+ /* truncate leading characters */
+ dest += len - serial_buf_len;
+ len = serial_buf_len;
}
- memset(serial, 0, MAX_RAID_SERIAL_LEN);
+ memset(serial, 0, serial_buf_len);
memcpy(serial, dest, len);
return 0;
@@ -4136,7 +4131,7 @@ load_imsm_disk(int fd, struct intel_super *super, char *devname, int keep_fd)
char name[40];
__u8 serial[MAX_RAID_SERIAL_LEN];
- rv = imsm_read_serial(fd, devname, serial);
+ rv = imsm_read_serial(fd, devname, serial, MAX_RAID_SERIAL_LEN);
if (rv != 0)
return 2;
@@ -5844,7 +5839,7 @@ int mark_spare(struct dl *disk)
return ret_val;
ret_val = 0;
- if (!imsm_read_serial(disk->fd, NULL, serial)) {
+ if (!imsm_read_serial(disk->fd, NULL, serial, MAX_RAID_SERIAL_LEN)) {
/* Restore disk serial number, because takeover marks disk
* as failed and adds to serial ':0' before it becomes
* a spare disk.
@@ -5895,7 +5890,7 @@ static int add_to_super_imsm(struct supertype *st, mdu_disk_info_t *dk,
dd->fd = fd;
dd->e = NULL;
dd->action = DISK_ADD;
- rv = imsm_read_serial(fd, devname, dd->serial);
+ rv = imsm_read_serial(fd, devname, dd->serial, MAX_RAID_SERIAL_LEN);
if (rv) {
pr_err("failed to retrieve scsi serial, aborting\n");
if (dd->devname)
--
2.7.5

View File

@ -1,342 +0,0 @@
From 329dfc28debb58ffe7bd1967cea00fc583139aca Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 4 Nov 2019 14:27:49 +1100
Subject: [RHEL8.2 PATCH 54/61] Create: add support for RAID0 layouts.
Since Linux 5.4 a layout is needed for RAID0 arrays with
varying device sizes.
This patch makes the layout of an array visible (via --examine)
and sets the layout on newly created arrays.
--layout=dangerous
can be used to avoid setting a layout so that they array
can be used on older kernels.
Tested-by: dann frazier <dann.frazier@canonical.com>
Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
Create.c | 11 +++++++++++
Detail.c | 5 +++++
maps.c | 12 ++++++++++++
md.4 | 14 ++++++++++++++
mdadm.8.in | 30 +++++++++++++++++++++++++++++-
mdadm.c | 8 ++++++++
mdadm.h | 8 +++++++-
super0.c | 6 ++++++
super1.c | 30 +++++++++++++++++++++++++++++-
9 files changed, 121 insertions(+), 3 deletions(-)
diff --git a/Create.c b/Create.c
index 292f92a..6f84e5b 100644
--- a/Create.c
+++ b/Create.c
@@ -51,6 +51,9 @@ static int default_layout(struct supertype *st, int level, int verbose)
default: /* no layout */
layout = 0;
break;
+ case 0:
+ layout = RAID0_ORIG_LAYOUT;
+ break;
case 10:
layout = 0x102; /* near=2, far=1 */
if (verbose > 0)
@@ -950,6 +953,11 @@ int Create(struct supertype *st, char *mddev,
if (rv) {
pr_err("ADD_NEW_DISK for %s failed: %s\n",
dv->devname, strerror(errno));
+ if (errno == EINVAL &&
+ info.array.level == 0) {
+ pr_err("Possibly your kernel doesn't support RAID0 layouts.\n");
+ pr_err("Either upgrade, or use --layout=dangerous\n");
+ }
goto abort_locked;
}
break;
@@ -1046,6 +1054,9 @@ int Create(struct supertype *st, char *mddev,
if (ioctl(mdfd, RUN_ARRAY, &param)) {
pr_err("RUN_ARRAY failed: %s\n",
strerror(errno));
+ if (errno == 524 /* ENOTSUP */ &&
+ info.array.level == 0)
+ cont_err("Please use --layout=original or --layout=alternate\n");
if (info.array.chunk_size & (info.array.chunk_size-1)) {
cont_err("Problem may be that chunk size is not a power of 2\n");
}
diff --git a/Detail.c b/Detail.c
index 24fa462..832485f 100644
--- a/Detail.c
+++ b/Detail.c
@@ -525,6 +525,11 @@ int Detail(char *dev, struct context *c)
printf(" Layout : %s\n",
str ? str : "-unknown-");
}
+ if (array.level == 0 && array.layout) {
+ str = map_num(r0layout, array.layout);
+ printf(" Layout : %s\n",
+ str ? str : "-unknown-");
+ }
if (array.level == 6) {
str = map_num(r6layout, array.layout);
printf(" Layout : %s\n",
diff --git a/maps.c b/maps.c
index 49b7f2c..a4fd279 100644
--- a/maps.c
+++ b/maps.c
@@ -73,6 +73,18 @@ mapping_t r6layout[] = {
{ NULL, UnSet }
};
+/* raid0 layout is only needed because of a bug in 3.14 which changed
+ * the effective layout of raid0 arrays with varying device sizes.
+ */
+mapping_t r0layout[] = {
+ { "original", RAID0_ORIG_LAYOUT},
+ { "alternate", RAID0_ALT_MULTIZONE_LAYOUT},
+ { "1", 1}, /* aka ORIG */
+ { "2", 2}, /* aka ALT */
+ { "dangerous", 0},
+ { NULL, UnSet},
+};
+
mapping_t pers[] = {
{ "linear", LEVEL_LINEAR},
{ "raid0", 0},
diff --git a/md.4 b/md.4
index e86707a..6fe2755 100644
--- a/md.4
+++ b/md.4
@@ -193,6 +193,20 @@ smallest device has been exhausted, the RAID0 driver starts
collecting chunks into smaller stripes that only span the drives which
still have remaining space.
+A bug was introduced in linux 3.14 which changed the layout of blocks in
+a RAID0 beyond the region that is striped over all devices. This bug
+does not affect an array with all devices the same size, but can affect
+other RAID0 arrays.
+
+Linux 5.4 (and some stable kernels to which the change was backported)
+will not normally assemble such an array as it cannot know which layout
+to use. There is a module parameter "raid0.default_layout" which can be
+set to "1" to force the kernel to use the pre-3.14 layout or to "2" to
+force it to use the 3.14-and-later layout. when creating a new RAID0
+array,
+.I mdadm
+will record the chosen layout in the metadata in a way that allows newer
+kernels to assemble the array without needing a module parameter.
.SS RAID1
diff --git a/mdadm.8.in b/mdadm.8.in
index 9aec9f4..fc9b6a6 100644
--- a/mdadm.8.in
+++ b/mdadm.8.in
@@ -593,6 +593,8 @@ to change the RAID level in some cases. See LEVEL CHANGES below.
This option configures the fine details of data layout for RAID5, RAID6,
and RAID10 arrays, and controls the failure modes for
.IR faulty .
+It can also be used for working around a kernel bug with RAID0, but generally
+doesn't need to be used explicitly.
The layout of the RAID5 parity block can be one of
.BR left\-asymmetric ,
@@ -652,7 +654,7 @@ option to set subsequent failure modes.
"clear" or "none" will remove any pending or periodic failure modes,
and "flush" will clear any persistent faults.
-Finally, the layout options for RAID10 are one of 'n', 'o' or 'f' followed
+The layout options for RAID10 are one of 'n', 'o' or 'f' followed
by a small number. The default is 'n2'. The supported options are:
.I 'n'
@@ -677,6 +679,32 @@ devices in the array. It does not need to divide evenly into that
number (e.g. it is perfectly legal to have an 'n2' layout for an array
with an odd number of devices).
+A bug introduced in Linux 3.14 means that RAID0 arrays
+.B "with devices of differing sizes"
+started using a different layout. This could lead to
+data corruption. Since Linux 5.4 (and various stable releases that received
+backports), the kernel will not accept such an array unless
+a layout is explictly set. It can be set to
+.RB ' original '
+or
+.RB ' alternate '.
+When creating a new array,
+.I mdadm
+will select
+.RB ' original '
+by default, so the layout does not normally need to be set.
+An array created for either
+.RB ' original '
+or
+.RB ' alternate '
+will not be recognized by an (unpatched) kernel prior to 5.4. To create
+a RAID0 array with devices of differing sizes that can be used on an
+older kernel, you can set the layout to
+.RB ' dangerous '.
+This will use whichever layout the running kernel supports, so the data
+on the array may become corrupt when changing kernel from pre-3.14 to a
+later kernel.
+
When an array is converted between RAID5 and RAID6 an intermediate
RAID6 layout is used in which the second parity block (Q) is always on
the last device. To convert a RAID5 to RAID6 and leave it in this new
diff --git a/mdadm.c b/mdadm.c
index 1fb8086..e438f9c 100644
--- a/mdadm.c
+++ b/mdadm.c
@@ -550,6 +550,14 @@ int main(int argc, char *argv[])
pr_err("raid level must be given before layout.\n");
exit(2);
+ case 0:
+ s.layout = map_name(r0layout, optarg);
+ if (s.layout == UnSet) {
+ pr_err("layout %s not understood for raid0.\n",
+ optarg);
+ exit(2);
+ }
+ break;
case 5:
s.layout = map_name(r5layout, optarg);
if (s.layout == UnSet) {
diff --git a/mdadm.h b/mdadm.h
index 91f1338..9e98778 100644
--- a/mdadm.h
+++ b/mdadm.h
@@ -763,7 +763,8 @@ extern int restore_stripes(int *dest, unsigned long long *offsets,
extern char *map_num(mapping_t *map, int num);
extern int map_name(mapping_t *map, char *name);
-extern mapping_t r5layout[], r6layout[], pers[], modes[], faultylayout[];
+extern mapping_t r0layout[], r5layout[], r6layout[],
+ pers[], modes[], faultylayout[];
extern mapping_t consistency_policies[], sysfs_array_states[];
extern char *map_dev_preferred(int major, int minor, int create,
@@ -1758,6 +1759,11 @@ char *xstrdup(const char *str);
#define makedev(M,m) (((M)<<8) | (m))
#endif
+enum r0layout {
+ RAID0_ORIG_LAYOUT = 1,
+ RAID0_ALT_MULTIZONE_LAYOUT = 2,
+};
+
/* for raid4/5/6 */
#define ALGORITHM_LEFT_ASYMMETRIC 0
#define ALGORITHM_RIGHT_ASYMMETRIC 1
diff --git a/super0.c b/super0.c
index 6b7c0e3..6af140b 100644
--- a/super0.c
+++ b/super0.c
@@ -1291,6 +1291,12 @@ static int validate_geometry0(struct supertype *st, int level,
if (*chunk == UnSet)
*chunk = DEFAULT_CHUNK;
+ if (level == 0 && layout != UnSet) {
+ if (verbose)
+ pr_err("0.90 metadata does not support layouts for RAID0\n");
+ return 0;
+ }
+
if (!subdev)
return 1;
diff --git a/super1.c b/super1.c
index 929466d..cedbb53 100644
--- a/super1.c
+++ b/super1.c
@@ -43,7 +43,7 @@ struct mdp_superblock_1 {
__u64 ctime; /* lo 40 bits are seconds, top 24 are microseconds or 0*/
__u32 level; /* -4 (multipath), -1 (linear), 0,1,4,5 */
- __u32 layout; /* only for raid5 currently */
+ __u32 layout; /* used for raid5, raid6, raid10, and raid0 */
__u64 size; /* used size of component devices, in 512byte sectors */
__u32 chunksize; /* in 512byte sectors */
@@ -144,6 +144,7 @@ struct misc_dev_info {
#define MD_FEATURE_JOURNAL 512 /* support write journal */
#define MD_FEATURE_PPL 1024 /* support PPL */
#define MD_FEATURE_MUTLIPLE_PPLS 2048 /* support for multiple PPLs */
+#define MD_FEATURE_RAID0_LAYOUT 4096 /* layout is meaningful in RAID0 */
#define MD_FEATURE_ALL (MD_FEATURE_BITMAP_OFFSET \
|MD_FEATURE_RECOVERY_OFFSET \
|MD_FEATURE_RESHAPE_ACTIVE \
@@ -155,6 +156,7 @@ struct misc_dev_info {
|MD_FEATURE_JOURNAL \
|MD_FEATURE_PPL \
|MD_FEATURE_MULTIPLE_PPLS \
+ |MD_FEATURE_RAID0_LAYOUT \
)
static int role_from_sb(struct mdp_superblock_1 *sb)
@@ -498,6 +500,11 @@ static void examine_super1(struct supertype *st, char *homehost)
printf(" Events : %llu\n",
(unsigned long long)__le64_to_cpu(sb->events));
printf("\n");
+ if (__le32_to_cpu(sb->level) == 0 &&
+ (sb->feature_map & __cpu_to_le32(MD_FEATURE_RAID0_LAYOUT))) {
+ c = map_num(r0layout, __le32_to_cpu(sb->layout));
+ printf(" Layout : %s\n", c?c:"-unknown-");
+ }
if (__le32_to_cpu(sb->level) == 5) {
c = map_num(r5layout, __le32_to_cpu(sb->layout));
printf(" Layout : %s\n", c?c:"-unknown-");
@@ -1646,6 +1653,7 @@ struct devinfo {
int fd;
char *devname;
long long data_offset;
+ unsigned long long dev_size;
mdu_disk_info_t disk;
struct devinfo *next;
};
@@ -1687,6 +1695,7 @@ static int add_to_super1(struct supertype *st, mdu_disk_info_t *dk,
di->devname = devname;
di->disk = *dk;
di->data_offset = data_offset;
+ get_dev_size(fd, NULL, &di->dev_size);
di->next = NULL;
*dip = di;
@@ -1888,10 +1897,25 @@ static int write_init_super1(struct supertype *st)
unsigned long long sb_offset;
unsigned long long data_offset;
long bm_offset;
+ int raid0_need_layout = 0;
for (di = st->info; di; di = di->next) {
if (di->disk.state & (1 << MD_DISK_JOURNAL))
sb->feature_map |= __cpu_to_le32(MD_FEATURE_JOURNAL);
+ if (sb->level == 0 && sb->layout != 0) {
+ struct devinfo *di2 = st->info;
+ unsigned long long s1, s2;
+ s1 = di->dev_size;
+ if (di->data_offset != INVALID_SECTORS)
+ s1 -= di->data_offset;
+ s1 /= __le32_to_cpu(sb->chunksize);
+ s2 = di2->dev_size;
+ if (di2->data_offset != INVALID_SECTORS)
+ s2 -= di2->data_offset;
+ s2 /= __le32_to_cpu(sb->chunksize);
+ if (s1 != s2)
+ raid0_need_layout = 1;
+ }
}
for (di = st->info; di; di = di->next) {
@@ -2039,6 +2063,10 @@ static int write_init_super1(struct supertype *st)
sb->bblog_offset = 0;
}
+ /* RAID0 needs a layout if devices aren't all the same size */
+ if (raid0_need_layout)
+ sb->feature_map |= __cpu_to_le32(MD_FEATURE_RAID0_LAYOUT);
+
sb->sb_csum = calc_sb_1_csum(sb);
rv = store_super1(st, di->fd);
--
2.7.5

View File

@ -1,150 +0,0 @@
From 027c099fd1a31fb3815e592de75d0791a22353b4 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 4 Nov 2019 14:27:49 +1100
Subject: [RHEL8.2 PATCH 55/61] Assemble: add support for RAID0 layouts.
If you have a RAID0 array with varying sized devices
on a kernel before 5.4, you cannot assembling it on
5.4 or later without explicitly setting the layout.
This is now possible with
--update=layout-original (For 3.13 and earlier kernels)
or
--update=layout-alternate (for 3.14 and later kernels)
Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
Assemble.c | 8 ++++++++
md.4 | 7 +++++++
mdadm.8.in | 17 +++++++++++++++++
mdadm.c | 4 ++++
super1.c | 12 +++++++++++-
5 files changed, 47 insertions(+), 1 deletion(-)
diff --git a/Assemble.c b/Assemble.c
index b2e6914..6b5a7c8 100644
--- a/Assemble.c
+++ b/Assemble.c
@@ -1031,6 +1031,11 @@ static int start_array(int mdfd,
pr_err("failed to add %s to %s: %s\n",
devices[j].devname, mddev,
strerror(errno));
+ if (errno == EINVAL && content->array.level == 0 &&
+ content->array.layout != 0) {
+ cont_err("Possibly your kernel doesn't support RAID0 layouts.\n");
+ cont_err("Please upgrade.\n");
+ }
if (i < content->array.raid_disks * 2 ||
i == bestcnt)
okcnt--;
@@ -1220,6 +1225,9 @@ static int start_array(int mdfd,
return 0;
}
pr_err("failed to RUN_ARRAY %s: %s\n", mddev, strerror(errno));
+ if (errno == 524 /* ENOTSUP */ &&
+ content->array.level == 0 && content->array.layout == 0)
+ cont_err("Please use --update=layout-original or --update=layout-alternate\n");
if (!enough(content->array.level, content->array.raid_disks,
content->array.layout, 1, avail))
diff --git a/md.4 b/md.4
index 6fe2755..0712af2 100644
--- a/md.4
+++ b/md.4
@@ -208,6 +208,13 @@ array,
will record the chosen layout in the metadata in a way that allows newer
kernels to assemble the array without needing a module parameter.
+To assemble an old array on a new kernel without using the module parameter,
+use either the
+.B "--update=layout-original"
+option or the
+.B "--update=layout-alternate"
+option.
+
.SS RAID1
A RAID1 array is also known as a mirrored set (though mirrors tend to
diff --git a/mdadm.8.in b/mdadm.8.in
index fc9b6a6..6b63bb4 100644
--- a/mdadm.8.in
+++ b/mdadm.8.in
@@ -1213,6 +1213,8 @@ argument given to this flag can be one of
.BR no\-bbl ,
.BR ppl ,
.BR no\-ppl ,
+.BR layout\-original ,
+.BR layout\-alternate ,
.BR metadata ,
or
.BR super\-minor .
@@ -1364,6 +1366,21 @@ The
.B no\-ppl
option will disable PPL in the superblock.
+The
+.B layout\-original
+and
+.B layout\-alternate
+options are for RAID0 arrays in use before Linux 5.4. If the array was being
+used with Linux 3.13 or earlier, then to assemble the array on a new kernel,
+.B \-\-update=layout\-original
+must be given. If the array was created and used with a kernel from Linux 3.14 to
+Linux 5.3, then
+.B \-\-update=layout\-alternate
+must be given. This only needs to be given once. Subsequent assembly of the array
+will happen normally.
+For more information, see
+.IR md (4).
+
.TP
.BR \-\-freeze\-reshape
Option is intended to be used in start-up scripts during initrd boot phase.
diff --git a/mdadm.c b/mdadm.c
index e438f9c..256a97e 100644
--- a/mdadm.c
+++ b/mdadm.c
@@ -795,6 +795,9 @@ int main(int argc, char *argv[])
continue;
if (strcmp(c.update, "revert-reshape") == 0)
continue;
+ if (strcmp(c.update, "layout-original") == 0 ||
+ strcmp(c.update, "layout-alternate") == 0)
+ continue;
if (strcmp(c.update, "byteorder") == 0) {
if (ss) {
pr_err("must not set metadata type with --update=byteorder.\n");
@@ -825,6 +828,7 @@ int main(int argc, char *argv[])
" 'summaries', 'homehost', 'home-cluster', 'byteorder', 'devicesize',\n"
" 'no-bitmap', 'metadata', 'revert-reshape'\n"
" 'bbl', 'no-bbl', 'force-no-bbl', 'ppl', 'no-ppl'\n"
+ " 'layout-original', 'layout-alternate'\n"
);
exit(outf == stdout ? 0 : 2);
diff --git a/super1.c b/super1.c
index cedbb53..e0d80be 100644
--- a/super1.c
+++ b/super1.c
@@ -1550,7 +1550,17 @@ static int update_super1(struct supertype *st, struct mdinfo *info,
sb->devflags |= FailFast1;
else if (strcmp(update, "nofailfast") == 0)
sb->devflags &= ~FailFast1;
- else
+ else if (strcmp(update, "layout-original") == 0 ||
+ strcmp(update, "layout-alternate") == 0) {
+ if (__le32_to_cpu(sb->level) != 0) {
+ pr_err("%s: %s only supported for RAID0\n",
+ devname?:"", update);
+ rv = -1;
+ } else {
+ sb->feature_map |= __cpu_to_le32(MD_FEATURE_RAID0_LAYOUT);
+ sb->layout = __cpu_to_le32(update[7] == 'o' ? 1 : 2);
+ }
+ } else
rv = -1;
sb->sb_csum = calc_sb_1_csum(sb);
--
2.7.5

View File

@ -1,36 +0,0 @@
From aced6fc9542077a69b00d05bc9cd66c12fc34950 Mon Sep 17 00:00:00 2001
From: dann frazier <dann.frazier@canonical.com>
Date: Mon, 9 Dec 2019 13:54:13 -0700
Subject: [RHEL8.2 PATCH 56/61] Respect $(CROSS_COMPILE) when $(CC) is the
default
Commit 1180ed5 told make to only respect $(CROSS_COMPILE) when $(CC)
was unset. But that will never be the case, as make provides
a default value for $(CC). Change this logic to respect $(CROSS_COMPILE)
when $(CC) is the default. Patch originally by Helmet Grohne.
Fixes: 1180ed5 ("Makefile: make the CC definition conditional")
Signed-off-by: dann frazier <dann.frazier@canonical.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
Makefile | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/Makefile b/Makefile
index dfe00b0..a33319a 100644
--- a/Makefile
+++ b/Makefile
@@ -46,7 +46,9 @@ ifdef COVERITY
COVERITY_FLAGS=-include coverity-gcc-hack.h
endif
-CC ?= $(CROSS_COMPILE)gcc
+ifeq ($(origin CC),default)
+CC := $(CROSS_COMPILE)gcc
+endif
CXFLAGS ?= -ggdb
CWFLAGS = -Wall -Werror -Wstrict-prototypes -Wextra -Wno-unused-parameter
ifdef WARN_UNUSED
--
2.7.5

View File

@ -1,39 +0,0 @@
From 1a87493014050e3bd94000cd36122c3cadf21270 Mon Sep 17 00:00:00 2001
From: Kinga Tanska <kinga.tanska@intel.com>
Date: Tue, 10 Dec 2019 12:21:21 +0100
Subject: [RHEL8.2 PATCH 57/61] Change warning message
In commit 039b7225e6 ("md: allow creation of mdNNN arrays via
md_mod/parameters/new_array") support for name like mdNNN
was added. Special warning, when kernel is unable to handle
request, was added in commit 7105228e19
("mdadm/mdopen: create new function create_named_array for
writing to new_array"), but it was not adequate enough,
because in this situation mdadm tries to do it in old way.
This commit changes warning to be more relevant when
creating RAID container with "/dev/mdNNN" name and mdadm
back to old approach.
Signed-off-by: Kinga Tanska <kinga.tanska@intel.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
mdopen.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/mdopen.c b/mdopen.c
index 98c54e4..245be53 100644
--- a/mdopen.c
+++ b/mdopen.c
@@ -120,7 +120,8 @@ int create_named_array(char *devnm)
close(fd);
}
if (fd < 0 || n != (int)strlen(devnm)) {
- pr_err("Fail create %s when using %s\n", devnm, new_array_file);
+ pr_err("Fail to create %s when using %s, fallback to creation via node\n",
+ devnm, new_array_file);
return 0;
}
--
2.7.5

View File

@ -1,52 +0,0 @@
From 1cc3965d48deb0fb3e0657159c608ffb124643c1 Mon Sep 17 00:00:00 2001
From: Xiao Yang <ice_yangxiao@163.com>
Date: Wed, 27 Nov 2019 11:59:24 +0800
Subject: [RHEL8.2 PATCH 48/61] Manage: Remove the legacy code for md driver
prior to 0.90.03
Previous re-add operation only calls ioctl(HOT_ADD_DISK) for array without
metadata(e.g. mdadm -B/--build) when md driver is less than 0.90.02, but
commit 091e8e6 breaks the logic and current re-add operation can call
ioctl(HOT_ADD_DISK) even if md driver is 0.90.03.
This issue is reproduced by 05r1-re-add-nosuper:
------------------------------------------------
++ die 'resync or recovery is happening!'
++ echo -e '\n\tERROR: resync or recovery is happening! \n'
ERROR: resync or recovery is happening!
------------------------------------------------
Fixes: 091e8e6("Manage: Remove all references to md_get_version()")
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Xiao Yang <ice_yangxiao@163.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
Manage.c | 12 ------------
1 file changed, 12 deletions(-)
diff --git a/Manage.c b/Manage.c
index 21536f5..ffe55f8 100644
--- a/Manage.c
+++ b/Manage.c
@@ -741,18 +741,6 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
" Adding anyway as --force was given.\n",
dv->devname, devname);
}
- if (!tst->ss->external && array->major_version == 0) {
- if (ioctl(fd, HOT_ADD_DISK, rdev)==0) {
- if (verbose >= 0)
- pr_err("hot added %s\n",
- dv->devname);
- return 1;
- }
-
- pr_err("hot add failed for %s: %s\n",
- dv->devname, strerror(errno));
- return -1;
- }
if (array->not_persistent == 0 || tst->ss->external) {
--
2.7.5

View File

@ -1,43 +0,0 @@
From 4431efebabd0dd39f33dc1dd8ada312b8da1c9d8 Mon Sep 17 00:00:00 2001
From: Blazej Kucman <blazej.kucman@intel.com>
Date: Thu, 16 Jan 2020 09:34:44 +0100
Subject: [RHEL8.2 PATCH 59/61] imsm: Update grow manual.
Update --grow option description in manual, according to
the supported grow operations by IMSM.
Signed-off-by: Blazej Kucman <blazej.kucman@intel.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
mdadm.8.in | 8 ++------
1 file changed, 2 insertions(+), 6 deletions(-)
diff --git a/mdadm.8.in b/mdadm.8.in
index 6b63bb4..ca02a33 100644
--- a/mdadm.8.in
+++ b/mdadm.8.in
@@ -481,9 +481,7 @@ still be larger than any replacement.
This value can be set with
.B \-\-grow
for RAID level 1/4/5/6 though
-.B CONTAINER
-based arrays such as those with IMSM metadata may not be able to
-support this.
+DDF arrays may not be able to support this.
If the array was created with a size smaller than the currently
active drives, the extra space can be accessed using
.BR \-\-grow .
@@ -2759,9 +2757,7 @@ container format. The number of devices in a container can be
increased - which affects all arrays in the container - or an array
in a container can be converted between levels where those levels are
supported by the container, and the conversion is on of those listed
-above. Resizing arrays in an IMSM container with
-.B "--grow --size"
-is not yet supported.
+above.
.PP
Notes:
--
2.7.5

View File

@ -1,192 +0,0 @@
From 42e641abeb312a91b841f1b1ea73661e4bd5a31c Mon Sep 17 00:00:00 2001
From: Kinga Tanska <kinga.tanska@intel.com>
Date: Tue, 21 Jan 2020 10:38:52 +0100
Subject: [RHEL8.2 PATCH 60/61] Add support for Tebibytes
Adding support for Tebibytes enables display size of
volumes in Tebibytes and Terabytes when they are
bigger than 2048 GiB (or GB).
Signed-off-by: Kinga Tanska <kinga.tanska@intel.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
mdadm.8.in | 20 ++++++++++----------
util.c | 47 +++++++++++++++++++++++++++++++++--------------
2 files changed, 43 insertions(+), 24 deletions(-)
diff --git a/mdadm.8.in b/mdadm.8.in
index ca02a33..5d00faf 100644
--- a/mdadm.8.in
+++ b/mdadm.8.in
@@ -467,8 +467,8 @@ If this is not specified
size, though if there is a variance among the drives of greater than 1%, a warning is
issued.
-A suffix of 'K', 'M' or 'G' can be given to indicate Kilobytes, Megabytes or
-Gigabytes respectively.
+A suffix of 'K', 'M', 'G' or 'T' can be given to indicate Kilobytes,
+Megabytes, Gigabytes or Terabytes respectively.
Sometimes a replacement drive can be a little smaller than the
original drives though this should be minimised by IDEMA standards.
@@ -532,8 +532,8 @@ problems the array can be made bigger again with no loss with another
.B "\-\-grow \-\-array\-size="
command.
-A suffix of 'K', 'M' or 'G' can be given to indicate Kilobytes, Megabytes or
-Gigabytes respectively.
+A suffix of 'K', 'M', 'G' or 'T' can be given to indicate Kilobytes,
+Megabytes, Gigabytes or Terabytes respectively.
A value of
.B max
restores the apparent size of the array to be whatever the real
@@ -551,8 +551,8 @@ This is only meaningful for RAID0, RAID4, RAID5, RAID6, and RAID10.
RAID4, RAID5, RAID6, and RAID10 require the chunk size to be a power
of 2. In any case it must be a multiple of 4KB.
-A suffix of 'K', 'M' or 'G' can be given to indicate Kilobytes, Megabytes or
-Gigabytes respectively.
+A suffix of 'K', 'M', 'G' or 'T' can be given to indicate Kilobytes,
+Megabytes, Gigabytes or Terabytes respectively.
.TP
.BR \-\-rounding=
@@ -767,8 +767,8 @@ When using an
bitmap, the chunksize defaults to 64Meg, or larger if necessary to
fit the bitmap into the available space.
-A suffix of 'K', 'M' or 'G' can be given to indicate Kilobytes, Megabytes or
-Gigabytes respectively.
+A suffix of 'K', 'M', 'G' or 'T' can be given to indicate Kilobytes,
+Megabytes, Gigabytes or Terabytes respectively.
.TP
.BR \-W ", " \-\-write\-mostly
@@ -857,8 +857,8 @@ an array which was originally created using a different version of
which computed a different offset.
Setting the offset explicitly over-rides the default. The value given
-is in Kilobytes unless a suffix of 'K', 'M' or 'G' is used to explicitly
-indicate Kilobytes, Megabytes or Gigabytes respectively.
+is in Kilobytes unless a suffix of 'K', 'M', 'G' or 'T' is used to explicitly
+indicate Kilobytes, Megabytes, Gigabytes or Terabytes respectively.
Since Linux 3.4,
.B \-\-data\-offset
diff --git a/util.c b/util.c
index 64dd409..07f9dc3 100644
--- a/util.c
+++ b/util.c
@@ -389,7 +389,7 @@ int mdadm_version(char *version)
unsigned long long parse_size(char *size)
{
/* parse 'size' which should be a number optionally
- * followed by 'K', 'M', or 'G'.
+ * followed by 'K', 'M'. 'G' or 'T'.
* Without a suffix, K is assumed.
* Number returned is in sectors (half-K)
* INVALID_SECTORS returned on error.
@@ -411,6 +411,10 @@ unsigned long long parse_size(char *size)
c++;
s *= 1024 * 1024 * 2;
break;
+ case 'T':
+ c++;
+ s *= 1024 * 1024 * 1024 * 2LL;
+ break;
case 's': /* sectors */
c++;
break;
@@ -893,13 +897,14 @@ char *human_size(long long bytes)
{
static char buf[47];
- /* We convert bytes to either centi-M{ega,ibi}bytes or
- * centi-G{igi,ibi}bytes, with appropriate rounding,
- * and then print 1/100th of those as a decimal.
+ /* We convert bytes to either centi-M{ega,ibi}bytes,
+ * centi-G{igi,ibi}bytes or centi-T{era,ebi}bytes
+ * with appropriate rounding, and then print
+ * 1/100th of those as a decimal.
* We allow upto 2048Megabytes before converting to
- * gigabytes, as that shows more precision and isn't
+ * gigabytes and 2048Gigabytes before converting to
+ * terabytes, as that shows more precision and isn't
* too large a number.
- * Terabytes are not yet handled.
*/
if (bytes < 5000*1024)
@@ -909,11 +914,16 @@ char *human_size(long long bytes)
long cMB = (bytes / ( 1000000LL / 200LL ) +1) /2;
snprintf(buf, sizeof(buf), " (%ld.%02ld MiB %ld.%02ld MB)",
cMiB/100, cMiB % 100, cMB/100, cMB % 100);
- } else {
+ } else if (bytes < 2*1024LL*1024LL*1024LL*1024LL) {
long cGiB = (bytes * 200LL / (1LL<<30) +1) / 2;
long cGB = (bytes / (1000000000LL/200LL ) +1) /2;
snprintf(buf, sizeof(buf), " (%ld.%02ld GiB %ld.%02ld GB)",
cGiB/100, cGiB % 100, cGB/100, cGB % 100);
+ } else {
+ long cTiB = (bytes * 200LL / (1LL<<40) + 1) / 2;
+ long cTB = (bytes / (1000000000000LL / 200LL) + 1) / 2;
+ snprintf(buf, sizeof(buf), " (%ld.%02ld TiB %ld.%02ld TB)",
+ cTiB/100, cTiB % 100, cTB/100, cTB % 100);
}
return buf;
}
@@ -922,13 +932,14 @@ char *human_size_brief(long long bytes, int prefix)
{
static char buf[30];
- /* We convert bytes to either centi-M{ega,ibi}bytes or
- * centi-G{igi,ibi}bytes, with appropriate rounding,
- * and then print 1/100th of those as a decimal.
+ /* We convert bytes to either centi-M{ega,ibi}bytes,
+ * centi-G{igi,ibi}bytes or centi-T{era,ebi}bytes
+ * with appropriate rounding, and then print
+ * 1/100th of those as a decimal.
* We allow upto 2048Megabytes before converting to
- * gigabytes, as that shows more precision and isn't
+ * gigabytes and 2048Gigabytes before converting to
+ * terabytes, as that shows more precision and isn't
* too large a number.
- * Terabytes are not yet handled.
*
* If prefix == IEC, we mean prefixes like kibi,mebi,gibi etc.
* If prefix == JEDEC, we mean prefixes like kilo,mega,giga etc.
@@ -941,10 +952,14 @@ char *human_size_brief(long long bytes, int prefix)
long cMiB = (bytes * 200LL / (1LL<<20) +1) /2;
snprintf(buf, sizeof(buf), "%ld.%02ldMiB",
cMiB/100, cMiB % 100);
- } else {
+ } else if (bytes < 2*1024LL*1024LL*1024LL*1024LL) {
long cGiB = (bytes * 200LL / (1LL<<30) +1) /2;
snprintf(buf, sizeof(buf), "%ld.%02ldGiB",
cGiB/100, cGiB % 100);
+ } else {
+ long cTiB = (bytes * 200LL / (1LL<<40) + 1) / 2;
+ snprintf(buf, sizeof(buf), "%ld.%02ldTiB",
+ cTiB/100, cTiB % 100);
}
}
else if (prefix == JEDEC) {
@@ -952,10 +967,14 @@ char *human_size_brief(long long bytes, int prefix)
long cMB = (bytes / ( 1000000LL / 200LL ) +1) /2;
snprintf(buf, sizeof(buf), "%ld.%02ldMB",
cMB/100, cMB % 100);
- } else {
+ } else if (bytes < 2*1024LL*1024LL*1024LL*1024LL) {
long cGB = (bytes / (1000000000LL/200LL ) +1) /2;
snprintf(buf, sizeof(buf), "%ld.%02ldGB",
cGB/100, cGB % 100);
+ } else {
+ long cTB = (bytes / (1000000000000LL / 200LL) + 1) / 2;
+ snprintf(buf, sizeof(buf), "%ld.%02ldTB",
+ cTB/100, cTB % 100);
}
}
else
--
2.7.5

View File

@ -1,65 +0,0 @@
From 1e93d0d15913c3fa6d0de5af3fb5e4e3b3f068da Mon Sep 17 00:00:00 2001
From: Blazej Kucman <blazej.kucman@intel.com>
Date: Fri, 17 Jan 2020 15:24:04 +0100
Subject: [RHEL8.2 PATCH 61/61] imsm: fill working_disks according to metadata.
Imsm tracks as "working_disk" each visible drive.
Assemble routine expects that the value will return count
of active member drives recorded in metadata.
As a side effect "--no-degraded" doesn't work correctly for imsm.
Align this field to others.
Added check, if the option --no-degraded is called with --scan.
Signed-off-by: Blazej Kucman <blazej.kucman@intel.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
mdadm.c | 9 ++++++---
super-intel.c | 5 ++---
2 files changed, 8 insertions(+), 6 deletions(-)
diff --git a/mdadm.c b/mdadm.c
index 256a97e..13dc24e 100644
--- a/mdadm.c
+++ b/mdadm.c
@@ -1485,9 +1485,12 @@ int main(int argc, char *argv[])
rv = Manage_stop(devlist->devname, mdfd, c.verbose, 0);
break;
case ASSEMBLE:
- if (devs_found == 1 && ident.uuid_set == 0 &&
- ident.super_minor == UnSet && ident.name[0] == 0 &&
- !c.scan ) {
+ if (!c.scan && c.runstop == -1) {
+ pr_err("--no-degraded not meaningful without a --scan assembly.\n");
+ exit(1);
+ } else if (devs_found == 1 && ident.uuid_set == 0 &&
+ ident.super_minor == UnSet && ident.name[0] == 0 &&
+ !c.scan) {
/* Only a device has been given, so get details from config file */
struct mddev_ident *array_ident = conf_get_ident(devlist->devname);
if (array_ident == NULL) {
diff --git a/super-intel.c b/super-intel.c
index 5c1f759..47809bc 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -7946,7 +7946,8 @@ static struct mdinfo *container_content_imsm(struct supertype *st, char *subarra
skip = 1;
if (!skip && (ord & IMSM_ORD_REBUILD))
recovery_start = 0;
-
+ if (!(ord & IMSM_ORD_REBUILD))
+ this->array.working_disks++;
/*
* if we skip some disks the array will be assmebled degraded;
* reset resync start to avoid a dirty-degraded
@@ -7988,8 +7989,6 @@ static struct mdinfo *container_content_imsm(struct supertype *st, char *subarra
else
this->array.spare_disks++;
}
- if (info_d->recovery_start == MaxSector)
- this->array.working_disks++;
info_d->events = __le32_to_cpu(mpb->generation_num);
info_d->data_offset = pba_of_lba0(map);
--
2.7.5

View File

@ -1,55 +0,0 @@
From 2551061c253b8fd45ee93d1aab3e91d2c7ac9c20 Mon Sep 17 00:00:00 2001
From: Coly Li <colyli@suse.de>
Date: Mon, 24 Feb 2020 12:34:09 +0100
Subject: [RHEL7.9 PATCH 62/77] mdadm.8: add note information for raid0 growing
operation
When growing a raid0 device, if the new component disk size is not
big enough, the grow operation may fail due to lack of backup space.
The minimum backup space should be larger than:
LCM(old, new) * chunk-size * 2
where LCM() is the least common multiple of the old and new count of
component disks, and "* 2" comes from the fact that mdadm refuses to
use more than half of a spare device for backup space.
There are users reporting such failure when they grew a raid0 array
with small component disk. Neil Brown points out this is not a bug
and how the failure comes. This patch adds note information into
mdadm(8) man page in the Notes part of GROW MODE section to explain
the minimum size requirement of new component disk size or external
backup size.
Reviewed-by: Petr Vorel <pvorel@suse.cz>
Cc: NeilBrown <neilb@suse.de>
Cc: Jes Sorensen <jsorensen@fb.com>
Cc: Paul Menzel <pmenzel@molgen.mpg.de>
Cc: Wols Lists <antlists@youngman.org.uk>
Cc: Nix <nix@esperi.org.uk>
Signed-off-by: Coly Li <colyli@suse.de>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
mdadm.8.in | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/mdadm.8.in b/mdadm.8.in
index 5d00faf..a3494a1 100644
--- a/mdadm.8.in
+++ b/mdadm.8.in
@@ -2768,6 +2768,12 @@ option and it is transparent for assembly feature.
.IP \(bu 4
Roaming between Windows(R) and Linux systems for IMSM metadata is not
supported during grow process.
+.IP \(bu 4
+When growing a raid0 device, the new component disk size (or external
+backup size) should be larger than LCM(old, new) * chunk-size * 2,
+where LCM() is the least common multiple of the old and new count of
+component disks, and "* 2" comes from the fact that mdadm refuses to
+use more than half of a spare device for backup space.
.SS SIZE CHANGES
Normally when an array is built the "size" is taken from the smallest
--
2.7.5

View File

@ -1,59 +0,0 @@
From fd38b8ea80ff8e0317e12d1d70431148ceedd5fd Mon Sep 17 00:00:00 2001
From: Xiao Ni <xni@redhat.com>
Date: Tue, 11 Feb 2020 21:44:15 +0800
Subject: [RHEL7.9 PATCH 63/77] Remove the legacy whitespace
The whitespace between Environment= and the true value causes confusion.
To avoid confusing other people in future, remove the whitespace to keep
it a simple, unambiguous syntax
Signed-off-by: Xiao Ni <xni@redhat.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
systemd/mdcheck_continue.service | 2 +-
systemd/mdcheck_start.service | 2 +-
systemd/mdmonitor-oneshot.service | 2 +-
3 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/systemd/mdcheck_continue.service b/systemd/mdcheck_continue.service
index aa02dde..854317f 100644
--- a/systemd/mdcheck_continue.service
+++ b/systemd/mdcheck_continue.service
@@ -11,7 +11,7 @@ ConditionPathExistsGlob = /var/lib/mdcheck/MD_UUID_*
[Service]
Type=oneshot
-Environment= "MDADM_CHECK_DURATION=6 hours"
+Environment="MDADM_CHECK_DURATION=6 hours"
EnvironmentFile=-/run/sysconfig/mdadm
ExecStartPre=-/usr/lib/mdadm/mdadm_env.sh
ExecStart=/usr/share/mdadm/mdcheck --continue --duration ${MDADM_CHECK_DURATION}
diff --git a/systemd/mdcheck_start.service b/systemd/mdcheck_start.service
index da62d5f..3bb3d13 100644
--- a/systemd/mdcheck_start.service
+++ b/systemd/mdcheck_start.service
@@ -11,7 +11,7 @@ Wants=mdcheck_continue.timer
[Service]
Type=oneshot
-Environment= "MDADM_CHECK_DURATION=6 hours"
+Environment="MDADM_CHECK_DURATION=6 hours"
EnvironmentFile=-/run/sysconfig/mdadm
ExecStartPre=-/usr/lib/mdadm/mdadm_env.sh
ExecStart=/usr/share/mdadm/mdcheck --duration ${MDADM_CHECK_DURATION}
diff --git a/systemd/mdmonitor-oneshot.service b/systemd/mdmonitor-oneshot.service
index fd469b1..373955a 100644
--- a/systemd/mdmonitor-oneshot.service
+++ b/systemd/mdmonitor-oneshot.service
@@ -9,7 +9,7 @@
Description=Reminder for degraded MD arrays
[Service]
-Environment= MDADM_MONITOR_ARGS=--scan
+Environment=MDADM_MONITOR_ARGS=--scan
EnvironmentFile=-/run/sysconfig/mdadm
ExecStartPre=-/usr/lib/mdadm/mdadm_env.sh
ExecStart=BINDIR/mdadm --monitor --oneshot $MDADM_MONITOR_ARGS
--
2.7.5

View File

@ -1,91 +0,0 @@
From 3364781b929f571a3dc3a6afed09eb1b03ce607c Mon Sep 17 00:00:00 2001
From: Blazej Kucman <blazej.kucman@intel.com>
Date: Wed, 19 Feb 2020 10:54:49 +0100
Subject: [RHEL7.9 PATCH 64/77] imsm: pass subarray id to kill_subarray
function
After patch b6180160f ("imsm: save current_vol number")
current_vol for imsm is not set and kill_subarray()
cannot determine which volume has to be deleted.
Volume has to be passed as "subarray_id".
The parameter affects only IMSM metadata.
Signed-off-by: Blazej Kucman <blazej.kucman@intel.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
Kill.c | 2 +-
mdadm.h | 3 ++-
super-ddf.c | 2 +-
super-intel.c | 9 ++++-----
4 files changed, 8 insertions(+), 8 deletions(-)
diff --git a/Kill.c b/Kill.c
index d4767e2..bfd0efd 100644
--- a/Kill.c
+++ b/Kill.c
@@ -119,7 +119,7 @@ int Kill_subarray(char *dev, char *subarray, int verbose)
st->update_tail = &st->updates;
/* ok we've found our victim, drop the axe */
- rv = st->ss->kill_subarray(st);
+ rv = st->ss->kill_subarray(st, subarray);
if (rv) {
if (verbose >= 0)
pr_err("Failed to delete subarray-%s from %s\n",
diff --git a/mdadm.h b/mdadm.h
index 9e98778..d94569f 100644
--- a/mdadm.h
+++ b/mdadm.h
@@ -1038,7 +1038,8 @@ extern struct superswitch {
/* query the supertype for default geometry */
void (*default_geometry)(struct supertype *st, int *level, int *layout, int *chunk); /* optional */
/* Permit subarray's to be deleted from inactive containers */
- int (*kill_subarray)(struct supertype *st); /* optional */
+ int (*kill_subarray)(struct supertype *st,
+ char *subarray_id); /* optional */
/* Permit subarray's to be modified */
int (*update_subarray)(struct supertype *st, char *subarray,
char *update, struct mddev_ident *ident); /* optional */
diff --git a/super-ddf.c b/super-ddf.c
index 7802063..7cd5702 100644
--- a/super-ddf.c
+++ b/super-ddf.c
@@ -4446,7 +4446,7 @@ static int _kill_subarray_ddf(struct ddf_super *ddf, const char *guid)
return 0;
}
-static int kill_subarray_ddf(struct supertype *st)
+static int kill_subarray_ddf(struct supertype *st, char *subarray_id)
{
struct ddf_super *ddf = st->sb;
/*
diff --git a/super-intel.c b/super-intel.c
index 47809bc..e4d2122 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -7600,18 +7600,17 @@ static void default_geometry_imsm(struct supertype *st, int *level, int *layout,
static void handle_missing(struct intel_super *super, struct imsm_dev *dev);
-static int kill_subarray_imsm(struct supertype *st)
+static int kill_subarray_imsm(struct supertype *st, char *subarray_id)
{
- /* remove the subarray currently referenced by ->current_vol */
+ /* remove the subarray currently referenced by subarray_id */
__u8 i;
struct intel_dev **dp;
struct intel_super *super = st->sb;
- __u8 current_vol = super->current_vol;
+ __u8 current_vol = strtoul(subarray_id, NULL, 10);
struct imsm_super *mpb = super->anchor;
- if (super->current_vol < 0)
+ if (mpb->num_raid_devs == 0)
return 2;
- super->current_vol = -1; /* invalidate subarray cursor */
/* block deletions that would change the uuid of active subarrays
*
--
2.7.5

View File

@ -1,91 +0,0 @@
From 45c43276d02a32876c7e1f9f0d04580595141b3d Mon Sep 17 00:00:00 2001
From: Blazej Kucman <blazej.kucman@intel.com>
Date: Wed, 19 Feb 2020 11:13:17 +0100
Subject: [RHEL7.9 PATCH 65/77] imsm: Remove --dump/--restore implementation
Functionalities --dump and --restore are not supported.
Remove dead code from imsm.
Signed-off-by: Blazej Kucman <blazej.kucman@intel.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
super-intel.c | 56 --------------------------------------------------------
1 file changed, 56 deletions(-)
diff --git a/super-intel.c b/super-intel.c
index e4d2122..c9a1af5 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -2128,61 +2128,6 @@ static void export_examine_super_imsm(struct supertype *st)
printf("MD_DEVICES=%u\n", mpb->num_disks);
}
-static int copy_metadata_imsm(struct supertype *st, int from, int to)
-{
- /* The second last sector of the device contains
- * the "struct imsm_super" metadata.
- * This contains mpb_size which is the size in bytes of the
- * extended metadata. This is located immediately before
- * the imsm_super.
- * We want to read all that, plus the last sector which
- * may contain a migration record, and write it all
- * to the target.
- */
- void *buf;
- unsigned long long dsize, offset;
- int sectors;
- struct imsm_super *sb;
- struct intel_super *super = st->sb;
- unsigned int sector_size = super->sector_size;
- unsigned int written = 0;
-
- if (posix_memalign(&buf, MAX_SECTOR_SIZE, MAX_SECTOR_SIZE) != 0)
- return 1;
-
- if (!get_dev_size(from, NULL, &dsize))
- goto err;
-
- if (lseek64(from, dsize-(2*sector_size), 0) < 0)
- goto err;
- if ((unsigned int)read(from, buf, sector_size) != sector_size)
- goto err;
- sb = buf;
- if (strncmp((char*)sb->sig, MPB_SIGNATURE, MPB_SIG_LEN) != 0)
- goto err;
-
- sectors = mpb_sectors(sb, sector_size) + 2;
- offset = dsize - sectors * sector_size;
- if (lseek64(from, offset, 0) < 0 ||
- lseek64(to, offset, 0) < 0)
- goto err;
- while (written < sectors * sector_size) {
- int n = sectors*sector_size - written;
- if (n > 4096)
- n = 4096;
- if (read(from, buf, n) != n)
- goto err;
- if (write(to, buf, n) != n)
- goto err;
- written += n;
- }
- free(buf);
- return 0;
-err:
- free(buf);
- return 1;
-}
-
static void detail_super_imsm(struct supertype *st, char *homehost,
char *subarray)
{
@@ -12270,7 +12215,6 @@ struct superswitch super_imsm = {
.reshape_super = imsm_reshape_super,
.manage_reshape = imsm_manage_reshape,
.recover_backup = recover_backup_imsm,
- .copy_metadata = copy_metadata_imsm,
.examine_badblocks = examine_badblocks_imsm,
.match_home = match_home_imsm,
.uuid_from_super= uuid_from_super_imsm,
--
2.7.5

View File

@ -1,32 +0,0 @@
From 06a6101c0a4d2658798dc42f461ace8e6900f840 Mon Sep 17 00:00:00 2001
From: Blazej Kucman <blazej.kucman@intel.com>
Date: Wed, 11 Mar 2020 15:40:13 +0100
Subject: [RHEL7.9 PATCH 66/77] imsm: Correct minimal device size.
Check if given size of member drive is not less than 1 MibiByte.
Signed-off-by: Blazej Kucman <blazej.kucman@intel.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
super-intel.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/super-intel.c b/super-intel.c
index c9a1af5..6680df2 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -7425,7 +7425,10 @@ static int validate_geometry_imsm(struct supertype *st, int level, int layout,
verbose);
}
- if (size && (size < 1024)) {
+ /*
+ * Size is given in sectors.
+ */
+ if (size && (size < 2048)) {
pr_err("Given size must be greater than 1M.\n");
/* Depends on algorithm in Create.c :
* if container was given (dev == NULL) return -1,
--
2.7.5

View File

@ -1,30 +0,0 @@
From 9e4494051de3f53228fabae56c116879bff5a0c8 Mon Sep 17 00:00:00 2001
From: Lidong Zhong <lidong.zhong@suse.com>
Date: Mon, 16 Mar 2020 10:16:49 +0800
Subject: [RHEL7.9 PATCH 67/77] Detail: show correct bitmap info for cluster
raid device
Signed-off-by: Lidong Zhong <lidong.zhong@suse.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
Detail.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/Detail.c b/Detail.c
index 832485f..daec4f1 100644
--- a/Detail.c
+++ b/Detail.c
@@ -468,7 +468,9 @@ int Detail(char *dev, struct context *c)
if (ioctl(fd, GET_BITMAP_FILE, &bmf) == 0 && bmf.pathname[0]) {
printf(" Intent Bitmap : %s\n", bmf.pathname);
printf("\n");
- } else if (array.state & (1<<MD_SB_BITMAP_PRESENT))
+ } else if (array.state & (1<<MD_SB_CLUSTERED))
+ printf(" Intent Bitmap : Internal(Clustered)\n\n");
+ else if (array.state & (1<<MD_SB_BITMAP_PRESENT))
printf(" Intent Bitmap : Internal\n\n");
atime = array.utime;
if (atime)
--
2.7.5

View File

@ -1,67 +0,0 @@
From e48aed3c81a75fa3f761fb5b84e5d16f2baee709 Mon Sep 17 00:00:00 2001
From: Artur Paszkiewicz <artur.paszkiewicz@intel.com>
Date: Tue, 17 Mar 2020 10:20:12 +0100
Subject: [RHEL7.9 PATCH 68/77] imsm: support the Array Creation Time field in
metadata
Also present its value in --examine and --examine --export.
Signed-off-by: Artur Paszkiewicz <artur.paszkiewicz@intel.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
super-intel.c | 11 +++++++++--
1 file changed, 9 insertions(+), 2 deletions(-)
diff --git a/super-intel.c b/super-intel.c
index 6680df2..8840fff 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -260,8 +260,9 @@ struct imsm_super {
* (starts at 1)
*/
__u16 filler1; /* 0x4E - 0x4F */
-#define IMSM_FILLERS 34
- __u32 filler[IMSM_FILLERS]; /* 0x50 - 0xD7 RAID_MPB_FILLERS */
+ __u64 creation_time; /* 0x50 - 0x57 Array creation time */
+#define IMSM_FILLERS 32
+ __u32 filler[IMSM_FILLERS]; /* 0x58 - 0xD7 RAID_MPB_FILLERS */
struct imsm_disk disk[1]; /* 0xD8 diskTbl[numDisks] */
/* here comes imsm_dev[num_raid_devs] */
/* here comes BBM logs */
@@ -2014,6 +2015,7 @@ static void examine_super_imsm(struct supertype *st, char *homehost)
__u32 sum;
__u32 reserved = imsm_reserved_sectors(super, super->disks);
struct dl *dl;
+ time_t creation_time;
strncpy(str, (char *)mpb->sig, MPB_SIG_LEN);
str[MPB_SIG_LEN-1] = '\0';
@@ -2022,6 +2024,9 @@ static void examine_super_imsm(struct supertype *st, char *homehost)
printf(" Orig Family : %08x\n", __le32_to_cpu(mpb->orig_family_num));
printf(" Family : %08x\n", __le32_to_cpu(mpb->family_num));
printf(" Generation : %08x\n", __le32_to_cpu(mpb->generation_num));
+ creation_time = __le64_to_cpu(mpb->creation_time);
+ printf(" Creation Time : %.24s\n",
+ creation_time ? ctime(&creation_time) : "Unknown");
printf(" Attributes : ");
if (imsm_check_attributes(mpb->attributes))
printf("All supported\n");
@@ -2126,6 +2131,7 @@ static void export_examine_super_imsm(struct supertype *st)
printf("MD_LEVEL=container\n");
printf("MD_UUID=%s\n", nbuf+5);
printf("MD_DEVICES=%u\n", mpb->num_disks);
+ printf("MD_CREATION_TIME=%llu\n", __le64_to_cpu(mpb->creation_time));
}
static void detail_super_imsm(struct supertype *st, char *homehost,
@@ -5762,6 +5768,7 @@ static int add_to_super_imsm_volume(struct supertype *st, mdu_disk_info_t *dk,
sum += __gen_imsm_checksum(mpb);
mpb->family_num = __cpu_to_le32(sum);
mpb->orig_family_num = mpb->family_num;
+ mpb->creation_time = __cpu_to_le64((__u64)time(NULL));
}
super->current_disk = dl;
return 0;
--
2.7.5

View File

@ -1,39 +0,0 @@
From ba1b3bc80ea555c288f1119e69d9273249967081 Mon Sep 17 00:00:00 2001
From: Artur Paszkiewicz <artur.paszkiewicz@intel.com>
Date: Tue, 17 Mar 2020 10:21:03 +0100
Subject: [RHEL7.9 PATCH 69/77] imsm: show Subarray and Volume ID in --examine
output
Show the index of the subarray as 'Subarray' and the value of the
my_vol_raid_dev_num field as 'Volume ID'.
Signed-off-by: Artur Paszkiewicz <artur.paszkiewicz@intel.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
super-intel.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/super-intel.c b/super-intel.c
index 8840fff..562a58c 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -1579,6 +1579,7 @@ static void print_imsm_dev(struct intel_super *super,
printf("\n");
printf("[%.16s]:\n", dev->volume);
+ printf(" Subarray : %d\n", super->current_vol);
printf(" UUID : %s\n", uuid);
printf(" RAID Level : %d", get_imsm_raid_level(map));
if (map2)
@@ -1683,6 +1684,8 @@ static void print_imsm_dev(struct intel_super *super,
printf("Multiple PPLs on journaling drive\n");
else
printf("<unknown:%d>\n", dev->rwh_policy);
+
+ printf(" Volume ID : %u\n", dev->my_vol_raid_dev_num);
}
static void print_imsm_disk(struct imsm_disk *disk,
--
2.7.5

View File

@ -1,35 +0,0 @@
From e1b92ee0de26576a33b20c9dd6ef6bd8cab8e283 Mon Sep 17 00:00:00 2001
From: Mariusz Tkaczyk <mariusz.tkaczyk@intel.com>
Date: Wed, 8 Apr 2020 16:44:52 +0200
Subject: [RHEL7.9 PATCH 70/77] udev: Ignore change event for imsm
When adding a device to a container mdadm has to close its file
descriptor before sysfs_add_disk(). This generates change event.
There is race possibility because metadata is already written and other
-I process can place drive differently. As a result device can be added
to two containers simultaneously.
From IMSM perspective there is no need to react for change event. IMSM
doesn't support stacked devices.
Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@intel.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
udev-md-raid-assembly.rules | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/udev-md-raid-assembly.rules b/udev-md-raid-assembly.rules
index 9f055ed..d668cdd 100644
--- a/udev-md-raid-assembly.rules
+++ b/udev-md-raid-assembly.rules
@@ -23,7 +23,7 @@ IMPORT{cmdline}="nodmraid"
ENV{nodmraid}=="?*", GOTO="md_inc_end"
ENV{ID_FS_TYPE}=="ddf_raid_member", GOTO="md_inc"
ENV{noiswmd}=="?*", GOTO="md_inc_end"
-ENV{ID_FS_TYPE}=="isw_raid_member", GOTO="md_inc"
+ENV{ID_FS_TYPE}=="isw_raid_member", ACTION!="change", GOTO="md_inc"
GOTO="md_inc_end"
LABEL="md_inc"
--
2.7.5

View File

@ -1,109 +0,0 @@
From 185ec4397e61ad00dd68c841e15eaa8629eb9514 Mon Sep 17 00:00:00 2001
From: Coly Li <colyli@suse.de>
Date: Sat, 11 Apr 2020 00:24:46 +0800
Subject: [RHEL7.9 PATCH 71/77] Monitor: improve check_one_sharer() for
checking duplicated process
When running mdadm monitor with scan mode, only one autorebuild process
is allowed. check_one_sharer() checks duplicated process by following
steps,
1) Read autorebuild.pid file,
- if file does not exist, no duplicated process, go to 3).
- if file exists, continue to next step.
2) Read pid number from autorebuild.pid file, then check procfs pid
directory /proc/<PID>,
- if the directory does not exist, no duplicated process, go to 3)
- if the directory exists, print error message for duplicated process
and exit this mdadm.
3) Write current pid into autorebuild.pid file, continue to monitor in
scan mode.
The problem for the above step 2) is, if after system reboots and
another different process happens to have exact same pid number which
autorebuild.pid file records, check_one_sharer() will treat it as a
duplicated mdadm process and returns error with message "Only one
autorebuild process allowed in scan mode, aborting".
This patch tries to fix the above same-pid-but-different-process issue
by one more step to check the process command name,
1) Read autorebuild.pid file
- if file does not exist, no duplicated process, go to 4).
- if file exists, continue to next step.
2) Read pid number from autorebuild.pid file, then check procfs file
comm with the specific pid directory /proc/<PID>/comm
- if the file does not exit, it means the directory /proc/<PID> does
not exist, go to 4)
- if the file exits, continue next step
3) Read process command name from /proc/<PIC>/comm, compare the command
name with "mdadm" process name,
- if not equal, no duplicated process, goto 4)
- if strings are equal, print error message for duplicated process
and exit this mdadm.
4) Write current pid into autorebuild.pid file, continue to monitor in
scan mode.
Now check_one_sharer() returns error for duplicated process only when
the recorded pid from autorebuild.pid exists, and the process has exact
same command name as "mdadm".
Reported-by: Shinkichi Yamazaki <shinkichi.yamazaki@suse.com>
Signed-off-by: Coly Li <colyli@suse.de>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
Monitor.c | 32 ++++++++++++++++++++------------
1 file changed, 20 insertions(+), 12 deletions(-)
diff --git a/Monitor.c b/Monitor.c
index b527165..2d6b3b9 100644
--- a/Monitor.c
+++ b/Monitor.c
@@ -301,26 +301,34 @@ static int make_daemon(char *pidfile)
static int check_one_sharer(int scan)
{
- int pid, rv;
+ int pid;
+ FILE *comm_fp;
FILE *fp;
- char dir[20];
+ char comm_path[100];
char path[100];
- struct stat buf;
+ char comm[20];
+
sprintf(path, "%s/autorebuild.pid", MDMON_DIR);
fp = fopen(path, "r");
if (fp) {
if (fscanf(fp, "%d", &pid) != 1)
pid = -1;
- sprintf(dir, "/proc/%d", pid);
- rv = stat(dir, &buf);
- if (rv != -1) {
- if (scan) {
- pr_err("Only one autorebuild process allowed in scan mode, aborting\n");
- fclose(fp);
- return 1;
- } else {
- pr_err("Warning: One autorebuild process already running.\n");
+ snprintf(comm_path, sizeof(comm_path),
+ "/proc/%d/comm", pid);
+ comm_fp = fopen(comm_path, "r");
+ if (comm_fp) {
+ if (fscanf(comm_fp, "%s", comm) &&
+ strncmp(basename(comm), Name, strlen(Name)) == 0) {
+ if (scan) {
+ pr_err("Only one autorebuild process allowed in scan mode, aborting\n");
+ fclose(comm_fp);
+ fclose(fp);
+ return 1;
+ } else {
+ pr_err("Warning: One autorebuild process already running.\n");
+ }
}
+ fclose(comm_fp);
}
fclose(fp);
}
--
2.7.5

View File

@ -1,85 +0,0 @@
From 1c294b5d960abeeb9e0f188af294d019bc82b20e Mon Sep 17 00:00:00 2001
From: Lidong Zhong <lidong.zhong@suse.com>
Date: Tue, 14 Apr 2020 16:19:41 +0800
Subject: [RHEL7.9 PATCH 72/77] Detail: adding sync status for cluster device
On the node with /proc/mdstat is
Personalities : [raid1]
md0 : active raid1 sdb[4] sdc[3] sdd[2]
1046528 blocks super 1.2 [3/2] [UU_]
recover=REMOTE
bitmap: 1/1 pages [4KB], 65536KB chunk
Let's change the 'State' of 'mdadm -Q -D' accordingly
State : clean, degraded
With this patch, it will be
State : clean, degraded, recovering (REMOTE)
Signed-off-by: Lidong Zhong <lidong.zhong@suse.com>
Acked-by: Guoqing Jiang <guoqing.jiang@cloud.ionos.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
Detail.c | 9 ++++++---
mdadm.h | 3 ++-
mdstat.c | 2 ++
3 files changed, 10 insertions(+), 4 deletions(-)
diff --git a/Detail.c b/Detail.c
index daec4f1..24eeba0 100644
--- a/Detail.c
+++ b/Detail.c
@@ -498,17 +498,20 @@ int Detail(char *dev, struct context *c)
} else
arrayst = "active";
- printf(" State : %s%s%s%s%s%s \n",
+ printf(" State : %s%s%s%s%s%s%s \n",
arrayst, st,
(!e || (e->percent < 0 &&
e->percent != RESYNC_PENDING &&
- e->percent != RESYNC_DELAYED)) ?
+ e->percent != RESYNC_DELAYED &&
+ e->percent != RESYNC_REMOTE)) ?
"" : sync_action[e->resync],
larray_size ? "": ", Not Started",
(e && e->percent == RESYNC_DELAYED) ?
" (DELAYED)": "",
(e && e->percent == RESYNC_PENDING) ?
- " (PENDING)": "");
+ " (PENDING)": "",
+ (e && e->percent == RESYNC_REMOTE) ?
+ " (REMOTE)": "");
} else if (inactive && !is_container) {
printf(" State : inactive\n");
}
diff --git a/mdadm.h b/mdadm.h
index d94569f..399478b 100644
--- a/mdadm.h
+++ b/mdadm.h
@@ -1815,7 +1815,8 @@ enum r0layout {
#define RESYNC_NONE -1
#define RESYNC_DELAYED -2
#define RESYNC_PENDING -3
-#define RESYNC_UNKNOWN -4
+#define RESYNC_REMOTE -4
+#define RESYNC_UNKNOWN -5
/* When using "GET_DISK_INFO" it isn't certain how high
* we need to check. So we impose an absolute limit of
diff --git a/mdstat.c b/mdstat.c
index 7e600d0..20577a3 100644
--- a/mdstat.c
+++ b/mdstat.c
@@ -257,6 +257,8 @@ struct mdstat_ent *mdstat_read(int hold, int start)
ent->percent = RESYNC_DELAYED;
if (l > 8 && strcmp(w+l-8, "=PENDING") == 0)
ent->percent = RESYNC_PENDING;
+ if (l > 7 && strcmp(w+l-7, "=REMOTE") == 0)
+ ent->percent = RESYNC_REMOTE;
} else if (ent->percent == RESYNC_NONE &&
w[0] >= '0' &&
w[0] <= '9' &&
--
2.7.5

View File

@ -1,164 +0,0 @@
From 12724c018c964596aa277489fd287d5c3506361a Mon Sep 17 00:00:00 2001
From: Tkaczyk Mariusz <mariusz.tkaczyk@intel.com>
Date: Fri, 17 Apr 2020 13:55:55 +0200
Subject: [RHEL7.9 PATCH 73/77] Manage, imsm: Write metadata before add
New drive in container always appears as spare. Manager is able to
handle that, and queues appropriative update to monitor.
No update from mdadm side has to be processed, just insert the drive and
ping the mdmon. Metadata has to be written if no mdmon is running (case
for Raid0 or container without arrays).
If bare drive is added very early on startup (by custom bare rule),
there is possiblity that mdmon was not restarted after switch root. Old
one is not able to handle new drive. New one fails because there is
drive without metadata in container and metadata cannot be loaded.
To prevent this, write spare metadata before adding device
to container. Mdmon will overwrite it (same case as spare migration,
if drive appears it writes the most recent metadata).
Metadata has to be written only on new drive before sysfs_add_disk(),
don't race with mdmon if running.
Signed-off-by: Tkaczyk Mariusz <mariusz.tkaczyk@intel.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
Manage.c | 6 +-----
super-intel.c | 66 ++++++++++++++++++++++++++++++++++++++---------------------
2 files changed, 44 insertions(+), 28 deletions(-)
diff --git a/Manage.c b/Manage.c
index b22c396..0a5f09b 100644
--- a/Manage.c
+++ b/Manage.c
@@ -994,17 +994,13 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
Kill(dv->devname, NULL, 0, -1, 0);
dfd = dev_open(dv->devname, O_RDWR | O_EXCL|O_DIRECT);
- if (mdmon_running(tst->container_devnm))
- tst->update_tail = &tst->updates;
if (tst->ss->add_to_super(tst, &disc, dfd,
dv->devname, INVALID_SECTORS)) {
close(dfd);
close(container_fd);
return -1;
}
- if (tst->update_tail)
- flush_metadata_updates(tst);
- else
+ if (!mdmon_running(tst->container_devnm))
tst->ss->sync_metadata(tst);
sra = sysfs_read(container_fd, NULL, 0);
diff --git a/super-intel.c b/super-intel.c
index 562a58c..3a73d2b 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -5809,6 +5809,9 @@ int mark_spare(struct dl *disk)
return ret_val;
}
+
+static int write_super_imsm_spare(struct intel_super *super, struct dl *d);
+
static int add_to_super_imsm(struct supertype *st, mdu_disk_info_t *dk,
int fd, char *devname,
unsigned long long data_offset)
@@ -5938,9 +5941,13 @@ static int add_to_super_imsm(struct supertype *st, mdu_disk_info_t *dk,
dd->next = super->disk_mgmt_list;
super->disk_mgmt_list = dd;
} else {
+ /* this is called outside of mdmon
+ * write initial spare metadata
+ * mdmon will overwrite it.
+ */
dd->next = super->disks;
super->disks = dd;
- super->updates_pending++;
+ write_super_imsm_spare(super, dd);
}
return 0;
@@ -5979,15 +5986,15 @@ static union {
struct imsm_super anchor;
} spare_record __attribute__ ((aligned(MAX_SECTOR_SIZE)));
-/* spare records have their own family number and do not have any defined raid
- * devices
- */
-static int write_super_imsm_spares(struct intel_super *super, int doclose)
+
+static int write_super_imsm_spare(struct intel_super *super, struct dl *d)
{
struct imsm_super *mpb = super->anchor;
struct imsm_super *spare = &spare_record.anchor;
__u32 sum;
- struct dl *d;
+
+ if (d->index != -1)
+ return 1;
spare->mpb_size = __cpu_to_le32(sizeof(struct imsm_super));
spare->generation_num = __cpu_to_le32(1UL);
@@ -6000,28 +6007,41 @@ static int write_super_imsm_spares(struct intel_super *super, int doclose)
snprintf((char *) spare->sig, MAX_SIGNATURE_LENGTH,
MPB_SIGNATURE MPB_VERSION_RAID0);
- for (d = super->disks; d; d = d->next) {
- if (d->index != -1)
- continue;
+ spare->disk[0] = d->disk;
+ if (__le32_to_cpu(d->disk.total_blocks_hi) > 0)
+ spare->attributes |= MPB_ATTRIB_2TB_DISK;
- spare->disk[0] = d->disk;
- if (__le32_to_cpu(d->disk.total_blocks_hi) > 0)
- spare->attributes |= MPB_ATTRIB_2TB_DISK;
+ if (super->sector_size == 4096)
+ convert_to_4k_imsm_disk(&spare->disk[0]);
- if (super->sector_size == 4096)
- convert_to_4k_imsm_disk(&spare->disk[0]);
+ sum = __gen_imsm_checksum(spare);
+ spare->family_num = __cpu_to_le32(sum);
+ spare->orig_family_num = 0;
+ sum = __gen_imsm_checksum(spare);
+ spare->check_sum = __cpu_to_le32(sum);
- sum = __gen_imsm_checksum(spare);
- spare->family_num = __cpu_to_le32(sum);
- spare->orig_family_num = 0;
- sum = __gen_imsm_checksum(spare);
- spare->check_sum = __cpu_to_le32(sum);
+ if (store_imsm_mpb(d->fd, spare)) {
+ pr_err("failed for device %d:%d %s\n",
+ d->major, d->minor, strerror(errno));
+ return 1;
+ }
+
+ return 0;
+}
+/* spare records have their own family number and do not have any defined raid
+ * devices
+ */
+static int write_super_imsm_spares(struct intel_super *super, int doclose)
+{
+ struct dl *d;
+
+ for (d = super->disks; d; d = d->next) {
+ if (d->index != -1)
+ continue;
- if (store_imsm_mpb(d->fd, spare)) {
- pr_err("failed for device %d:%d %s\n",
- d->major, d->minor, strerror(errno));
+ if (write_super_imsm_spare(super, d))
return 1;
- }
+
if (doclose) {
close(d->fd);
d->fd = -1;
--
2.7.5

View File

@ -1,57 +0,0 @@
From 5cfb79dea26d9d7266f79c7c196a1a9f70c16a28 Mon Sep 17 00:00:00 2001
From: Gioh Kim <gi-oh.kim@cloud.ionos.com>
Date: Tue, 16 Apr 2019 18:08:17 +0200
Subject: [RHEL7.9 PATCH 74/77] Assemble: print error message if mdadm fails
assembling with --uuid option
When mdadm tries to assemble one working device and one zeroed-out device,
it failed but print successful message because there is --uuid option.
Following script always reproduce it.
dd if=/dev/zero of=/dev/ram0 oflag=direct
dd if=/dev/zero of=/dev/ram1 oflag=direct
./mdadm -C /dev/md111 -e 1.2 --uuid="12345678:12345678:12345678:12345678" \
-l1 -n2 /dev/ram0 /dev/ram1
./mdadm -S /dev/md111
dd if=/dev/zero of=/dev/ram1 oflag=direct
./mdadm -A /dev/md111 --uuid="12345678:12345678:12345678:12345678" \
/dev/ram0 /dev/ram1
Following is message from mdadm.
mdadm: No super block found on /dev/ram1 (Expected magic a92b4efc, got 00000000)
mdadm: no RAID superblock on /dev/ram1
mdadm: /dev/md111 assembled from 1 drive - need all 2 to start it (use --run to insist).
The mdadm say that it assembled but mdadm does not create /dev/md111.
The message is wrong.
After applying this patch, mdadm reports error correctly as following.
mdadm: No super block found on /dev/ram1 (Expected magic a92b4efc, got 00000000)
mdadm: no RAID superblock on /dev/ram1
mdadm: /dev/ram1 has no superblock - assembly aborted
Signed-off-by: Gioh Kim <gi-oh.kim@cloud.ionos.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
Assemble.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Assemble.c b/Assemble.c
index 6b5a7c8..2ed5884 100644
--- a/Assemble.c
+++ b/Assemble.c
@@ -269,7 +269,7 @@ static int select_devices(struct mddev_dev *devlist,
if (auto_assem || !inargv)
/* Ignore unrecognised devices during auto-assembly */
goto loop;
- if (ident->uuid_set || ident->name[0] ||
+ if (ident->name[0] ||
ident->super_minor != UnSet)
/* Ignore unrecognised device if looking for
* specific array */
--
2.7.5

View File

@ -1,29 +0,0 @@
From ec7d7ceefc1c2b9ba82cac1ba0f6a34d41a4a913 Mon Sep 17 00:00:00 2001
From: Nigel Croxon <ncroxon@redhat.com>
Date: Mon, 4 May 2020 12:27:45 -0400
Subject: [RHEL7.9 PATCH 75/77] clean up meaning of small typo
Clean up the typo which leads to wrong understanding.
Signed-off-by: Nigel Croxon <ncroxon@redhat.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
mdadm.8.in | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/mdadm.8.in b/mdadm.8.in
index a3494a1..9e7cb96 100644
--- a/mdadm.8.in
+++ b/mdadm.8.in
@@ -2878,7 +2878,7 @@ operation, as described below under LAYOUT CHANGES.
.SS CHUNK-SIZE AND LAYOUT CHANGES
-Changing the chunk-size of layout without also changing the number of
+Changing the chunk-size or layout without also changing the number of
devices as the same time will involve re-writing all blocks in-place.
To ensure against data loss in the case of a crash, a
.B --backup-file
--
2.7.5

View File

@ -1,95 +0,0 @@
From 7b99edab2834d5d08ef774b4cff784caaa1a186f Mon Sep 17 00:00:00 2001
From: Mariusz Tkaczyk <mariusz.tkaczyk@intel.com>
Date: Tue, 5 May 2020 12:17:17 +0200
Subject: [RHEL7.9 PATCH 76/77] Assemble.c: respect force flag.
If the array is dirty handler will set resync_start to 0 to inform kernel
that resync is needed. RWH affects only raid456 module, for other
levels array will be started even array is degraded and resync cannot be
performed.
Force is really meaningful for raid456. If array is degraded and resync
is requested, kernel will reject an attempt to start the array. To
respect force, it has to be marked as clean (this will be done for each
array without PPL) and remove the resync request (only for raid 456).
Data corruption may occur so proper warning is added.
Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@intel.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
Assemble.c | 51 ++++++++++++++++++++++++++++++++++++++-------------
1 file changed, 38 insertions(+), 13 deletions(-)
diff --git a/Assemble.c b/Assemble.c
index 2ed5884..3e5d4e6 100644
--- a/Assemble.c
+++ b/Assemble.c
@@ -2030,6 +2030,15 @@ int assemble_container_content(struct supertype *st, int mdfd,
free(avail);
return err;
}
+ } else if (c->force) {
+ /* Set the array as 'clean' so that we can proceed with starting
+ * it even if we don't have all devices. Mdmon doesn't care
+ * if the dirty flag is set in metadata, it will start managing
+ * it anyway.
+ * This is really important for raid456 (RWH case), other levels
+ * are started anyway.
+ */
+ content->array.state |= 1;
}
if (enough(content->array.level, content->array.raid_disks,
@@ -2049,20 +2058,36 @@ int assemble_container_content(struct supertype *st, int mdfd,
}
free(avail);
- if (c->runstop <= 0 &&
- (working + preexist + expansion) <
- content->array.working_disks) {
- if (c->export && result)
- *result |= INCR_UNSAFE;
- else if (c->verbose >= 0) {
- pr_err("%s assembled with %d device%s",
- chosen_name, preexist + working,
- preexist + working == 1 ? "":"s");
- if (preexist)
- fprintf(stderr, " (%d new)", working);
- fprintf(stderr, " but not safe to start\n");
+ if ((working + preexist + expansion) < content->array.working_disks) {
+ if (c->runstop <= 0) {
+ if (c->export && result)
+ *result |= INCR_UNSAFE;
+ else if (c->verbose >= 0) {
+ pr_err("%s assembled with %d device%s",
+ chosen_name, preexist + working,
+ preexist + working == 1 ? "":"s");
+ if (preexist)
+ fprintf(stderr, " (%d new)", working);
+ fprintf(stderr, " but not safe to start\n");
+ if (c->force)
+ pr_err("Consider --run to start array as degraded.\n");
+ }
+ return 1;
+ } else if (content->array.level >= 4 &&
+ content->array.level <= 6 &&
+ content->resync_start != MaxSector &&
+ c->force) {
+ /* Don't inform the kernel that the array is not
+ * clean and requires resync.
+ */
+ content->resync_start = MaxSector;
+ err = sysfs_set_num(content, NULL, "resync_start",
+ MaxSector);
+ if (err)
+ return 1;
+ pr_err("%s array state forced to clean. It may cause data corruption.\n",
+ chosen_name);
}
- return 1;
}
--
2.7.5

View File

@ -1,45 +0,0 @@
From 3b7aae927bdb6e150d1aaf3aaf0d183a06abda0b Mon Sep 17 00:00:00 2001
From: Donald Buczek <buczek@molgen.mpg.de>
Date: Wed, 13 May 2020 15:16:46 +0200
Subject: [RHEL7.9 PATCH 77/77] mdcheck: Log when done
Currently mdcheck (when called with `--duration`) logs only the
beginning of the check, the pausing and the continuation but not the
completion.
So, log the completion, too, so that it can be determined how long the
raid check took.
2020-05-08T18:00:02+02:00 deadpool root: mdcheck start checking /dev/md0
2020-05-08T18:00:02+02:00 deadpool root: mdcheck start checking /dev/md1
2020-05-09T15:32:04+02:00 deadpool root: mdcheck finished checking /dev/md1
2020-05-09T17:38:04+02:00 deadpool root: mdcheck finished checking /dev/md0
Cc: linux-raid@vger.kernel.org
Signed-off-by: Paul Menzel <pmenzel@molgen.mpg.de>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
misc/mdcheck | 2 ++
1 file changed, 2 insertions(+)
diff --git a/misc/mdcheck b/misc/mdcheck
index 42d4094..700c3e2 100644
--- a/misc/mdcheck
+++ b/misc/mdcheck
@@ -125,11 +125,13 @@ do
do
eval fl=\$MD_${i}_fl
eval sys=\$MD_${i}_sys
+ eval dev=\$MD_${i}_dev
if [ -z "$fl" ]; then continue; fi
if [ "`cat $sys/md/sync_action`" != 'check' ]
then
+ logger -p daemon.info mdcheck finished checking $dev
eval MD_${i}_fl=
rm -f $fl
continue;
--
2.7.5

View File

@ -1,49 +0,0 @@
From 03ab9763f51ddf2030f60f83e76cf9c1b50b726c Mon Sep 17 00:00:00 2001
From: Tkaczyk Mariusz <mariusz.tkaczyk@intel.com>
Date: Fri, 15 May 2020 11:23:14 +0200
Subject: [PATCH 078/108] Makefile: add EXTRAVERSION support
Add optional EXTRAVERSION parameter to Makefile and allow to mark version
by user friendly label. It might be useful when creating custom
spins of mdadm, or labeling some instance in between major releases.
Signed-off-by: Tkaczyk Mariusz <mariusz.tkaczyk@intel.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
Makefile | 3 ++-
ReadMe.c | 5 ++++-
2 files changed, 6 insertions(+), 2 deletions(-)
diff --git a/Makefile b/Makefile
index a33319a..0a20b75 100644
--- a/Makefile
+++ b/Makefile
@@ -105,7 +105,8 @@ VERSION = $(shell [ -d .git ] && git describe HEAD | sed 's/mdadm-//')
VERS_DATE = $(shell [ -d .git ] && date --iso-8601 --date="`git log -n1 --format=format:%cd --date=iso --date=short`")
DVERS = $(if $(VERSION),-DVERSION=\"$(VERSION)\",)
DDATE = $(if $(VERS_DATE),-DVERS_DATE="\"$(VERS_DATE)\"",)
-CFLAGS += $(DVERS) $(DDATE)
+DEXTRAVERSION = $(if $(EXTRAVERSION),-DEXTRAVERSION="\" - $(EXTRAVERSION)\"",)
+CFLAGS += $(DVERS) $(DDATE) $(DEXTRAVERSION)
# The glibc TLS ABI requires applications that call clone(2) to set up
# TLS data structures, use pthreads until mdmon implements this support
diff --git a/ReadMe.c b/ReadMe.c
index eaf1042..06b8f7e 100644
--- a/ReadMe.c
+++ b/ReadMe.c
@@ -33,7 +33,10 @@
#ifndef VERS_DATE
#define VERS_DATE "2018-10-01"
#endif
-char Version[] = "mdadm - v" VERSION " - " VERS_DATE "\n";
+#ifndef EXTRAVERSION
+#define EXTRAVERSION ""
+#endif
+char Version[] = "mdadm - v" VERSION " - " VERS_DATE EXTRAVERSION "\n";
/*
* File: ReadMe.c
--
2.7.5

View File

@ -1,284 +0,0 @@
From f4c8a605d2467c0ed25fcba5d27dd56540660e55 Mon Sep 17 00:00:00 2001
From: Guoqing Jiang <guoqing.jiang@cloud.ionos.com>
Date: Mon, 18 May 2020 23:53:35 +0200
Subject: [PATCH 079/108] uuid.c: split uuid stuffs from util.c
Currently, 'make raid6check' is build broken since commit b06815989
("mdadm: load default sysfs attributes after assemblation").
/usr/bin/ld: sysfs.o: in function `sysfsline':
sysfs.c:(.text+0x2707): undefined reference to `parse_uuid'
/usr/bin/ld: sysfs.c:(.text+0x271a): undefined reference to `uuid_zero'
/usr/bin/ld: sysfs.c:(.text+0x2721): undefined reference to `uuid_zero'
Apparently, the compile of mdadm or raid6check are coupled with uuid
functions inside util.c. However, we can't just add util.o to CHECK_OBJS
which raid6check is needed, because it caused other worse problems.
So, let's introduce a uuid.c file which is indenpended file to fix the
problem, all the contents are splitted from util.c.
Signed-off-by: Guoqing Jiang <guoqing.jiang@cloud.ionos.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
Makefile | 6 ++--
util.c | 87 -------------------------------------------------
uuid.c | 112 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 115 insertions(+), 90 deletions(-)
create mode 100644 uuid.c
diff --git a/Makefile b/Makefile
index 0a20b75..15d05d1 100644
--- a/Makefile
+++ b/Makefile
@@ -140,7 +140,7 @@ else
ECHO=:
endif
-OBJS = mdadm.o config.o policy.o mdstat.o ReadMe.o util.o maps.o lib.o \
+OBJS = mdadm.o config.o policy.o mdstat.o ReadMe.o uuid.o util.o maps.o lib.o \
Manage.o Assemble.o Build.o \
Create.o Detail.o Examine.o Grow.o Monitor.o dlink.o Kill.o Query.o \
Incremental.o Dump.o \
@@ -149,13 +149,13 @@ OBJS = mdadm.o config.o policy.o mdstat.o ReadMe.o util.o maps.o lib.o \
restripe.o sysfs.o sha1.o mapfile.o crc32.o sg_io.o msg.o xmalloc.o \
platform-intel.o probe_roms.o crc32c.o
-CHECK_OBJS = restripe.o sysfs.o maps.o lib.o xmalloc.o dlink.o
+CHECK_OBJS = restripe.o uuid.o sysfs.o maps.o lib.o xmalloc.o dlink.o
SRCS = $(patsubst %.o,%.c,$(OBJS))
INCL = mdadm.h part.h bitmap.h
-MON_OBJS = mdmon.o monitor.o managemon.o util.o maps.o mdstat.o sysfs.o \
+MON_OBJS = mdmon.o monitor.o managemon.o uuid.o util.o maps.o mdstat.o sysfs.o \
policy.o lib.o \
Kill.o sg_io.o dlink.o ReadMe.o super-intel.o \
super-mbr.o super-gpt.o \
diff --git a/util.c b/util.c
index 07f9dc3..579dd42 100644
--- a/util.c
+++ b/util.c
@@ -306,43 +306,6 @@ int md_get_disk_info(int fd, struct mdu_disk_info_s *disk)
return ioctl(fd, GET_DISK_INFO, disk);
}
-/*
- * Parse a 128 bit uuid in 4 integers
- * format is 32 hexx nibbles with options :.<space> separator
- * If not exactly 32 hex digits are found, return 0
- * else return 1
- */
-int parse_uuid(char *str, int uuid[4])
-{
- int hit = 0; /* number of Hex digIT */
- int i;
- char c;
- for (i = 0; i < 4; i++)
- uuid[i] = 0;
-
- while ((c = *str++) != 0) {
- int n;
- if (c >= '0' && c <= '9')
- n = c-'0';
- else if (c >= 'a' && c <= 'f')
- n = 10 + c - 'a';
- else if (c >= 'A' && c <= 'F')
- n = 10 + c - 'A';
- else if (strchr(":. -", c))
- continue;
- else return 0;
-
- if (hit<32) {
- uuid[hit/8] <<= 4;
- uuid[hit/8] += n;
- }
- hit++;
- }
- if (hit == 32)
- return 1;
- return 0;
-}
-
int get_linux_version()
{
struct utsname name;
@@ -611,56 +574,6 @@ int enough(int level, int raid_disks, int layout, int clean, char *avail)
}
}
-const int uuid_zero[4] = { 0, 0, 0, 0 };
-
-int same_uuid(int a[4], int b[4], int swapuuid)
-{
- if (swapuuid) {
- /* parse uuids are hostendian.
- * uuid's from some superblocks are big-ending
- * if there is a difference, we need to swap..
- */
- unsigned char *ac = (unsigned char *)a;
- unsigned char *bc = (unsigned char *)b;
- int i;
- for (i = 0; i < 16; i += 4) {
- if (ac[i+0] != bc[i+3] ||
- ac[i+1] != bc[i+2] ||
- ac[i+2] != bc[i+1] ||
- ac[i+3] != bc[i+0])
- return 0;
- }
- return 1;
- } else {
- if (a[0]==b[0] &&
- a[1]==b[1] &&
- a[2]==b[2] &&
- a[3]==b[3])
- return 1;
- return 0;
- }
-}
-
-void copy_uuid(void *a, int b[4], int swapuuid)
-{
- if (swapuuid) {
- /* parse uuids are hostendian.
- * uuid's from some superblocks are big-ending
- * if there is a difference, we need to swap..
- */
- unsigned char *ac = (unsigned char *)a;
- unsigned char *bc = (unsigned char *)b;
- int i;
- for (i = 0; i < 16; i += 4) {
- ac[i+0] = bc[i+3];
- ac[i+1] = bc[i+2];
- ac[i+2] = bc[i+1];
- ac[i+3] = bc[i+0];
- }
- } else
- memcpy(a, b, 16);
-}
-
char *__fname_from_uuid(int id[4], int swap, char *buf, char sep)
{
int i, j;
diff --git a/uuid.c b/uuid.c
new file mode 100644
index 0000000..94b5abd
--- /dev/null
+++ b/uuid.c
@@ -0,0 +1,112 @@
+/*
+ * mdadm - manage Linux "md" devices aka RAID arrays.
+ *
+ * Copyright (C) 2001-2013 Neil Brown <neilb@suse.de>
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Author: Neil Brown
+ * Email: <neilb@suse.de>
+ */
+
+#include <string.h>
+
+const int uuid_zero[4] = { 0, 0, 0, 0 };
+
+int same_uuid(int a[4], int b[4], int swapuuid)
+{
+ if (swapuuid) {
+ /* parse uuids are hostendian.
+ * uuid's from some superblocks are big-ending
+ * if there is a difference, we need to swap..
+ */
+ unsigned char *ac = (unsigned char *)a;
+ unsigned char *bc = (unsigned char *)b;
+ int i;
+ for (i = 0; i < 16; i += 4) {
+ if (ac[i+0] != bc[i+3] ||
+ ac[i+1] != bc[i+2] ||
+ ac[i+2] != bc[i+1] ||
+ ac[i+3] != bc[i+0])
+ return 0;
+ }
+ return 1;
+ } else {
+ if (a[0]==b[0] &&
+ a[1]==b[1] &&
+ a[2]==b[2] &&
+ a[3]==b[3])
+ return 1;
+ return 0;
+ }
+}
+
+void copy_uuid(void *a, int b[4], int swapuuid)
+{
+ if (swapuuid) {
+ /* parse uuids are hostendian.
+ * uuid's from some superblocks are big-ending
+ * if there is a difference, we need to swap..
+ */
+ unsigned char *ac = (unsigned char *)a;
+ unsigned char *bc = (unsigned char *)b;
+ int i;
+ for (i = 0; i < 16; i += 4) {
+ ac[i+0] = bc[i+3];
+ ac[i+1] = bc[i+2];
+ ac[i+2] = bc[i+1];
+ ac[i+3] = bc[i+0];
+ }
+ } else
+ memcpy(a, b, 16);
+}
+
+/*
+ * Parse a 128 bit uuid in 4 integers
+ * format is 32 hexx nibbles with options :.<space> separator
+ * If not exactly 32 hex digits are found, return 0
+ * else return 1
+ */
+int parse_uuid(char *str, int uuid[4])
+{
+ int hit = 0; /* number of Hex digIT */
+ int i;
+ char c;
+ for (i = 0; i < 4; i++)
+ uuid[i] = 0;
+
+ while ((c = *str++) != 0) {
+ int n;
+ if (c >= '0' && c <= '9')
+ n = c-'0';
+ else if (c >= 'a' && c <= 'f')
+ n = 10 + c - 'a';
+ else if (c >= 'A' && c <= 'F')
+ n = 10 + c - 'A';
+ else if (strchr(":. -", c))
+ continue;
+ else return 0;
+
+ if (hit<32) {
+ uuid[hit/8] <<= 4;
+ uuid[hit/8] += n;
+ }
+ hit++;
+ }
+ if (hit == 32)
+ return 1;
+ return 0;
+}
--
2.7.5

View File

@ -1,34 +0,0 @@
From 7d90f7603af6b59e7144cef6617a1e9dd42161bd Mon Sep 17 00:00:00 2001
From: Jes Sorensen <jsorensen@fb.com>
Date: Mon, 18 May 2020 20:19:53 -0400
Subject: [PATCH 080/108] Include count for \0 character when using strncpy to
implement strdup.
We have to include the \0 character in the length when copying a
string with strncpy() for which length was found with strlen().
Otherwise the destination will not get null terminated - except that
we explicitly zeroed it out earlier.
This quiets down the compiler's warnings.
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
dlink.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/dlink.c b/dlink.c
index 3efa94b..69aa7aa 100644
--- a/dlink.c
+++ b/dlink.c
@@ -63,7 +63,7 @@ char *dl_strndup(char *s, int l)
if (s == NULL)
return NULL;
n = dl_newv(char, l+1);
- strncpy(n, s, l);
+ strncpy(n, s, l+1);
n[l] = 0;
return n;
}
--
2.7.5

View File

@ -1,53 +0,0 @@
From d92cee7b374db9944b63bdd6c1784a2dd90ee9ca Mon Sep 17 00:00:00 2001
From: Guoqing Jiang <guoqing.jiang@cloud.ionos.com>
Date: Mon, 18 May 2020 23:53:36 +0200
Subject: [PATCH 081/108] =?UTF-8?q?restripe:=20fix=20ignoring=20return=20v?=
=?UTF-8?q?alue=20of=20=E2=80=98read=E2=80=99=20and=20lseek?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Got below error when run "make everything".
restripe.c: In function test_stripes:
restripe.c:870:4: error: ignoring return value of read, declared with attribute warn_unused_result [-Werror=unused-result]
read(source[i], stripes[i], chunk_size);
^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Fix it by check the return value of read, and free memory
in the failure case.
And check the return value of lseek as well per Jes's comment.
Signed-off-by: Guoqing Jiang <guoqing.jiang@cloud.ionos.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
restripe.c | 12 ++++++++++--
1 file changed, 10 insertions(+), 2 deletions(-)
diff --git a/restripe.c b/restripe.c
index 31b07e8..86e1d00 100644
--- a/restripe.c
+++ b/restripe.c
@@ -866,8 +866,16 @@ int test_stripes(int *source, unsigned long long *offsets,
int disk;
for (i = 0 ; i < raid_disks ; i++) {
- lseek64(source[i], offsets[i]+start, 0);
- read(source[i], stripes[i], chunk_size);
+ if ((lseek64(source[i], offsets[i]+start, 0) < 0) ||
+ (read(source[i], stripes[i], chunk_size) !=
+ chunk_size)) {
+ free(q);
+ free(p);
+ free(blocks);
+ free(stripes);
+ free(stripe_buf);
+ return -1;
+ }
}
for (i = 0 ; i < data_disks ; i++) {
int disk = geo_map(i, start/chunk_size, raid_disks,
--
2.7.5

View File

@ -1,33 +0,0 @@
From 7758ada9f3872cc9cb4c76c733dbc553562b3d7d Mon Sep 17 00:00:00 2001
From: Kinga Tanska <kinga.tanska@intel.com>
Date: Fri, 29 May 2020 08:31:36 +0200
Subject: [PATCH 082/108] Block overwriting existing links while manual
assembly
Manual assembly with existing link caused overwriting
this link. Add checking link and block this situation.
Signed-off-by: Kinga Tanska <kinga.tanska@intel.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
Assemble.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/Assemble.c b/Assemble.c
index 3e5d4e6..ed0ddfb 100644
--- a/Assemble.c
+++ b/Assemble.c
@@ -1482,6 +1482,10 @@ try_again:
name = content->name;
break;
}
+ if (mddev && map_by_name(&map, mddev) != NULL) {
+ pr_err("Cannot create device with %s because is in use\n", mddev);
+ goto out;
+ }
if (!auto_assem)
/* If the array is listed in mdadm.conf or on
* command line, then we trust the name
--
2.7.5

View File

@ -1,76 +0,0 @@
From 2cf0433063203fca10d26629c9e090b51fb1d806 Mon Sep 17 00:00:00 2001
From: David Favro <dfavro@meta-dynamic.com>
Date: Sat, 23 May 2020 08:24:59 -0400
Subject: [PATCH 083/108] Detect too-small device: error rather than
underflow/crash
For 1.x metadata, when the user requested creation of an array on
component devices that were too small even to hold the superblock,
an undetected integer wraparound (underflow) resulted in an enormous
computed size which resulted in various follow-on errors such as
floating-point exception.
This patch detects this condition, prints a reasonable diagnostic
message, and refuses to continue.
Signed-off-by: David Favro <dfavro@meta-dynamic.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
super1.c | 19 ++++++++++++++-----
1 file changed, 14 insertions(+), 5 deletions(-)
diff --git a/super1.c b/super1.c
index e0d80be..7664883 100644
--- a/super1.c
+++ b/super1.c
@@ -2753,6 +2753,7 @@ static int validate_geometry1(struct supertype *st, int level,
unsigned long long ldsize, devsize;
int bmspace;
unsigned long long headroom;
+ unsigned long long overhead;
int fd;
if (level == LEVEL_CONTAINER) {
@@ -2785,10 +2786,6 @@ static int validate_geometry1(struct supertype *st, int level,
close(fd);
devsize = ldsize >> 9;
- if (devsize < 24) {
- *freesize = 0;
- return 0;
- }
/* creating: allow suitable space for bitmap or PPL */
if (consistency_policy == CONSISTENCY_POLICY_PPL)
@@ -2829,15 +2826,27 @@ static int validate_geometry1(struct supertype *st, int level,
case 0: /* metadata at end. Round down and subtract space to reserve */
devsize = (devsize & ~(4ULL*2-1));
/* space for metadata, bblog, bitmap/ppl */
- devsize -= 8*2 + 8 + bmspace;
+ overhead = 8*2 + 8 + bmspace;
+ if (devsize < overhead) /* detect underflow */
+ goto dev_too_small_err;
+ devsize -= overhead;
break;
case 1:
case 2:
+ if (devsize < data_offset) /* detect underflow */
+ goto dev_too_small_err;
devsize -= data_offset;
break;
}
*freesize = devsize;
return 1;
+
+/* Error condition, device cannot even hold the overhead. */
+dev_too_small_err:
+ fprintf(stderr, "device %s is too small (%lluK) for "
+ "required metadata!\n", subdev, devsize>>1);
+ *freesize = 0;
+ return 0;
}
void *super1_make_v0(struct supertype *st, struct mdinfo *info, mdp_super_t *sb0)
--
2.7.5

View File

@ -1,123 +0,0 @@
From 8e41153c91cdce696618c527906648625217470c Mon Sep 17 00:00:00 2001
From: Paul Menzel <pmenzel@molgen.mpg.de>
Date: Thu, 28 May 2020 16:52:24 +0200
Subject: [PATCH 084/108] Use more secure HTTPS URLs
All URLs in the source are available over HTTPS, so convert all URLs to
HTTPS with the command below.
git grep -l 'http://' | xargs sed -i 's,http://,https://,g'
Revert the changes to announcement files `ANNOUNCE-*` as requested by
the maintainer.
Cc: linux-raid@vger.kernel.org
Signed-off-by: Paul Menzel <pmenzel@molgen.mpg.de>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
external-reshape-design.txt | 2 +-
mdadm.8.in | 6 +++---
mdadm.spec | 4 ++--
raid6check.8 | 2 +-
restripe.c | 2 +-
udev-md-raid-safe-timeouts.rules | 2 +-
6 files changed, 9 insertions(+), 9 deletions(-)
diff --git a/external-reshape-design.txt b/external-reshape-design.txt
index 10c57cc..e4cf4e1 100644
--- a/external-reshape-design.txt
+++ b/external-reshape-design.txt
@@ -277,4 +277,4 @@ sync_action
...
-[1]: Linux kernel design patterns - part 3, Neil Brown http://lwn.net/Articles/336262/
+[1]: Linux kernel design patterns - part 3, Neil Brown https://lwn.net/Articles/336262/
diff --git a/mdadm.8.in b/mdadm.8.in
index 9e7cb96..7f32762 100644
--- a/mdadm.8.in
+++ b/mdadm.8.in
@@ -367,7 +367,7 @@ Use the Intel(R) Matrix Storage Manager metadata format. This creates a
which is managed in a similar manner to DDF, and is supported by an
option-rom on some platforms:
.IP
-.B http://www.intel.com/design/chipsets/matrixstorage_sb.htm
+.B https://www.intel.com/design/chipsets/matrixstorage_sb.htm
.PP
.RE
@@ -3407,7 +3407,7 @@ was previously known as
For further information on mdadm usage, MD and the various levels of
RAID, see:
.IP
-.B http://raid.wiki.kernel.org/
+.B https://raid.wiki.kernel.org/
.PP
(based upon Jakob \(/Ostergaard's Software\-RAID.HOWTO)
.PP
@@ -3415,7 +3415,7 @@ The latest version of
.I mdadm
should always be available from
.IP
-.B http://www.kernel.org/pub/linux/utils/raid/mdadm/
+.B https://www.kernel.org/pub/linux/utils/raid/mdadm/
.PP
Related man pages:
.PP
diff --git a/mdadm.spec b/mdadm.spec
index 1c66894..506ea33 100644
--- a/mdadm.spec
+++ b/mdadm.spec
@@ -2,8 +2,8 @@ Summary: mdadm is used for controlling Linux md devices (aka RAID arrays)
Name: mdadm
Version: 4.1
Release: 1
-Source: http://www.kernel.org/pub/linux/utils/raid/mdadm/mdadm-%{version}.tar.gz
-URL: http://neil.brown.name/blog/mdadm
+Source: https://www.kernel.org/pub/linux/utils/raid/mdadm/mdadm-%{version}.tar.gz
+URL: https://neil.brown.name/blog/mdadm
License: GPL
Group: Utilities/System
BuildRoot: %{_tmppath}/%{name}-root
diff --git a/raid6check.8 b/raid6check.8
index 5003343..8999ca8 100644
--- a/raid6check.8
+++ b/raid6check.8
@@ -86,7 +86,7 @@ The latest version of
.I raid6check
should always be available from
.IP
-.B http://www.kernel.org/pub/linux/utils/raid/mdadm/
+.B https://www.kernel.org/pub/linux/utils/raid/mdadm/
.PP
Related man pages:
.PP
diff --git a/restripe.c b/restripe.c
index 86e1d00..a7a7229 100644
--- a/restripe.c
+++ b/restripe.c
@@ -333,7 +333,7 @@ void make_tables(void)
/* Compute log and inverse log */
/* Modified code from:
- * http://web.eecs.utk.edu/~plank/plank/papers/CS-96-332.html
+ * https://web.eecs.utk.edu/~plank/plank/papers/CS-96-332.html
*/
b = 1;
raid6_gflog[0] = 0;
diff --git a/udev-md-raid-safe-timeouts.rules b/udev-md-raid-safe-timeouts.rules
index 13c23d8..12bdcaa 100644
--- a/udev-md-raid-safe-timeouts.rules
+++ b/udev-md-raid-safe-timeouts.rules
@@ -13,7 +13,7 @@
#
# You should have received a copy of the GNU General Public License
# along with mdraid-safe-timeouts. If not, see
-# <http://www.gnu.org/licenses/>.
+# <https://www.gnu.org/licenses/>.
# This file causes block devices with Linux RAID (mdadm) signatures to
# attempt to set safe timeouts for the drives involved
--
2.7.5

View File

@ -1,28 +0,0 @@
From bcf40dbb5bf7db9d55a877b805ebb95c2008a132 Mon Sep 17 00:00:00 2001
From: Jes Sorensen <jsorensen@fb.com>
Date: Fri, 12 Jun 2020 10:49:11 -0400
Subject: [PATCH 085/108] Update link to Intel page for IMSM
The old design page is gone, so update to the current overview page.
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
mdadm.8.in | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/mdadm.8.in b/mdadm.8.in
index 7f32762..1474602 100644
--- a/mdadm.8.in
+++ b/mdadm.8.in
@@ -367,7 +367,7 @@ Use the Intel(R) Matrix Storage Manager metadata format. This creates a
which is managed in a similar manner to DDF, and is supported by an
option-rom on some platforms:
.IP
-.B https://www.intel.com/design/chipsets/matrixstorage_sb.htm
+.B https://www.intel.com/content/www/us/en/support/products/122484/memory-and-storage/ssd-software/intel-virtual-raid-on-cpu-intel-vroc.html
.PP
.RE
--
2.7.5

View File

@ -1,64 +0,0 @@
From 77b72fa828132a35c8b2e08d3fb07eea80b11895 Mon Sep 17 00:00:00 2001
From: allenpeng <allenpeng@synology.com>
Date: Fri, 12 Jun 2020 17:00:39 +0800
Subject: [PATCH 086/108] mdadm/Grow: prevent md's fd from being occupied
during delayed time
If we start reshaping on md which shares sub-devices with another
resyncing md, it may be forced to wait for others to complete. mdadm
occupies the md's fd during this time, which causes the md can not be
stopped and the filesystem can not be mounted on the md. We can close
md's fd earlier to solve this problem.
Reproducible Steps:
1. create two partitions on sda, sdb, sdc, sdd
2. create raid1 with sda1, sdb1
mdadm -C /dev/md1 --assume-clean -l1 -n2 /dev/sda1 /dev/sdb1
3. create raid5 with sda2, sdb2, sdc2
mdadm -C /dev/md2 --assume-clean -l5 -n3 /dev/sda2 /dev/sdb2 /dev/sdc2
4. start resync at md1
echo repair > /sys/block/md1/md/sync_action
5. reshape raid5 to raid6
mdadm -a /dev/md2 /dev/sdd2
mdadm --grow /dev/md2 -n4 -l6 --backup-file=/root/md2-backup
Now mdadm is occupying the fd of md2, causing md2 unable to be stopped
6.Try to stop md2, an error message shows
mdadm -S /dev/md2
mdadm: Cannot get exclusive access to /dev/md3:Perhaps a running process,
mounted filesystem or active volume group?
Reviewed-by: Alex Wu <alexwu@synology.com>
Reviewed-by: BingJing Chang <bingjingc@synology.com>
Reviewed-by: Danny Shih <dannyshih@synology.com>
Signed-off-by: ChangSyun Peng <allenpeng@synology.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
Grow.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Grow.c b/Grow.c
index 764374f..57db7d4 100644
--- a/Grow.c
+++ b/Grow.c
@@ -3517,6 +3517,7 @@ started:
return 0;
}
+ close(fd);
/* Now we just need to kick off the reshape and watch, while
* handling backups of the data...
* This is all done by a forked background process.
@@ -3569,7 +3570,6 @@ started:
mdstat_wait(30 - (delayed-1) * 25);
} while (delayed);
mdstat_close();
- close(fd);
if (check_env("MDADM_GROW_VERIFY"))
fd = open(devname, O_RDONLY | O_DIRECT);
else
--
2.7.5

View File

@ -1,36 +0,0 @@
From 138a9e9bbe2622eafc90c976b82f3d84895dbebd Mon Sep 17 00:00:00 2001
From: Xiao Ni <xni@redhat.com>
Date: Mon, 27 Jul 2020 09:14:20 +0800
Subject: [PATCH 087/108] Specify nodes number when updating cluster nodes
Now it allows updating cluster nodes without specify --nodes. It can write superblock
with zero nodes. It can break the current cluster. Add this check to avoid this problem.
v2: It needs check c.update first to avoid NULL pointer reference
v3: Wol points the typo error
Signed-off-by: Xiao Ni <xni@redhat.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
mdadm.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/mdadm.c b/mdadm.c
index 13dc24e..1b3467f 100644
--- a/mdadm.c
+++ b/mdadm.c
@@ -1433,6 +1433,11 @@ int main(int argc, char *argv[])
}
}
+ if (c.update && strcmp(c.update, "nodes") == 0 && c.nodes == 0) {
+ pr_err("Please specify nodes number with --nodes\n");
+ exit(1);
+ }
+
if (c.backup_file && data_offset != INVALID_SECTORS) {
pr_err("--backup-file and --data-offset are incompatible\n");
exit(2);
--
2.7.5

View File

@ -1,32 +0,0 @@
From 5e592e1ed809b94670872b7a4629317fc1c8a5c1 Mon Sep 17 00:00:00 2001
From: Winston Weinert <winston@ml1.net>
Date: Wed, 22 Jul 2020 08:33:22 -0500
Subject: [PATCH 088/108] mdadm/md.4: update path to in-kernel-tree
documentation
Documentation/md.txt was renamed to Documentation/admin-guide/md.rst
in linux commit 9d85025b0418163fae079c9ba8f8445212de8568 (Oct 26,
2016).
Signed-off-by: Winston Weinert <winston@ml1.net>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
md.4 | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/md.4 b/md.4
index 0712af2..aecff38 100644
--- a/md.4
+++ b/md.4
@@ -1061,7 +1061,7 @@ which contains various files for providing access to information about
the array.
This interface is documented more fully in the file
-.B Documentation/md.txt
+.B Documentation/admin-guide/md.rst
which is distributed with the kernel sources. That file should be
consulted for full documentation. The following are just a selection
of attribute files that are available.
--
2.7.5

View File

@ -1,34 +0,0 @@
From 5f4184557a98bb641a7889e280265109c73e2f43 Mon Sep 17 00:00:00 2001
From: Mariusz Tkaczyk <mariusz.tkaczyk@intel.com>
Date: Thu, 6 Aug 2020 13:57:50 +0200
Subject: [PATCH 089/108] manual: update --examine-badblocks
IMSM also supports it.
Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@intel.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
mdadm.8.in | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/mdadm.8.in b/mdadm.8.in
index 1474602..ab832e8 100644
--- a/mdadm.8.in
+++ b/mdadm.8.in
@@ -1695,9 +1695,11 @@ does not report the bitmap for that array.
.TP
.B \-\-examine\-badblocks
List the bad-blocks recorded for the device, if a bad-blocks list has
-been configured. Currently only
+been configured. Currently only
.B 1.x
-metadata supports bad-blocks lists.
+and
+.B IMSM
+metadata support bad-blocks lists.
.TP
.BI \-\-dump= directory
--
2.7.5

View File

@ -1,68 +0,0 @@
From 64bf4dff34301a4b44883a8bc03f7835faef121e Mon Sep 17 00:00:00 2001
From: Lidong Zhong <lidong.zhong@suse.com>
Date: Mon, 14 Sep 2020 10:52:18 +0800
Subject: [PATCH 090/108] Detail: show correct raid level when the array is
inactive
Sometimes the raid level in the output of `mdadm -D /dev/mdX` is
misleading when the array is in inactive state. Here is a testcase for
introduction.
1\ creating a raid1 device with two disks. Specify a different hostname
rather than the real one for later verfication.
node1:~ # mdadm --create /dev/md0 --homehost TESTARRAY -o -l 1 -n 2 /dev/sdb
/dev/sdc
2\ remove one of the devices and reboot
3\ show the detail of raid1 device
node1:~ # mdadm -D /dev/md127
/dev/md127:
Version : 1.2
Raid Level : raid0
Total Devices : 1
Persistence : Superblock is persistent
State : inactive
Working Devices : 1
You can see that the "Raid Level" in /dev/md127 is raid0 now.
After step 2\ is done, the degraded raid1 device is recognized
as a "foreign" array in 64-md-raid-assembly.rules. And thus the
timer to activate the raid1 device is not triggered. The array
level returned from GET_ARRAY_INFO ioctl is 0. And the string
shown for "Raid Level" is
str = map_num(pers, array.level);
And the definition of pers is
mapping_t pers[] = {
{ "linear", LEVEL_LINEAR},
{ "raid0", 0},
{ "0", 0}
...
So the misleading "raid0" is shown in this testcase.
Changelog:
v1: don't show "Raid Level" when array is inactive
Signed-off-by: Lidong Zhong <lidong.zhong@suse.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
Detail.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/Detail.c b/Detail.c
index 24eeba0..b6587c8 100644
--- a/Detail.c
+++ b/Detail.c
@@ -224,7 +224,10 @@ int Detail(char *dev, struct context *c)
}
/* Ok, we have some info to print... */
- str = map_num(pers, array.level);
+ if (inactive)
+ str = map_num(pers, info->array.level);
+ else
+ str = map_num(pers, array.level);
if (c->export) {
if (array.raid_disks) {
--
2.7.5

View File

@ -1,29 +0,0 @@
From 2ce091724031e18f522994ffd1e5eb0dc404bcba Mon Sep 17 00:00:00 2001
From: Xiao Ni <xni@redhat.com>
Date: Tue, 15 Sep 2020 15:44:42 +0800
Subject: [PATCH 091/108] Don't create bitmap for raid5 with journal disk
Journal disk and bitmap can't exist at the same time. It needs to check if the raid
has a journal disk when creating bitmap.
Signed-off-by: Xiao Ni <xni@redhat.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
Create.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/Create.c b/Create.c
index 6f84e5b..0efa19c 100644
--- a/Create.c
+++ b/Create.c
@@ -542,6 +542,7 @@ int Create(struct supertype *st, char *mddev,
if (!s->bitmap_file &&
s->level >= 1 &&
st->ss->add_internal_bitmap &&
+ s->journaldisks == 0 &&
(s->consistency_policy != CONSISTENCY_POLICY_RESYNC &&
s->consistency_policy != CONSISTENCY_POLICY_PPL) &&
(s->write_behind || s->size > 100*1024*1024ULL)) {
--
2.7.5

View File

@ -1,70 +0,0 @@
From e2308733910a157b0a4d4e78721f239d44b91a24 Mon Sep 17 00:00:00 2001
From: Mariusz Tkaczyk <mariusz.tkaczyk@intel.com>
Date: Wed, 9 Sep 2020 10:31:17 +0200
Subject: [PATCH 092/108] Monitor: refresh mdstat fd after select
After 52209d6ee118 ("Monitor: release /proc/mdstat fd when no arrays
present") mdstat fd is closed if mdstat is empty or cannot be opened.
It causes that monitor is not able to select on mdstat. Select
doesn't fail because it gets valid descriptor to a different resource.
As a result any new event will be unnoticed until timeout (delay).
Refresh mdstat after wake up, don't poll on wrong resource.
Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@intel.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
Monitor.c | 6 +++---
mdstat.c | 4 ++--
2 files changed, 5 insertions(+), 5 deletions(-)
diff --git a/Monitor.c b/Monitor.c
index 2d6b3b9..80a3200 100644
--- a/Monitor.c
+++ b/Monitor.c
@@ -216,8 +216,6 @@ int Monitor(struct mddev_dev *devlist,
if (mdstat)
free_mdstat(mdstat);
mdstat = mdstat_read(oneshot ? 0 : 1, 0);
- if (!mdstat)
- mdstat_close();
for (st = statelist; st; st = st->next)
if (check_array(st, mdstat, c->test, &info,
@@ -238,8 +236,10 @@ int Monitor(struct mddev_dev *devlist,
if (!new_found) {
if (oneshot)
break;
- else
+ else {
mdstat_wait(c->delay);
+ mdstat_close();
+ }
}
c->test = 0;
diff --git a/mdstat.c b/mdstat.c
index 20577a3..48559e6 100644
--- a/mdstat.c
+++ b/mdstat.c
@@ -135,7 +135,6 @@ struct mdstat_ent *mdstat_read(int hold, int start)
if (hold && mdstat_fd != -1) {
off_t offset = lseek(mdstat_fd, 0L, 0);
if (offset == (off_t)-1) {
- mdstat_close();
return NULL;
}
fd = dup(mdstat_fd);
@@ -312,7 +311,8 @@ void mdstat_wait(int seconds)
if (mdstat_fd >= 0) {
FD_SET(mdstat_fd, &fds);
maxfd = mdstat_fd;
- }
+ } else
+ return;
tm.tv_sec = seconds;
tm.tv_usec = 0;
select(maxfd + 1, NULL, NULL, &fds, &tm);
--
2.7.5

View File

@ -1,78 +0,0 @@
From 007087d0898a045901e4e120296e6d9b845b20a6 Mon Sep 17 00:00:00 2001
From: Mariusz Tkaczyk <mariusz.tkaczyk@intel.com>
Date: Wed, 9 Sep 2020 10:31:18 +0200
Subject: [PATCH 093/108] Monitor: stop notifing about containers.
Stop reporting any events from container but still track them,
it is important for spare migration.
Stop mdmonitor if no redundant array is presented in mdstat.
There is nothing to follow.
Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@intel.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
Monitor.c | 19 ++++++++++++++++---
1 file changed, 16 insertions(+), 3 deletions(-)
diff --git a/Monitor.c b/Monitor.c
index 80a3200..aed7a69 100644
--- a/Monitor.c
+++ b/Monitor.c
@@ -212,15 +212,24 @@ int Monitor(struct mddev_dev *devlist,
int new_found = 0;
struct state *st, **stp;
int anydegraded = 0;
+ int anyredundant = 0;
if (mdstat)
free_mdstat(mdstat);
mdstat = mdstat_read(oneshot ? 0 : 1, 0);
- for (st = statelist; st; st = st->next)
+ for (st = statelist; st; st = st->next) {
if (check_array(st, mdstat, c->test, &info,
increments, c->prefer))
anydegraded = 1;
+ /* for external arrays, metadata is filled for
+ * containers only
+ */
+ if (st->metadata && st->metadata->ss->external)
+ continue;
+ if (st->err == 0 && !anyredundant)
+ anyredundant = 1;
+ }
/* now check if there are any new devices found in mdstat */
if (c->scan)
@@ -236,6 +245,9 @@ int Monitor(struct mddev_dev *devlist,
if (!new_found) {
if (oneshot)
break;
+ else if (!anyredundant) {
+ break;
+ }
else {
mdstat_wait(c->delay);
mdstat_close();
@@ -542,7 +554,8 @@ static int check_array(struct state *st, struct mdstat_ent *mdstat,
st->err = 0;
st->percent = RESYNC_NONE;
new_array = 1;
- alert("NewArray", st->devname, NULL, ainfo);
+ if (!is_container)
+ alert("NewArray", st->devname, NULL, ainfo);
}
if (st->utime == array.utime && st->failed == sra->array.failed_disks &&
@@ -676,7 +689,7 @@ static int check_array(struct state *st, struct mdstat_ent *mdstat,
return retval;
disappeared:
- if (!st->err)
+ if (!st->err && !is_container)
alert("DeviceDisappeared", dev, NULL, ainfo);
st->err++;
goto out;
--
2.7.5

View File

@ -1,103 +0,0 @@
From cab9c67d461c65a1138359f9f6d39636466b90e4 Mon Sep 17 00:00:00 2001
From: Blazej Kucman <blazej.kucman@intel.com>
Date: Wed, 9 Sep 2020 10:31:19 +0200
Subject: [PATCH 094/108] mdmonitor: set small delay once
If mdmonitor is awakened by event, set small delay once
to deal with udev and mdadm.
Signed-off-by: Blazej Kucman <blazej.kucman@intel.com>
Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@intel.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
Monitor.c | 14 +++++++++++++-
mdadm.h | 2 +-
mdstat.c | 18 +++++++++++++++---
3 files changed, 29 insertions(+), 5 deletions(-)
diff --git a/Monitor.c b/Monitor.c
index aed7a69..0fb4f77 100644
--- a/Monitor.c
+++ b/Monitor.c
@@ -128,6 +128,7 @@ int Monitor(struct mddev_dev *devlist,
char *mailfrom;
struct alert_info info;
struct mddev_ident *mdlist;
+ int delay_for_event = c->delay;
if (!mailaddr) {
mailaddr = conf_get_mailaddr();
@@ -249,7 +250,18 @@ int Monitor(struct mddev_dev *devlist,
break;
}
else {
- mdstat_wait(c->delay);
+ int wait_result = mdstat_wait(delay_for_event);
+
+ /*
+ * If mdmonitor is awaken by event, set small delay once
+ * to deal with udev and mdadm.
+ */
+ if (wait_result != 0) {
+ if (c->delay > 5)
+ delay_for_event = 5;
+ } else
+ delay_for_event = c->delay;
+
mdstat_close();
}
}
diff --git a/mdadm.h b/mdadm.h
index 399478b..4961c0f 100644
--- a/mdadm.h
+++ b/mdadm.h
@@ -628,7 +628,7 @@ struct mdstat_ent {
extern struct mdstat_ent *mdstat_read(int hold, int start);
extern void mdstat_close(void);
extern void free_mdstat(struct mdstat_ent *ms);
-extern void mdstat_wait(int seconds);
+extern int mdstat_wait(int seconds);
extern void mdstat_wait_fd(int fd, const sigset_t *sigmask);
extern int mddev_busy(char *devnm);
extern struct mdstat_ent *mdstat_by_component(char *name);
diff --git a/mdstat.c b/mdstat.c
index 48559e6..dd96cca 100644
--- a/mdstat.c
+++ b/mdstat.c
@@ -302,7 +302,17 @@ void mdstat_close(void)
mdstat_fd = -1;
}
-void mdstat_wait(int seconds)
+/*
+ * function: mdstat_wait
+ * Description: Function waits for event on mdstat.
+ * Parameters:
+ * seconds - timeout for waiting
+ * Returns:
+ * > 0 - detected event
+ * 0 - timeout
+ * < 0 - detected error
+ */
+int mdstat_wait(int seconds)
{
fd_set fds;
struct timeval tm;
@@ -312,10 +322,12 @@ void mdstat_wait(int seconds)
FD_SET(mdstat_fd, &fds);
maxfd = mdstat_fd;
} else
- return;
+ return -1;
+
tm.tv_sec = seconds;
tm.tv_usec = 0;
- select(maxfd + 1, NULL, NULL, &fds, &tm);
+
+ return select(maxfd + 1, NULL, NULL, &fds, &tm);
}
void mdstat_wait_fd(int fd, const sigset_t *sigmask)
--
2.7.5

View File

@ -1,103 +0,0 @@
From 7f3b2d1d1621cbdc60b5af4a41445391010fe9e1 Mon Sep 17 00:00:00 2001
From: Blazej Kucman <blazej.kucman@intel.com>
Date: Wed, 9 Sep 2020 10:31:20 +0200
Subject: [PATCH 095/108] Check if other Monitor instance running before fork.
Make error message visible to the user.
Signed-off-by: Blazej Kucman <blazej.kucman@intel.com>
Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@intel.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
Monitor.c | 44 ++++++++++++++++++++++++++++----------------
1 file changed, 28 insertions(+), 16 deletions(-)
diff --git a/Monitor.c b/Monitor.c
index 0fb4f77..7fd4808 100644
--- a/Monitor.c
+++ b/Monitor.c
@@ -63,6 +63,7 @@ struct alert_info {
};
static int make_daemon(char *pidfile);
static int check_one_sharer(int scan);
+static void write_autorebuild_pid(void);
static void alert(char *event, char *dev, char *disc, struct alert_info *info);
static int check_array(struct state *st, struct mdstat_ent *mdstat,
int test, struct alert_info *info,
@@ -153,6 +154,11 @@ int Monitor(struct mddev_dev *devlist,
info.mailfrom = mailfrom;
info.dosyslog = dosyslog;
+ if (share){
+ if (check_one_sharer(c->scan))
+ return 1;
+ }
+
if (daemonise) {
int rv = make_daemon(pidfile);
if (rv >= 0)
@@ -160,8 +166,7 @@ int Monitor(struct mddev_dev *devlist,
}
if (share)
- if (check_one_sharer(c->scan))
- return 1;
+ write_autorebuild_pid();
if (devlist == NULL) {
mdlist = conf_get_ident(NULL);
@@ -328,8 +333,8 @@ static int check_one_sharer(int scan)
int pid;
FILE *comm_fp;
FILE *fp;
- char comm_path[100];
- char path[100];
+ char comm_path[PATH_MAX];
+ char path[PATH_MAX];
char comm[20];
sprintf(path, "%s/autorebuild.pid", MDMON_DIR);
@@ -356,21 +361,28 @@ static int check_one_sharer(int scan)
}
fclose(fp);
}
- if (scan) {
- if (mkdir(MDMON_DIR, S_IRWXU) < 0 && errno != EEXIST) {
+ return 0;
+}
+
+static void write_autorebuild_pid()
+{
+ char path[PATH_MAX];
+ int pid;
+ FILE *fp;
+ sprintf(path, "%s/autorebuild.pid", MDMON_DIR);
+
+ if (mkdir(MDMON_DIR, S_IRWXU) < 0 && errno != EEXIST) {
+ pr_err("Can't create autorebuild.pid file\n");
+ } else {
+ fp = fopen(path, "w");
+ if (!fp)
pr_err("Can't create autorebuild.pid file\n");
- } else {
- fp = fopen(path, "w");
- if (!fp)
- pr_err("Cannot create autorebuild.pidfile\n");
- else {
- pid = getpid();
- fprintf(fp, "%d\n", pid);
- fclose(fp);
- }
+ else {
+ pid = getpid();
+ fprintf(fp, "%d\n", pid);
+ fclose(fp);
}
}
- return 0;
}
static void alert(char *event, char *dev, char *disc, struct alert_info *info)
--
2.7.5

View File

@ -1,136 +0,0 @@
From 97b51a2c2d00b79a59f2a8e37134031b0c9e0223 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Wed, 14 Oct 2020 13:12:48 +1100
Subject: [PATCH 096/108] Super1: allow RAID0 layout setting to be removed.
Once the RAID0 layout has been set, the RAID0 array cannot be assembled
on an older kernel which doesn't understand layouts.
This is an intentional safety feature, but sometimes people need the
ability to roll-back to a previously working configuration.
So add "--update=layout-unspecified" to remove RAID0 layout information
from the superblock.
Running "--assemble --update=layout-unspecified" will cause the assembly
the fail when run on a newer kernel, but will allow it to work on
an older kernel.
Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
md.4 | 13 +++++++++++++
mdadm.8.in | 15 +++++++++++++--
mdadm.c | 5 +++--
super1.c | 6 +++++-
4 files changed, 34 insertions(+), 5 deletions(-)
diff --git a/md.4 b/md.4
index aecff38..60fdd27 100644
--- a/md.4
+++ b/md.4
@@ -215,6 +215,19 @@ option or the
.B "--update=layout-alternate"
option.
+Once you have updated the layout you will not be able to mount the array
+on an older kernel. If you need to revert to an older kernel, the
+layout information can be erased with the
+.B "--update=layout-unspecificed"
+option. If you use this option to
+.B --assemble
+while running a newer kernel, the array will NOT assemble, but the
+metadata will be update so that it can be assembled on an older kernel.
+
+No that setting the layout to "unspecified" removes protections against
+this bug, and you must be sure that the kernel you use matches the
+layout of the array.
+
.SS RAID1
A RAID1 array is also known as a mirrored set (though mirrors tend to
diff --git a/mdadm.8.in b/mdadm.8.in
index ab832e8..34a93a8 100644
--- a/mdadm.8.in
+++ b/mdadm.8.in
@@ -1213,6 +1213,7 @@ argument given to this flag can be one of
.BR no\-ppl ,
.BR layout\-original ,
.BR layout\-alternate ,
+.BR layout\-unspecified ,
.BR metadata ,
or
.BR super\-minor .
@@ -1368,8 +1369,9 @@ The
.B layout\-original
and
.B layout\-alternate
-options are for RAID0 arrays in use before Linux 5.4. If the array was being
-used with Linux 3.13 or earlier, then to assemble the array on a new kernel,
+options are for RAID0 arrays with non-uniform devices size that were in
+use before Linux 5.4. If the array was being used with Linux 3.13 or
+earlier, then to assemble the array on a new kernel,
.B \-\-update=layout\-original
must be given. If the array was created and used with a kernel from Linux 3.14 to
Linux 5.3, then
@@ -1379,6 +1381,15 @@ will happen normally.
For more information, see
.IR md (4).
+The
+.B layout\-unspecified
+option reverts the effect of
+.B layout\-orignal
+or
+.B layout\-alternate
+and allows the array to be again used on a kernel prior to Linux 5.3.
+This option should be used with great caution.
+
.TP
.BR \-\-freeze\-reshape
Option is intended to be used in start-up scripts during initrd boot phase.
diff --git a/mdadm.c b/mdadm.c
index 1b3467f..493d70e 100644
--- a/mdadm.c
+++ b/mdadm.c
@@ -796,7 +796,8 @@ int main(int argc, char *argv[])
if (strcmp(c.update, "revert-reshape") == 0)
continue;
if (strcmp(c.update, "layout-original") == 0 ||
- strcmp(c.update, "layout-alternate") == 0)
+ strcmp(c.update, "layout-alternate") == 0 ||
+ strcmp(c.update, "layout-unspecified") == 0)
continue;
if (strcmp(c.update, "byteorder") == 0) {
if (ss) {
@@ -828,7 +829,7 @@ int main(int argc, char *argv[])
" 'summaries', 'homehost', 'home-cluster', 'byteorder', 'devicesize',\n"
" 'no-bitmap', 'metadata', 'revert-reshape'\n"
" 'bbl', 'no-bbl', 'force-no-bbl', 'ppl', 'no-ppl'\n"
- " 'layout-original', 'layout-alternate'\n"
+ " 'layout-original', 'layout-alternate', 'layout-unspecified'\n"
);
exit(outf == stdout ? 0 : 2);
diff --git a/super1.c b/super1.c
index 7664883..8b0d6ff 100644
--- a/super1.c
+++ b/super1.c
@@ -1551,11 +1551,15 @@ static int update_super1(struct supertype *st, struct mdinfo *info,
else if (strcmp(update, "nofailfast") == 0)
sb->devflags &= ~FailFast1;
else if (strcmp(update, "layout-original") == 0 ||
- strcmp(update, "layout-alternate") == 0) {
+ strcmp(update, "layout-alternate") == 0 ||
+ strcmp(update, "layout-unspecified") == 0) {
if (__le32_to_cpu(sb->level) != 0) {
pr_err("%s: %s only supported for RAID0\n",
devname?:"", update);
rv = -1;
+ } else if (strcmp(update, "layout-unspecified") == 0) {
+ sb->feature_map &= ~__cpu_to_le32(MD_FEATURE_RAID0_LAYOUT);
+ sb->layout = 0;
} else {
sb->feature_map |= __cpu_to_le32(MD_FEATURE_RAID0_LAYOUT);
sb->layout = __cpu_to_le32(update[7] == 'o' ? 1 : 2);
--
2.7.5

Some files were not shown because too many files have changed in this diff Show More