import mdadm-4.1-13.el8

This commit is contained in:
CentOS Sources 2020-04-28 05:36:31 -04:00 committed by Andrew Lukoshko
parent df56316c8f
commit a5476d5870
42 changed files with 3713 additions and 3 deletions

View File

@ -0,0 +1,109 @@
From ae7d61e35ec2ab6361c3e509a8db00698ef3396f Mon Sep 17 00:00:00 2001
From: Artur Paszkiewicz <artur.paszkiewicz@intel.com>
Date: Tue, 7 May 2019 16:08:47 +0200
Subject: [RHEL7.8 PATCH V2 25/47] mdmon: fix wrong array state when disk fails
during mdmon startup
If a member drive disappears and is set faulty by the kernel during
mdmon startup, after ss->load_container() but before manage_new(), mdmon
will try to readd the faulty drive to the array and start rebuilding.
Metadata on the active drive is updated, but the faulty drive is not
removed from the array and is left in a "blocked" state and any write
request to the array will block. If the faulty drive reappears in the
system e.g. after a reboot, the array will not assemble because metadata
on the drives will be incompatible (at least on imsm).
Fix this by adding a new option for sysfs_read(): "GET_DEVS_ALL". This
is an extension for the "GET_DEVS" option and causes all member devices
to be returned, even if the associated block device has been removed.
Use this option in manage_new() to include the faulty device on the
active_array's devices list. Mdmon will then properly remove the faulty
device from the array and update the metadata to reflect the degraded
state.
Signed-off-by: Artur Paszkiewicz <artur.paszkiewicz@intel.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
managemon.c | 2 +-
mdadm.h | 1 +
super-intel.c | 2 +-
sysfs.c | 23 ++++++++++++++---------
4 files changed, 17 insertions(+), 11 deletions(-)
diff --git a/managemon.c b/managemon.c
index 29b91ba..200cf83 100644
--- a/managemon.c
+++ b/managemon.c
@@ -678,7 +678,7 @@ static void manage_new(struct mdstat_ent *mdstat,
mdi = sysfs_read(-1, mdstat->devnm,
GET_LEVEL|GET_CHUNK|GET_DISKS|GET_COMPONENT|
GET_SAFEMODE|GET_DEVS|GET_OFFSET|GET_SIZE|GET_STATE|
- GET_LAYOUT);
+ GET_LAYOUT|GET_DEVS_ALL);
if (!mdi)
return;
diff --git a/mdadm.h b/mdadm.h
index 705bd9b..427cc52 100644
--- a/mdadm.h
+++ b/mdadm.h
@@ -647,6 +647,7 @@ enum sysfs_read_flags {
GET_ERROR = (1 << 24),
GET_ARRAY_STATE = (1 << 25),
GET_CONSISTENCY_POLICY = (1 << 26),
+ GET_DEVS_ALL = (1 << 27),
};
/* If fd >= 0, get the array it is open on,
diff --git a/super-intel.c b/super-intel.c
index 2ba045a..4fd5e84 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -8560,7 +8560,7 @@ static void imsm_set_disk(struct active_array *a, int n, int state)
disk = get_imsm_disk(super, ord_to_idx(ord));
/* check for new failures */
- if (state & DS_FAULTY) {
+ if (disk && (state & DS_FAULTY)) {
if (mark_failure(super, dev, disk, ord_to_idx(ord)))
super->updates_pending++;
}
diff --git a/sysfs.c b/sysfs.c
index df6fdda..2dd9ab6 100644
--- a/sysfs.c
+++ b/sysfs.c
@@ -313,17 +313,22 @@ struct mdinfo *sysfs_read(int fd, char *devnm, unsigned long options)
/* assume this is a stale reference to a hot
* removed device
*/
- free(dev);
- continue;
+ if (!(options & GET_DEVS_ALL)) {
+ free(dev);
+ continue;
+ }
+ } else {
+ sscanf(buf, "%d:%d", &dev->disk.major, &dev->disk.minor);
}
- sscanf(buf, "%d:%d", &dev->disk.major, &dev->disk.minor);
- /* special case check for block devices that can go 'offline' */
- strcpy(dbase, "block/device/state");
- if (load_sys(fname, buf, sizeof(buf)) == 0 &&
- strncmp(buf, "offline", 7) == 0) {
- free(dev);
- continue;
+ if (!(options & GET_DEVS_ALL)) {
+ /* special case check for block devices that can go 'offline' */
+ strcpy(dbase, "block/device/state");
+ if (load_sys(fname, buf, sizeof(buf)) == 0 &&
+ strncmp(buf, "offline", 7) == 0) {
+ free(dev);
+ continue;
+ }
}
/* finally add this disk to the array */
--
2.7.5

View File

@ -0,0 +1,212 @@
From 4ec389e3f0c1233f5aa2d5b4e63d96e33d2a37f0 Mon Sep 17 00:00:00 2001
From: Roman Sobanski <roman.sobanski@intel.com>
Date: Tue, 2 Jul 2019 13:29:27 +0200
Subject: [RHEL7.8 PATCH V2 26/47] Enable probe_roms to scan more than 6 roms.
In some cases if more than 6 oroms exist, resource for particular
controller may not be found. Change method for storing
adapter_rom_resources from array to list.
Signed-off-by: Roman Sobanski <roman.sobanski@intel.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
probe_roms.c | 98 ++++++++++++++++++++++++++++++++++--------------------------
1 file changed, 56 insertions(+), 42 deletions(-)
diff --git a/probe_roms.c b/probe_roms.c
index b0b0883..7ea04c7 100644
--- a/probe_roms.c
+++ b/probe_roms.c
@@ -35,6 +35,9 @@ static const int rom_len = 0xf0000 - 0xc0000; /* option-rom memory region */
static int _sigbus;
static unsigned long rom_align;
+static void roms_deinit(void);
+static int roms_init(void);
+
static void sigbus(int sig)
{
_sigbus = 1;
@@ -75,6 +78,7 @@ void probe_roms_exit(void)
munmap(rom_mem, rom_len);
rom_mem = MAP_FAILED;
}
+ roms_deinit();
}
int probe_roms_init(unsigned long align)
@@ -91,6 +95,9 @@ int probe_roms_init(unsigned long align)
else
return -1;
+ if (roms_init())
+ return -1;
+
if (signal(SIGBUS, sigbus) == SIG_ERR)
rc = -1;
if (rc == 0) {
@@ -131,6 +138,7 @@ struct resource {
unsigned long end;
unsigned long data;
const char *name;
+ struct resource *next;
};
static struct resource system_rom_resource = {
@@ -147,37 +155,7 @@ static struct resource extension_rom_resource = {
.end = 0xeffff,
};
-static struct resource adapter_rom_resources[] = { {
- .name = "Adapter ROM",
- .start = 0xc8000,
- .data = 0,
- .end = 0,
-}, {
- .name = "Adapter ROM",
- .start = 0,
- .data = 0,
- .end = 0,
-}, {
- .name = "Adapter ROM",
- .start = 0,
- .data = 0,
- .end = 0,
-}, {
- .name = "Adapter ROM",
- .start = 0,
- .data = 0,
- .end = 0,
-}, {
- .name = "Adapter ROM",
- .start = 0,
- .data = 0,
- .end = 0,
-}, {
- .name = "Adapter ROM",
- .start = 0,
- .data = 0,
- .end = 0,
-} };
+static struct resource *adapter_rom_resources;
static struct resource video_rom_resource = {
.name = "Video ROM",
@@ -186,8 +164,35 @@ static struct resource video_rom_resource = {
.end = 0xc7fff,
};
+static int roms_init(void)
+{
+ adapter_rom_resources = malloc(sizeof(struct resource));
+ if (adapter_rom_resources == NULL)
+ return 1;
+ adapter_rom_resources->name = "Adapter ROM";
+ adapter_rom_resources->start = 0xc8000;
+ adapter_rom_resources->data = 0;
+ adapter_rom_resources->end = 0;
+ adapter_rom_resources->next = NULL;
+ return 0;
+}
+
+static void roms_deinit(void)
+{
+ struct resource *res;
+
+ res = adapter_rom_resources;
+ while (res) {
+ struct resource *tmp = res;
+
+ res = res->next;
+ free(tmp);
+ }
+}
+
#define ROMSIGNATURE 0xaa55
+
static int romsignature(const unsigned char *rom)
{
const unsigned short * const ptr = (const unsigned short *)rom;
@@ -208,16 +213,14 @@ static int romchecksum(const unsigned char *rom, unsigned long length)
int scan_adapter_roms(scan_fn fn)
{
/* let scan_fn examing each of the adapter roms found by probe_roms */
- unsigned int i;
+ struct resource *res = adapter_rom_resources;
int found;
if (rom_fd < 0)
return 0;
found = 0;
- for (i = 0; i < ARRAY_SIZE(adapter_rom_resources); i++) {
- struct resource *res = &adapter_rom_resources[i];
-
+ while (res) {
if (res->start) {
found = fn(isa_bus_to_virt(res->start),
isa_bus_to_virt(res->end),
@@ -226,6 +229,7 @@ int scan_adapter_roms(scan_fn fn)
break;
} else
break;
+ res = res->next;
}
return found;
@@ -241,14 +245,14 @@ void probe_roms(void)
const void *rom;
unsigned long start, length, upper;
unsigned char c;
- unsigned int i;
+ struct resource *res = adapter_rom_resources;
__u16 val=0;
if (rom_fd < 0)
return;
/* video rom */
- upper = adapter_rom_resources[0].start;
+ upper = res->start;
for (start = video_rom_resource.start; start < upper; start += rom_align) {
rom = isa_bus_to_virt(start);
if (!romsignature(rom))
@@ -283,8 +287,9 @@ void probe_roms(void)
upper = extension_rom_resource.start;
}
+ struct resource *prev_res = res;
/* check for adapter roms on 2k boundaries */
- for (i = 0; i < ARRAY_SIZE(adapter_rom_resources) && start < upper; start += rom_align) {
+ for (; start < upper; start += rom_align) {
rom = isa_bus_to_virt(start);
if (!romsignature(rom))
continue;
@@ -308,10 +313,19 @@ void probe_roms(void)
if (!length || start + length > upper || !romchecksum(rom, length))
continue;
- adapter_rom_resources[i].start = start;
- adapter_rom_resources[i].data = start + (unsigned long) val;
- adapter_rom_resources[i].end = start + length - 1;
+ if (res == NULL) {
+ res = calloc(1, sizeof(struct resource));
+ if (res == NULL)
+ return;
+ prev_res->next = res;
+ }
+
+ res->start = start;
+ res->data = start + (unsigned long)val;
+ res->end = start + length - 1;
- start = adapter_rom_resources[i++].end & ~(rom_align - 1);
+ start = res->end & ~(rom_align - 1);
+ prev_res = res;
+ res = res->next;
}
}
--
2.7.5

View File

@ -0,0 +1,39 @@
From a4f7290c20c2ff78328c9db0b18029165cfb05b2 Mon Sep 17 00:00:00 2001
From: Jes Sorensen <jsorensen@fb.com>
Date: Tue, 9 Jul 2019 13:26:08 -0400
Subject: [RHEL7.8 PATCH V2 27/47] super-intel: Fix issue with abs() being
irrelevant
gcc9 complains about subtracting unsigned from unsigned and code
assuming the result can be negative.
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
super-intel.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/super-intel.c b/super-intel.c
index 4fd5e84..230e164 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -2875,7 +2875,7 @@ static unsigned long long calc_component_size(struct imsm_map *map,
{
unsigned long long component_size;
unsigned long long dev_size = imsm_dev_size(dev);
- unsigned long long calc_dev_size = 0;
+ long long calc_dev_size = 0;
unsigned int member_disks = imsm_num_data_members(map);
if (member_disks == 0)
@@ -2889,7 +2889,7 @@ static unsigned long long calc_component_size(struct imsm_map *map,
* 2048 blocks per each device. If the difference is higher it means
* that array size was expanded and num_data_stripes was not updated.
*/
- if ((unsigned int)abs(calc_dev_size - dev_size) >
+ if (llabs(calc_dev_size - (long long)dev_size) >
(1 << SECT_PER_MB_SHIFT) * member_disks) {
component_size = dev_size / member_disks;
dprintf("Invalid num_data_stripes in metadata; expected=%llu, found=%llu\n",
--
2.7.5

View File

@ -0,0 +1,57 @@
From 7039d1f8200b9599b23db5953934fdb43b0442e0 Mon Sep 17 00:00:00 2001
From: Jes Sorensen <jsorensen@fb.com>
Date: Tue, 9 Jul 2019 14:15:38 -0400
Subject: [RHEL7.8 PATCH V2 28/47] mdadm.h: Introduced unaligned
{get,put}_unaligned{16,32}()
We need these to avoid gcc9 going all crazy on us.
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
mdadm.h | 30 ++++++++++++++++++++++++++++++
1 file changed, 30 insertions(+)
diff --git a/mdadm.h b/mdadm.h
index 427cc52..0fa9e1b 100644
--- a/mdadm.h
+++ b/mdadm.h
@@ -192,6 +192,36 @@ struct dlm_lksb {
#endif /* __KLIBC__ */
/*
+ * Partially stolen from include/linux/unaligned/packed_struct.h
+ */
+struct __una_u16 { __u16 x; } __attribute__ ((packed));
+struct __una_u32 { __u32 x; } __attribute__ ((packed));
+
+static inline __u16 __get_unaligned16(const void *p)
+{
+ const struct __una_u16 *ptr = (const struct __una_u16 *)p;
+ return ptr->x;
+}
+
+static inline __u32 __get_unaligned32(const void *p)
+{
+ const struct __una_u32 *ptr = (const struct __una_u32 *)p;
+ return ptr->x;
+}
+
+static inline void __put_unaligned16(__u16 val, void *p)
+{
+ struct __una_u16 *ptr = (struct __una_u16 *)p;
+ ptr->x = val;
+}
+
+static inline void __put_unaligned32(__u32 val, void *p)
+{
+ struct __una_u32 *ptr = (struct __una_u32 *)p;
+ ptr->x = val;
+}
+
+/*
* Check at compile time that something is of a particular type.
* Always evaluates to 1 so you may use it easily in comparisons.
*/
--
2.7.5

View File

@ -0,0 +1,38 @@
From 486720e0c2418e7e2e0a16221f7c42a308622254 Mon Sep 17 00:00:00 2001
From: Jes Sorensen <jsorensen@fb.com>
Date: Tue, 9 Jul 2019 14:49:22 -0400
Subject: [RHEL7.8 PATCH V2 29/47] super-intel: Use put_unaligned in split_ull
Shut up some gcc9 errors by using put_unaligned() accessors. Not pretty,
but better than it was.
Also correct to the correct swap macros.
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
super-intel.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/super-intel.c b/super-intel.c
index 230e164..d7e8a65 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -1165,12 +1165,12 @@ static int count_memberships(struct dl *dl, struct intel_super *super)
static __u32 imsm_min_reserved_sectors(struct intel_super *super);
-static int split_ull(unsigned long long n, __u32 *lo, __u32 *hi)
+static int split_ull(unsigned long long n, void *lo, void *hi)
{
if (lo == 0 || hi == 0)
return 1;
- *lo = __le32_to_cpu((unsigned)n);
- *hi = __le32_to_cpu((unsigned)(n >> 32));
+ __put_unaligned32(__cpu_to_le32((__u32)n), lo);
+ __put_unaligned32(__cpu_to_le32((n >> 32)), hi);
return 0;
}
--
2.7.5

View File

@ -0,0 +1,345 @@
From b06815989179e0f153e44e4336290e655edce9a1 Mon Sep 17 00:00:00 2001
From: Mariusz Dabrowski <mariusz.dabrowski@intel.com>
Date: Wed, 10 Jul 2019 13:38:53 +0200
Subject: [RHEL7.8 PATCH V2 30/47] mdadm: load default sysfs attributes after
assemblation
Added new type of line to mdadm.conf which allows to specify values of
sysfs attributes for MD devices that should be loaded after the array is
assembled. Each line is interpreted as list of structures containing
sysname of MD device (md126 etc.) and list of sysfs attributes and their
values.
Signed-off-by: Mariusz Dabrowski <mariusz.dabrowski@intel.com>
Signed-off-by: Krzysztof Smolinski <krzysztof.smolinski@intel.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
Assemble.c | 12 +++--
Incremental.c | 1 +
config.c | 7 ++-
mdadm.conf.5 | 25 ++++++++++
mdadm.h | 3 ++
sysfs.c | 158 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
6 files changed, 202 insertions(+), 4 deletions(-)
diff --git a/Assemble.c b/Assemble.c
index 420c7b3..b2e6914 100644
--- a/Assemble.c
+++ b/Assemble.c
@@ -1063,9 +1063,12 @@ static int start_array(int mdfd,
mddev, okcnt + sparecnt + journalcnt,
okcnt + sparecnt + journalcnt == 1 ? "" : "s");
if (okcnt < (unsigned)content->array.raid_disks)
- fprintf(stderr, " (out of %d)",
+ fprintf(stderr, " (out of %d)\n",
content->array.raid_disks);
- fprintf(stderr, "\n");
+ else {
+ fprintf(stderr, "\n");
+ sysfs_rules_apply(mddev, content);
+ }
}
if (st->ss->validate_container) {
@@ -1139,6 +1142,7 @@ static int start_array(int mdfd,
rv = ioctl(mdfd, RUN_ARRAY, NULL);
reopen_mddev(mdfd); /* drop O_EXCL */
if (rv == 0) {
+ sysfs_rules_apply(mddev, content);
if (c->verbose >= 0) {
pr_err("%s has been started with %d drive%s",
mddev, okcnt, okcnt==1?"":"s");
@@ -2130,10 +2134,12 @@ int assemble_container_content(struct supertype *st, int mdfd,
pr_err("array %s now has %d device%s",
chosen_name, working + preexist,
working + preexist == 1 ? "":"s");
- else
+ else {
+ sysfs_rules_apply(chosen_name, content);
pr_err("Started %s with %d device%s",
chosen_name, working + preexist,
working + preexist == 1 ? "":"s");
+ }
if (preexist)
fprintf(stderr, " (%d new)", working);
if (expansion)
diff --git a/Incremental.c b/Incremental.c
index d4d3c35..98dbcd9 100644
--- a/Incremental.c
+++ b/Incremental.c
@@ -480,6 +480,7 @@ int Incremental(struct mddev_dev *devlist, struct context *c,
pr_err("container %s now has %d device%s\n",
chosen_name, info.array.working_disks,
info.array.working_disks == 1?"":"s");
+ sysfs_rules_apply(chosen_name, &info);
wait_for(chosen_name, mdfd);
if (st->ss->external)
strcpy(devnm, fd2devnm(mdfd));
diff --git a/config.c b/config.c
index e14eae0..7592b2d 100644
--- a/config.c
+++ b/config.c
@@ -80,7 +80,8 @@ char DefaultAltConfFile[] = CONFFILE2;
char DefaultAltConfDir[] = CONFFILE2 ".d";
enum linetype { Devices, Array, Mailaddr, Mailfrom, Program, CreateDev,
- Homehost, HomeCluster, AutoMode, Policy, PartPolicy, LTEnd };
+ Homehost, HomeCluster, AutoMode, Policy, PartPolicy, Sysfs,
+ LTEnd };
char *keywords[] = {
[Devices] = "devices",
[Array] = "array",
@@ -93,6 +94,7 @@ char *keywords[] = {
[AutoMode] = "auto",
[Policy] = "policy",
[PartPolicy]="part-policy",
+ [Sysfs] = "sysfs",
[LTEnd] = NULL
};
@@ -764,6 +766,9 @@ void conf_file(FILE *f)
case PartPolicy:
policyline(line, rule_part);
break;
+ case Sysfs:
+ sysfsline(line);
+ break;
default:
pr_err("Unknown keyword %s\n", line);
}
diff --git a/mdadm.conf.5 b/mdadm.conf.5
index 47c962a..27dbab1 100644
--- a/mdadm.conf.5
+++ b/mdadm.conf.5
@@ -587,6 +587,26 @@ be based on the domain, but with
appended, when N is the partition number for the partition that was
found.
+.TP
+.B SYSFS
+The SYSFS line lists custom values of MD device's sysfs attributes which will be
+stored in sysfs after the array is assembled. Multiple lines are allowed and each
+line has to contain the uuid or the name of the device to which it relates.
+.RS 4
+.TP
+.B uuid=
+hexadecimal identifier of MD device. This has to match the uuid stored in the
+superblock.
+.TP
+.B name=
+name of the MD device as was given to
+.I mdadm
+when the array was created. It will be ignored if
+.B uuid
+is not empty.
+.TP
+.RS 7
+
.SH EXAMPLE
DEVICE /dev/sd[bcdjkl]1
.br
@@ -657,6 +677,11 @@ CREATE group=system mode=0640 auto=part\-8
HOMEHOST <system>
.br
AUTO +1.x homehost \-all
+.br
+SYSFS name=/dev/md/raid5 group_thread_cnt=4 sync_speed_max=1000000
+.br
+SYSFS uuid=bead5eb6:31c17a27:da120ba2:7dfda40d group_thread_cnt=4
+sync_speed_max=1000000
.SH SEE ALSO
.BR mdadm (8),
diff --git a/mdadm.h b/mdadm.h
index 0fa9e1b..c36d7fd 100644
--- a/mdadm.h
+++ b/mdadm.h
@@ -1322,6 +1322,9 @@ void domain_add(struct domainlist **domp, char *domain);
extern void policy_save_path(char *id_path, struct map_ent *array);
extern int policy_check_path(struct mdinfo *disk, struct map_ent *array);
+extern void sysfs_rules_apply(char *devnm, struct mdinfo *dev);
+extern void sysfsline(char *line);
+
#if __GNUC__ < 3
struct stat64;
#endif
diff --git a/sysfs.c b/sysfs.c
index 2dd9ab6..c313781 100644
--- a/sysfs.c
+++ b/sysfs.c
@@ -26,9 +26,22 @@
#include "mdadm.h"
#include <dirent.h>
#include <ctype.h>
+#include "dlink.h"
#define MAX_SYSFS_PATH_LEN 120
+struct dev_sysfs_rule {
+ struct dev_sysfs_rule *next;
+ char *devname;
+ int uuid[4];
+ int uuid_set;
+ struct sysfs_entry {
+ struct sysfs_entry *next;
+ char *name;
+ char *value;
+ } *entry;
+};
+
int load_sys(char *path, char *buf, int len)
{
int fd = open(path, O_RDONLY);
@@ -999,3 +1012,148 @@ int sysfs_wait(int fd, int *msec)
}
return n;
}
+
+int sysfs_rules_apply_check(const struct mdinfo *sra,
+ const struct sysfs_entry *ent)
+{
+ /* Check whether parameter is regular file,
+ * exists and is under specified directory.
+ */
+ char fname[MAX_SYSFS_PATH_LEN];
+ char dname[MAX_SYSFS_PATH_LEN];
+ char resolved_path[PATH_MAX];
+ char resolved_dir[PATH_MAX];
+
+ if (sra == NULL || ent == NULL)
+ return -1;
+
+ snprintf(dname, MAX_SYSFS_PATH_LEN, "/sys/block/%s/md/", sra->sys_name);
+ snprintf(fname, MAX_SYSFS_PATH_LEN, "%s/%s", dname, ent->name);
+
+ if (realpath(fname, resolved_path) == NULL ||
+ realpath(dname, resolved_dir) == NULL)
+ return -1;
+
+ if (strncmp(resolved_dir, resolved_path,
+ strnlen(resolved_dir, PATH_MAX)) != 0)
+ return -1;
+
+ return 0;
+}
+
+static struct dev_sysfs_rule *sysfs_rules;
+
+void sysfs_rules_apply(char *devnm, struct mdinfo *dev)
+{
+ struct dev_sysfs_rule *rules = sysfs_rules;
+
+ while (rules) {
+ struct sysfs_entry *ent = rules->entry;
+ int match = 0;
+
+ if (!rules->uuid_set) {
+ if (rules->devname)
+ match = strcmp(devnm, rules->devname) == 0;
+ } else {
+ match = memcmp(dev->uuid, rules->uuid,
+ sizeof(int[4])) == 0;
+ }
+
+ while (match && ent) {
+ if (sysfs_rules_apply_check(dev, ent) < 0)
+ pr_err("SYSFS: failed to write '%s' to '%s'\n",
+ ent->value, ent->name);
+ else
+ sysfs_set_str(dev, NULL, ent->name, ent->value);
+ ent = ent->next;
+ }
+ rules = rules->next;
+ }
+}
+
+static void sysfs_rule_free(struct dev_sysfs_rule *rule)
+{
+ struct sysfs_entry *entry;
+
+ while (rule) {
+ struct dev_sysfs_rule *tmp = rule->next;
+
+ entry = rule->entry;
+ while (entry) {
+ struct sysfs_entry *tmp = entry->next;
+
+ free(entry->name);
+ free(entry->value);
+ free(entry);
+ entry = tmp;
+ }
+
+ if (rule->devname)
+ free(rule->devname);
+ free(rule);
+ rule = tmp;
+ }
+}
+
+void sysfsline(char *line)
+{
+ struct dev_sysfs_rule *sr;
+ char *w;
+
+ sr = xcalloc(1, sizeof(*sr));
+ for (w = dl_next(line); w != line ; w = dl_next(w)) {
+ if (strncasecmp(w, "name=", 5) == 0) {
+ char *devname = w + 5;
+
+ if (strncmp(devname, "/dev/md/", 8) == 0) {
+ if (sr->devname)
+ pr_err("Only give one device per SYSFS line: %s\n",
+ devname);
+ else
+ sr->devname = xstrdup(devname);
+ } else {
+ pr_err("%s is an invalid name for an md device - ignored.\n",
+ devname);
+ }
+ } else if (strncasecmp(w, "uuid=", 5) == 0) {
+ char *uuid = w + 5;
+
+ if (sr->uuid_set) {
+ pr_err("Only give one uuid per SYSFS line: %s\n",
+ uuid);
+ } else {
+ if (parse_uuid(w + 5, sr->uuid) &&
+ memcmp(sr->uuid, uuid_zero,
+ sizeof(int[4])) != 0)
+ sr->uuid_set = 1;
+ else
+ pr_err("Invalid uuid: %s\n", uuid);
+ }
+ } else {
+ struct sysfs_entry *prop;
+
+ char *sep = strchr(w, '=');
+
+ if (sep == NULL || *(sep + 1) == 0) {
+ pr_err("Cannot parse \"%s\" - ignoring.\n", w);
+ continue;
+ }
+
+ prop = xmalloc(sizeof(*prop));
+ prop->value = xstrdup(sep + 1);
+ *sep = 0;
+ prop->name = xstrdup(w);
+ prop->next = sr->entry;
+ sr->entry = prop;
+ }
+ }
+
+ if (!sr->devname && !sr->uuid_set) {
+ pr_err("Device name not found in sysfs config entry - ignoring.\n");
+ sysfs_rule_free(sr);
+ return;
+ }
+
+ sr->next = sysfs_rules;
+ sysfs_rules = sr;
+}
--
2.7.5

View File

@ -0,0 +1,34 @@
From 452dc4d13a012cdcb05088c0dbc699959c4d6c73 Mon Sep 17 00:00:00 2001
From: Baruch Siach <baruch@tkos.co.il>
Date: Tue, 6 Aug 2019 16:05:23 +0300
Subject: [RHEL7.8 PATCH V2 31/47] mdadm.h: include sysmacros.h unconditionally
musl libc now also requires sys/sysmacros.h for the major/minor macros.
All supported libc implementations carry sys/sysmacros.h, including
diet-libc, klibc, and uclibc-ng.
Cc: Hauke Mehrtens <hauke@hauke-m.de>
Signed-off-by: Baruch Siach <baruch@tkos.co.il>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
mdadm.h | 2 --
1 file changed, 2 deletions(-)
diff --git a/mdadm.h b/mdadm.h
index c36d7fd..d61a9ca 100644
--- a/mdadm.h
+++ b/mdadm.h
@@ -45,10 +45,8 @@ extern __off64_t lseek64 __P ((int __fd, __off64_t __offset, int __whence));
#include <errno.h>
#include <string.h>
#include <syslog.h>
-#ifdef __GLIBC__
/* Newer glibc requires sys/sysmacros.h directly for makedev() */
#include <sys/sysmacros.h>
-#endif
#ifdef __dietlibc__
#include <strings.h>
/* dietlibc has deprecated random and srandom!! */
--
2.7.5

View File

@ -0,0 +1,161 @@
From d11abe4bd5cad39803726ddff1888674e417bda5 Mon Sep 17 00:00:00 2001
From: Coly Li <colyli@suse.de>
Date: Wed, 31 Jul 2019 13:29:29 +0800
Subject: [RHEL7.8 PATCH V2 32/47] mdadm: add --no-devices to avoid component
devices detail information
When people assemble a md raid device with a large number of
component deivces (e.g. 1500 DASD disks), the raid device detail
information generated by 'mdadm --detail --export $devnode' is very
large. It is because the detail information contains information of
all the component disks (even the missing/failed ones).
In such condition, when udev-md-raid-arrays.rules is triggered and
internally calls "mdadm --detail --no-devices --export $devnode",
user may observe systemd error message ""invalid message length". It
is because the following on-stack raw message buffer in systemd code
is not big enough,
systemd/src/libudev/libudev-monitor.c
_public_ struct udev_device *udev_monito ...
struct ucred *cred;
union {
struct udev_monitor_netlink_header nlh;
char raw[8192];
} buf;
Even change size of raw[] from 8KB to larger size, it may still be not
enough for detail message of a md raid device with much larger number of
component devices.
To fix this problem, an extra option '--no-devices' is added (the
original idea is proposed by Neil Brown). When printing detailed
information of a md raid device, if '--no-devices' is specified, then
all component devices information will not be printed, then the output
message size can be restricted to a small number, even with the systemd
only has 8KB on-disk raw buffer, the md raid array udev rules can work
correctly without failure message.
Signed-off-by: Coly Li <colyli@suse.de>
Reviewed-by: NeilBrown <neilb@suse.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
Detail.c | 24 ++++++++++++++++--------
ReadMe.c | 1 +
mdadm.c | 4 ++++
mdadm.h | 2 ++
4 files changed, 23 insertions(+), 8 deletions(-)
diff --git a/Detail.c b/Detail.c
index 20ea03a..ad60434 100644
--- a/Detail.c
+++ b/Detail.c
@@ -56,7 +56,7 @@ int Detail(char *dev, struct context *c)
*/
int fd = open(dev, O_RDONLY);
mdu_array_info_t array;
- mdu_disk_info_t *disks;
+ mdu_disk_info_t *disks = NULL;
int next;
int d;
time_t atime;
@@ -280,7 +280,7 @@ int Detail(char *dev, struct context *c)
}
map_free(map);
}
- if (sra) {
+ if (!c->no_devices && sra) {
struct mdinfo *mdi;
for (mdi = sra->devs; mdi; mdi = mdi->next) {
char *path;
@@ -655,12 +655,17 @@ This is pretty boring
printf("\n\n");
}
- if (array.raid_disks)
- printf(" Number Major Minor RaidDevice State\n");
- else
- printf(" Number Major Minor RaidDevice\n");
+ if (!c->no_devices) {
+ if (array.raid_disks)
+ printf(" Number Major Minor RaidDevice State\n");
+ else
+ printf(" Number Major Minor RaidDevice\n");
+ }
}
- free(info);
+
+ /* if --no_devices specified, not print component devices info */
+ if (c->no_devices)
+ goto skip_devices_state;
for (d = 0; d < max_disks * 2; d++) {
char *dv;
@@ -747,6 +752,8 @@ This is pretty boring
if (!c->brief)
printf("\n");
}
+
+skip_devices_state:
if (spares && c->brief && array.raid_disks)
printf(" spares=%d", spares);
if (c->brief && st && st->sb)
@@ -766,8 +773,9 @@ This is pretty boring
!enough(array.level, array.raid_disks, array.layout, 1, avail))
rv = 2;
- free(disks);
out:
+ free(info);
+ free(disks);
close(fd);
free(subarray);
free(avail);
diff --git a/ReadMe.c b/ReadMe.c
index 12ccf83..eaf1042 100644
--- a/ReadMe.c
+++ b/ReadMe.c
@@ -181,6 +181,7 @@ struct option long_options[] = {
/* For Detail/Examine */
{"brief", 0, 0, Brief},
+ {"no-devices",0, 0, NoDevices},
{"export", 0, 0, 'Y'},
{"sparc2.2", 0, 0, Sparc22},
{"test", 0, 0, 't'},
diff --git a/mdadm.c b/mdadm.c
index 25a1abd..1fb8086 100644
--- a/mdadm.c
+++ b/mdadm.c
@@ -159,6 +159,10 @@ int main(int argc, char *argv[])
c.brief = 1;
continue;
+ case NoDevices:
+ c.no_devices = 1;
+ continue;
+
case 'Y': c.export++;
continue;
diff --git a/mdadm.h b/mdadm.h
index d61a9ca..43b07d5 100644
--- a/mdadm.h
+++ b/mdadm.h
@@ -440,6 +440,7 @@ enum special_options {
NoSharing,
HelpOptions,
Brief,
+ NoDevices,
ManageOpt,
Add,
AddSpare,
@@ -550,6 +551,7 @@ struct context {
int runstop;
int verbose;
int brief;
+ int no_devices;
int force;
char *homehost;
int require_homehost;
--
2.7.5

View File

@ -0,0 +1,42 @@
From 1a52f1fc0266d438c996789d4addbfac999a6139 Mon Sep 17 00:00:00 2001
From: Coly Li <colyli@suse.de>
Date: Wed, 31 Jul 2019 13:29:30 +0800
Subject: [RHEL7.8 PATCH V2 33/47] udev: add --no-devices option for calling
'mdadm --detail'
When creating symlink of a md raid device, the detailed information of
component disks are unnecessary for rule udev-md-raid-arrays.rules. For
md raid devices with huge number of component disks (e.g. 1500 DASD
disks), the detail information of component devices can be very large
and exceed udev monitor's on-stack message buffer.
This patch adds '--no-devices' option when calling mdadm by,
IMPORT{program}="BINDIR/mdadm --detail --no-devices --export $devnode"
Now the detailed output won't include component disks information,
and the error message "invalid message length" reported by systemd can
be removed.
Signed-off-by: Coly Li <colyli@suse.de>
Reviewed-by: NeilBrown <neilb@suse.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
udev-md-raid-arrays.rules | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/udev-md-raid-arrays.rules b/udev-md-raid-arrays.rules
index 5b99d58..d391665 100644
--- a/udev-md-raid-arrays.rules
+++ b/udev-md-raid-arrays.rules
@@ -17,7 +17,7 @@ TEST!="md/array_state", ENV{SYSTEMD_READY}="0", GOTO="md_end"
ATTR{md/array_state}=="|clear|inactive", ENV{SYSTEMD_READY}="0", GOTO="md_end"
LABEL="md_ignore_state"
-IMPORT{program}="BINDIR/mdadm --detail --export $devnode"
+IMPORT{program}="BINDIR/mdadm --detail --no-devices --export $devnode"
ENV{DEVTYPE}=="disk", ENV{MD_NAME}=="?*", SYMLINK+="disk/by-id/md-name-$env{MD_NAME}", OPTIONS+="string_escape=replace"
ENV{DEVTYPE}=="disk", ENV{MD_UUID}=="?*", SYMLINK+="disk/by-id/md-uuid-$env{MD_UUID}"
ENV{DEVTYPE}=="disk", ENV{MD_DEVNAME}=="?*", SYMLINK+="md/$env{MD_DEVNAME}"
--
2.7.5

View File

@ -0,0 +1,44 @@
From 91c97c5432028875db5f8abeddb5cb5f31902001 Mon Sep 17 00:00:00 2001
From: Mariusz Tkaczyk <mariusz.tkaczyk@intel.com>
Date: Mon, 15 Jul 2019 09:25:35 +0200
Subject: [RHEL7.8 PATCH V2 34/47] imsm: close removed drive fd.
When member drive fails, managemon prepares metadata update and adds
the drive to disk_mgmt_list with DISK_REMOVE flag. It fills only
minor and major. It is enough to recognize the device later.
Monitor thread while processing this update will remove the drive from
super only if it is a spare. It never removes failed member from
disks list. As a result, it still keeps opened descriptor to
non-existing device.
If removed drive is not a spare fill fd in disk_cfg structure
(prepared by managemon), monitor will close fd during freeing it.
Also set this drive fd to -1 in super to avoid double closing because
monitor will close the fd (if needed) while replacing removed drive
in array.
Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@intel.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
super-intel.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/super-intel.c b/super-intel.c
index d7e8a65..a103a3f 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -9200,6 +9200,9 @@ static int add_remove_disk_update(struct intel_super *super)
remove_disk_super(super,
disk_cfg->major,
disk_cfg->minor);
+ } else {
+ disk_cfg->fd = disk->fd;
+ disk->fd = -1;
}
}
/* release allocate disk structure */
--
2.7.5

View File

@ -0,0 +1,46 @@
From fd5b09c9a9107f0393ce194c4aac6e7b8f163e85 Mon Sep 17 00:00:00 2001
From: Krzysztof Smolinski <krzysztof.smolinski@intel.com>
Date: Fri, 16 Aug 2019 11:06:17 +0200
Subject: [RHEL7.8 PATCH V2 35/47] mdadm: check value returned by snprintf
against errors
GCC 8 checks possible truncation during snprintf more strictly
than GCC 7 which result in compilation errors. To fix this
problem checking result of snprintf against errors has been added.
Signed-off-by: Krzysztof Smolinski <krzysztof.smolinski@intel.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
sysfs.c | 12 ++++++++++--
1 file changed, 10 insertions(+), 2 deletions(-)
diff --git a/sysfs.c b/sysfs.c
index c313781..2995713 100644
--- a/sysfs.c
+++ b/sysfs.c
@@ -1023,12 +1023,20 @@ int sysfs_rules_apply_check(const struct mdinfo *sra,
char dname[MAX_SYSFS_PATH_LEN];
char resolved_path[PATH_MAX];
char resolved_dir[PATH_MAX];
+ int result;
if (sra == NULL || ent == NULL)
return -1;
- snprintf(dname, MAX_SYSFS_PATH_LEN, "/sys/block/%s/md/", sra->sys_name);
- snprintf(fname, MAX_SYSFS_PATH_LEN, "%s/%s", dname, ent->name);
+ result = snprintf(dname, MAX_SYSFS_PATH_LEN,
+ "/sys/block/%s/md/", sra->sys_name);
+ if (result < 0 || result >= MAX_SYSFS_PATH_LEN)
+ return -1;
+
+ result = snprintf(fname, MAX_SYSFS_PATH_LEN,
+ "%s/%s", dname, ent->name);
+ if (result < 0 || result >= MAX_SYSFS_PATH_LEN)
+ return -1;
if (realpath(fname, resolved_path) == NULL ||
realpath(dname, resolved_dir) == NULL)
--
2.7.5

View File

@ -0,0 +1,163 @@
From 43ebc9105e9dafe5145b3e801c05da4736bf6e02 Mon Sep 17 00:00:00 2001
From: "Guilherme G. Piccoli" <gpiccoli@canonical.com>
Date: Tue, 3 Sep 2019 16:49:01 -0300
Subject: [RHEL7.8 PATCH V2 36/47] mdadm: Introduce new array state 'broken'
for raid0/linear
Currently if a md raid0/linear array gets one or more members removed while
being mounted, kernel keeps showing state 'clean' in the 'array_state'
sysfs attribute. Despite udev signaling the member device is gone, 'mdadm'
cannot issue the STOP_ARRAY ioctl successfully, given the array is mounted.
Nothing else hints that something is wrong (except that the removed devices
don't show properly in the output of mdadm 'detail' command). There is no
other property to be checked, and if user is not performing reads/writes
to the array, even kernel log is quiet and doesn't give a clue about the
missing member.
This patch is the mdadm counterpart of kernel new array state 'broken'.
The 'broken' state mimics the state 'clean' in every aspect, being useful
only to distinguish if an array has some member missing. All necessary
paths in mdadm were changed to deal with 'broken' state, and in case the
tool runs in a kernel that is not updated, it'll work normally, i.e., it
doesn't require the 'broken' state in order to work.
Also, this patch changes the way the array state is showed in the 'detail'
command (for raid0/linear only) - now it takes the 'array_state' sysfs
attribute into account instead of only rely in the MD_SB_CLEAN flag.
Cc: Jes Sorensen <jes.sorensen@gmail.com>
Cc: NeilBrown <neilb@suse.de>
Cc: Song Liu <songliubraving@fb.com>
Signed-off-by: Guilherme G. Piccoli <gpiccoli@canonical.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
Detail.c | 14 ++++++++++++--
Monitor.c | 8 ++++++--
maps.c | 1 +
mdadm.h | 1 +
mdmon.h | 2 +-
monitor.c | 4 ++--
6 files changed, 23 insertions(+), 7 deletions(-)
diff --git a/Detail.c b/Detail.c
index ad60434..3e61e37 100644
--- a/Detail.c
+++ b/Detail.c
@@ -81,6 +81,7 @@ int Detail(char *dev, struct context *c)
int external;
int inactive;
int is_container = 0;
+ char *arrayst;
if (fd < 0) {
pr_err("cannot open %s: %s\n",
@@ -485,9 +486,18 @@ int Detail(char *dev, struct context *c)
else
st = ", degraded";
+ if (array.state & (1 << MD_SB_CLEAN)) {
+ if ((array.level == 0) ||
+ (array.level == LEVEL_LINEAR))
+ arrayst = map_num(sysfs_array_states,
+ sra->array_state);
+ else
+ arrayst = "clean";
+ } else
+ arrayst = "active";
+
printf(" State : %s%s%s%s%s%s \n",
- (array.state & (1 << MD_SB_CLEAN)) ?
- "clean" : "active", st,
+ arrayst, st,
(!e || (e->percent < 0 &&
e->percent != RESYNC_PENDING &&
e->percent != RESYNC_DELAYED)) ?
diff --git a/Monitor.c b/Monitor.c
index 036103f..b527165 100644
--- a/Monitor.c
+++ b/Monitor.c
@@ -1055,8 +1055,11 @@ int Wait(char *dev)
}
}
+/* The state "broken" is used only for RAID0/LINEAR - it's the same as
+ * "clean", but used in case the array has one or more members missing.
+ */
static char *clean_states[] = {
- "clear", "inactive", "readonly", "read-auto", "clean", NULL };
+ "clear", "inactive", "readonly", "read-auto", "clean", "broken", NULL };
int WaitClean(char *dev, int verbose)
{
@@ -1116,7 +1119,8 @@ int WaitClean(char *dev, int verbose)
rv = read(state_fd, buf, sizeof(buf));
if (rv < 0)
break;
- if (sysfs_match_word(buf, clean_states) <= 4)
+ if (sysfs_match_word(buf, clean_states) <
+ (int)ARRAY_SIZE(clean_states) - 1)
break;
rv = sysfs_wait(state_fd, &delay);
if (rv < 0 && errno != EINTR)
diff --git a/maps.c b/maps.c
index 02a0474..49b7f2c 100644
--- a/maps.c
+++ b/maps.c
@@ -150,6 +150,7 @@ mapping_t sysfs_array_states[] = {
{ "read-auto", ARRAY_READ_AUTO },
{ "clean", ARRAY_CLEAN },
{ "write-pending", ARRAY_WRITE_PENDING },
+ { "broken", ARRAY_BROKEN },
{ NULL, ARRAY_UNKNOWN_STATE }
};
diff --git a/mdadm.h b/mdadm.h
index 43b07d5..c88ceab 100644
--- a/mdadm.h
+++ b/mdadm.h
@@ -373,6 +373,7 @@ struct mdinfo {
ARRAY_ACTIVE,
ARRAY_WRITE_PENDING,
ARRAY_ACTIVE_IDLE,
+ ARRAY_BROKEN,
ARRAY_UNKNOWN_STATE,
} array_state;
struct md_bb bb;
diff --git a/mdmon.h b/mdmon.h
index 818367c..b3d72ac 100644
--- a/mdmon.h
+++ b/mdmon.h
@@ -21,7 +21,7 @@
extern const char Name[];
enum array_state { clear, inactive, suspended, readonly, read_auto,
- clean, active, write_pending, active_idle, bad_word};
+ clean, active, write_pending, active_idle, broken, bad_word};
enum sync_action { idle, reshape, resync, recover, check, repair, bad_action };
diff --git a/monitor.c b/monitor.c
index 81537ed..e0d3be6 100644
--- a/monitor.c
+++ b/monitor.c
@@ -26,7 +26,7 @@
static char *array_states[] = {
"clear", "inactive", "suspended", "readonly", "read-auto",
- "clean", "active", "write-pending", "active-idle", NULL };
+ "clean", "active", "write-pending", "active-idle", "broken", NULL };
static char *sync_actions[] = {
"idle", "reshape", "resync", "recover", "check", "repair", NULL
};
@@ -476,7 +476,7 @@ static int read_and_act(struct active_array *a, fd_set *fds)
a->next_state = clean;
ret |= ARRAY_DIRTY;
}
- if (a->curr_state == clean) {
+ if ((a->curr_state == clean) || (a->curr_state == broken)) {
a->container->ss->set_array_state(a, 1);
}
if (a->curr_state == active ||
--
2.7.5

View File

@ -0,0 +1,40 @@
From 2c2d9c48d2daf0d78d20494c3779c0f6dc4bfa75 Mon Sep 17 00:00:00 2001
From: Nigel Croxon <ncroxon@redhat.com>
Date: Tue, 24 Sep 2019 11:39:24 -0400
Subject: [RHEL7.8 PATCH V2 37/47] mdadm: force a uuid swap on big endian
The code path for metadata 0.90 calls a common routine
fname_from_uuid that uses metadata 1.2. The code expects member
swapuuid to be setup and usable. But it is only setup when using
metadata 1.2. Since the metadata 0.90 did not create swapuuid
and set it. The test (st->ss == &super1) ? 1 : st->ss->swapuuid
fails. The swapuuid is set at compile time based on byte order.
Any call based on metadata 0.90 and on big endian processors,
the --export uuid will be incorrect.
Signed-Off-by: Nigel Croxon <ncroxon@redhat.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
util.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/util.c b/util.c
index c26cf5f..64dd409 100644
--- a/util.c
+++ b/util.c
@@ -685,8 +685,12 @@ char *fname_from_uuid(struct supertype *st, struct mdinfo *info,
// work, but can't have it set if we want this printout to match
// all the other uuid printouts in super1.c, so we force swapuuid
// to 1 to make our printout match the rest of super1
+#if __BYTE_ORDER == BIG_ENDIAN
+ return __fname_from_uuid(info->uuid, 1, buf, sep);
+#else
return __fname_from_uuid(info->uuid, (st->ss == &super1) ? 1 :
st->ss->swapuuid, buf, sep);
+#endif
}
int check_ext2(int fd, char *name)
--
2.7.5

View File

@ -0,0 +1,99 @@
From e53cb968691d9e40d83caf5570da3bb7b83c64e1 Mon Sep 17 00:00:00 2001
From: Guoqing Jiang <gqjiang@suse.com>
Date: Fri, 31 May 2019 10:10:00 +0800
Subject: [RHEL7.8 PATCH V2 38/47] mdadm/md.4: add the descriptions for bitmap
sysfs nodes
The sysfs nodes under bitmap are not recorded in md.4,
add them based on md.rst and kernel source code.
Cc: NeilBrown <neilb@suse.com>
Signed-off-by: Guoqing Jiang <gqjiang@suse.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
md.4 | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 69 insertions(+)
diff --git a/md.4 b/md.4
index 3a1d677..e86707a 100644
--- a/md.4
+++ b/md.4
@@ -1101,6 +1101,75 @@ stripe that requires some "prereading". For fairness this defaults to
maximizes sequential-write throughput at the cost of fairness to threads
doing small or random writes.
+.TP
+.B md/bitmap/backlog
+The value stored in the file only has any effect on RAID1 when write-mostly
+devices are active, and write requests to those devices are proceed in the
+background.
+
+This variable sets a limit on the number of concurrent background writes,
+the valid values are 0 to 16383, 0 means that write-behind is not allowed,
+while any other number means it can happen. If there are more write requests
+than the number, new writes will by synchronous.
+
+.TP
+.B md/bitmap/can_clear
+This is for externally managed bitmaps, where the kernel writes the bitmap
+itself, but metadata describing the bitmap is managed by mdmon or similar.
+
+When the array is degraded, bits mustn't be cleared. When the array becomes
+optimal again, bit can be cleared, but first the metadata needs to record
+the current event count. So md sets this to 'false' and notifies mdmon,
+then mdmon updates the metadata and writes 'true'.
+
+There is no code in mdmon to actually do this, so maybe it doesn't even
+work.
+
+.TP
+.B md/bitmap/chunksize
+The bitmap chunksize can only be changed when no bitmap is active, and
+the value should be power of 2 and at least 512.
+
+.TP
+.B md/bitmap/location
+This indicates where the write-intent bitmap for the array is stored.
+It can be "none" or "file" or a signed offset from the array metadata
+- measured in sectors. You cannot set a file by writing here - that can
+only be done with the SET_BITMAP_FILE ioctl.
+
+Write 'none' to 'bitmap/location' will clear bitmap, and the previous
+location value must be write to it to restore bitmap.
+
+.TP
+.B md/bitmap/max_backlog_used
+This keeps track of the maximum number of concurrent write-behind requests
+for an md array, writing any value to this file will clear it.
+
+.TP
+.B md/bitmap/metadata
+This can be 'internal' or 'clustered' or 'external'. 'internal' is set
+by default, which means the metadata for bitmap is stored in the first 256
+bytes of the bitmap space. 'clustered' means separate bitmap metadata are
+used for each cluster node. 'external' means that bitmap metadata is managed
+externally to the kernel.
+
+.TP
+.B md/bitmap/space
+This shows the space (in sectors) which is available at md/bitmap/location,
+and allows the kernel to know when it is safe to resize the bitmap to match
+a resized array. It should big enough to contain the total bytes in the bitmap.
+
+For 1.0 metadata, assume we can use up to the superblock if before, else
+to 4K beyond superblock. For other metadata versions, assume no change is
+possible.
+
+.TP
+.B md/bitmap/time_base
+This shows the time (in seconds) between disk flushes, and is used to looking
+for bits in the bitmap to be cleared.
+
+The default value is 5 seconds, and it should be an unsigned long value.
+
.SS KERNEL PARAMETERS
The md driver recognised several different kernel parameters.
--
2.7.5

View File

@ -0,0 +1,35 @@
From 8063fd0f9e8abd718bd65928c19bc607cee5acd8 Mon Sep 17 00:00:00 2001
From: Xiao Ni <xni@redhat.com>
Date: Mon, 30 Sep 2019 19:47:59 +0800
Subject: [RHEL7.8 PATCH V2 39/47] Init devlist as an array
devlist is an string. It will change to an array if there is disk that
is sbd disk. If one device is sbd, it runs devlist=().
This line code changes devlist from a string to an array. If there is
no sbd device, it can't run this line code. So it will still be a string.
The later codes need an array, rather than an string. So init devlist
as an array to fix this problem.
Signed-off-by: Xiao Ni <xni@redhat.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
clustermd_tests/func.sh | 3 +++
1 file changed, 3 insertions(+)
diff --git a/clustermd_tests/func.sh b/clustermd_tests/func.sh
index 642cc96..801d604 100644
--- a/clustermd_tests/func.sh
+++ b/clustermd_tests/func.sh
@@ -39,6 +39,9 @@ fetch_devlist()
devlist=($(ls /dev/disk/by-path/*$ISCSI_ID*))
fi
# sbd disk cannot use in testing
+ # Init devlist as an array
+ i=''
+ devlist=(${devlist[@]#$i})
for i in ${devlist[@]}
do
sbd -d $i dump &> /dev/null
--
2.7.5

View File

@ -0,0 +1,31 @@
From 611093148574164fcf4f24f8c076d09473f655d7 Mon Sep 17 00:00:00 2001
From: Xiao Ni <xni@redhat.com>
Date: Mon, 30 Sep 2019 19:48:00 +0800
Subject: [RHEL7.8 PATCH V2 40/47] Don't need to check recovery after re-add
when no I/O writes to raid
If there is no write I/O between removing member disk and re-add it, there is no
recovery after re-adding member disk.
Signed-off-by: Xiao Ni <xni@redhat.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
clustermd_tests/02r1_Manage_re-add | 2 --
1 file changed, 2 deletions(-)
diff --git a/clustermd_tests/02r1_Manage_re-add b/clustermd_tests/02r1_Manage_re-add
index dd9c416..d0d13e5 100644
--- a/clustermd_tests/02r1_Manage_re-add
+++ b/clustermd_tests/02r1_Manage_re-add
@@ -9,8 +9,6 @@ check all state UU
check all dmesg
mdadm --manage $md0 --fail $dev0 --remove $dev0
mdadm --manage $md0 --re-add $dev0
-check $NODE1 recovery
-check all wait
check all state UU
check all dmesg
stop_md all $md0
--
2.7.5

View File

@ -0,0 +1,47 @@
From 7bd59e7926c6921121087eb067befaa896c900a4 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Wed, 18 Sep 2019 15:12:55 +1000
Subject: [RHEL7.8 PATCH V2 41/47] udev: allow for udev attribute reading bug.
There is a bug in udev (which will hopefully get fixed, but
we should allow for it anways).
When reading a sysfs attribute, it first reads the whole
value of the attribute, then reads again expecting to get
a read of 0 bytes, like you would with an ordinary file.
If the sysfs attribute changed between these two reads, it can
get a mixture of two values.
In particular, if it reads when 'array_state' is changing from
'clear' to 'inactive', it can find the value as "clear\nve".
This causes the test for "|clear|active" to fail, so systemd is allowed
to think that the array is ready - when it isn't.
So change the pattern to allow for this but adding a wildcard at
the end.
Also don't allow for an empty string - reading array_state will
never return an empty string - if it exists at all, it will be
non-empty.
Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
udev-md-raid-arrays.rules | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/udev-md-raid-arrays.rules b/udev-md-raid-arrays.rules
index d391665..c8fa8e8 100644
--- a/udev-md-raid-arrays.rules
+++ b/udev-md-raid-arrays.rules
@@ -14,7 +14,7 @@ ENV{DEVTYPE}=="partition", GOTO="md_ignore_state"
# never leave state 'inactive'
ATTR{md/metadata_version}=="external:[A-Za-z]*", ATTR{md/array_state}=="inactive", GOTO="md_ignore_state"
TEST!="md/array_state", ENV{SYSTEMD_READY}="0", GOTO="md_end"
-ATTR{md/array_state}=="|clear|inactive", ENV{SYSTEMD_READY}="0", GOTO="md_end"
+ATTR{md/array_state}=="clear*|inactive", ENV{SYSTEMD_READY}="0", GOTO="md_end"
LABEL="md_ignore_state"
IMPORT{program}="BINDIR/mdadm --detail --no-devices --export $devnode"
--
2.7.5

View File

@ -0,0 +1,40 @@
From b6180160f78f0182b296bdceed6419b26a6fccc7 Mon Sep 17 00:00:00 2001
From: Mariusz Tkaczyk <mariusz.tkaczyk@intel.com>
Date: Fri, 4 Oct 2019 12:07:28 +0200
Subject: [RHEL7.8 PATCH V2 42/47] imsm: save current_vol number
The imsm container_content routine will set curr_volume index in super
for getting volume information. This flag has never been restored to
original value, later other function may rely on it.
Restore this flag to original value.
Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@intel.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
super-intel.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/super-intel.c b/super-intel.c
index a103a3f..e02bbd7 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -7826,6 +7826,7 @@ static struct mdinfo *container_content_imsm(struct supertype *st, char *subarra
int sb_errors = 0;
struct dl *d;
int spare_disks = 0;
+ int current_vol = super->current_vol;
/* do not assemble arrays when not all attributes are supported */
if (imsm_check_attributes(mpb->attributes) == 0) {
@@ -7993,6 +7994,7 @@ static struct mdinfo *container_content_imsm(struct supertype *st, char *subarra
rest = this;
}
+ super->current_vol = current_vol;
return rest;
}
--
2.7.5

View File

@ -0,0 +1,50 @@
From 1a1ced1e2e64a6b4b349a3fb559f6b39e4cf7103 Mon Sep 17 00:00:00 2001
From: Krzysztof Smolinski <krzysztof.smolinski@intel.com>
Date: Fri, 8 Nov 2019 11:59:11 +0100
Subject: [RHEL7.8 PATCH V2 43/47] imsm: allow to specify second volume size
Removed checks which limited second volume size only to max value (the
largest size that fits on all current drives). It is now permitted
to create second volume with size lower then maximum possible.
Signed-off-by: Krzysztof Smolinski <krzysztof.smolinski@intel.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
super-intel.c | 14 ++++----------
1 file changed, 4 insertions(+), 10 deletions(-)
diff --git a/super-intel.c b/super-intel.c
index e02bbd7..713058c 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -7298,11 +7298,8 @@ static int validate_geometry_imsm_volume(struct supertype *st, int level,
maxsize = merge_extents(super, i);
- if (!check_env("IMSM_NO_PLATFORM") &&
- mpb->num_raid_devs > 0 && size && size != maxsize) {
- pr_err("attempting to create a second volume with size less then remaining space. Aborting...\n");
- return 0;
- }
+ if (mpb->num_raid_devs > 0 && size && size != maxsize)
+ pr_err("attempting to create a second volume with size less then remaining space.\n");
if (maxsize < size || maxsize == 0) {
if (verbose) {
@@ -7393,11 +7390,8 @@ static int imsm_get_free_size(struct supertype *st, int raiddisks,
}
maxsize = size;
}
- if (!check_env("IMSM_NO_PLATFORM") &&
- mpb->num_raid_devs > 0 && size && size != maxsize) {
- pr_err("attempting to create a second volume with size less then remaining space. Aborting...\n");
- return 0;
- }
+ if (mpb->num_raid_devs > 0 && size && size != maxsize)
+ pr_err("attempting to create a second volume with size less then remaining space.\n");
cnt = 0;
for (dl = super->disks; dl; dl = dl->next)
if (dl->e)
--
2.7.5

View File

@ -0,0 +1,45 @@
From 6636788aaf4ec0cacaefb6e77592e4a68e70a957 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Wed, 30 Oct 2019 10:32:41 +1100
Subject: [RHEL7.8 PATCH V2 44/47] mdcheck: when mdcheck_start is enabled,
enable mdcheck_continue too.
mdcheck_continue continues a regular array scan that was started by
mdcheck_start.
mdcheck_start will ensure that mdcheck_continue is active.
Howver if you reboot after a check has started, but before it finishes,
then mdcheck_continue won't cause it to continue, because nothing
starts it on boot.
So add an install option for mdcheck_contine, and make sure it
gets enabled when mdcheck_start is enabled.
Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
systemd/mdcheck_continue.timer | 2 ++
systemd/mdcheck_start.timer | 1 +
2 files changed, 3 insertions(+)
diff --git a/systemd/mdcheck_continue.timer b/systemd/mdcheck_continue.timer
index 3ccfd78..dba1074 100644
--- a/systemd/mdcheck_continue.timer
+++ b/systemd/mdcheck_continue.timer
@@ -11,3 +11,5 @@ Description=MD array scrubbing - continuation
[Timer]
OnCalendar= 1:05:00
+[Install]
+WantedBy= mdmonitor.service
diff --git a/systemd/mdcheck_start.timer b/systemd/mdcheck_start.timer
index 6480736..9e7e02a 100644
--- a/systemd/mdcheck_start.timer
+++ b/systemd/mdcheck_start.timer
@@ -13,3 +13,4 @@ OnCalendar=Sun *-*-1..7 1:00:00
[Install]
WantedBy= mdmonitor.service
+Also= mdcheck_continue.timer
--
2.7.5

View File

@ -0,0 +1,51 @@
From 4ca799c581703d4d0ad840833c037c2fff088ca7 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Wed, 30 Oct 2019 10:32:41 +1100
Subject: [RHEL7.8 PATCH V2 45/47] mdcheck: use ${} to pass variable to mdcheck
$MDADM_CHECK_DURATION allows the value to be split on spaces.
${MDADM_CHECK_DURATION} avoids such splitting.
Making this change removes the need for double quoting when setting
the default Environment, and means that double quoting isn't needed
in the EnvironmentFile.
Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
systemd/mdcheck_continue.service | 5 ++---
systemd/mdcheck_start.service | 4 ++--
2 files changed, 4 insertions(+), 5 deletions(-)
diff --git a/systemd/mdcheck_continue.service b/systemd/mdcheck_continue.service
index 592c607..deac695 100644
--- a/systemd/mdcheck_continue.service
+++ b/systemd/mdcheck_continue.service
@@ -11,8 +11,7 @@ ConditionPathExistsGlob = /var/lib/mdcheck/MD_UUID_*
[Service]
Type=oneshot
-Environment= MDADM_CHECK_DURATION='"6 hours"'
+Environment= MDADM_CHECK_DURATION="6 hours"
EnvironmentFile=-/run/sysconfig/mdadm
ExecStartPre=-/usr/lib/mdadm/mdadm_env.sh
-ExecStart=/usr/share/mdadm/mdcheck --continue --duration $MDADM_CHECK_DURATION
-
+ExecStart=/usr/share/mdadm/mdcheck --continue --duration ${MDADM_CHECK_DURATION}
diff --git a/systemd/mdcheck_start.service b/systemd/mdcheck_start.service
index 812141b..f17f1aa 100644
--- a/systemd/mdcheck_start.service
+++ b/systemd/mdcheck_start.service
@@ -11,7 +11,7 @@ Wants=mdcheck_continue.timer
[Service]
Type=oneshot
-Environment= MDADM_CHECK_DURATION='"6 hours"'
+Environment= MDADM_CHECK_DURATION="6 hours"
EnvironmentFile=-/run/sysconfig/mdadm
ExecStartPre=-/usr/lib/mdadm/mdadm_env.sh
-ExecStart=/usr/share/mdadm/mdcheck --duration $MDADM_CHECK_DURATION
+ExecStart=/usr/share/mdadm/mdcheck --duration ${MDADM_CHECK_DURATION}
--
2.7.5

View File

@ -0,0 +1,29 @@
From 85b83a7920bca5b93d2458f093f2c640a130614c Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Wed, 30 Oct 2019 10:32:41 +1100
Subject: [RHEL7.8 PATCH V2 46/47] SUSE-mdadm_env.sh: handle
MDADM_CHECK_DURATION
The suse sysconfig/mdadm allows MDADM_CHECK_DURATION
to be set, but it is currently ignored.
Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
systemd/SUSE-mdadm_env.sh | 3 +++
1 file changed, 3 insertions(+)
diff --git a/systemd/SUSE-mdadm_env.sh b/systemd/SUSE-mdadm_env.sh
index 10b2e74..c13b48a 100644
--- a/systemd/SUSE-mdadm_env.sh
+++ b/systemd/SUSE-mdadm_env.sh
@@ -43,3 +43,6 @@ fi
mkdir -p /run/sysconfig
echo "MDADM_MONITOR_ARGS=$MDADM_RAIDDEVICES $MDADM_DELAY $MDADM_MAIL $MDADM_PROGRAM $MDADM_SCAN $MDADM_SEND_MAIL $MDADM_CONFIG" > /run/sysconfig/mdadm
+if [ -n "$MDADM_CHECK_DURATION" ]; then
+ echo "MDADM_CHECK_DURATION=$MDADM_CHECK_DURATION" >> /run/sysconfig/mdadm
+fi
--
2.7.5

View File

@ -0,0 +1,122 @@
From 761e3bd9f5e3aafa95ad3ae50a637dc67c8774f0 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Thu, 31 Oct 2019 15:15:38 +1100
Subject: [RHEL7.8 PATCH V2 47/47] super-intel: don't mark structs 'packed'
unnecessarily
super-intel marks a number of structures 'packed', but this
doesn't change the layout - they are already well organized.
This is a problem a gcc warns when code takes the address
of a field in a packet struct - as super-intel sometimes does.
So remove the marking where isn't needed.
Do ensure this does introduce a regression, add a compile-time
assertion that the size of the structure is exactly the value
it had before the 'packed' notation was removed.
Note that a couple of structure do need to be packed.
As the address of fields is never taken, that is safe.
Signed-off-by: NeilBrown <neilb@suse.de>
Acked-by: Artur Paszkiewicz <artur.paszkiewicz@intel.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
super-intel.c | 32 ++++++++++++++++++++++++++------
1 file changed, 26 insertions(+), 6 deletions(-)
diff --git a/super-intel.c b/super-intel.c
index 713058c..a7fbed4 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -96,6 +96,19 @@
* mutliple PPL area
*/
+/*
+ * This macro let's us ensure that no-one accidentally
+ * changes the size of a struct
+ */
+#define ASSERT_SIZE(_struct, size) \
+static inline void __assert_size_##_struct(void) \
+{ \
+ switch (0) { \
+ case 0: break; \
+ case (sizeof(struct _struct) == size): break; \
+ } \
+}
+
/* Disk configuration info. */
#define IMSM_MAX_DEVICES 255
struct imsm_disk {
@@ -112,6 +125,7 @@ struct imsm_disk {
#define IMSM_DISK_FILLERS 3
__u32 filler[IMSM_DISK_FILLERS]; /* 0xF5 - 0x107 MPB_DISK_FILLERS for future expansion */
};
+ASSERT_SIZE(imsm_disk, 48)
/* map selector for map managment
*/
@@ -146,7 +160,8 @@ struct imsm_map {
__u32 disk_ord_tbl[1]; /* disk_ord_tbl[num_members],
* top byte contains some flags
*/
-} __attribute__ ((packed));
+};
+ASSERT_SIZE(imsm_map, 52)
struct imsm_vol {
__u32 curr_migr_unit;
@@ -169,7 +184,8 @@ struct imsm_vol {
__u32 filler[4];
struct imsm_map map[1];
/* here comes another one if migr_state */
-} __attribute__ ((packed));
+};
+ASSERT_SIZE(imsm_vol, 84)
struct imsm_dev {
__u8 volume[MAX_RAID_SERIAL_LEN];
@@ -220,7 +236,8 @@ struct imsm_dev {
#define IMSM_DEV_FILLERS 3
__u32 filler[IMSM_DEV_FILLERS];
struct imsm_vol vol;
-} __attribute__ ((packed));
+};
+ASSERT_SIZE(imsm_dev, 164)
struct imsm_super {
__u8 sig[MAX_SIGNATURE_LENGTH]; /* 0x00 - 0x1F */
@@ -248,7 +265,8 @@ struct imsm_super {
struct imsm_disk disk[1]; /* 0xD8 diskTbl[numDisks] */
/* here comes imsm_dev[num_raid_devs] */
/* here comes BBM logs */
-} __attribute__ ((packed));
+};
+ASSERT_SIZE(imsm_super, 264)
#define BBM_LOG_MAX_ENTRIES 254
#define BBM_LOG_MAX_LBA_ENTRY_VAL 256 /* Represents 256 LBAs */
@@ -269,7 +287,8 @@ struct bbm_log {
__u32 signature; /* 0xABADB10C */
__u32 entry_count;
struct bbm_log_entry marked_block_entries[BBM_LOG_MAX_ENTRIES];
-} __attribute__ ((__packed__));
+};
+ASSERT_SIZE(bbm_log, 2040)
static char *map_state_str[] = { "normal", "uninitialized", "degraded", "failed" };
@@ -323,7 +342,8 @@ struct migr_record {
* destination - high order 32 bits */
__u32 num_migr_units_hi; /* Total num migration units-of-op
* high order 32 bits */
-} __attribute__ ((__packed__));
+};
+ASSERT_SIZE(migr_record, 64)
struct md_list {
/* usage marker:
--
2.7.5

View File

@ -0,0 +1,45 @@
From e1512e7b7d060f0346738b237ea34eac21b29a26 Mon Sep 17 00:00:00 2001
From: Xiao Ni <xni@redhat.com>
Date: Wed, 18 Dec 2019 14:46:21 +0800
Subject: [RHEL8.2 PATCH 1/1] mdcheck service can't start succesfully because
of syntax error
It reports error when starting mdcheck_start and mdcheck_continue service.
Invalid environment assignment, ignoring: MDADM_CHECK_DURATION="6 hours"
Signed-off-by: Xiao Ni <xni@redhat.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
systemd/mdcheck_continue.service | 2 +-
systemd/mdcheck_start.service | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/systemd/mdcheck_continue.service b/systemd/mdcheck_continue.service
index deac695..aa02dde 100644
--- a/systemd/mdcheck_continue.service
+++ b/systemd/mdcheck_continue.service
@@ -11,7 +11,7 @@ ConditionPathExistsGlob = /var/lib/mdcheck/MD_UUID_*
[Service]
Type=oneshot
-Environment= MDADM_CHECK_DURATION="6 hours"
+Environment= "MDADM_CHECK_DURATION=6 hours"
EnvironmentFile=-/run/sysconfig/mdadm
ExecStartPre=-/usr/lib/mdadm/mdadm_env.sh
ExecStart=/usr/share/mdadm/mdcheck --continue --duration ${MDADM_CHECK_DURATION}
diff --git a/systemd/mdcheck_start.service b/systemd/mdcheck_start.service
index f17f1aa..da62d5f 100644
--- a/systemd/mdcheck_start.service
+++ b/systemd/mdcheck_start.service
@@ -11,7 +11,7 @@ Wants=mdcheck_continue.timer
[Service]
Type=oneshot
-Environment= MDADM_CHECK_DURATION="6 hours"
+Environment= "MDADM_CHECK_DURATION=6 hours"
EnvironmentFile=-/run/sysconfig/mdadm
ExecStartPre=-/usr/lib/mdadm/mdadm_env.sh
ExecStart=/usr/share/mdadm/mdcheck --duration ${MDADM_CHECK_DURATION}
--
2.7.5

View File

@ -0,0 +1,41 @@
From 02af379337c73e751ad97c0fed9123121f8b4289 Mon Sep 17 00:00:00 2001
From: Jes Sorensen <jsorensen@fb.com>
Date: Wed, 27 Nov 2019 10:19:54 -0500
Subject: [RHEL8.2 PATCH 49/61] Remove last traces of HOT_ADD_DISK
This ioctl is no longer used, so remove all references to it.
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
Manage.c | 2 --
md_u.h | 1 -
2 files changed, 3 deletions(-)
diff --git a/Manage.c b/Manage.c
index ffe55f8..deeba2b 100644
--- a/Manage.c
+++ b/Manage.c
@@ -1289,8 +1289,6 @@ int Manage_subdevs(char *devname, int fd,
/* Do something to each dev.
* devmode can be
* 'a' - add the device
- * try HOT_ADD_DISK
- * If that fails EINVAL, try ADD_NEW_DISK
* 'S' - add the device as a spare - don't try re-add
* 'j' - add the device as a journal device
* 'A' - re-add the device
diff --git a/md_u.h b/md_u.h
index 2d66d52..b30893c 100644
--- a/md_u.h
+++ b/md_u.h
@@ -28,7 +28,6 @@
#define ADD_NEW_DISK _IOW (MD_MAJOR, 0x21, mdu_disk_info_t)
#define HOT_REMOVE_DISK _IO (MD_MAJOR, 0x22)
#define SET_ARRAY_INFO _IOW (MD_MAJOR, 0x23, mdu_array_info_t)
-#define HOT_ADD_DISK _IO (MD_MAJOR, 0x28)
#define SET_DISK_FAULTY _IO (MD_MAJOR, 0x29)
#define SET_BITMAP_FILE _IOW (MD_MAJOR, 0x2b, int)
--
2.7.5

View File

@ -0,0 +1,51 @@
From 9cf361f8791d86aaced821c19af556819bc03732 Mon Sep 17 00:00:00 2001
From: Jes Sorensen <jsorensen@fb.com>
Date: Wed, 27 Nov 2019 11:33:15 -0500
Subject: [RHEL8.2 PATCH 50/61] Fix up a few formatting issues
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
Manage.c | 13 +++++++++----
1 file changed, 9 insertions(+), 4 deletions(-)
diff --git a/Manage.c b/Manage.c
index deeba2b..b22c396 100644
--- a/Manage.c
+++ b/Manage.c
@@ -1728,8 +1728,10 @@ int move_spare(char *from_devname, char *to_devname, dev_t devid)
int fd2 = open(from_devname, O_RDONLY);
if (fd1 < 0 || fd2 < 0) {
- if (fd1>=0) close(fd1);
- if (fd2>=0) close(fd2);
+ if (fd1 >= 0)
+ close(fd1);
+ if (fd2 >= 0)
+ close(fd2);
return 0;
}
@@ -1743,7 +1745,8 @@ int move_spare(char *from_devname, char *to_devname, dev_t devid)
devlist.disposition = 'r';
if (Manage_subdevs(from_devname, fd2, &devlist, -1, 0, NULL, 0) == 0) {
devlist.disposition = 'a';
- if (Manage_subdevs(to_devname, fd1, &devlist, -1, 0, NULL, 0) == 0) {
+ if (Manage_subdevs(to_devname, fd1, &devlist, -1, 0,
+ NULL, 0) == 0) {
/* make sure manager is aware of changes */
ping_manager(to_devname);
ping_manager(from_devname);
@@ -1751,7 +1754,9 @@ int move_spare(char *from_devname, char *to_devname, dev_t devid)
close(fd2);
return 1;
}
- else Manage_subdevs(from_devname, fd2, &devlist, -1, 0, NULL, 0);
+ else
+ Manage_subdevs(from_devname, fd2, &devlist,
+ -1, 0, NULL, 0);
}
close(fd1);
close(fd2);
--
2.7.5

View File

@ -0,0 +1,26 @@
From 4b31846f3f90aa24f883ceed80e91f204c0a9389 Mon Sep 17 00:00:00 2001
From: Xiao Ni <xni@redhat.com>
Date: Fri, 29 Nov 2019 17:14:47 +0800
Subject: [RHEL8.2 PATCH 51/61] Remove unused code
Signed-off-by: Xiao Ni <xni@redhat.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
platform-intel.h | 1 -
1 file changed, 1 deletion(-)
diff --git a/platform-intel.h b/platform-intel.h
index 29c85f1..7cb370e 100644
--- a/platform-intel.h
+++ b/platform-intel.h
@@ -169,7 +169,6 @@ static inline int fls(int x)
r -= 2;
}
if (!(x & 0x80000000u)) {
- x <<= 1;
r -= 1;
}
return r;
--
2.7.5

View File

@ -0,0 +1,176 @@
From b771faef931c798a4553db0a8c1366aff90079c6 Mon Sep 17 00:00:00 2001
From: Blazej Kucman <blazej.kucman@intel.com>
Date: Fri, 29 Nov 2019 15:21:08 +0100
Subject: [RHEL8.2 PATCH 52/61] imsm: return correct uuid for volume in detail
Fixes the side effect of the patch b6180160f ("imsm: save current_vol number")
- wrong UUID is printed in detail for each volume.
New parameter "subarray" is added to determine what info should be extracted
from metadata (subarray or container).
The parameter affects only IMSM metadata.
Signed-off-by: Blazej Kucman <blazej.kucman@intel.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
Detail.c | 4 ++--
mdadm.h | 5 +++--
super-ddf.c | 5 +++--
super-intel.c | 20 ++++++++++++++++++--
super0.c | 4 ++--
super1.c | 4 ++--
6 files changed, 30 insertions(+), 12 deletions(-)
diff --git a/Detail.c b/Detail.c
index 3e61e37..24fa462 100644
--- a/Detail.c
+++ b/Detail.c
@@ -623,7 +623,7 @@ This is pretty boring
free_mdstat(ms);
if (st && st->sb)
- st->ss->detail_super(st, c->homehost);
+ st->ss->detail_super(st, c->homehost, subarray);
if (array.raid_disks == 0 && sra &&
sra->array.major_version == -1 &&
@@ -767,7 +767,7 @@ skip_devices_state:
if (spares && c->brief && array.raid_disks)
printf(" spares=%d", spares);
if (c->brief && st && st->sb)
- st->ss->brief_detail_super(st);
+ st->ss->brief_detail_super(st, subarray);
if (st)
st->ss->free_super(st);
diff --git a/mdadm.h b/mdadm.h
index c88ceab..91f1338 100644
--- a/mdadm.h
+++ b/mdadm.h
@@ -847,8 +847,9 @@ extern struct superswitch {
/* Used to report details of an active array.
* ->load_super was possibly given a 'component' string.
*/
- void (*detail_super)(struct supertype *st, char *homehost);
- void (*brief_detail_super)(struct supertype *st);
+ void (*detail_super)(struct supertype *st, char *homehost,
+ char *subarray);
+ void (*brief_detail_super)(struct supertype *st, char *subarray);
void (*export_detail_super)(struct supertype *st);
/* Optional: platform hardware / firmware details */
diff --git a/super-ddf.c b/super-ddf.c
index c095e8a..7802063 100644
--- a/super-ddf.c
+++ b/super-ddf.c
@@ -1730,7 +1730,8 @@ err:
return 1;
}
-static void detail_super_ddf(struct supertype *st, char *homehost)
+static void detail_super_ddf(struct supertype *st, char *homehost,
+ char *subarray)
{
struct ddf_super *sb = st->sb;
int cnt = be16_to_cpu(sb->virt->populated_vdes);
@@ -1787,7 +1788,7 @@ static void uuid_of_ddf_subarray(const struct ddf_super *ddf,
memcpy(uuid, sha, 4*4);
}
-static void brief_detail_super_ddf(struct supertype *st)
+static void brief_detail_super_ddf(struct supertype *st, char *subarray)
{
struct mdinfo info;
char nbuf[64];
diff --git a/super-intel.c b/super-intel.c
index a7fbed4..86dcb69 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -2183,23 +2183,39 @@ err:
return 1;
}
-static void detail_super_imsm(struct supertype *st, char *homehost)
+static void detail_super_imsm(struct supertype *st, char *homehost,
+ char *subarray)
{
struct mdinfo info;
char nbuf[64];
+ struct intel_super *super = st->sb;
+ int temp_vol = super->current_vol;
+
+ if (subarray)
+ super->current_vol = strtoul(subarray, NULL, 10);
getinfo_super_imsm(st, &info, NULL);
fname_from_uuid(st, &info, nbuf, ':');
printf("\n UUID : %s\n", nbuf + 5);
+
+ super->current_vol = temp_vol;
}
-static void brief_detail_super_imsm(struct supertype *st)
+static void brief_detail_super_imsm(struct supertype *st, char *subarray)
{
struct mdinfo info;
char nbuf[64];
+ struct intel_super *super = st->sb;
+ int temp_vol = super->current_vol;
+
+ if (subarray)
+ super->current_vol = strtoul(subarray, NULL, 10);
+
getinfo_super_imsm(st, &info, NULL);
fname_from_uuid(st, &info, nbuf, ':');
printf(" UUID=%s", nbuf + 5);
+
+ super->current_vol = temp_vol;
}
static int imsm_read_serial(int fd, char *devname, __u8 *serial);
diff --git a/super0.c b/super0.c
index 42989b9..6b7c0e3 100644
--- a/super0.c
+++ b/super0.c
@@ -348,7 +348,7 @@ err:
return 1;
}
-static void detail_super0(struct supertype *st, char *homehost)
+static void detail_super0(struct supertype *st, char *homehost, char *subarray)
{
mdp_super_t *sb = st->sb;
printf(" UUID : ");
@@ -368,7 +368,7 @@ static void detail_super0(struct supertype *st, char *homehost)
printf("\n Events : %d.%d\n\n", sb->events_hi, sb->events_lo);
}
-static void brief_detail_super0(struct supertype *st)
+static void brief_detail_super0(struct supertype *st, char *subarray)
{
mdp_super_t *sb = st->sb;
printf(" UUID=");
diff --git a/super1.c b/super1.c
index b85dc20..929466d 100644
--- a/super1.c
+++ b/super1.c
@@ -833,7 +833,7 @@ err:
return 1;
}
-static void detail_super1(struct supertype *st, char *homehost)
+static void detail_super1(struct supertype *st, char *homehost, char *subarray)
{
struct mdp_superblock_1 *sb = st->sb;
bitmap_super_t *bms = (bitmap_super_t*)(((char*)sb) + MAX_SB_SIZE);
@@ -857,7 +857,7 @@ static void detail_super1(struct supertype *st, char *homehost)
(unsigned long long)__le64_to_cpu(sb->events));
}
-static void brief_detail_super1(struct supertype *st)
+static void brief_detail_super1(struct supertype *st, char *subarray)
{
struct mdp_superblock_1 *sb = st->sb;
int i;
--
2.7.5

View File

@ -0,0 +1,208 @@
From 6da53c0e2aab200605722795798b1e4f2352cd64 Mon Sep 17 00:00:00 2001
From: Blazej Kucman <blazej.kucman@intel.com>
Date: Mon, 2 Dec 2019 10:52:05 +0100
Subject: [RHEL8.2 PATCH 53/61] imsm: Change the way of printing nvme drives in
detail-platform.
Change NVMe controller path to device node path
in mdadm --detail-platform and print serial number.
The method imsm_read_serial always trimes serial to
MAX_RAID_SERIAL_LEN, added parameter 'serial_buf_len'
will be used to check the serial fit
to passed buffor, if not, will be trimed.
Signed-off-by: Blazej Kucman <blazej.kucman@intel.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
super-intel.c | 97 ++++++++++++++++++++++++++++-------------------------------
1 file changed, 46 insertions(+), 51 deletions(-)
diff --git a/super-intel.c b/super-intel.c
index 86dcb69..5c1f759 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -2218,7 +2218,8 @@ static void brief_detail_super_imsm(struct supertype *st, char *subarray)
super->current_vol = temp_vol;
}
-static int imsm_read_serial(int fd, char *devname, __u8 *serial);
+static int imsm_read_serial(int fd, char *devname, __u8 *serial,
+ size_t serial_buf_len);
static void fd2devname(int fd, char *name);
static int ahci_enumerate_ports(const char *hba_path, int port_count, int host_base, int verbose)
@@ -2364,8 +2365,9 @@ static int ahci_enumerate_ports(const char *hba_path, int port_count, int host_b
else {
fd2devname(fd, buf);
printf(" Port%d : %s", port, buf);
- if (imsm_read_serial(fd, NULL, (__u8 *) buf) == 0)
- printf(" (%.*s)\n", MAX_RAID_SERIAL_LEN, buf);
+ if (imsm_read_serial(fd, NULL, (__u8 *)buf,
+ sizeof(buf)) == 0)
+ printf(" (%s)\n", buf);
else
printf(" ()\n");
close(fd);
@@ -2388,52 +2390,45 @@ static int ahci_enumerate_ports(const char *hba_path, int port_count, int host_b
return err;
}
-static int print_vmd_attached_devs(struct sys_dev *hba)
+static int print_nvme_info(struct sys_dev *hba)
{
+ char buf[1024];
struct dirent *ent;
DIR *dir;
- char path[292];
- char link[256];
- char *c, *rp;
-
- if (hba->type != SYS_DEV_VMD)
- return 1;
+ char *rp;
+ int fd;
- /* scroll through /sys/dev/block looking for devices attached to
- * this hba
- */
- dir = opendir("/sys/bus/pci/drivers/nvme");
+ dir = opendir("/sys/block/");
if (!dir)
return 1;
for (ent = readdir(dir); ent; ent = readdir(dir)) {
- int n;
-
- /* is 'ent' a device? check that the 'subsystem' link exists and
- * that its target matches 'bus'
- */
- sprintf(path, "/sys/bus/pci/drivers/nvme/%s/subsystem",
- ent->d_name);
- n = readlink(path, link, sizeof(link));
- if (n < 0 || n >= (int)sizeof(link))
- continue;
- link[n] = '\0';
- c = strrchr(link, '/');
- if (!c)
- continue;
- if (strncmp("pci", c+1, strlen("pci")) != 0)
- continue;
-
- sprintf(path, "/sys/bus/pci/drivers/nvme/%s", ent->d_name);
-
- rp = realpath(path, NULL);
- if (!rp)
- continue;
+ if (strstr(ent->d_name, "nvme")) {
+ sprintf(buf, "/sys/block/%s", ent->d_name);
+ rp = realpath(buf, NULL);
+ if (!rp)
+ continue;
+ if (path_attached_to_hba(rp, hba->path)) {
+ fd = open_dev(ent->d_name);
+ if (fd < 0) {
+ free(rp);
+ continue;
+ }
- if (path_attached_to_hba(rp, hba->path)) {
- printf(" NVMe under VMD : %s\n", rp);
+ fd2devname(fd, buf);
+ if (hba->type == SYS_DEV_VMD)
+ printf(" NVMe under VMD : %s", buf);
+ else if (hba->type == SYS_DEV_NVME)
+ printf(" NVMe Device : %s", buf);
+ if (!imsm_read_serial(fd, NULL, (__u8 *)buf,
+ sizeof(buf)))
+ printf(" (%s)\n", buf);
+ else
+ printf("()\n");
+ close(fd);
+ }
+ free(rp);
}
- free(rp);
}
closedir(dir);
@@ -2648,7 +2643,7 @@ static int detail_platform_imsm(int verbose, int enumerate_only, char *controlle
char buf[PATH_MAX];
printf(" I/O Controller : %s (%s)\n",
vmd_domain_to_controller(hba, buf), get_sys_dev_type(hba->type));
- if (print_vmd_attached_devs(hba)) {
+ if (print_nvme_info(hba)) {
if (verbose > 0)
pr_err("failed to get devices attached to VMD domain.\n");
result |= 2;
@@ -2663,7 +2658,7 @@ static int detail_platform_imsm(int verbose, int enumerate_only, char *controlle
if (entry->type == SYS_DEV_NVME) {
for (hba = list; hba; hba = hba->next) {
if (hba->type == SYS_DEV_NVME)
- printf(" NVMe Device : %s\n", hba->path);
+ print_nvme_info(hba);
}
printf("\n");
continue;
@@ -4028,11 +4023,11 @@ static int nvme_get_serial(int fd, void *buf, size_t buf_len)
extern int scsi_get_serial(int fd, void *buf, size_t buf_len);
static int imsm_read_serial(int fd, char *devname,
- __u8 serial[MAX_RAID_SERIAL_LEN])
+ __u8 *serial, size_t serial_buf_len)
{
char buf[50];
int rv;
- int len;
+ size_t len;
char *dest;
char *src;
unsigned int i;
@@ -4075,13 +4070,13 @@ static int imsm_read_serial(int fd, char *devname,
len = dest - buf;
dest = buf;
- /* truncate leading characters */
- if (len > MAX_RAID_SERIAL_LEN) {
- dest += len - MAX_RAID_SERIAL_LEN;
- len = MAX_RAID_SERIAL_LEN;
+ if (len > serial_buf_len) {
+ /* truncate leading characters */
+ dest += len - serial_buf_len;
+ len = serial_buf_len;
}
- memset(serial, 0, MAX_RAID_SERIAL_LEN);
+ memset(serial, 0, serial_buf_len);
memcpy(serial, dest, len);
return 0;
@@ -4136,7 +4131,7 @@ load_imsm_disk(int fd, struct intel_super *super, char *devname, int keep_fd)
char name[40];
__u8 serial[MAX_RAID_SERIAL_LEN];
- rv = imsm_read_serial(fd, devname, serial);
+ rv = imsm_read_serial(fd, devname, serial, MAX_RAID_SERIAL_LEN);
if (rv != 0)
return 2;
@@ -5844,7 +5839,7 @@ int mark_spare(struct dl *disk)
return ret_val;
ret_val = 0;
- if (!imsm_read_serial(disk->fd, NULL, serial)) {
+ if (!imsm_read_serial(disk->fd, NULL, serial, MAX_RAID_SERIAL_LEN)) {
/* Restore disk serial number, because takeover marks disk
* as failed and adds to serial ':0' before it becomes
* a spare disk.
@@ -5895,7 +5890,7 @@ static int add_to_super_imsm(struct supertype *st, mdu_disk_info_t *dk,
dd->fd = fd;
dd->e = NULL;
dd->action = DISK_ADD;
- rv = imsm_read_serial(fd, devname, dd->serial);
+ rv = imsm_read_serial(fd, devname, dd->serial, MAX_RAID_SERIAL_LEN);
if (rv) {
pr_err("failed to retrieve scsi serial, aborting\n");
if (dd->devname)
--
2.7.5

View File

@ -0,0 +1,342 @@
From 329dfc28debb58ffe7bd1967cea00fc583139aca Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 4 Nov 2019 14:27:49 +1100
Subject: [RHEL8.2 PATCH 54/61] Create: add support for RAID0 layouts.
Since Linux 5.4 a layout is needed for RAID0 arrays with
varying device sizes.
This patch makes the layout of an array visible (via --examine)
and sets the layout on newly created arrays.
--layout=dangerous
can be used to avoid setting a layout so that they array
can be used on older kernels.
Tested-by: dann frazier <dann.frazier@canonical.com>
Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
Create.c | 11 +++++++++++
Detail.c | 5 +++++
maps.c | 12 ++++++++++++
md.4 | 14 ++++++++++++++
mdadm.8.in | 30 +++++++++++++++++++++++++++++-
mdadm.c | 8 ++++++++
mdadm.h | 8 +++++++-
super0.c | 6 ++++++
super1.c | 30 +++++++++++++++++++++++++++++-
9 files changed, 121 insertions(+), 3 deletions(-)
diff --git a/Create.c b/Create.c
index 292f92a..6f84e5b 100644
--- a/Create.c
+++ b/Create.c
@@ -51,6 +51,9 @@ static int default_layout(struct supertype *st, int level, int verbose)
default: /* no layout */
layout = 0;
break;
+ case 0:
+ layout = RAID0_ORIG_LAYOUT;
+ break;
case 10:
layout = 0x102; /* near=2, far=1 */
if (verbose > 0)
@@ -950,6 +953,11 @@ int Create(struct supertype *st, char *mddev,
if (rv) {
pr_err("ADD_NEW_DISK for %s failed: %s\n",
dv->devname, strerror(errno));
+ if (errno == EINVAL &&
+ info.array.level == 0) {
+ pr_err("Possibly your kernel doesn't support RAID0 layouts.\n");
+ pr_err("Either upgrade, or use --layout=dangerous\n");
+ }
goto abort_locked;
}
break;
@@ -1046,6 +1054,9 @@ int Create(struct supertype *st, char *mddev,
if (ioctl(mdfd, RUN_ARRAY, &param)) {
pr_err("RUN_ARRAY failed: %s\n",
strerror(errno));
+ if (errno == 524 /* ENOTSUP */ &&
+ info.array.level == 0)
+ cont_err("Please use --layout=original or --layout=alternate\n");
if (info.array.chunk_size & (info.array.chunk_size-1)) {
cont_err("Problem may be that chunk size is not a power of 2\n");
}
diff --git a/Detail.c b/Detail.c
index 24fa462..832485f 100644
--- a/Detail.c
+++ b/Detail.c
@@ -525,6 +525,11 @@ int Detail(char *dev, struct context *c)
printf(" Layout : %s\n",
str ? str : "-unknown-");
}
+ if (array.level == 0 && array.layout) {
+ str = map_num(r0layout, array.layout);
+ printf(" Layout : %s\n",
+ str ? str : "-unknown-");
+ }
if (array.level == 6) {
str = map_num(r6layout, array.layout);
printf(" Layout : %s\n",
diff --git a/maps.c b/maps.c
index 49b7f2c..a4fd279 100644
--- a/maps.c
+++ b/maps.c
@@ -73,6 +73,18 @@ mapping_t r6layout[] = {
{ NULL, UnSet }
};
+/* raid0 layout is only needed because of a bug in 3.14 which changed
+ * the effective layout of raid0 arrays with varying device sizes.
+ */
+mapping_t r0layout[] = {
+ { "original", RAID0_ORIG_LAYOUT},
+ { "alternate", RAID0_ALT_MULTIZONE_LAYOUT},
+ { "1", 1}, /* aka ORIG */
+ { "2", 2}, /* aka ALT */
+ { "dangerous", 0},
+ { NULL, UnSet},
+};
+
mapping_t pers[] = {
{ "linear", LEVEL_LINEAR},
{ "raid0", 0},
diff --git a/md.4 b/md.4
index e86707a..6fe2755 100644
--- a/md.4
+++ b/md.4
@@ -193,6 +193,20 @@ smallest device has been exhausted, the RAID0 driver starts
collecting chunks into smaller stripes that only span the drives which
still have remaining space.
+A bug was introduced in linux 3.14 which changed the layout of blocks in
+a RAID0 beyond the region that is striped over all devices. This bug
+does not affect an array with all devices the same size, but can affect
+other RAID0 arrays.
+
+Linux 5.4 (and some stable kernels to which the change was backported)
+will not normally assemble such an array as it cannot know which layout
+to use. There is a module parameter "raid0.default_layout" which can be
+set to "1" to force the kernel to use the pre-3.14 layout or to "2" to
+force it to use the 3.14-and-later layout. when creating a new RAID0
+array,
+.I mdadm
+will record the chosen layout in the metadata in a way that allows newer
+kernels to assemble the array without needing a module parameter.
.SS RAID1
diff --git a/mdadm.8.in b/mdadm.8.in
index 9aec9f4..fc9b6a6 100644
--- a/mdadm.8.in
+++ b/mdadm.8.in
@@ -593,6 +593,8 @@ to change the RAID level in some cases. See LEVEL CHANGES below.
This option configures the fine details of data layout for RAID5, RAID6,
and RAID10 arrays, and controls the failure modes for
.IR faulty .
+It can also be used for working around a kernel bug with RAID0, but generally
+doesn't need to be used explicitly.
The layout of the RAID5 parity block can be one of
.BR left\-asymmetric ,
@@ -652,7 +654,7 @@ option to set subsequent failure modes.
"clear" or "none" will remove any pending or periodic failure modes,
and "flush" will clear any persistent faults.
-Finally, the layout options for RAID10 are one of 'n', 'o' or 'f' followed
+The layout options for RAID10 are one of 'n', 'o' or 'f' followed
by a small number. The default is 'n2'. The supported options are:
.I 'n'
@@ -677,6 +679,32 @@ devices in the array. It does not need to divide evenly into that
number (e.g. it is perfectly legal to have an 'n2' layout for an array
with an odd number of devices).
+A bug introduced in Linux 3.14 means that RAID0 arrays
+.B "with devices of differing sizes"
+started using a different layout. This could lead to
+data corruption. Since Linux 5.4 (and various stable releases that received
+backports), the kernel will not accept such an array unless
+a layout is explictly set. It can be set to
+.RB ' original '
+or
+.RB ' alternate '.
+When creating a new array,
+.I mdadm
+will select
+.RB ' original '
+by default, so the layout does not normally need to be set.
+An array created for either
+.RB ' original '
+or
+.RB ' alternate '
+will not be recognized by an (unpatched) kernel prior to 5.4. To create
+a RAID0 array with devices of differing sizes that can be used on an
+older kernel, you can set the layout to
+.RB ' dangerous '.
+This will use whichever layout the running kernel supports, so the data
+on the array may become corrupt when changing kernel from pre-3.14 to a
+later kernel.
+
When an array is converted between RAID5 and RAID6 an intermediate
RAID6 layout is used in which the second parity block (Q) is always on
the last device. To convert a RAID5 to RAID6 and leave it in this new
diff --git a/mdadm.c b/mdadm.c
index 1fb8086..e438f9c 100644
--- a/mdadm.c
+++ b/mdadm.c
@@ -550,6 +550,14 @@ int main(int argc, char *argv[])
pr_err("raid level must be given before layout.\n");
exit(2);
+ case 0:
+ s.layout = map_name(r0layout, optarg);
+ if (s.layout == UnSet) {
+ pr_err("layout %s not understood for raid0.\n",
+ optarg);
+ exit(2);
+ }
+ break;
case 5:
s.layout = map_name(r5layout, optarg);
if (s.layout == UnSet) {
diff --git a/mdadm.h b/mdadm.h
index 91f1338..9e98778 100644
--- a/mdadm.h
+++ b/mdadm.h
@@ -763,7 +763,8 @@ extern int restore_stripes(int *dest, unsigned long long *offsets,
extern char *map_num(mapping_t *map, int num);
extern int map_name(mapping_t *map, char *name);
-extern mapping_t r5layout[], r6layout[], pers[], modes[], faultylayout[];
+extern mapping_t r0layout[], r5layout[], r6layout[],
+ pers[], modes[], faultylayout[];
extern mapping_t consistency_policies[], sysfs_array_states[];
extern char *map_dev_preferred(int major, int minor, int create,
@@ -1758,6 +1759,11 @@ char *xstrdup(const char *str);
#define makedev(M,m) (((M)<<8) | (m))
#endif
+enum r0layout {
+ RAID0_ORIG_LAYOUT = 1,
+ RAID0_ALT_MULTIZONE_LAYOUT = 2,
+};
+
/* for raid4/5/6 */
#define ALGORITHM_LEFT_ASYMMETRIC 0
#define ALGORITHM_RIGHT_ASYMMETRIC 1
diff --git a/super0.c b/super0.c
index 6b7c0e3..6af140b 100644
--- a/super0.c
+++ b/super0.c
@@ -1291,6 +1291,12 @@ static int validate_geometry0(struct supertype *st, int level,
if (*chunk == UnSet)
*chunk = DEFAULT_CHUNK;
+ if (level == 0 && layout != UnSet) {
+ if (verbose)
+ pr_err("0.90 metadata does not support layouts for RAID0\n");
+ return 0;
+ }
+
if (!subdev)
return 1;
diff --git a/super1.c b/super1.c
index 929466d..cedbb53 100644
--- a/super1.c
+++ b/super1.c
@@ -43,7 +43,7 @@ struct mdp_superblock_1 {
__u64 ctime; /* lo 40 bits are seconds, top 24 are microseconds or 0*/
__u32 level; /* -4 (multipath), -1 (linear), 0,1,4,5 */
- __u32 layout; /* only for raid5 currently */
+ __u32 layout; /* used for raid5, raid6, raid10, and raid0 */
__u64 size; /* used size of component devices, in 512byte sectors */
__u32 chunksize; /* in 512byte sectors */
@@ -144,6 +144,7 @@ struct misc_dev_info {
#define MD_FEATURE_JOURNAL 512 /* support write journal */
#define MD_FEATURE_PPL 1024 /* support PPL */
#define MD_FEATURE_MUTLIPLE_PPLS 2048 /* support for multiple PPLs */
+#define MD_FEATURE_RAID0_LAYOUT 4096 /* layout is meaningful in RAID0 */
#define MD_FEATURE_ALL (MD_FEATURE_BITMAP_OFFSET \
|MD_FEATURE_RECOVERY_OFFSET \
|MD_FEATURE_RESHAPE_ACTIVE \
@@ -155,6 +156,7 @@ struct misc_dev_info {
|MD_FEATURE_JOURNAL \
|MD_FEATURE_PPL \
|MD_FEATURE_MULTIPLE_PPLS \
+ |MD_FEATURE_RAID0_LAYOUT \
)
static int role_from_sb(struct mdp_superblock_1 *sb)
@@ -498,6 +500,11 @@ static void examine_super1(struct supertype *st, char *homehost)
printf(" Events : %llu\n",
(unsigned long long)__le64_to_cpu(sb->events));
printf("\n");
+ if (__le32_to_cpu(sb->level) == 0 &&
+ (sb->feature_map & __cpu_to_le32(MD_FEATURE_RAID0_LAYOUT))) {
+ c = map_num(r0layout, __le32_to_cpu(sb->layout));
+ printf(" Layout : %s\n", c?c:"-unknown-");
+ }
if (__le32_to_cpu(sb->level) == 5) {
c = map_num(r5layout, __le32_to_cpu(sb->layout));
printf(" Layout : %s\n", c?c:"-unknown-");
@@ -1646,6 +1653,7 @@ struct devinfo {
int fd;
char *devname;
long long data_offset;
+ unsigned long long dev_size;
mdu_disk_info_t disk;
struct devinfo *next;
};
@@ -1687,6 +1695,7 @@ static int add_to_super1(struct supertype *st, mdu_disk_info_t *dk,
di->devname = devname;
di->disk = *dk;
di->data_offset = data_offset;
+ get_dev_size(fd, NULL, &di->dev_size);
di->next = NULL;
*dip = di;
@@ -1888,10 +1897,25 @@ static int write_init_super1(struct supertype *st)
unsigned long long sb_offset;
unsigned long long data_offset;
long bm_offset;
+ int raid0_need_layout = 0;
for (di = st->info; di; di = di->next) {
if (di->disk.state & (1 << MD_DISK_JOURNAL))
sb->feature_map |= __cpu_to_le32(MD_FEATURE_JOURNAL);
+ if (sb->level == 0 && sb->layout != 0) {
+ struct devinfo *di2 = st->info;
+ unsigned long long s1, s2;
+ s1 = di->dev_size;
+ if (di->data_offset != INVALID_SECTORS)
+ s1 -= di->data_offset;
+ s1 /= __le32_to_cpu(sb->chunksize);
+ s2 = di2->dev_size;
+ if (di2->data_offset != INVALID_SECTORS)
+ s2 -= di2->data_offset;
+ s2 /= __le32_to_cpu(sb->chunksize);
+ if (s1 != s2)
+ raid0_need_layout = 1;
+ }
}
for (di = st->info; di; di = di->next) {
@@ -2039,6 +2063,10 @@ static int write_init_super1(struct supertype *st)
sb->bblog_offset = 0;
}
+ /* RAID0 needs a layout if devices aren't all the same size */
+ if (raid0_need_layout)
+ sb->feature_map |= __cpu_to_le32(MD_FEATURE_RAID0_LAYOUT);
+
sb->sb_csum = calc_sb_1_csum(sb);
rv = store_super1(st, di->fd);
--
2.7.5

View File

@ -0,0 +1,150 @@
From 027c099fd1a31fb3815e592de75d0791a22353b4 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 4 Nov 2019 14:27:49 +1100
Subject: [RHEL8.2 PATCH 55/61] Assemble: add support for RAID0 layouts.
If you have a RAID0 array with varying sized devices
on a kernel before 5.4, you cannot assembling it on
5.4 or later without explicitly setting the layout.
This is now possible with
--update=layout-original (For 3.13 and earlier kernels)
or
--update=layout-alternate (for 3.14 and later kernels)
Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
Assemble.c | 8 ++++++++
md.4 | 7 +++++++
mdadm.8.in | 17 +++++++++++++++++
mdadm.c | 4 ++++
super1.c | 12 +++++++++++-
5 files changed, 47 insertions(+), 1 deletion(-)
diff --git a/Assemble.c b/Assemble.c
index b2e6914..6b5a7c8 100644
--- a/Assemble.c
+++ b/Assemble.c
@@ -1031,6 +1031,11 @@ static int start_array(int mdfd,
pr_err("failed to add %s to %s: %s\n",
devices[j].devname, mddev,
strerror(errno));
+ if (errno == EINVAL && content->array.level == 0 &&
+ content->array.layout != 0) {
+ cont_err("Possibly your kernel doesn't support RAID0 layouts.\n");
+ cont_err("Please upgrade.\n");
+ }
if (i < content->array.raid_disks * 2 ||
i == bestcnt)
okcnt--;
@@ -1220,6 +1225,9 @@ static int start_array(int mdfd,
return 0;
}
pr_err("failed to RUN_ARRAY %s: %s\n", mddev, strerror(errno));
+ if (errno == 524 /* ENOTSUP */ &&
+ content->array.level == 0 && content->array.layout == 0)
+ cont_err("Please use --update=layout-original or --update=layout-alternate\n");
if (!enough(content->array.level, content->array.raid_disks,
content->array.layout, 1, avail))
diff --git a/md.4 b/md.4
index 6fe2755..0712af2 100644
--- a/md.4
+++ b/md.4
@@ -208,6 +208,13 @@ array,
will record the chosen layout in the metadata in a way that allows newer
kernels to assemble the array without needing a module parameter.
+To assemble an old array on a new kernel without using the module parameter,
+use either the
+.B "--update=layout-original"
+option or the
+.B "--update=layout-alternate"
+option.
+
.SS RAID1
A RAID1 array is also known as a mirrored set (though mirrors tend to
diff --git a/mdadm.8.in b/mdadm.8.in
index fc9b6a6..6b63bb4 100644
--- a/mdadm.8.in
+++ b/mdadm.8.in
@@ -1213,6 +1213,8 @@ argument given to this flag can be one of
.BR no\-bbl ,
.BR ppl ,
.BR no\-ppl ,
+.BR layout\-original ,
+.BR layout\-alternate ,
.BR metadata ,
or
.BR super\-minor .
@@ -1364,6 +1366,21 @@ The
.B no\-ppl
option will disable PPL in the superblock.
+The
+.B layout\-original
+and
+.B layout\-alternate
+options are for RAID0 arrays in use before Linux 5.4. If the array was being
+used with Linux 3.13 or earlier, then to assemble the array on a new kernel,
+.B \-\-update=layout\-original
+must be given. If the array was created and used with a kernel from Linux 3.14 to
+Linux 5.3, then
+.B \-\-update=layout\-alternate
+must be given. This only needs to be given once. Subsequent assembly of the array
+will happen normally.
+For more information, see
+.IR md (4).
+
.TP
.BR \-\-freeze\-reshape
Option is intended to be used in start-up scripts during initrd boot phase.
diff --git a/mdadm.c b/mdadm.c
index e438f9c..256a97e 100644
--- a/mdadm.c
+++ b/mdadm.c
@@ -795,6 +795,9 @@ int main(int argc, char *argv[])
continue;
if (strcmp(c.update, "revert-reshape") == 0)
continue;
+ if (strcmp(c.update, "layout-original") == 0 ||
+ strcmp(c.update, "layout-alternate") == 0)
+ continue;
if (strcmp(c.update, "byteorder") == 0) {
if (ss) {
pr_err("must not set metadata type with --update=byteorder.\n");
@@ -825,6 +828,7 @@ int main(int argc, char *argv[])
" 'summaries', 'homehost', 'home-cluster', 'byteorder', 'devicesize',\n"
" 'no-bitmap', 'metadata', 'revert-reshape'\n"
" 'bbl', 'no-bbl', 'force-no-bbl', 'ppl', 'no-ppl'\n"
+ " 'layout-original', 'layout-alternate'\n"
);
exit(outf == stdout ? 0 : 2);
diff --git a/super1.c b/super1.c
index cedbb53..e0d80be 100644
--- a/super1.c
+++ b/super1.c
@@ -1550,7 +1550,17 @@ static int update_super1(struct supertype *st, struct mdinfo *info,
sb->devflags |= FailFast1;
else if (strcmp(update, "nofailfast") == 0)
sb->devflags &= ~FailFast1;
- else
+ else if (strcmp(update, "layout-original") == 0 ||
+ strcmp(update, "layout-alternate") == 0) {
+ if (__le32_to_cpu(sb->level) != 0) {
+ pr_err("%s: %s only supported for RAID0\n",
+ devname?:"", update);
+ rv = -1;
+ } else {
+ sb->feature_map |= __cpu_to_le32(MD_FEATURE_RAID0_LAYOUT);
+ sb->layout = __cpu_to_le32(update[7] == 'o' ? 1 : 2);
+ }
+ } else
rv = -1;
sb->sb_csum = calc_sb_1_csum(sb);
--
2.7.5

View File

@ -0,0 +1,36 @@
From aced6fc9542077a69b00d05bc9cd66c12fc34950 Mon Sep 17 00:00:00 2001
From: dann frazier <dann.frazier@canonical.com>
Date: Mon, 9 Dec 2019 13:54:13 -0700
Subject: [RHEL8.2 PATCH 56/61] Respect $(CROSS_COMPILE) when $(CC) is the
default
Commit 1180ed5 told make to only respect $(CROSS_COMPILE) when $(CC)
was unset. But that will never be the case, as make provides
a default value for $(CC). Change this logic to respect $(CROSS_COMPILE)
when $(CC) is the default. Patch originally by Helmet Grohne.
Fixes: 1180ed5 ("Makefile: make the CC definition conditional")
Signed-off-by: dann frazier <dann.frazier@canonical.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
Makefile | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/Makefile b/Makefile
index dfe00b0..a33319a 100644
--- a/Makefile
+++ b/Makefile
@@ -46,7 +46,9 @@ ifdef COVERITY
COVERITY_FLAGS=-include coverity-gcc-hack.h
endif
-CC ?= $(CROSS_COMPILE)gcc
+ifeq ($(origin CC),default)
+CC := $(CROSS_COMPILE)gcc
+endif
CXFLAGS ?= -ggdb
CWFLAGS = -Wall -Werror -Wstrict-prototypes -Wextra -Wno-unused-parameter
ifdef WARN_UNUSED
--
2.7.5

View File

@ -0,0 +1,39 @@
From 1a87493014050e3bd94000cd36122c3cadf21270 Mon Sep 17 00:00:00 2001
From: Kinga Tanska <kinga.tanska@intel.com>
Date: Tue, 10 Dec 2019 12:21:21 +0100
Subject: [RHEL8.2 PATCH 57/61] Change warning message
In commit 039b7225e6 ("md: allow creation of mdNNN arrays via
md_mod/parameters/new_array") support for name like mdNNN
was added. Special warning, when kernel is unable to handle
request, was added in commit 7105228e19
("mdadm/mdopen: create new function create_named_array for
writing to new_array"), but it was not adequate enough,
because in this situation mdadm tries to do it in old way.
This commit changes warning to be more relevant when
creating RAID container with "/dev/mdNNN" name and mdadm
back to old approach.
Signed-off-by: Kinga Tanska <kinga.tanska@intel.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
mdopen.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/mdopen.c b/mdopen.c
index 98c54e4..245be53 100644
--- a/mdopen.c
+++ b/mdopen.c
@@ -120,7 +120,8 @@ int create_named_array(char *devnm)
close(fd);
}
if (fd < 0 || n != (int)strlen(devnm)) {
- pr_err("Fail create %s when using %s\n", devnm, new_array_file);
+ pr_err("Fail to create %s when using %s, fallback to creation via node\n",
+ devnm, new_array_file);
return 0;
}
--
2.7.5

View File

@ -0,0 +1,52 @@
From 1cc3965d48deb0fb3e0657159c608ffb124643c1 Mon Sep 17 00:00:00 2001
From: Xiao Yang <ice_yangxiao@163.com>
Date: Wed, 27 Nov 2019 11:59:24 +0800
Subject: [RHEL8.2 PATCH 48/61] Manage: Remove the legacy code for md driver
prior to 0.90.03
Previous re-add operation only calls ioctl(HOT_ADD_DISK) for array without
metadata(e.g. mdadm -B/--build) when md driver is less than 0.90.02, but
commit 091e8e6 breaks the logic and current re-add operation can call
ioctl(HOT_ADD_DISK) even if md driver is 0.90.03.
This issue is reproduced by 05r1-re-add-nosuper:
------------------------------------------------
++ die 'resync or recovery is happening!'
++ echo -e '\n\tERROR: resync or recovery is happening! \n'
ERROR: resync or recovery is happening!
------------------------------------------------
Fixes: 091e8e6("Manage: Remove all references to md_get_version()")
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Xiao Yang <ice_yangxiao@163.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
Manage.c | 12 ------------
1 file changed, 12 deletions(-)
diff --git a/Manage.c b/Manage.c
index 21536f5..ffe55f8 100644
--- a/Manage.c
+++ b/Manage.c
@@ -741,18 +741,6 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
" Adding anyway as --force was given.\n",
dv->devname, devname);
}
- if (!tst->ss->external && array->major_version == 0) {
- if (ioctl(fd, HOT_ADD_DISK, rdev)==0) {
- if (verbose >= 0)
- pr_err("hot added %s\n",
- dv->devname);
- return 1;
- }
-
- pr_err("hot add failed for %s: %s\n",
- dv->devname, strerror(errno));
- return -1;
- }
if (array->not_persistent == 0 || tst->ss->external) {
--
2.7.5

View File

@ -0,0 +1,43 @@
From 4431efebabd0dd39f33dc1dd8ada312b8da1c9d8 Mon Sep 17 00:00:00 2001
From: Blazej Kucman <blazej.kucman@intel.com>
Date: Thu, 16 Jan 2020 09:34:44 +0100
Subject: [RHEL8.2 PATCH 59/61] imsm: Update grow manual.
Update --grow option description in manual, according to
the supported grow operations by IMSM.
Signed-off-by: Blazej Kucman <blazej.kucman@intel.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
mdadm.8.in | 8 ++------
1 file changed, 2 insertions(+), 6 deletions(-)
diff --git a/mdadm.8.in b/mdadm.8.in
index 6b63bb4..ca02a33 100644
--- a/mdadm.8.in
+++ b/mdadm.8.in
@@ -481,9 +481,7 @@ still be larger than any replacement.
This value can be set with
.B \-\-grow
for RAID level 1/4/5/6 though
-.B CONTAINER
-based arrays such as those with IMSM metadata may not be able to
-support this.
+DDF arrays may not be able to support this.
If the array was created with a size smaller than the currently
active drives, the extra space can be accessed using
.BR \-\-grow .
@@ -2759,9 +2757,7 @@ container format. The number of devices in a container can be
increased - which affects all arrays in the container - or an array
in a container can be converted between levels where those levels are
supported by the container, and the conversion is on of those listed
-above. Resizing arrays in an IMSM container with
-.B "--grow --size"
-is not yet supported.
+above.
.PP
Notes:
--
2.7.5

View File

@ -0,0 +1,192 @@
From 42e641abeb312a91b841f1b1ea73661e4bd5a31c Mon Sep 17 00:00:00 2001
From: Kinga Tanska <kinga.tanska@intel.com>
Date: Tue, 21 Jan 2020 10:38:52 +0100
Subject: [RHEL8.2 PATCH 60/61] Add support for Tebibytes
Adding support for Tebibytes enables display size of
volumes in Tebibytes and Terabytes when they are
bigger than 2048 GiB (or GB).
Signed-off-by: Kinga Tanska <kinga.tanska@intel.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
mdadm.8.in | 20 ++++++++++----------
util.c | 47 +++++++++++++++++++++++++++++++++--------------
2 files changed, 43 insertions(+), 24 deletions(-)
diff --git a/mdadm.8.in b/mdadm.8.in
index ca02a33..5d00faf 100644
--- a/mdadm.8.in
+++ b/mdadm.8.in
@@ -467,8 +467,8 @@ If this is not specified
size, though if there is a variance among the drives of greater than 1%, a warning is
issued.
-A suffix of 'K', 'M' or 'G' can be given to indicate Kilobytes, Megabytes or
-Gigabytes respectively.
+A suffix of 'K', 'M', 'G' or 'T' can be given to indicate Kilobytes,
+Megabytes, Gigabytes or Terabytes respectively.
Sometimes a replacement drive can be a little smaller than the
original drives though this should be minimised by IDEMA standards.
@@ -532,8 +532,8 @@ problems the array can be made bigger again with no loss with another
.B "\-\-grow \-\-array\-size="
command.
-A suffix of 'K', 'M' or 'G' can be given to indicate Kilobytes, Megabytes or
-Gigabytes respectively.
+A suffix of 'K', 'M', 'G' or 'T' can be given to indicate Kilobytes,
+Megabytes, Gigabytes or Terabytes respectively.
A value of
.B max
restores the apparent size of the array to be whatever the real
@@ -551,8 +551,8 @@ This is only meaningful for RAID0, RAID4, RAID5, RAID6, and RAID10.
RAID4, RAID5, RAID6, and RAID10 require the chunk size to be a power
of 2. In any case it must be a multiple of 4KB.
-A suffix of 'K', 'M' or 'G' can be given to indicate Kilobytes, Megabytes or
-Gigabytes respectively.
+A suffix of 'K', 'M', 'G' or 'T' can be given to indicate Kilobytes,
+Megabytes, Gigabytes or Terabytes respectively.
.TP
.BR \-\-rounding=
@@ -767,8 +767,8 @@ When using an
bitmap, the chunksize defaults to 64Meg, or larger if necessary to
fit the bitmap into the available space.
-A suffix of 'K', 'M' or 'G' can be given to indicate Kilobytes, Megabytes or
-Gigabytes respectively.
+A suffix of 'K', 'M', 'G' or 'T' can be given to indicate Kilobytes,
+Megabytes, Gigabytes or Terabytes respectively.
.TP
.BR \-W ", " \-\-write\-mostly
@@ -857,8 +857,8 @@ an array which was originally created using a different version of
which computed a different offset.
Setting the offset explicitly over-rides the default. The value given
-is in Kilobytes unless a suffix of 'K', 'M' or 'G' is used to explicitly
-indicate Kilobytes, Megabytes or Gigabytes respectively.
+is in Kilobytes unless a suffix of 'K', 'M', 'G' or 'T' is used to explicitly
+indicate Kilobytes, Megabytes, Gigabytes or Terabytes respectively.
Since Linux 3.4,
.B \-\-data\-offset
diff --git a/util.c b/util.c
index 64dd409..07f9dc3 100644
--- a/util.c
+++ b/util.c
@@ -389,7 +389,7 @@ int mdadm_version(char *version)
unsigned long long parse_size(char *size)
{
/* parse 'size' which should be a number optionally
- * followed by 'K', 'M', or 'G'.
+ * followed by 'K', 'M'. 'G' or 'T'.
* Without a suffix, K is assumed.
* Number returned is in sectors (half-K)
* INVALID_SECTORS returned on error.
@@ -411,6 +411,10 @@ unsigned long long parse_size(char *size)
c++;
s *= 1024 * 1024 * 2;
break;
+ case 'T':
+ c++;
+ s *= 1024 * 1024 * 1024 * 2LL;
+ break;
case 's': /* sectors */
c++;
break;
@@ -893,13 +897,14 @@ char *human_size(long long bytes)
{
static char buf[47];
- /* We convert bytes to either centi-M{ega,ibi}bytes or
- * centi-G{igi,ibi}bytes, with appropriate rounding,
- * and then print 1/100th of those as a decimal.
+ /* We convert bytes to either centi-M{ega,ibi}bytes,
+ * centi-G{igi,ibi}bytes or centi-T{era,ebi}bytes
+ * with appropriate rounding, and then print
+ * 1/100th of those as a decimal.
* We allow upto 2048Megabytes before converting to
- * gigabytes, as that shows more precision and isn't
+ * gigabytes and 2048Gigabytes before converting to
+ * terabytes, as that shows more precision and isn't
* too large a number.
- * Terabytes are not yet handled.
*/
if (bytes < 5000*1024)
@@ -909,11 +914,16 @@ char *human_size(long long bytes)
long cMB = (bytes / ( 1000000LL / 200LL ) +1) /2;
snprintf(buf, sizeof(buf), " (%ld.%02ld MiB %ld.%02ld MB)",
cMiB/100, cMiB % 100, cMB/100, cMB % 100);
- } else {
+ } else if (bytes < 2*1024LL*1024LL*1024LL*1024LL) {
long cGiB = (bytes * 200LL / (1LL<<30) +1) / 2;
long cGB = (bytes / (1000000000LL/200LL ) +1) /2;
snprintf(buf, sizeof(buf), " (%ld.%02ld GiB %ld.%02ld GB)",
cGiB/100, cGiB % 100, cGB/100, cGB % 100);
+ } else {
+ long cTiB = (bytes * 200LL / (1LL<<40) + 1) / 2;
+ long cTB = (bytes / (1000000000000LL / 200LL) + 1) / 2;
+ snprintf(buf, sizeof(buf), " (%ld.%02ld TiB %ld.%02ld TB)",
+ cTiB/100, cTiB % 100, cTB/100, cTB % 100);
}
return buf;
}
@@ -922,13 +932,14 @@ char *human_size_brief(long long bytes, int prefix)
{
static char buf[30];
- /* We convert bytes to either centi-M{ega,ibi}bytes or
- * centi-G{igi,ibi}bytes, with appropriate rounding,
- * and then print 1/100th of those as a decimal.
+ /* We convert bytes to either centi-M{ega,ibi}bytes,
+ * centi-G{igi,ibi}bytes or centi-T{era,ebi}bytes
+ * with appropriate rounding, and then print
+ * 1/100th of those as a decimal.
* We allow upto 2048Megabytes before converting to
- * gigabytes, as that shows more precision and isn't
+ * gigabytes and 2048Gigabytes before converting to
+ * terabytes, as that shows more precision and isn't
* too large a number.
- * Terabytes are not yet handled.
*
* If prefix == IEC, we mean prefixes like kibi,mebi,gibi etc.
* If prefix == JEDEC, we mean prefixes like kilo,mega,giga etc.
@@ -941,10 +952,14 @@ char *human_size_brief(long long bytes, int prefix)
long cMiB = (bytes * 200LL / (1LL<<20) +1) /2;
snprintf(buf, sizeof(buf), "%ld.%02ldMiB",
cMiB/100, cMiB % 100);
- } else {
+ } else if (bytes < 2*1024LL*1024LL*1024LL*1024LL) {
long cGiB = (bytes * 200LL / (1LL<<30) +1) /2;
snprintf(buf, sizeof(buf), "%ld.%02ldGiB",
cGiB/100, cGiB % 100);
+ } else {
+ long cTiB = (bytes * 200LL / (1LL<<40) + 1) / 2;
+ snprintf(buf, sizeof(buf), "%ld.%02ldTiB",
+ cTiB/100, cTiB % 100);
}
}
else if (prefix == JEDEC) {
@@ -952,10 +967,14 @@ char *human_size_brief(long long bytes, int prefix)
long cMB = (bytes / ( 1000000LL / 200LL ) +1) /2;
snprintf(buf, sizeof(buf), "%ld.%02ldMB",
cMB/100, cMB % 100);
- } else {
+ } else if (bytes < 2*1024LL*1024LL*1024LL*1024LL) {
long cGB = (bytes / (1000000000LL/200LL ) +1) /2;
snprintf(buf, sizeof(buf), "%ld.%02ldGB",
cGB/100, cGB % 100);
+ } else {
+ long cTB = (bytes / (1000000000000LL / 200LL) + 1) / 2;
+ snprintf(buf, sizeof(buf), "%ld.%02ldTB",
+ cTB/100, cTB % 100);
}
}
else
--
2.7.5

View File

@ -0,0 +1,65 @@
From 1e93d0d15913c3fa6d0de5af3fb5e4e3b3f068da Mon Sep 17 00:00:00 2001
From: Blazej Kucman <blazej.kucman@intel.com>
Date: Fri, 17 Jan 2020 15:24:04 +0100
Subject: [RHEL8.2 PATCH 61/61] imsm: fill working_disks according to metadata.
Imsm tracks as "working_disk" each visible drive.
Assemble routine expects that the value will return count
of active member drives recorded in metadata.
As a side effect "--no-degraded" doesn't work correctly for imsm.
Align this field to others.
Added check, if the option --no-degraded is called with --scan.
Signed-off-by: Blazej Kucman <blazej.kucman@intel.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
mdadm.c | 9 ++++++---
super-intel.c | 5 ++---
2 files changed, 8 insertions(+), 6 deletions(-)
diff --git a/mdadm.c b/mdadm.c
index 256a97e..13dc24e 100644
--- a/mdadm.c
+++ b/mdadm.c
@@ -1485,9 +1485,12 @@ int main(int argc, char *argv[])
rv = Manage_stop(devlist->devname, mdfd, c.verbose, 0);
break;
case ASSEMBLE:
- if (devs_found == 1 && ident.uuid_set == 0 &&
- ident.super_minor == UnSet && ident.name[0] == 0 &&
- !c.scan ) {
+ if (!c.scan && c.runstop == -1) {
+ pr_err("--no-degraded not meaningful without a --scan assembly.\n");
+ exit(1);
+ } else if (devs_found == 1 && ident.uuid_set == 0 &&
+ ident.super_minor == UnSet && ident.name[0] == 0 &&
+ !c.scan) {
/* Only a device has been given, so get details from config file */
struct mddev_ident *array_ident = conf_get_ident(devlist->devname);
if (array_ident == NULL) {
diff --git a/super-intel.c b/super-intel.c
index 5c1f759..47809bc 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -7946,7 +7946,8 @@ static struct mdinfo *container_content_imsm(struct supertype *st, char *subarra
skip = 1;
if (!skip && (ord & IMSM_ORD_REBUILD))
recovery_start = 0;
-
+ if (!(ord & IMSM_ORD_REBUILD))
+ this->array.working_disks++;
/*
* if we skip some disks the array will be assmebled degraded;
* reset resync start to avoid a dirty-degraded
@@ -7988,8 +7989,6 @@ static struct mdinfo *container_content_imsm(struct supertype *st, char *subarra
else
this->array.spare_disks++;
}
- if (info_d->recovery_start == MaxSector)
- this->array.working_disks++;
info_d->events = __le32_to_cpu(mpb->generation_num);
info_d->data_offset = pba_of_lba0(map);
--
2.7.5

View File

@ -0,0 +1,52 @@
commit fd38b8ea80ff8e0317e12d1d70431148ceedd5fd
Author: Xiao Ni <xni@redhat.com>
Date: Tue Feb 11 21:44:15 2020 +0800
Remove the legacy whitespace
The whitespace between Environment= and the true value causes confusion.
To avoid confusing other people in future, remove the whitespace to keep
it a simple, unambiguous syntax
Signed-off-by: Xiao Ni <xni@redhat.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
diff --git a/systemd/mdcheck_continue.service b/systemd/mdcheck_continue.service
index aa02dde..854317f 100644
--- a/systemd/mdcheck_continue.service
+++ b/systemd/mdcheck_continue.service
@@ -11,7 +11,7 @@ ConditionPathExistsGlob = /var/lib/mdcheck/MD_UUID_*
[Service]
Type=oneshot
-Environment= "MDADM_CHECK_DURATION=6 hours"
+Environment="MDADM_CHECK_DURATION=6 hours"
EnvironmentFile=-/run/sysconfig/mdadm
ExecStartPre=-/usr/lib/mdadm/mdadm_env.sh
ExecStart=/usr/share/mdadm/mdcheck --continue --duration ${MDADM_CHECK_DURATION}
diff --git a/systemd/mdcheck_start.service b/systemd/mdcheck_start.service
index da62d5f..3bb3d13 100644
--- a/systemd/mdcheck_start.service
+++ b/systemd/mdcheck_start.service
@@ -11,7 +11,7 @@ Wants=mdcheck_continue.timer
[Service]
Type=oneshot
-Environment= "MDADM_CHECK_DURATION=6 hours"
+Environment="MDADM_CHECK_DURATION=6 hours"
EnvironmentFile=-/run/sysconfig/mdadm
ExecStartPre=-/usr/lib/mdadm/mdadm_env.sh
ExecStart=/usr/share/mdadm/mdcheck --duration ${MDADM_CHECK_DURATION}
diff --git a/systemd/mdmonitor-oneshot.service b/systemd/mdmonitor-oneshot.service
index fd469b1..373955a 100644
--- a/systemd/mdmonitor-oneshot.service
+++ b/systemd/mdmonitor-oneshot.service
@@ -9,7 +9,7 @@
Description=Reminder for degraded MD arrays
[Service]
-Environment= MDADM_MONITOR_ARGS=--scan
+Environment=MDADM_MONITOR_ARGS=--scan
EnvironmentFile=-/run/sysconfig/mdadm
ExecStartPre=-/usr/lib/mdadm/mdadm_env.sh
ExecStart=BINDIR/mdadm --monitor --oneshot $MDADM_MONITOR_ARGS

View File

@ -1 +1 @@
d /var/run/mdadm 0710 root root -
d /run/mdadm 0710 root root -

48
SOURCES/mdadm_env.sh Normal file
View File

@ -0,0 +1,48 @@
#!/bin/sh
# extract configuration from /etc/sysconfig/mdadm and write
# environment to /run/sysconfig/mdadm to be used by
# systemd unit files.
MDADM_SCAN="yes"
# Following adapted from /etc/init.d/mdadmd on openSUSE
mdadmd_CONFIG=/etc/sysconfig/mdadm
if test -r $mdadmd_CONFIG; then
. $mdadmd_CONFIG
fi
if [ x$MDADM_DELAY != x"" ]; then
MDADM_DELAY="-d "$MDADM_DELAY;
fi
if [ x$MDADM_MAIL != x"" ]; then
MDADM_MAIL="-m \"$MDADM_MAIL\""
fi
if [ x$MDADM_PROGRAM != x"" ]; then
MDADM_PROGRAM="-p \"$MDADM_PROGRAM\""
fi
if [ x$MDADM_SCAN = x"yes" ]; then
MDADM_SCAN="--scan"
else
MDADM_SCAN=""
fi
if [ x$MDADM_SEND_MAIL_ON_START = x"yes" ]; then
MDADM_SEND_MAIL="-t"
else
MDADM_SEND_MAIL=""
fi
if [ x$MDADM_CONFIG != x"" ]; then
MDADM_CONFIG="-c \"$MDADM_CONFIG\""
fi
mkdir -p /run/sysconfig
echo "MDADM_MONITOR_ARGS=$MDADM_RAIDDEVICES $MDADM_DELAY $MDADM_MAIL $MDADM_PROGRAM $MDADM_SCAN $MDADM_SEND_MAIL $MDADM_CONFIG" > /run/sysconfig/mdadm
if [ -n "$MDADM_CHECK_DURATION" ]; then
echo "MDADM_CHECK_DURATION=$MDADM_CHECK_DURATION" >> /run/sysconfig/mdadm
fi

164
SOURCES/mdcheck Normal file
View File

@ -0,0 +1,164 @@
#!/bin/bash
# Copyright (C) 2014-2017 Neil Brown <neilb@suse.de>
#
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# Author: Neil Brown
# Email: <neilb@suse.com>
# This script should be run periodically to automatically
# perform a 'check' on any md arrays.
#
# It supports a 'time budget' such that any incomplete 'check'
# will be checkpointed when that time has expired.
# A subsequent invocation can allow the 'check' to continue.
#
# Options are:
# --continue Don't start new checks, only continue old ones.
# --duration This is passed to "date --date=$duration" to find out
# when to finish
#
# To support '--continue', arrays are identified by UUID and the 'sync_completed'
# value is stored in /var/lib/mdcheck/$UUID
# convert a /dev/md name into /sys/.../md equivalent
sysname() {
set `ls -lLd $1`
maj=${5%,}
min=$6
readlink -f /sys/dev/block/$maj:$min
}
args=$(getopt -o hcd: -l help,continue,duration: -n mdcheck -- "$@")
rv=$?
if [ $rv -ne 0 ]; then exit $rv; fi
eval set -- $args
cont=
endtime=
while [ " $1" != " --" ]
do
case $1 in
--help )
echo >&2 'Usage: mdcheck [--continue] [--duration time-offset]'
echo >&2 ' time-offset must be understood by "date --date"'
exit 0
;;
--continue ) cont=yes ;;
--duration ) shift; dur=$1
endtime=$(date --date "$dur" "+%s")
;;
esac
shift
done
shift
# We need a temp file occasionally...
tmp=/var/lib/mdcheck/.md-check-$$
trap 'rm -f "$tmp"' 0 2 3 15
# firstly, clean out really old state files
mkdir -p /var/lib/mdcheck
find /var/lib/mdcheck -name "MD_UUID*" -type f -mtime +180 -exec rm {} \;
# Now look at each md device.
cnt=0
for dev in /dev/md?*
do
[ -e "$dev" ] || continue
sys=`sysname $dev`
if [ ! -f "$sys/md/sync_action" ]
then # cannot check this array
continue
fi
if [ "`cat $sys/md/sync_action`" != 'idle' ]
then # This array is busy
continue
fi
mdadm --detail --export "$dev" | grep '^MD_UUID=' > $tmp || continue
source $tmp
fl="/var/lib/mdcheck/MD_UUID_$MD_UUID"
if [ -z "$cont" ]
then
start=0
logger -p daemon.info mdcheck start checking $dev
elif [ -z "$MD_UUID" -o ! -f "$fl" ]
then
# Nothing to continue here
continue
else
start=`cat "$fl"`
logger -p daemon.info mdcheck continue checking $dev from $start
fi
cnt=$[cnt+1]
eval MD_${cnt}_fl=\$fl
eval MD_${cnt}_sys=\$sys
eval MD_${cnt}_dev=\$dev
echo $start > $fl
echo $start > $sys/md/sync_min
echo check > $sys/md/sync_action
done
if [ -z "$endtime" ]
then
exit 0
fi
while [ `date +%s` -lt $endtime ]
do
any=
for i in `eval echo {1..$cnt}`
do
eval fl=\$MD_${i}_fl
eval sys=\$MD_${i}_sys
if [ -z "$fl" ]; then continue; fi
if [ "`cat $sys/md/sync_action`" != 'check' ]
then
eval MD_${i}_fl=
rm -f $fl
continue;
fi
read a rest < $sys/md/sync_completed
echo $a > $fl
any=yes
done
if [ -z "$any" ]; then exit 0; fi
sleep 120
done
# We've waited, and there are still checks running.
# Time to stop them.
for i in `eval echo {1..$cnt}`
do
eval fl=\$MD_${i}_fl
eval sys=\$MD_${i}_sys
eval dev=\$MD_${i}_dev
if [ -z "$fl" ]; then continue; fi
if [ "`cat $sys/md/sync_action`" != 'check' ]
then
eval MD_${i}_fl=
rm -f $fl
continue;
fi
echo idle > $sys/md/sync_action
cat $sys/md/sync_min > $fl
logger -p daemon.info pause checking $dev at `cat $fl`
done

View File

@ -1,7 +1,7 @@
Summary: The mdadm program controls Linux md devices (software RAID arrays)
Name: mdadm
Version: 4.1
Release: 9%{?dist}
Release: 13%{?dist}
Source: http://www.kernel.org/pub/linux/utils/raid/mdadm/mdadm-%{version}.tar.xz
Source1: mdmonitor.init
Source2: raid-check
@ -11,6 +11,8 @@ Source5: mdadm-cron
Source6: mdmonitor.service
Source7: mdadm.conf
Source8: mdadm_event.conf
Source9: mdcheck
Source10: mdadm_env.sh
Patch1: 0001-Assemble-keep-MD_DISK_FAILFAST-and-MD_DISK_WRITEMOST.patch
Patch2: 0002-Document-PART-POLICY-lines.patch
@ -36,6 +38,45 @@ Patch21: 0021-add-missing-units-to-examine.patch
Patch22: 0022-imsm-fix-spare-activation-for-old-matrix-arrays.patch
Patch23: 0023-Create-Block-rounding-size-to-max.patch
Patch24: 0024-udev-Add-udev-rules-to-create-by-partuuid-for-md-dev.patch
Patch25: 0025-mdmon-fix-wrong-array-state-when-disk-fails-during-m.patch
Patch26: 0026-Enable-probe_roms-to-scan-more-than-6-roms.patch
Patch27: 0027-super-intel-Fix-issue-with-abs-being-irrelevant.patch
Patch28: 0028-mdadm.h-Introduced-unaligned-get-put-_unaligned-16-3.patch
Patch29: 0029-super-intel-Use-put_unaligned-in-split_ull.patch
Patch30: 0030-mdadm-load-default-sysfs-attributes-after-assemblati.patch
Patch31: 0031-mdadm.h-include-sysmacros.h-unconditionally.patch
Patch32: 0032-mdadm-add-no-devices-to-avoid-component-devices-deta.patch
Patch33: 0033-udev-add-no-devices-option-for-calling-mdadm-detail.patch
Patch34: 0034-imsm-close-removed-drive-fd.patch
Patch35: 0035-mdadm-check-value-returned-by-snprintf-against-error.patch
Patch36: 0036-mdadm-Introduce-new-array-state-broken-for-raid0-lin.patch
Patch37: 0037-mdadm-force-a-uuid-swap-on-big-endian.patch
Patch38: 0038-mdadm-md.4-add-the-descriptions-for-bitmap-sysfs-nod.patch
Patch39: 0039-Init-devlist-as-an-array.patch
Patch40: 0040-Don-t-need-to-check-recovery-after-re-add-when-no-I-.patch
Patch41: 0041-udev-allow-for-udev-attribute-reading-bug.patch
Patch42: 0042-imsm-save-current_vol-number.patch
Patch43: 0043-imsm-allow-to-specify-second-volume-size.patch
Patch44: 0044-mdcheck-when-mdcheck_start-is-enabled-enable-mdcheck.patch
Patch45: 0045-mdcheck-use-to-pass-variable-to-mdcheck.patch
Patch46: 0046-SUSE-mdadm_env.sh-handle-MDADM_CHECK_DURATION.patch
Patch47: 0047-super-intel-don-t-mark-structs-packed-unnecessarily.patch
Patch48: 0048-mdcheck-service-can-t-start-succesfully-because-of-s.patch
Patch49: 0049-Remove-last-traces-of-HOT_ADD_DISK.patch
Patch50: 0050-Fix-up-a-few-formatting-issues.patch
Patch51: 0051-Remove-unused-code.patch
Patch52: 0052-imsm-return-correct-uuid-for-volume-in-detail.patch
Patch53: 0053-imsm-Change-the-way-of-printing-nvme-drives-in-detai.patch
Patch54: 0054-Create-add-support-for-RAID0-layouts.patch
Patch55: 0055-Assemble-add-support-for-RAID0-layouts.patch
Patch56: 0056-Respect-CROSS_COMPILE-when-CC-is-the-default.patch
Patch57: 0057-Change-warning-message.patch
Patch58: 0058-Manage-Remove-the-legacy-code-for-md-driver-prior-to.patch
Patch59: 0059-imsm-Update-grow-manual.patch
Patch60: 0060-Add-support-for-Tebibytes.patch
Patch61: 0061-imsm-fill-working_disks-according-to-metadata.patch
Patch62: 0062-Remove-the-legacy-whitespace.patch
# RHEL customization patches
Patch97: mdadm-3.3-udev.patch
Patch98: mdadm-2.5.2-static.patch
@ -89,6 +130,44 @@ file can be used to help with some common tasks.
%patch22 -p1 -b .0022
%patch23 -p1 -b .0023
%patch24 -p1 -b .0024
%patch25 -p1 -b .0025
%patch26 -p1 -b .0026
%patch27 -p1 -b .0027
%patch28 -p1 -b .0028
%patch29 -p1 -b .0029
%patch30 -p1 -b .0030
%patch31 -p1 -b .0031
%patch32 -p1 -b .0032
%patch33 -p1 -b .0033
%patch34 -p1 -b .0034
%patch35 -p1 -b .0035
%patch36 -p1 -b .0036
%patch37 -p1 -b .0037
%patch38 -p1 -b .0038
%patch39 -p1 -b .0039
%patch40 -p1 -b .0040
%patch41 -p1 -b .0041
%patch42 -p1 -b .0042
%patch43 -p1 -b .0043
%patch44 -p1 -b .0044
%patch45 -p1 -b .0045
%patch46 -p1 -b .0046
%patch47 -p1 -b .0047
%patch48 -p1 -b .0048
%patch49 -p1 -b .0049
%patch50 -p1 -b .0050
%patch51 -p1 -b .0051
%patch52 -p1 -b .0052
%patch53 -p1 -b .0053
%patch54 -p1 -b .0054
%patch55 -p1 -b .0055
%patch56 -p1 -b .0056
%patch57 -p1 -b .0057
%patch58 -p1 -b .0058
%patch59 -p1 -b .0059
%patch60 -p1 -b .0060
%patch61 -p1 -b .0061
%patch62 -p1 -b .0062
# RHEL customization patches
%patch97 -p1 -b .udev
@ -105,6 +184,10 @@ install -Dp -m 644 %{SOURCE3} %{buildroot}%{_udevrulesdir}/65-md-incremental.rul
install -Dp -m 644 %{SOURCE4} %{buildroot}%{_sysconfdir}/sysconfig/raid-check
install -Dp -m 644 %{SOURCE5} %{buildroot}%{_sysconfdir}/cron.d/raid-check
mkdir -p -m 710 %{buildroot}/var/run/mdadm
mkdir -p -m 700 %{buildroot}/usr/share/mdadm
mkdir -p -m 700 %{buildroot}/usr/lib/mdadm
install -Dp -m 755 %{SOURCE9} %{buildroot}/usr/share/mdadm/mdcheck
install -Dp -m 755 %{SOURCE10} %{buildroot}/usr/lib/mdadm/mdadm_env.sh
# systemd
mkdir -p %{buildroot}%{_unitdir}
@ -152,9 +235,27 @@ rm -rf %{buildroot}
%dir %{_localstatedir}/run/%{name}/
%config(noreplace) %{_tmpfilesdir}/%{name}.conf
/etc/libreport/events.d/*
/usr/share/mdadm/mdcheck
/usr/lib/mdadm/mdadm_env.sh
%changelog
* Thu Jul 10 2019 Xiao Ni <xni@redhat.com> - 4.1.9
* Fri Feb 28 2020 Xiao Ni <xni@redhat.com> - 4.1.13
- Remove the unnecessary whitespace in .service file
- Resolves rhbz#1803470
* Tue Feb 11 2020 Xiao Ni <xni@redhat.com> - 4.1.12
- Update mdadm to latest upstream && change tmpfiles directory && correct changelog date
- Resolves rhbz#1800521 and rhbz#1657265
* Sun Feb 09 2020 Xiao Ni <xni@redhat.com> - 4.1.11
- mdcheck start service can't start
- Resolves rhbz#1769823
* Fri Nov 15 2019 Xiao Ni <xni@redhat.com> - 4.1.10
- Update mdadm to latest upstream
- Resolves rhbz#1721937
* Wed Jul 10 2019 Xiao Ni <xni@redhat.com> - 4.1.9
- Add --incremental for ddf member disk in udev rule
- Resolves rhbz#1693583