import mdadm-4.1-13.el8

This commit is contained in:
CentOS Sources 2020-04-28 05:36:31 -04:00 committed by Andrew Lukoshko
parent df56316c8f
commit a5476d5870
42 changed files with 3713 additions and 3 deletions

View File

@ -0,0 +1,109 @@
From ae7d61e35ec2ab6361c3e509a8db00698ef3396f Mon Sep 17 00:00:00 2001
From: Artur Paszkiewicz <artur.paszkiewicz@intel.com>
Date: Tue, 7 May 2019 16:08:47 +0200
Subject: [RHEL7.8 PATCH V2 25/47] mdmon: fix wrong array state when disk fails
during mdmon startup
If a member drive disappears and is set faulty by the kernel during
mdmon startup, after ss->load_container() but before manage_new(), mdmon
will try to readd the faulty drive to the array and start rebuilding.
Metadata on the active drive is updated, but the faulty drive is not
removed from the array and is left in a "blocked" state and any write
request to the array will block. If the faulty drive reappears in the
system e.g. after a reboot, the array will not assemble because metadata
on the drives will be incompatible (at least on imsm).
Fix this by adding a new option for sysfs_read(): "GET_DEVS_ALL". This
is an extension for the "GET_DEVS" option and causes all member devices
to be returned, even if the associated block device has been removed.
Use this option in manage_new() to include the faulty device on the
active_array's devices list. Mdmon will then properly remove the faulty
device from the array and update the metadata to reflect the degraded
state.
Signed-off-by: Artur Paszkiewicz <artur.paszkiewicz@intel.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
managemon.c | 2 +-
mdadm.h | 1 +
super-intel.c | 2 +-
sysfs.c | 23 ++++++++++++++---------
4 files changed, 17 insertions(+), 11 deletions(-)
diff --git a/managemon.c b/managemon.c
index 29b91ba..200cf83 100644
--- a/managemon.c
+++ b/managemon.c
@@ -678,7 +678,7 @@ static void manage_new(struct mdstat_ent *mdstat,
mdi = sysfs_read(-1, mdstat->devnm,
GET_LEVEL|GET_CHUNK|GET_DISKS|GET_COMPONENT|
GET_SAFEMODE|GET_DEVS|GET_OFFSET|GET_SIZE|GET_STATE|
- GET_LAYOUT);
+ GET_LAYOUT|GET_DEVS_ALL);
if (!mdi)
return;
diff --git a/mdadm.h b/mdadm.h
index 705bd9b..427cc52 100644
--- a/mdadm.h
+++ b/mdadm.h
@@ -647,6 +647,7 @@ enum sysfs_read_flags {
GET_ERROR = (1 << 24),
GET_ARRAY_STATE = (1 << 25),
GET_CONSISTENCY_POLICY = (1 << 26),
+ GET_DEVS_ALL = (1 << 27),
};
/* If fd >= 0, get the array it is open on,
diff --git a/super-intel.c b/super-intel.c
index 2ba045a..4fd5e84 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -8560,7 +8560,7 @@ static void imsm_set_disk(struct active_array *a, int n, int state)
disk = get_imsm_disk(super, ord_to_idx(ord));
/* check for new failures */
- if (state & DS_FAULTY) {
+ if (disk && (state & DS_FAULTY)) {
if (mark_failure(super, dev, disk, ord_to_idx(ord)))
super->updates_pending++;
}
diff --git a/sysfs.c b/sysfs.c
index df6fdda..2dd9ab6 100644
--- a/sysfs.c
+++ b/sysfs.c
@@ -313,17 +313,22 @@ struct mdinfo *sysfs_read(int fd, char *devnm, unsigned long options)
/* assume this is a stale reference to a hot
* removed device
*/
- free(dev);
- continue;
+ if (!(options & GET_DEVS_ALL)) {
+ free(dev);
+ continue;
+ }
+ } else {
+ sscanf(buf, "%d:%d", &dev->disk.major, &dev->disk.minor);
}
- sscanf(buf, "%d:%d", &dev->disk.major, &dev->disk.minor);
- /* special case check for block devices that can go 'offline' */
- strcpy(dbase, "block/device/state");
- if (load_sys(fname, buf, sizeof(buf)) == 0 &&
- strncmp(buf, "offline", 7) == 0) {
- free(dev);
- continue;
+ if (!(options & GET_DEVS_ALL)) {
+ /* special case check for block devices that can go 'offline' */
+ strcpy(dbase, "block/device/state");
+ if (load_sys(fname, buf, sizeof(buf)) == 0 &&
+ strncmp(buf, "offline", 7) == 0) {
+ free(dev);
+ continue;
+ }
}
/* finally add this disk to the array */
--
2.7.5

View File

@ -0,0 +1,212 @@
From 4ec389e3f0c1233f5aa2d5b4e63d96e33d2a37f0 Mon Sep 17 00:00:00 2001
From: Roman Sobanski <roman.sobanski@intel.com>
Date: Tue, 2 Jul 2019 13:29:27 +0200
Subject: [RHEL7.8 PATCH V2 26/47] Enable probe_roms to scan more than 6 roms.
In some cases if more than 6 oroms exist, resource for particular
controller may not be found. Change method for storing
adapter_rom_resources from array to list.
Signed-off-by: Roman Sobanski <roman.sobanski@intel.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
probe_roms.c | 98 ++++++++++++++++++++++++++++++++++--------------------------
1 file changed, 56 insertions(+), 42 deletions(-)
diff --git a/probe_roms.c b/probe_roms.c
index b0b0883..7ea04c7 100644
--- a/probe_roms.c
+++ b/probe_roms.c
@@ -35,6 +35,9 @@ static const int rom_len = 0xf0000 - 0xc0000; /* option-rom memory region */
static int _sigbus;
static unsigned long rom_align;
+static void roms_deinit(void);
+static int roms_init(void);
+
static void sigbus(int sig)
{
_sigbus = 1;
@@ -75,6 +78,7 @@ void probe_roms_exit(void)
munmap(rom_mem, rom_len);
rom_mem = MAP_FAILED;
}
+ roms_deinit();
}
int probe_roms_init(unsigned long align)
@@ -91,6 +95,9 @@ int probe_roms_init(unsigned long align)
else
return -1;
+ if (roms_init())
+ return -1;
+
if (signal(SIGBUS, sigbus) == SIG_ERR)
rc = -1;
if (rc == 0) {
@@ -131,6 +138,7 @@ struct resource {
unsigned long end;
unsigned long data;
const char *name;
+ struct resource *next;
};
static struct resource system_rom_resource = {
@@ -147,37 +155,7 @@ static struct resource extension_rom_resource = {
.end = 0xeffff,
};
-static struct resource adapter_rom_resources[] = { {
- .name = "Adapter ROM",
- .start = 0xc8000,
- .data = 0,
- .end = 0,
-}, {
- .name = "Adapter ROM",
- .start = 0,
- .data = 0,
- .end = 0,
-}, {
- .name = "Adapter ROM",
- .start = 0,
- .data = 0,
- .end = 0,
-}, {
- .name = "Adapter ROM",
- .start = 0,
- .data = 0,
- .end = 0,
-}, {
- .name = "Adapter ROM",
- .start = 0,
- .data = 0,
- .end = 0,
-}, {
- .name = "Adapter ROM",
- .start = 0,
- .data = 0,
- .end = 0,
-} };
+static struct resource *adapter_rom_resources;
static struct resource video_rom_resource = {
.name = "Video ROM",
@@ -186,8 +164,35 @@ static struct resource video_rom_resource = {
.end = 0xc7fff,
};
+static int roms_init(void)
+{
+ adapter_rom_resources = malloc(sizeof(struct resource));
+ if (adapter_rom_resources == NULL)
+ return 1;
+ adapter_rom_resources->name = "Adapter ROM";
+ adapter_rom_resources->start = 0xc8000;
+ adapter_rom_resources->data = 0;
+ adapter_rom_resources->end = 0;
+ adapter_rom_resources->next = NULL;
+ return 0;
+}
+
+static void roms_deinit(void)
+{
+ struct resource *res;
+
+ res = adapter_rom_resources;
+ while (res) {
+ struct resource *tmp = res;
+
+ res = res->next;
+ free(tmp);
+ }
+}
+
#define ROMSIGNATURE 0xaa55
+
static int romsignature(const unsigned char *rom)
{
const unsigned short * const ptr = (const unsigned short *)rom;
@@ -208,16 +213,14 @@ static int romchecksum(const unsigned char *rom, unsigned long length)
int scan_adapter_roms(scan_fn fn)
{
/* let scan_fn examing each of the adapter roms found by probe_roms */
- unsigned int i;
+ struct resource *res = adapter_rom_resources;
int found;
if (rom_fd < 0)
return 0;
found = 0;
- for (i = 0; i < ARRAY_SIZE(adapter_rom_resources); i++) {
- struct resource *res = &adapter_rom_resources[i];
-
+ while (res) {
if (res->start) {
found = fn(isa_bus_to_virt(res->start),
isa_bus_to_virt(res->end),
@@ -226,6 +229,7 @@ int scan_adapter_roms(scan_fn fn)
break;
} else
break;
+ res = res->next;
}
return found;
@@ -241,14 +245,14 @@ void probe_roms(void)
const void *rom;
unsigned long start, length, upper;
unsigned char c;
- unsigned int i;
+ struct resource *res = adapter_rom_resources;
__u16 val=0;
if (rom_fd < 0)
return;
/* video rom */
- upper = adapter_rom_resources[0].start;
+ upper = res->start;
for (start = video_rom_resource.start; start < upper; start += rom_align) {
rom = isa_bus_to_virt(start);
if (!romsignature(rom))
@@ -283,8 +287,9 @@ void probe_roms(void)
upper = extension_rom_resource.start;
}
+ struct resource *prev_res = res;
/* check for adapter roms on 2k boundaries */
- for (i = 0; i < ARRAY_SIZE(adapter_rom_resources) && start < upper; start += rom_align) {
+ for (; start < upper; start += rom_align) {
rom = isa_bus_to_virt(start);
if (!romsignature(rom))
continue;
@@ -308,10 +313,19 @@ void probe_roms(void)
if (!length || start + length > upper || !romchecksum(rom, length))
continue;
- adapter_rom_resources[i].start = start;
- adapter_rom_resources[i].data = start + (unsigned long) val;
- adapter_rom_resources[i].end = start + length - 1;
+ if (res == NULL) {
+ res = calloc(1, sizeof(struct resource));
+ if (res == NULL)
+ return;
+ prev_res->next = res;
+ }
+
+ res->start = start;
+ res->data = start + (unsigned long)val;
+ res->end = start + length - 1;
- start = adapter_rom_resources[i++].end & ~(rom_align - 1);
+ start = res->end & ~(rom_align - 1);
+ prev_res = res;
+ res = res->next;
}
}
--
2.7.5

View File

@ -0,0 +1,39 @@
From a4f7290c20c2ff78328c9db0b18029165cfb05b2 Mon Sep 17 00:00:00 2001
From: Jes Sorensen <jsorensen@fb.com>
Date: Tue, 9 Jul 2019 13:26:08 -0400
Subject: [RHEL7.8 PATCH V2 27/47] super-intel: Fix issue with abs() being
irrelevant
gcc9 complains about subtracting unsigned from unsigned and code
assuming the result can be negative.
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
super-intel.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/super-intel.c b/super-intel.c
index 4fd5e84..230e164 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -2875,7 +2875,7 @@ static unsigned long long calc_component_size(struct imsm_map *map,
{
unsigned long long component_size;
unsigned long long dev_size = imsm_dev_size(dev);
- unsigned long long calc_dev_size = 0;
+ long long calc_dev_size = 0;
unsigned int member_disks = imsm_num_data_members(map);
if (member_disks == 0)
@@ -2889,7 +2889,7 @@ static unsigned long long calc_component_size(struct imsm_map *map,
* 2048 blocks per each device. If the difference is higher it means
* that array size was expanded and num_data_stripes was not updated.
*/
- if ((unsigned int)abs(calc_dev_size - dev_size) >
+ if (llabs(calc_dev_size - (long long)dev_size) >
(1 << SECT_PER_MB_SHIFT) * member_disks) {
component_size = dev_size / member_disks;
dprintf("Invalid num_data_stripes in metadata; expected=%llu, found=%llu\n",
--
2.7.5

View File

@ -0,0 +1,57 @@
From 7039d1f8200b9599b23db5953934fdb43b0442e0 Mon Sep 17 00:00:00 2001
From: Jes Sorensen <jsorensen@fb.com>
Date: Tue, 9 Jul 2019 14:15:38 -0400
Subject: [RHEL7.8 PATCH V2 28/47] mdadm.h: Introduced unaligned
{get,put}_unaligned{16,32}()
We need these to avoid gcc9 going all crazy on us.
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
mdadm.h | 30 ++++++++++++++++++++++++++++++
1 file changed, 30 insertions(+)
diff --git a/mdadm.h b/mdadm.h
index 427cc52..0fa9e1b 100644
--- a/mdadm.h
+++ b/mdadm.h
@@ -192,6 +192,36 @@ struct dlm_lksb {
#endif /* __KLIBC__ */
/*
+ * Partially stolen from include/linux/unaligned/packed_struct.h
+ */
+struct __una_u16 { __u16 x; } __attribute__ ((packed));
+struct __una_u32 { __u32 x; } __attribute__ ((packed));
+
+static inline __u16 __get_unaligned16(const void *p)
+{
+ const struct __una_u16 *ptr = (const struct __una_u16 *)p;
+ return ptr->x;
+}
+
+static inline __u32 __get_unaligned32(const void *p)
+{
+ const struct __una_u32 *ptr = (const struct __una_u32 *)p;
+ return ptr->x;
+}
+
+static inline void __put_unaligned16(__u16 val, void *p)
+{
+ struct __una_u16 *ptr = (struct __una_u16 *)p;
+ ptr->x = val;
+}
+
+static inline void __put_unaligned32(__u32 val, void *p)
+{
+ struct __una_u32 *ptr = (struct __una_u32 *)p;
+ ptr->x = val;
+}
+
+/*
* Check at compile time that something is of a particular type.
* Always evaluates to 1 so you may use it easily in comparisons.
*/
--
2.7.5

View File

@ -0,0 +1,38 @@
From 486720e0c2418e7e2e0a16221f7c42a308622254 Mon Sep 17 00:00:00 2001
From: Jes Sorensen <jsorensen@fb.com>
Date: Tue, 9 Jul 2019 14:49:22 -0400
Subject: [RHEL7.8 PATCH V2 29/47] super-intel: Use put_unaligned in split_ull
Shut up some gcc9 errors by using put_unaligned() accessors. Not pretty,
but better than it was.
Also correct to the correct swap macros.
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
super-intel.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/super-intel.c b/super-intel.c
index 230e164..d7e8a65 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -1165,12 +1165,12 @@ static int count_memberships(struct dl *dl, struct intel_super *super)
static __u32 imsm_min_reserved_sectors(struct intel_super *super);
-static int split_ull(unsigned long long n, __u32 *lo, __u32 *hi)
+static int split_ull(unsigned long long n, void *lo, void *hi)
{
if (lo == 0 || hi == 0)
return 1;
- *lo = __le32_to_cpu((unsigned)n);
- *hi = __le32_to_cpu((unsigned)(n >> 32));
+ __put_unaligned32(__cpu_to_le32((__u32)n), lo);
+ __put_unaligned32(__cpu_to_le32((n >> 32)), hi);
return 0;
}
--
2.7.5

View File

@ -0,0 +1,345 @@
From b06815989179e0f153e44e4336290e655edce9a1 Mon Sep 17 00:00:00 2001
From: Mariusz Dabrowski <mariusz.dabrowski@intel.com>
Date: Wed, 10 Jul 2019 13:38:53 +0200
Subject: [RHEL7.8 PATCH V2 30/47] mdadm: load default sysfs attributes after
assemblation
Added new type of line to mdadm.conf which allows to specify values of
sysfs attributes for MD devices that should be loaded after the array is
assembled. Each line is interpreted as list of structures containing
sysname of MD device (md126 etc.) and list of sysfs attributes and their
values.
Signed-off-by: Mariusz Dabrowski <mariusz.dabrowski@intel.com>
Signed-off-by: Krzysztof Smolinski <krzysztof.smolinski@intel.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
Assemble.c | 12 +++--
Incremental.c | 1 +
config.c | 7 ++-
mdadm.conf.5 | 25 ++++++++++
mdadm.h | 3 ++
sysfs.c | 158 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
6 files changed, 202 insertions(+), 4 deletions(-)
diff --git a/Assemble.c b/Assemble.c
index 420c7b3..b2e6914 100644
--- a/Assemble.c
+++ b/Assemble.c
@@ -1063,9 +1063,12 @@ static int start_array(int mdfd,
mddev, okcnt + sparecnt + journalcnt,
okcnt + sparecnt + journalcnt == 1 ? "" : "s");
if (okcnt < (unsigned)content->array.raid_disks)
- fprintf(stderr, " (out of %d)",
+ fprintf(stderr, " (out of %d)\n",
content->array.raid_disks);
- fprintf(stderr, "\n");
+ else {
+ fprintf(stderr, "\n");
+ sysfs_rules_apply(mddev, content);
+ }
}
if (st->ss->validate_container) {
@@ -1139,6 +1142,7 @@ static int start_array(int mdfd,
rv = ioctl(mdfd, RUN_ARRAY, NULL);
reopen_mddev(mdfd); /* drop O_EXCL */
if (rv == 0) {
+ sysfs_rules_apply(mddev, content);
if (c->verbose >= 0) {
pr_err("%s has been started with %d drive%s",
mddev, okcnt, okcnt==1?"":"s");
@@ -2130,10 +2134,12 @@ int assemble_container_content(struct supertype *st, int mdfd,
pr_err("array %s now has %d device%s",
chosen_name, working + preexist,
working + preexist == 1 ? "":"s");
- else
+ else {
+ sysfs_rules_apply(chosen_name, content);
pr_err("Started %s with %d device%s",
chosen_name, working + preexist,
working + preexist == 1 ? "":"s");
+ }
if (preexist)
fprintf(stderr, " (%d new)", working);
if (expansion)
diff --git a/Incremental.c b/Incremental.c
index d4d3c35..98dbcd9 100644
--- a/Incremental.c
+++ b/Incremental.c
@@ -480,6 +480,7 @@ int Incremental(struct mddev_dev *devlist, struct context *c,
pr_err("container %s now has %d device%s\n",
chosen_name, info.array.working_disks,
info.array.working_disks == 1?"":"s");
+ sysfs_rules_apply(chosen_name, &info);
wait_for(chosen_name, mdfd);
if (st->ss->external)
strcpy(devnm, fd2devnm(mdfd));
diff --git a/config.c b/config.c
index e14eae0..7592b2d 100644
--- a/config.c
+++ b/config.c
@@ -80,7 +80,8 @@ char DefaultAltConfFile[] = CONFFILE2;
char DefaultAltConfDir[] = CONFFILE2 ".d";
enum linetype { Devices, Array, Mailaddr, Mailfrom, Program, CreateDev,
- Homehost, HomeCluster, AutoMode, Policy, PartPolicy, LTEnd };
+ Homehost, HomeCluster, AutoMode, Policy, PartPolicy, Sysfs,
+ LTEnd };
char *keywords[] = {
[Devices] = "devices",
[Array] = "array",
@@ -93,6 +94,7 @@ char *keywords[] = {
[AutoMode] = "auto",
[Policy] = "policy",
[PartPolicy]="part-policy",
+ [Sysfs] = "sysfs",
[LTEnd] = NULL
};
@@ -764,6 +766,9 @@ void conf_file(FILE *f)
case PartPolicy:
policyline(line, rule_part);
break;
+ case Sysfs:
+ sysfsline(line);
+ break;
default:
pr_err("Unknown keyword %s\n", line);
}
diff --git a/mdadm.conf.5 b/mdadm.conf.5
index 47c962a..27dbab1 100644
--- a/mdadm.conf.5
+++ b/mdadm.conf.5
@@ -587,6 +587,26 @@ be based on the domain, but with
appended, when N is the partition number for the partition that was
found.
+.TP
+.B SYSFS
+The SYSFS line lists custom values of MD device's sysfs attributes which will be
+stored in sysfs after the array is assembled. Multiple lines are allowed and each
+line has to contain the uuid or the name of the device to which it relates.
+.RS 4
+.TP
+.B uuid=
+hexadecimal identifier of MD device. This has to match the uuid stored in the
+superblock.
+.TP
+.B name=
+name of the MD device as was given to
+.I mdadm
+when the array was created. It will be ignored if
+.B uuid
+is not empty.
+.TP
+.RS 7
+
.SH EXAMPLE
DEVICE /dev/sd[bcdjkl]1
.br
@@ -657,6 +677,11 @@ CREATE group=system mode=0640 auto=part\-8
HOMEHOST <system>
.br
AUTO +1.x homehost \-all
+.br
+SYSFS name=/dev/md/raid5 group_thread_cnt=4 sync_speed_max=1000000
+.br
+SYSFS uuid=bead5eb6:31c17a27:da120ba2:7dfda40d group_thread_cnt=4
+sync_speed_max=1000000
.SH SEE ALSO
.BR mdadm (8),
diff --git a/mdadm.h b/mdadm.h
index 0fa9e1b..c36d7fd 100644
--- a/mdadm.h
+++ b/mdadm.h
@@ -1322,6 +1322,9 @@ void domain_add(struct domainlist **domp, char *domain);
extern void policy_save_path(char *id_path, struct map_ent *array);
extern int policy_check_path(struct mdinfo *disk, struct map_ent *array);
+extern void sysfs_rules_apply(char *devnm, struct mdinfo *dev);
+extern void sysfsline(char *line);
+
#if __GNUC__ < 3
struct stat64;
#endif
diff --git a/sysfs.c b/sysfs.c
index 2dd9ab6..c313781 100644
--- a/sysfs.c
+++ b/sysfs.c
@@ -26,9 +26,22 @@
#include "mdadm.h"
#include <dirent.h>
#include <ctype.h>
+#include "dlink.h"
#define MAX_SYSFS_PATH_LEN 120
+struct dev_sysfs_rule {
+ struct dev_sysfs_rule *next;
+ char *devname;
+ int uuid[4];
+ int uuid_set;
+ struct sysfs_entry {
+ struct sysfs_entry *next;
+ char *name;
+ char *value;
+ } *entry;
+};
+
int load_sys(char *path, char *buf, int len)
{
int fd = open(path, O_RDONLY);
@@ -999,3 +1012,148 @@ int sysfs_wait(int fd, int *msec)
}
return n;
}
+
+int sysfs_rules_apply_check(const struct mdinfo *sra,
+ const struct sysfs_entry *ent)
+{
+ /* Check whether parameter is regular file,
+ * exists and is under specified directory.
+ */
+ char fname[MAX_SYSFS_PATH_LEN];
+ char dname[MAX_SYSFS_PATH_LEN];
+ char resolved_path[PATH_MAX];
+ char resolved_dir[PATH_MAX];
+
+ if (sra == NULL || ent == NULL)
+ return -1;
+
+ snprintf(dname, MAX_SYSFS_PATH_LEN, "/sys/block/%s/md/", sra->sys_name);
+ snprintf(fname, MAX_SYSFS_PATH_LEN, "%s/%s", dname, ent->name);
+
+ if (realpath(fname, resolved_path) == NULL ||
+ realpath(dname, resolved_dir) == NULL)
+ return -1;
+
+ if (strncmp(resolved_dir, resolved_path,
+ strnlen(resolved_dir, PATH_MAX)) != 0)
+ return -1;
+
+ return 0;
+}
+
+static struct dev_sysfs_rule *sysfs_rules;
+
+void sysfs_rules_apply(char *devnm, struct mdinfo *dev)
+{
+ struct dev_sysfs_rule *rules = sysfs_rules;
+
+ while (rules) {
+ struct sysfs_entry *ent = rules->entry;
+ int match = 0;
+
+ if (!rules->uuid_set) {
+ if (rules->devname)
+ match = strcmp(devnm, rules->devname) == 0;
+ } else {
+ match = memcmp(dev->uuid, rules->uuid,
+ sizeof(int[4])) == 0;
+ }
+
+ while (match && ent) {
+ if (sysfs_rules_apply_check(dev, ent) < 0)
+ pr_err("SYSFS: failed to write '%s' to '%s'\n",
+ ent->value, ent->name);
+ else
+ sysfs_set_str(dev, NULL, ent->name, ent->value);
+ ent = ent->next;
+ }
+ rules = rules->next;
+ }
+}
+
+static void sysfs_rule_free(struct dev_sysfs_rule *rule)
+{
+ struct sysfs_entry *entry;
+
+ while (rule) {
+ struct dev_sysfs_rule *tmp = rule->next;
+
+ entry = rule->entry;
+ while (entry) {
+ struct sysfs_entry *tmp = entry->next;
+
+ free(entry->name);
+ free(entry->value);
+ free(entry);
+ entry = tmp;
+ }
+
+ if (rule->devname)
+ free(rule->devname);
+ free(rule);
+ rule = tmp;
+ }
+}
+
+void sysfsline(char *line)
+{
+ struct dev_sysfs_rule *sr;
+ char *w;
+
+ sr = xcalloc(1, sizeof(*sr));
+ for (w = dl_next(line); w != line ; w = dl_next(w)) {
+ if (strncasecmp(w, "name=", 5) == 0) {
+ char *devname = w + 5;
+
+ if (strncmp(devname, "/dev/md/", 8) == 0) {
+ if (sr->devname)
+ pr_err("Only give one device per SYSFS line: %s\n",
+ devname);
+ else
+ sr->devname = xstrdup(devname);
+ } else {
+ pr_err("%s is an invalid name for an md device - ignored.\n",
+ devname);
+ }
+ } else if (strncasecmp(w, "uuid=", 5) == 0) {
+ char *uuid = w + 5;
+
+ if (sr->uuid_set) {
+ pr_err("Only give one uuid per SYSFS line: %s\n",
+ uuid);
+ } else {
+ if (parse_uuid(w + 5, sr->uuid) &&
+ memcmp(sr->uuid, uuid_zero,
+ sizeof(int[4])) != 0)
+ sr->uuid_set = 1;
+ else
+ pr_err("Invalid uuid: %s\n", uuid);
+ }
+ } else {
+ struct sysfs_entry *prop;
+
+ char *sep = strchr(w, '=');
+
+ if (sep == NULL || *(sep + 1) == 0) {
+ pr_err("Cannot parse \"%s\" - ignoring.\n", w);
+ continue;
+ }
+
+ prop = xmalloc(sizeof(*prop));
+ prop->value = xstrdup(sep + 1);
+ *sep = 0;
+ prop->name = xstrdup(w);
+ prop->next = sr->entry;
+ sr->entry = prop;
+ }
+ }
+
+ if (!sr->devname && !sr->uuid_set) {
+ pr_err("Device name not found in sysfs config entry - ignoring.\n");
+ sysfs_rule_free(sr);
+ return;
+ }
+
+ sr->next = sysfs_rules;
+ sysfs_rules = sr;
+}
--
2.7.5

View File

@ -0,0 +1,34 @@
From 452dc4d13a012cdcb05088c0dbc699959c4d6c73 Mon Sep 17 00:00:00 2001
From: Baruch Siach <baruch@tkos.co.il>
Date: Tue, 6 Aug 2019 16:05:23 +0300
Subject: [RHEL7.8 PATCH V2 31/47] mdadm.h: include sysmacros.h unconditionally
musl libc now also requires sys/sysmacros.h for the major/minor macros.
All supported libc implementations carry sys/sysmacros.h, including
diet-libc, klibc, and uclibc-ng.
Cc: Hauke Mehrtens <hauke@hauke-m.de>
Signed-off-by: Baruch Siach <baruch@tkos.co.il>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
mdadm.h | 2 --
1 file changed, 2 deletions(-)
diff --git a/mdadm.h b/mdadm.h
index c36d7fd..d61a9ca 100644
--- a/mdadm.h
+++ b/mdadm.h
@@ -45,10 +45,8 @@ extern __off64_t lseek64 __P ((int __fd, __off64_t __offset, int __whence));
#include <errno.h>
#include <string.h>
#include <syslog.h>
-#ifdef __GLIBC__
/* Newer glibc requires sys/sysmacros.h directly for makedev() */
#include <sys/sysmacros.h>
-#endif
#ifdef __dietlibc__
#include <strings.h>
/* dietlibc has deprecated random and srandom!! */
--
2.7.5

View File

@ -0,0 +1,161 @@
From d11abe4bd5cad39803726ddff1888674e417bda5 Mon Sep 17 00:00:00 2001
From: Coly Li <colyli@suse.de>
Date: Wed, 31 Jul 2019 13:29:29 +0800
Subject: [RHEL7.8 PATCH V2 32/47] mdadm: add --no-devices to avoid component
devices detail information
When people assemble a md raid device with a large number of
component deivces (e.g. 1500 DASD disks), the raid device detail
information generated by 'mdadm --detail --export $devnode' is very
large. It is because the detail information contains information of
all the component disks (even the missing/failed ones).
In such condition, when udev-md-raid-arrays.rules is triggered and
internally calls "mdadm --detail --no-devices --export $devnode",
user may observe systemd error message ""invalid message length". It
is because the following on-stack raw message buffer in systemd code
is not big enough,
systemd/src/libudev/libudev-monitor.c
_public_ struct udev_device *udev_monito ...
struct ucred *cred;
union {
struct udev_monitor_netlink_header nlh;
char raw[8192];
} buf;
Even change size of raw[] from 8KB to larger size, it may still be not
enough for detail message of a md raid device with much larger number of
component devices.
To fix this problem, an extra option '--no-devices' is added (the
original idea is proposed by Neil Brown). When printing detailed
information of a md raid device, if '--no-devices' is specified, then
all component devices information will not be printed, then the output
message size can be restricted to a small number, even with the systemd
only has 8KB on-disk raw buffer, the md raid array udev rules can work
correctly without failure message.
Signed-off-by: Coly Li <colyli@suse.de>
Reviewed-by: NeilBrown <neilb@suse.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
Detail.c | 24 ++++++++++++++++--------
ReadMe.c | 1 +
mdadm.c | 4 ++++
mdadm.h | 2 ++
4 files changed, 23 insertions(+), 8 deletions(-)
diff --git a/Detail.c b/Detail.c
index 20ea03a..ad60434 100644
--- a/Detail.c
+++ b/Detail.c
@@ -56,7 +56,7 @@ int Detail(char *dev, struct context *c)
*/
int fd = open(dev, O_RDONLY);
mdu_array_info_t array;
- mdu_disk_info_t *disks;
+ mdu_disk_info_t *disks = NULL;
int next;
int d;
time_t atime;
@@ -280,7 +280,7 @@ int Detail(char *dev, struct context *c)
}
map_free(map);
}
- if (sra) {
+ if (!c->no_devices && sra) {
struct mdinfo *mdi;
for (mdi = sra->devs; mdi; mdi = mdi->next) {
char *path;
@@ -655,12 +655,17 @@ This is pretty boring
printf("\n\n");
}
- if (array.raid_disks)
- printf(" Number Major Minor RaidDevice State\n");
- else
- printf(" Number Major Minor RaidDevice\n");
+ if (!c->no_devices) {
+ if (array.raid_disks)
+ printf(" Number Major Minor RaidDevice State\n");
+ else
+ printf(" Number Major Minor RaidDevice\n");
+ }
}
- free(info);
+
+ /* if --no_devices specified, not print component devices info */
+ if (c->no_devices)
+ goto skip_devices_state;
for (d = 0; d < max_disks * 2; d++) {
char *dv;
@@ -747,6 +752,8 @@ This is pretty boring
if (!c->brief)
printf("\n");
}
+
+skip_devices_state:
if (spares && c->brief && array.raid_disks)
printf(" spares=%d", spares);
if (c->brief && st && st->sb)
@@ -766,8 +773,9 @@ This is pretty boring
!enough(array.level, array.raid_disks, array.layout, 1, avail))
rv = 2;
- free(disks);
out:
+ free(info);
+ free(disks);
close(fd);
free(subarray);
free(avail);
diff --git a/ReadMe.c b/ReadMe.c
index 12ccf83..eaf1042 100644
--- a/ReadMe.c
+++ b/ReadMe.c
@@ -181,6 +181,7 @@ struct option long_options[] = {
/* For Detail/Examine */
{"brief", 0, 0, Brief},
+ {"no-devices",0, 0, NoDevices},
{"export", 0, 0, 'Y'},
{"sparc2.2", 0, 0, Sparc22},
{"test", 0, 0, 't'},
diff --git a/mdadm.c b/mdadm.c
index 25a1abd..1fb8086 100644
--- a/mdadm.c
+++ b/mdadm.c
@@ -159,6 +159,10 @@ int main(int argc, char *argv[])
c.brief = 1;
continue;
+ case NoDevices:
+ c.no_devices = 1;
+ continue;
+
case 'Y': c.export++;
continue;
diff --git a/mdadm.h b/mdadm.h
index d61a9ca..43b07d5 100644
--- a/mdadm.h
+++ b/mdadm.h
@@ -440,6 +440,7 @@ enum special_options {
NoSharing,
HelpOptions,
Brief,
+ NoDevices,
ManageOpt,
Add,
AddSpare,
@@ -550,6 +551,7 @@ struct context {
int runstop;
int verbose;
int brief;
+ int no_devices;
int force;
char *homehost;
int require_homehost;
--
2.7.5

View File

@ -0,0 +1,42 @@
From 1a52f1fc0266d438c996789d4addbfac999a6139 Mon Sep 17 00:00:00 2001
From: Coly Li <colyli@suse.de>
Date: Wed, 31 Jul 2019 13:29:30 +0800
Subject: [RHEL7.8 PATCH V2 33/47] udev: add --no-devices option for calling
'mdadm --detail'
When creating symlink of a md raid device, the detailed information of
component disks are unnecessary for rule udev-md-raid-arrays.rules. For
md raid devices with huge number of component disks (e.g. 1500 DASD
disks), the detail information of component devices can be very large
and exceed udev monitor's on-stack message buffer.
This patch adds '--no-devices' option when calling mdadm by,
IMPORT{program}="BINDIR/mdadm --detail --no-devices --export $devnode"
Now the detailed output won't include component disks information,
and the error message "invalid message length" reported by systemd can
be removed.
Signed-off-by: Coly Li <colyli@suse.de>
Reviewed-by: NeilBrown <neilb@suse.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
udev-md-raid-arrays.rules | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/udev-md-raid-arrays.rules b/udev-md-raid-arrays.rules
index 5b99d58..d391665 100644
--- a/udev-md-raid-arrays.rules
+++ b/udev-md-raid-arrays.rules
@@ -17,7 +17,7 @@ TEST!="md/array_state", ENV{SYSTEMD_READY}="0", GOTO="md_end"
ATTR{md/array_state}=="|clear|inactive", ENV{SYSTEMD_READY}="0", GOTO="md_end"
LABEL="md_ignore_state"
-IMPORT{program}="BINDIR/mdadm --detail --export $devnode"
+IMPORT{program}="BINDIR/mdadm --detail --no-devices --export $devnode"
ENV{DEVTYPE}=="disk", ENV{MD_NAME}=="?*", SYMLINK+="disk/by-id/md-name-$env{MD_NAME}", OPTIONS+="string_escape=replace"
ENV{DEVTYPE}=="disk", ENV{MD_UUID}=="?*", SYMLINK+="disk/by-id/md-uuid-$env{MD_UUID}"
ENV{DEVTYPE}=="disk", ENV{MD_DEVNAME}=="?*", SYMLINK+="md/$env{MD_DEVNAME}"
--
2.7.5

View File

@ -0,0 +1,44 @@
From 91c97c5432028875db5f8abeddb5cb5f31902001 Mon Sep 17 00:00:00 2001
From: Mariusz Tkaczyk <mariusz.tkaczyk@intel.com>
Date: Mon, 15 Jul 2019 09:25:35 +0200
Subject: [RHEL7.8 PATCH V2 34/47] imsm: close removed drive fd.
When member drive fails, managemon prepares metadata update and adds
the drive to disk_mgmt_list with DISK_REMOVE flag. It fills only
minor and major. It is enough to recognize the device later.
Monitor thread while processing this update will remove the drive from
super only if it is a spare. It never removes failed member from
disks list. As a result, it still keeps opened descriptor to
non-existing device.
If removed drive is not a spare fill fd in disk_cfg structure
(prepared by managemon), monitor will close fd during freeing it.
Also set this drive fd to -1 in super to avoid double closing because
monitor will close the fd (if needed) while replacing removed drive
in array.
Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@intel.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
super-intel.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/super-intel.c b/super-intel.c
index d7e8a65..a103a3f 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -9200,6 +9200,9 @@ static int add_remove_disk_update(struct intel_super *super)
remove_disk_super(super,
disk_cfg->major,
disk_cfg->minor);
+ } else {
+ disk_cfg->fd = disk->fd;
+ disk->fd = -1;
}
}
/* release allocate disk structure */
--
2.7.5

View File

@ -0,0 +1,46 @@
From fd5b09c9a9107f0393ce194c4aac6e7b8f163e85 Mon Sep 17 00:00:00 2001
From: Krzysztof Smolinski <krzysztof.smolinski@intel.com>
Date: Fri, 16 Aug 2019 11:06:17 +0200
Subject: [RHEL7.8 PATCH V2 35/47] mdadm: check value returned by snprintf
against errors
GCC 8 checks possible truncation during snprintf more strictly
than GCC 7 which result in compilation errors. To fix this
problem checking result of snprintf against errors has been added.
Signed-off-by: Krzysztof Smolinski <krzysztof.smolinski@intel.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
sysfs.c | 12 ++++++++++--
1 file changed, 10 insertions(+), 2 deletions(-)
diff --git a/sysfs.c b/sysfs.c
index c313781..2995713 100644
--- a/sysfs.c
+++ b/sysfs.c
@@ -1023,12 +1023,20 @@ int sysfs_rules_apply_check(const struct mdinfo *sra,
char dname[MAX_SYSFS_PATH_LEN];
char resolved_path[PATH_MAX];
char resolved_dir[PATH_MAX];
+ int result;
if (sra == NULL || ent == NULL)
return -1;
- snprintf(dname, MAX_SYSFS_PATH_LEN, "/sys/block/%s/md/", sra->sys_name);
- snprintf(fname, MAX_SYSFS_PATH_LEN, "%s/%s", dname, ent->name);
+ result = snprintf(dname, MAX_SYSFS_PATH_LEN,
+ "/sys/block/%s/md/", sra->sys_name);
+ if (result < 0 || result >= MAX_SYSFS_PATH_LEN)
+ return -1;
+
+ result = snprintf(fname, MAX_SYSFS_PATH_LEN,
+ "%s/%s", dname, ent->name);
+ if (result < 0 || result >= MAX_SYSFS_PATH_LEN)
+ return -1;
if (realpath(fname, resolved_path) == NULL ||
realpath(dname, resolved_dir) == NULL)
--
2.7.5

View File

@ -0,0 +1,163 @@
From 43ebc9105e9dafe5145b3e801c05da4736bf6e02 Mon Sep 17 00:00:00 2001
From: "Guilherme G. Piccoli" <gpiccoli@canonical.com>
Date: Tue, 3 Sep 2019 16:49:01 -0300
Subject: [RHEL7.8 PATCH V2 36/47] mdadm: Introduce new array state 'broken'
for raid0/linear
Currently if a md raid0/linear array gets one or more members removed while
being mounted, kernel keeps showing state 'clean' in the 'array_state'
sysfs attribute. Despite udev signaling the member device is gone, 'mdadm'
cannot issue the STOP_ARRAY ioctl successfully, given the array is mounted.
Nothing else hints that something is wrong (except that the removed devices
don't show properly in the output of mdadm 'detail' command). There is no
other property to be checked, and if user is not performing reads/writes
to the array, even kernel log is quiet and doesn't give a clue about the
missing member.
This patch is the mdadm counterpart of kernel new array state 'broken'.
The 'broken' state mimics the state 'clean' in every aspect, being useful
only to distinguish if an array has some member missing. All necessary
paths in mdadm were changed to deal with 'broken' state, and in case the
tool runs in a kernel that is not updated, it'll work normally, i.e., it
doesn't require the 'broken' state in order to work.
Also, this patch changes the way the array state is showed in the 'detail'
command (for raid0/linear only) - now it takes the 'array_state' sysfs
attribute into account instead of only rely in the MD_SB_CLEAN flag.
Cc: Jes Sorensen <jes.sorensen@gmail.com>
Cc: NeilBrown <neilb@suse.de>
Cc: Song Liu <songliubraving@fb.com>
Signed-off-by: Guilherme G. Piccoli <gpiccoli@canonical.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
Detail.c | 14 ++++++++++++--
Monitor.c | 8 ++++++--
maps.c | 1 +
mdadm.h | 1 +
mdmon.h | 2 +-
monitor.c | 4 ++--
6 files changed, 23 insertions(+), 7 deletions(-)
diff --git a/Detail.c b/Detail.c
index ad60434..3e61e37 100644
--- a/Detail.c
+++ b/Detail.c
@@ -81,6 +81,7 @@ int Detail(char *dev, struct context *c)
int external;
int inactive;
int is_container = 0;
+ char *arrayst;
if (fd < 0) {
pr_err("cannot open %s: %s\n",
@@ -485,9 +486,18 @@ int Detail(char *dev, struct context *c)
else
st = ", degraded";
+ if (array.state & (1 << MD_SB_CLEAN)) {
+ if ((array.level == 0) ||
+ (array.level == LEVEL_LINEAR))
+ arrayst = map_num(sysfs_array_states,
+ sra->array_state);
+ else
+ arrayst = "clean";
+ } else
+ arrayst = "active";
+
printf(" State : %s%s%s%s%s%s \n",
- (array.state & (1 << MD_SB_CLEAN)) ?
- "clean" : "active", st,
+ arrayst, st,
(!e || (e->percent < 0 &&
e->percent != RESYNC_PENDING &&
e->percent != RESYNC_DELAYED)) ?
diff --git a/Monitor.c b/Monitor.c
index 036103f..b527165 100644
--- a/Monitor.c
+++ b/Monitor.c
@@ -1055,8 +1055,11 @@ int Wait(char *dev)
}
}
+/* The state "broken" is used only for RAID0/LINEAR - it's the same as
+ * "clean", but used in case the array has one or more members missing.
+ */
static char *clean_states[] = {
- "clear", "inactive", "readonly", "read-auto", "clean", NULL };
+ "clear", "inactive", "readonly", "read-auto", "clean", "broken", NULL };
int WaitClean(char *dev, int verbose)
{
@@ -1116,7 +1119,8 @@ int WaitClean(char *dev, int verbose)
rv = read(state_fd, buf, sizeof(buf));
if (rv < 0)
break;
- if (sysfs_match_word(buf, clean_states) <= 4)
+ if (sysfs_match_word(buf, clean_states) <
+ (int)ARRAY_SIZE(clean_states) - 1)
break;
rv = sysfs_wait(state_fd, &delay);
if (rv < 0 && errno != EINTR)
diff --git a/maps.c b/maps.c
index 02a0474..49b7f2c 100644
--- a/maps.c
+++ b/maps.c
@@ -150,6 +150,7 @@ mapping_t sysfs_array_states[] = {
{ "read-auto", ARRAY_READ_AUTO },
{ "clean", ARRAY_CLEAN },
{ "write-pending", ARRAY_WRITE_PENDING },
+ { "broken", ARRAY_BROKEN },
{ NULL, ARRAY_UNKNOWN_STATE }
};
diff --git a/mdadm.h b/mdadm.h
index 43b07d5..c88ceab 100644
--- a/mdadm.h
+++ b/mdadm.h
@@ -373,6 +373,7 @@ struct mdinfo {
ARRAY_ACTIVE,
ARRAY_WRITE_PENDING,
ARRAY_ACTIVE_IDLE,
+ ARRAY_BROKEN,
ARRAY_UNKNOWN_STATE,
} array_state;
struct md_bb bb;
diff --git a/mdmon.h b/mdmon.h
index 818367c..b3d72ac 100644
--- a/mdmon.h
+++ b/mdmon.h
@@ -21,7 +21,7 @@
extern const char Name[];
enum array_state { clear, inactive, suspended, readonly, read_auto,
- clean, active, write_pending, active_idle, bad_word};
+ clean, active, write_pending, active_idle, broken, bad_word};
enum sync_action { idle, reshape, resync, recover, check, repair, bad_action };
diff --git a/monitor.c b/monitor.c
index 81537ed..e0d3be6 100644
--- a/monitor.c
+++ b/monitor.c
@@ -26,7 +26,7 @@
static char *array_states[] = {
"clear", "inactive", "suspended", "readonly", "read-auto",
- "clean", "active", "write-pending", "active-idle", NULL };
+ "clean", "active", "write-pending", "active-idle", "broken", NULL };
static char *sync_actions[] = {
"idle", "reshape", "resync", "recover", "check", "repair", NULL
};
@@ -476,7 +476,7 @@ static int read_and_act(struct active_array *a, fd_set *fds)
a->next_state = clean;
ret |= ARRAY_DIRTY;
}
- if (a->curr_state == clean) {
+ if ((a->curr_state == clean) || (a->curr_state == broken)) {
a->container->ss->set_array_state(a, 1);
}
if (a->curr_state == active ||
--
2.7.5

View File

@ -0,0 +1,40 @@
From 2c2d9c48d2daf0d78d20494c3779c0f6dc4bfa75 Mon Sep 17 00:00:00 2001
From: Nigel Croxon <ncroxon@redhat.com>
Date: Tue, 24 Sep 2019 11:39:24 -0400
Subject: [RHEL7.8 PATCH V2 37/47] mdadm: force a uuid swap on big endian
The code path for metadata 0.90 calls a common routine
fname_from_uuid that uses metadata 1.2. The code expects member
swapuuid to be setup and usable. But it is only setup when using
metadata 1.2. Since the metadata 0.90 did not create swapuuid
and set it. The test (st->ss == &super1) ? 1 : st->ss->swapuuid
fails. The swapuuid is set at compile time based on byte order.
Any call based on metadata 0.90 and on big endian processors,
the --export uuid will be incorrect.
Signed-Off-by: Nigel Croxon <ncroxon@redhat.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
util.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/util.c b/util.c
index c26cf5f..64dd409 100644
--- a/util.c
+++ b/util.c
@@ -685,8 +685,12 @@ char *fname_from_uuid(struct supertype *st, struct mdinfo *info,
// work, but can't have it set if we want this printout to match
// all the other uuid printouts in super1.c, so we force swapuuid
// to 1 to make our printout match the rest of super1
+#if __BYTE_ORDER == BIG_ENDIAN
+ return __fname_from_uuid(info->uuid, 1, buf, sep);
+#else
return __fname_from_uuid(info->uuid, (st->ss == &super1) ? 1 :
st->ss->swapuuid, buf, sep);
+#endif
}
int check_ext2(int fd, char *name)
--
2.7.5

View File

@ -0,0 +1,99 @@
From e53cb968691d9e40d83caf5570da3bb7b83c64e1 Mon Sep 17 00:00:00 2001
From: Guoqing Jiang <gqjiang@suse.com>
Date: Fri, 31 May 2019 10:10:00 +0800
Subject: [RHEL7.8 PATCH V2 38/47] mdadm/md.4: add the descriptions for bitmap
sysfs nodes
The sysfs nodes under bitmap are not recorded in md.4,
add them based on md.rst and kernel source code.
Cc: NeilBrown <neilb@suse.com>
Signed-off-by: Guoqing Jiang <gqjiang@suse.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
md.4 | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 69 insertions(+)
diff --git a/md.4 b/md.4
index 3a1d677..e86707a 100644
--- a/md.4
+++ b/md.4
@@ -1101,6 +1101,75 @@ stripe that requires some "prereading". For fairness this defaults to
maximizes sequential-write throughput at the cost of fairness to threads
doing small or random writes.
+.TP
+.B md/bitmap/backlog
+The value stored in the file only has any effect on RAID1 when write-mostly
+devices are active, and write requests to those devices are proceed in the
+background.
+
+This variable sets a limit on the number of concurrent background writes,
+the valid values are 0 to 16383, 0 means that write-behind is not allowed,
+while any other number means it can happen. If there are more write requests
+than the number, new writes will by synchronous.
+
+.TP
+.B md/bitmap/can_clear
+This is for externally managed bitmaps, where the kernel writes the bitmap
+itself, but metadata describing the bitmap is managed by mdmon or similar.
+
+When the array is degraded, bits mustn't be cleared. When the array becomes
+optimal again, bit can be cleared, but first the metadata needs to record
+the current event count. So md sets this to 'false' and notifies mdmon,
+then mdmon updates the metadata and writes 'true'.
+
+There is no code in mdmon to actually do this, so maybe it doesn't even
+work.
+
+.TP
+.B md/bitmap/chunksize
+The bitmap chunksize can only be changed when no bitmap is active, and
+the value should be power of 2 and at least 512.
+
+.TP
+.B md/bitmap/location
+This indicates where the write-intent bitmap for the array is stored.
+It can be "none" or "file" or a signed offset from the array metadata
+- measured in sectors. You cannot set a file by writing here - that can
+only be done with the SET_BITMAP_FILE ioctl.
+
+Write 'none' to 'bitmap/location' will clear bitmap, and the previous
+location value must be write to it to restore bitmap.
+
+.TP
+.B md/bitmap/max_backlog_used
+This keeps track of the maximum number of concurrent write-behind requests
+for an md array, writing any value to this file will clear it.
+
+.TP
+.B md/bitmap/metadata
+This can be 'internal' or 'clustered' or 'external'. 'internal' is set
+by default, which means the metadata for bitmap is stored in the first 256
+bytes of the bitmap space. 'clustered' means separate bitmap metadata are
+used for each cluster node. 'external' means that bitmap metadata is managed
+externally to the kernel.
+
+.TP
+.B md/bitmap/space
+This shows the space (in sectors) which is available at md/bitmap/location,
+and allows the kernel to know when it is safe to resize the bitmap to match
+a resized array. It should big enough to contain the total bytes in the bitmap.
+
+For 1.0 metadata, assume we can use up to the superblock if before, else
+to 4K beyond superblock. For other metadata versions, assume no change is
+possible.
+
+.TP
+.B md/bitmap/time_base
+This shows the time (in seconds) between disk flushes, and is used to looking
+for bits in the bitmap to be cleared.
+
+The default value is 5 seconds, and it should be an unsigned long value.
+
.SS KERNEL PARAMETERS
The md driver recognised several different kernel parameters.
--
2.7.5

View File

@ -0,0 +1,35 @@
From 8063fd0f9e8abd718bd65928c19bc607cee5acd8 Mon Sep 17 00:00:00 2001
From: Xiao Ni <xni@redhat.com>
Date: Mon, 30 Sep 2019 19:47:59 +0800
Subject: [RHEL7.8 PATCH V2 39/47] Init devlist as an array
devlist is an string. It will change to an array if there is disk that
is sbd disk. If one device is sbd, it runs devlist=().
This line code changes devlist from a string to an array. If there is
no sbd device, it can't run this line code. So it will still be a string.
The later codes need an array, rather than an string. So init devlist
as an array to fix this problem.
Signed-off-by: Xiao Ni <xni@redhat.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
clustermd_tests/func.sh | 3 +++
1 file changed, 3 insertions(+)
diff --git a/clustermd_tests/func.sh b/clustermd_tests/func.sh
index 642cc96..801d604 100644
--- a/clustermd_tests/func.sh
+++ b/clustermd_tests/func.sh
@@ -39,6 +39,9 @@ fetch_devlist()
devlist=($(ls /dev/disk/by-path/*$ISCSI_ID*))
fi
# sbd disk cannot use in testing
+ # Init devlist as an array
+ i=''
+ devlist=(${devlist[@]#$i})
for i in ${devlist[@]}
do
sbd -d $i dump &> /dev/null
--
2.7.5

View File

@ -0,0 +1,31 @@
From 611093148574164fcf4f24f8c076d09473f655d7 Mon Sep 17 00:00:00 2001
From: Xiao Ni <xni@redhat.com>
Date: Mon, 30 Sep 2019 19:48:00 +0800
Subject: [RHEL7.8 PATCH V2 40/47] Don't need to check recovery after re-add
when no I/O writes to raid
If there is no write I/O between removing member disk and re-add it, there is no
recovery after re-adding member disk.
Signed-off-by: Xiao Ni <xni@redhat.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
clustermd_tests/02r1_Manage_re-add | 2 --
1 file changed, 2 deletions(-)
diff --git a/clustermd_tests/02r1_Manage_re-add b/clustermd_tests/02r1_Manage_re-add
index dd9c416..d0d13e5 100644
--- a/clustermd_tests/02r1_Manage_re-add
+++ b/clustermd_tests/02r1_Manage_re-add
@@ -9,8 +9,6 @@ check all state UU
check all dmesg
mdadm --manage $md0 --fail $dev0 --remove $dev0
mdadm --manage $md0 --re-add $dev0
-check $NODE1 recovery
-check all wait
check all state UU
check all dmesg
stop_md all $md0
--
2.7.5

View File

@ -0,0 +1,47 @@
From 7bd59e7926c6921121087eb067befaa896c900a4 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Wed, 18 Sep 2019 15:12:55 +1000
Subject: [RHEL7.8 PATCH V2 41/47] udev: allow for udev attribute reading bug.
There is a bug in udev (which will hopefully get fixed, but
we should allow for it anways).
When reading a sysfs attribute, it first reads the whole
value of the attribute, then reads again expecting to get
a read of 0 bytes, like you would with an ordinary file.
If the sysfs attribute changed between these two reads, it can
get a mixture of two values.
In particular, if it reads when 'array_state' is changing from
'clear' to 'inactive', it can find the value as "clear\nve".
This causes the test for "|clear|active" to fail, so systemd is allowed
to think that the array is ready - when it isn't.
So change the pattern to allow for this but adding a wildcard at
the end.
Also don't allow for an empty string - reading array_state will
never return an empty string - if it exists at all, it will be
non-empty.
Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
udev-md-raid-arrays.rules | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/udev-md-raid-arrays.rules b/udev-md-raid-arrays.rules
index d391665..c8fa8e8 100644
--- a/udev-md-raid-arrays.rules
+++ b/udev-md-raid-arrays.rules
@@ -14,7 +14,7 @@ ENV{DEVTYPE}=="partition", GOTO="md_ignore_state"
# never leave state 'inactive'
ATTR{md/metadata_version}=="external:[A-Za-z]*", ATTR{md/array_state}=="inactive", GOTO="md_ignore_state"
TEST!="md/array_state", ENV{SYSTEMD_READY}="0", GOTO="md_end"
-ATTR{md/array_state}=="|clear|inactive", ENV{SYSTEMD_READY}="0", GOTO="md_end"
+ATTR{md/array_state}=="clear*|inactive", ENV{SYSTEMD_READY}="0", GOTO="md_end"
LABEL="md_ignore_state"
IMPORT{program}="BINDIR/mdadm --detail --no-devices --export $devnode"
--
2.7.5

View File

@ -0,0 +1,40 @@
From b6180160f78f0182b296bdceed6419b26a6fccc7 Mon Sep 17 00:00:00 2001
From: Mariusz Tkaczyk <mariusz.tkaczyk@intel.com>
Date: Fri, 4 Oct 2019 12:07:28 +0200
Subject: [RHEL7.8 PATCH V2 42/47] imsm: save current_vol number
The imsm container_content routine will set curr_volume index in super
for getting volume information. This flag has never been restored to
original value, later other function may rely on it.
Restore this flag to original value.
Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@intel.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
super-intel.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/super-intel.c b/super-intel.c
index a103a3f..e02bbd7 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -7826,6 +7826,7 @@ static struct mdinfo *container_content_imsm(struct supertype *st, char *subarra
int sb_errors = 0;
struct dl *d;
int spare_disks = 0;
+ int current_vol = super->current_vol;
/* do not assemble arrays when not all attributes are supported */
if (imsm_check_attributes(mpb->attributes) == 0) {
@@ -7993,6 +7994,7 @@ static struct mdinfo *container_content_imsm(struct supertype *st, char *subarra
rest = this;
}
+ super->current_vol = current_vol;
return rest;
}
--
2.7.5

View File

@ -0,0 +1,50 @@
From 1a1ced1e2e64a6b4b349a3fb559f6b39e4cf7103 Mon Sep 17 00:00:00 2001
From: Krzysztof Smolinski <krzysztof.smolinski@intel.com>
Date: Fri, 8 Nov 2019 11:59:11 +0100
Subject: [RHEL7.8 PATCH V2 43/47] imsm: allow to specify second volume size
Removed checks which limited second volume size only to max value (the
largest size that fits on all current drives). It is now permitted
to create second volume with size lower then maximum possible.
Signed-off-by: Krzysztof Smolinski <krzysztof.smolinski@intel.com>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
super-intel.c | 14 ++++----------
1 file changed, 4 insertions(+), 10 deletions(-)
diff --git a/super-intel.c b/super-intel.c
index e02bbd7..713058c 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -7298,11 +7298,8 @@ static int validate_geometry_imsm_volume(struct supertype *st, int level,
maxsize = merge_extents(super, i);
- if (!check_env("IMSM_NO_PLATFORM") &&
- mpb->num_raid_devs > 0 && size && size != maxsize) {
- pr_err("attempting to create a second volume with size less then remaining space. Aborting...\n");
- return 0;
- }
+ if (mpb->num_raid_devs > 0 && size && size != maxsize)
+ pr_err("attempting to create a second volume with size less then remaining space.\n");
if (maxsize < size || maxsize == 0) {
if (verbose) {
@@ -7393,11 +7390,8 @@ static int imsm_get_free_size(struct supertype *st, int raiddisks,
}
maxsize = size;
}
- if (!check_env("IMSM_NO_PLATFORM") &&
- mpb->num_raid_devs > 0 && size && size != maxsize) {
- pr_err("attempting to create a second volume with size less then remaining space. Aborting...\n");
- return 0;
- }
+ if (mpb->num_raid_devs > 0 && size && size != maxsize)
+ pr_err("attempting to create a second volume with size less then remaining space.\n");
cnt = 0;
for (dl = super->disks; dl; dl = dl->next)
if (dl->e)
--
2.7.5

View File

@ -0,0 +1,45 @@
From 6636788aaf4ec0cacaefb6e77592e4a68e70a957 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Wed, 30 Oct 2019 10:32:41 +1100
Subject: [RHEL7.8 PATCH V2 44/47] mdcheck: when mdcheck_start is enabled,
enable mdcheck_continue too.
mdcheck_continue continues a regular array scan that was started by
mdcheck_start.
mdcheck_start will ensure that mdcheck_continue is active.
Howver if you reboot after a check has started, but before it finishes,
then mdcheck_continue won't cause it to continue, because nothing
starts it on boot.
So add an install option for mdcheck_contine, and make sure it
gets enabled when mdcheck_start is enabled.
Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---
systemd/mdcheck_continue.timer | 2 ++
systemd/mdcheck_start.timer | 1 +
2 files changed, 3 insertions(+)
diff --git a/systemd/mdcheck_continue.timer b/systemd/mdcheck_continue.timer
index 3ccfd78..dba1074 100644
--- a/systemd/mdcheck_continue.timer
+++ b/systemd/mdcheck_continue.timer
@@ -11,3 +11,5 @@ Description=MD array scrubbing - continuation
[Timer]
OnCalendar= 1:05:00
+[Install]
+WantedBy= mdmonitor.service
diff --git a/systemd/mdcheck_start.timer b/systemd/mdcheck_start.timer
index 6480736..9e7e02a 100644
--- a/systemd/mdcheck_start.timer
+++ b/systemd/mdcheck_start.timer
@@ -13,3 +13,4 @@ OnCalendar=Sun *-*-1..7 1:00:00
[Install]
WantedBy= mdmonitor.service
+Also= mdcheck_continue.timer
--
2.7.5

View File

@ -0,0 +1,51 @@
From 4ca799c581703d4d0ad840833c037c2fff088ca7 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Wed, 30 Oct 2019 10:32:41 +1100
Subject: [RHEL7.8 PATCH V2 45/47] mdcheck: use ${} to pass variable to mdcheck
$MDADM_CHECK_DURATION allows the value to be split on spaces.
${MDADM_CHECK_DURATION} avoids such splitting.
Making this change removes the need for double quoting when setting
the default Environment, and means that double quoting isn't needed
in the EnvironmentFile.
Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
---