diff --git a/0169-Mdmonitor-Improve-udev-event-handling.patch b/0169-Mdmonitor-Improve-udev-event-handling.patch new file mode 100644 index 0000000..205500c --- /dev/null +++ b/0169-Mdmonitor-Improve-udev-event-handling.patch @@ -0,0 +1,564 @@ +From 9935cf0f64f3f1e70e7840385e9838c30487dc64 Mon Sep 17 00:00:00 2001 +From: Mateusz Grzonka +Date: Tue, 21 Nov 2023 01:58:23 +0100 +Subject: [PATCH 1/2] Mdmonitor: Improve udev event handling + +Mdmonitor is waiting for udev queue to become empty. +Even if the queue becomes empty, udev might still be processing last event. +However we want to wait and wake up mdmonitor when udev finished +processing events.. + +Also, the udev queue interface is considered legacy and should not be +used outside of udev. + +Use udev monitor instead, and wake up mdmonitor on every event triggered +by udev for md block device. + +We need to generate more change events from kernel, because they are +missing in some situations, for example, when rebuild started. +This will be addressed in a separate patch. + +Move udev specific code into separate functions, and place them in udev.c file. +Also move use_udev() logic from lib.c into newly created file. + +Signed-off-by: Mateusz Grzonka +Signed-off-by: Kinga Tanska +Signed-off-by: Jes Sorensen +--- + Makefile | 18 +++---- + Manage.c | 3 +- + Monitor.c | 137 ++++++++++++++++++------------------------------- + lib.c | 13 ----- + mdadm.h | 1 + + mdopen.c | 7 +-- + udev.c | 150 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ + udev.h | 37 ++++++++++++++ + 8 files changed, 253 insertions(+), 113 deletions(-) + create mode 100644 udev.c + create mode 100644 udev.h + +diff --git a/Makefile b/Makefile +index b3aa36f6..cbdba49a 100644 +--- a/Makefile ++++ b/Makefile +@@ -163,14 +163,14 @@ else + ECHO=: + endif + +-OBJS = mdadm.o config.o policy.o mdstat.o ReadMe.o uuid.o util.o maps.o lib.o \ +- Manage.o Assemble.o Build.o \ +- Create.o Detail.o Examine.o Grow.o Monitor.o dlink.o Kill.o Query.o \ +- Incremental.o Dump.o \ +- mdopen.o super0.o super1.o super-ddf.o super-intel.o bitmap.o \ +- super-mbr.o super-gpt.o \ +- restripe.o sysfs.o sha1.o mapfile.o crc32.o sg_io.o msg.o xmalloc.o \ +- platform-intel.o probe_roms.o crc32c.o ++OBJS = mdadm.o config.o policy.o mdstat.o ReadMe.o uuid.o util.o maps.o lib.o udev.o \ ++ Manage.o Assemble.o Build.o \ ++ Create.o Detail.o Examine.o Grow.o Monitor.o dlink.o Kill.o Query.o \ ++ Incremental.o Dump.o \ ++ mdopen.o super0.o super1.o super-ddf.o super-intel.o bitmap.o \ ++ super-mbr.o super-gpt.o \ ++ restripe.o sysfs.o sha1.o mapfile.o crc32.o sg_io.o msg.o xmalloc.o \ ++ platform-intel.o probe_roms.o crc32c.o + + CHECK_OBJS = restripe.o uuid.o sysfs.o maps.o lib.o xmalloc.o dlink.o + +@@ -179,7 +179,7 @@ SRCS = $(patsubst %.o,%.c,$(OBJS)) + INCL = mdadm.h part.h bitmap.h + + MON_OBJS = mdmon.o monitor.o managemon.o uuid.o util.o maps.o mdstat.o sysfs.o config.o mapfile.o mdopen.o\ +- policy.o lib.o \ ++ policy.o lib.o udev.o \ + Kill.o sg_io.o dlink.o ReadMe.o super-intel.o \ + super-mbr.o super-gpt.o \ + super-ddf.o sha1.o crc32.o msg.o bitmap.o xmalloc.o \ +diff --git a/Manage.c b/Manage.c +index 075dd720..f0d4cb01 100644 +--- a/Manage.c ++++ b/Manage.c +@@ -25,6 +25,7 @@ + #include "mdadm.h" + #include "md_u.h" + #include "md_p.h" ++#include "udev.h" + #include + + int Manage_ro(char *devname, int fd, int readonly) +@@ -462,7 +463,7 @@ done: + goto out; + } + +- if (devnm[0] && use_udev()) { ++ if (devnm[0] && udev_is_available()) { + struct map_ent *mp = map_by_devnm(&map, devnm); + remove_devices(devnm, mp ? mp->path : NULL); + } +diff --git a/Monitor.c b/Monitor.c +index e74a0558..9a1f2514 100644 +--- a/Monitor.c ++++ b/Monitor.c +@@ -23,18 +23,17 @@ + */ + + #include "mdadm.h" ++#include "udev.h" + #include "md_p.h" + #include "md_u.h" + #include + #include + #include +-#ifndef NO_LIBUDEV +-#include +-#endif + + #define TASK_COMM_LEN 16 + #define EVENT_NAME_MAX 32 + #define AUTOREBUILD_PID_PATH MDMON_DIR "/autorebuild.pid" ++#define FALLBACK_DELAY 5 + + /** + * struct state - external array or container properties. +@@ -126,12 +125,11 @@ static void link_containers_with_subarrays(struct state *list); + static void free_statelist(struct state *statelist); + static int check_array(struct state *st, struct mdstat_ent *mdstat, int increments, char *prefer); + static int check_one_sharer(int scan); +-#ifndef NO_LIBUDEV +-static int check_udev_activity(void); +-#endif + static void link_containers_with_subarrays(struct state *list); + static int make_daemon(char *pidfile); + static void try_spare_migration(struct state *statelist); ++static void wait_for_events(int *delay_for_event, int c_delay); ++static void wait_for_events_mdstat(int *delay_for_event, int c_delay); + static int write_autorebuild_pid(void); + + int Monitor(struct mddev_dev *devlist, +@@ -326,32 +324,12 @@ int Monitor(struct mddev_dev *devlist, + if (!new_found) { + if (oneshot) + break; +- else if (!anyredundant) { ++ if (!anyredundant) { + pr_err("No array with redundancy detected, stopping\n"); + break; + } +- else { +-#ifndef NO_LIBUDEV +- /* +- * Wait for udevd to finish new devices +- * processing. +- */ +- if (mdstat_wait(delay_for_event) && +- check_udev_activity()) +- pr_err("Error while waiting for UDEV to complete new devices processing\n"); +-#else +- int wait_result = mdstat_wait(delay_for_event); +- /* +- * Give chance to process new device +- */ +- if (wait_result != 0) { +- if (c->delay > 5) +- delay_for_event = 5; +- } else +- delay_for_event = c->delay; +-#endif +- mdstat_close(); +- } ++ ++ wait_for_events(&delay_for_event, c->delay); + } + info.test = 0; + +@@ -374,6 +352,49 @@ int Monitor(struct mddev_dev *devlist, + return 0; + } + ++/* ++ * wait_for_events() - Waits for events on md devices. ++ * @delay_for_event: pointer to current event delay ++ * @c_delay: delay from config ++ */ ++static void wait_for_events(int *delay_for_event, int c_delay) ++{ ++#ifndef NO_LIBUDEV ++ if (udev_is_available()) { ++ if (udev_wait_for_events(*delay_for_event) == UDEV_STATUS_ERROR) ++ pr_err("Error while waiting for udev events.\n"); ++ return; ++ } ++#endif ++ wait_for_events_mdstat(delay_for_event, c_delay); ++} ++ ++/* ++ * wait_for_events_mdstat() - Waits for events on mdstat. ++ * @delay_for_event: pointer to current event delay ++ * @c_delay: delay from config ++ */ ++static void wait_for_events_mdstat(int *delay_for_event, int c_delay) ++{ ++ int wait_result = mdstat_wait(*delay_for_event); ++ ++ if (wait_result < 0) { ++ pr_err("Error while waiting for events on mdstat.\n"); ++ return; ++ } ++ ++ /* ++ * Give chance to process new device ++ */ ++ if (wait_result != 0) { ++ if (c_delay > FALLBACK_DELAY) ++ *delay_for_event = FALLBACK_DELAY; ++ } else { ++ *delay_for_event = c_delay; ++ } ++ mdstat_close(); ++} ++ + static int make_daemon(char *pidfile) + { + /* Return: +@@ -1254,64 +1275,6 @@ static void free_statelist(struct state *statelist) + } + } + +-#ifndef NO_LIBUDEV +-/* function: check_udev_activity +- * Description: Function waits for udev to finish +- * events processing. +- * Returns: +- * 1 - detected error while opening udev +- * 2 - timeout +- * 0 - successfull completion +- */ +-static int check_udev_activity(void) +-{ +- struct udev *udev = NULL; +- struct udev_queue *udev_queue = NULL; +- int timeout_cnt = 30; +- int rc = 0; +- +- /* +- * In rare cases systemd may not have udevm, +- * in such cases just exit with rc 0 +- */ +- if (!use_udev()) +- goto out; +- +- udev = udev_new(); +- if (!udev) { +- rc = 1; +- goto out; +- } +- +- udev_queue = udev_queue_new(udev); +- if (!udev_queue) { +- rc = 1; +- goto out; +- } +- +- if (udev_queue_get_queue_is_empty(udev_queue)) +- goto out; +- +- while (!udev_queue_get_queue_is_empty(udev_queue)) { +- sleep(1); +- +- if (timeout_cnt) +- timeout_cnt--; +- else { +- rc = 2; +- goto out; +- } +- } +- +-out: +- if (udev_queue) +- udev_queue_unref(udev_queue); +- if (udev) +- udev_unref(udev); +- return rc; +-} +-#endif +- + /* Not really Monitor but ... */ + int Wait(char *dev) + { +diff --git a/lib.c b/lib.c +index 7ab59988..cf2701cd 100644 +--- a/lib.c ++++ b/lib.c +@@ -539,19 +539,6 @@ int check_env(char *name) + return 0; + } + +-int use_udev(void) +-{ +- static int use = -1; +- struct stat stb; +- +- if (use < 0) { +- use = ((stat("/dev/.udev", &stb) == 0 || +- stat("/run/udev", &stb) == 0) && +- check_env("MDADM_NO_UDEV") == 0); +- } +- return use; +-} +- + unsigned long GCD(unsigned long a, unsigned long b) + { + while (a != b) { +diff --git a/mdadm.h b/mdadm.h +index b48e6f86..9514cbe5 100644 +--- a/mdadm.h ++++ b/mdadm.h +@@ -1656,6 +1656,7 @@ extern char *conf_line(FILE *file); + extern char *conf_word(FILE *file, int allow_key); + extern void print_quoted(char *str); + extern int use_udev(void); ++extern void print_escape(char *str); + extern unsigned long GCD(unsigned long a, unsigned long b); + extern int conf_name_is_free(char *name); + extern bool is_devname_ignore(const char *devname); +diff --git a/mdopen.c b/mdopen.c +index 3daa71f9..f9b04e1c 100644 +--- a/mdopen.c ++++ b/mdopen.c +@@ -23,6 +23,7 @@ + */ + + #include "mdadm.h" ++#include "udev.h" + #include "md_p.h" + #include + +@@ -176,7 +177,7 @@ int create_mddev(char *dev, char *name, int autof, int trustworthy, + char devnm[32]; + char cbuf[400]; + +- if (!use_udev()) ++ if (!udev_is_available()) + block_udev = 0; + + if (chosen == NULL) +@@ -384,7 +385,7 @@ int create_mddev(char *dev, char *name, int autof, int trustworthy, + * If we cannot detect udev, we need to make + * devices and links ourselves. + */ +- if (!use_udev()) { ++ if (!udev_is_available()) { + /* Make sure 'devname' exists and 'chosen' is a symlink to it */ + if (lstat(devname, &stb) == 0) { + /* Must be the correct device, else error */ +@@ -508,7 +509,7 @@ char *find_free_devnm(int use_partitions) + continue; + if (!conf_name_is_free(devnm)) + continue; +- if (!use_udev()) { ++ if (!udev_is_available()) { + /* make sure it is new to /dev too, at least as a + * non-standard */ + dev_t devid = devnm2devid(devnm); +diff --git a/udev.c b/udev.c +new file mode 100644 +index 00000000..2bac6921 +--- /dev/null ++++ b/udev.c +@@ -0,0 +1,150 @@ ++/* ++ * mdadm - manage Linux "md" devices aka RAID arrays. ++ * ++ * Copyright (C) 2022 Mateusz Grzonka ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ++ */ ++ ++#include "mdadm.h" ++#include "udev.h" ++#include "md_p.h" ++#include "md_u.h" ++#include ++#include ++#include ++#include ++#include ++ ++/* ++ * udev_is_available() - Checks for udev in the system. ++ * ++ * Function looks whether udev directories are available and MDADM_NO_UDEV env defined. ++ * ++ * Return: ++ * true if udev is available, ++ * false if not ++ */ ++bool udev_is_available(void) ++{ ++ struct stat stb; ++ ++ if (stat("/dev/.udev", &stb) != 0 && ++ stat("/run/udev", &stb) != 0) ++ return false; ++ if (check_env("MDADM_NO_UDEV") == 1) ++ return false; ++ return true; ++} ++ ++#ifndef NO_LIBUDEV ++ ++static struct udev *udev; ++static struct udev_monitor *udev_monitor; ++ ++/* ++ * udev_release() - Drops references of udev and udev_monitor. ++ */ ++static void udev_release(void) ++{ ++ udev_monitor_unref(udev_monitor); ++ udev_unref(udev); ++} ++ ++/* ++ * udev_initialize() - Initializes udev and udev_monitor structures. ++ * ++ * Function initializes udev, udev_monitor, and sets udev_monitor filter for block devices. ++ * ++ * Return: ++ * UDEV_STATUS_SUCCESS on success ++ * UDEV_STATUS_ERROR on error ++ * UDEV_STATUS_ERROR_NO_UDEV when udev not available ++ */ ++static enum udev_status udev_initialize(void) ++{ ++ if (!udev_is_available()) { ++ pr_err("No udev.\n"); ++ return UDEV_STATUS_ERROR_NO_UDEV; ++ } ++ ++ udev = udev_new(); ++ if (!udev) { ++ pr_err("Cannot initialize udev.\n"); ++ return UDEV_STATUS_ERROR; ++ } ++ ++ udev_monitor = udev_monitor_new_from_netlink(udev, "udev"); ++ if (!udev_monitor) { ++ pr_err("Cannot initialize udev monitor.\n"); ++ udev = udev_unref(udev); ++ return UDEV_STATUS_ERROR; ++ } ++ ++ if (udev_monitor_filter_add_match_subsystem_devtype(udev_monitor, "block", 0) < 0) { ++ pr_err("Cannot add udev monitor event filter for md devices.\n"); ++ udev_release(); ++ return UDEV_STATUS_ERROR; ++ } ++ if (udev_monitor_enable_receiving(udev_monitor) < 0) { ++ pr_err("Cannot enable receiving udev events through udev monitor.\n"); ++ udev_release(); ++ return UDEV_STATUS_ERROR; ++ } ++ atexit(udev_release); ++ return UDEV_STATUS_SUCCESS; ++} ++ ++/* ++ * udev_wait_for_events() - Waits for events from udev. ++ * @seconds: Timeout in seconds. ++ * ++ * Function waits udev events, wakes up on event or timeout. ++ * ++ * Return: ++ * UDEV_STATUS_SUCCESS on detected event ++ * UDEV_STATUS_TIMEOUT on timeout ++ * UDEV_STATUS_ERROR on error ++ */ ++enum udev_status udev_wait_for_events(int seconds) ++{ ++ int fd; ++ fd_set readfds; ++ struct timeval tv; ++ int ret; ++ ++ if (!udev || !udev_monitor) { ++ ret = udev_initialize(); ++ if (ret != UDEV_STATUS_SUCCESS) ++ return ret; ++ } ++ ++ fd = udev_monitor_get_fd(udev_monitor); ++ if (fd < 0) { ++ pr_err("Cannot access file descriptor associated with udev monitor.\n"); ++ return UDEV_STATUS_ERROR; ++ } ++ ++ FD_ZERO(&readfds); ++ FD_SET(fd, &readfds); ++ tv.tv_sec = seconds; ++ tv.tv_usec = 0; ++ ++ if (select(fd + 1, &readfds, NULL, NULL, &tv) > 0 && FD_ISSET(fd, &readfds)) ++ if (udev_monitor_receive_device(udev_monitor)) ++ return UDEV_STATUS_SUCCESS; /* event detected */ ++ return UDEV_STATUS_TIMEOUT; ++} ++#endif +diff --git a/udev.h b/udev.h +new file mode 100644 +index 00000000..33884861 +--- /dev/null ++++ b/udev.h +@@ -0,0 +1,37 @@ ++/* ++ * mdadm - manage Linux "md" devices aka RAID arrays. ++ * ++ * Copyright (C) 2022 Mateusz Grzonka ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ++ */ ++ ++#ifndef MONITOR_UDEV_H ++#define MONITOR_UDEV_H ++ ++enum udev_status { ++ UDEV_STATUS_ERROR_NO_UDEV = -2, ++ UDEV_STATUS_ERROR, ++ UDEV_STATUS_SUCCESS = 0, ++ UDEV_STATUS_TIMEOUT ++}; ++ ++bool udev_is_available(void); ++ ++#ifndef NO_LIBUDEV ++enum udev_status udev_wait_for_events(int seconds); ++#endif ++ ++#endif +-- +2.41.0 + diff --git a/0170-udev-Move-udev_block-and-udev_unblock-into-udev.c.patch b/0170-udev-Move-udev_block-and-udev_unblock-into-udev.c.patch new file mode 100644 index 0000000..9555dde --- /dev/null +++ b/0170-udev-Move-udev_block-and-udev_unblock-into-udev.c.patch @@ -0,0 +1,195 @@ +From 9f376da6439b07dc93ae084ab576e133b9d8d839 Mon Sep 17 00:00:00 2001 +From: Mateusz Grzonka +Date: Tue, 21 Nov 2023 01:58:24 +0100 +Subject: [PATCH 2/2] udev: Move udev_block() and udev_unblock() into udev.c + +Add kernel style comments and better error handling. + +Signed-off-by: Mateusz Grzonka +Signed-off-by: Kinga Tanska +Signed-off-by: Jes Sorensen +--- + Create.c | 1 + + lib.c | 29 ----------------------------- + mdadm.h | 2 -- + mdopen.c | 12 ++++++------ + udev.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ + udev.h | 3 +++ + 6 files changed, 54 insertions(+), 37 deletions(-) + +diff --git a/Create.c b/Create.c +index a280c7bc..ddd1a79b 100644 +--- a/Create.c ++++ b/Create.c +@@ -23,6 +23,7 @@ + */ + + #include "mdadm.h" ++#include "udev.h" + #include "md_u.h" + #include "md_p.h" + #include +diff --git a/lib.c b/lib.c +index cf2701cd..2b09293c 100644 +--- a/lib.c ++++ b/lib.c +@@ -204,35 +204,6 @@ char *fd2devnm(int fd) + return NULL; + } + +-/* When we create a new array, we don't want the content to +- * be immediately examined by udev - it is probably meaningless. +- * So create /run/mdadm/creating-mdXXX and expect that a udev +- * rule will noticed this and act accordingly. +- */ +-static char block_path[] = "/run/mdadm/creating-%s"; +-static char *unblock_path = NULL; +-void udev_block(char *devnm) +-{ +- int fd; +- char *path = NULL; +- +- xasprintf(&path, block_path, devnm); +- fd = open(path, O_CREAT|O_RDWR, 0600); +- if (fd >= 0) { +- close(fd); +- unblock_path = path; +- } else +- free(path); +-} +- +-void udev_unblock(void) +-{ +- if (unblock_path) +- unlink(unblock_path); +- free(unblock_path); +- unblock_path = NULL; +-} +- + /* + * convert a major/minor pair for a block device into a name in /dev, if possible. + * On the first call, walk /dev collecting name. +diff --git a/mdadm.h b/mdadm.h +index 9514cbe5..8dcd8b86 100644 +--- a/mdadm.h ++++ b/mdadm.h +@@ -1765,8 +1765,6 @@ extern char *fd2kname(int fd); + extern char *stat2devnm(struct stat *st); + bool stat_is_md_dev(struct stat *st); + extern char *fd2devnm(int fd); +-extern void udev_block(char *devnm); +-extern void udev_unblock(void); + + extern int in_initrd(void); + +diff --git a/mdopen.c b/mdopen.c +index f9b04e1c..eaa59b59 100644 +--- a/mdopen.c ++++ b/mdopen.c +@@ -336,8 +336,8 @@ int create_mddev(char *dev, char *name, int autof, int trustworthy, + devnm[0] = 0; + if (num < 0 && cname && ci->names) { + sprintf(devnm, "md_%s", cname); +- if (block_udev) +- udev_block(devnm); ++ if (block_udev && udev_block(devnm) != UDEV_STATUS_SUCCESS) ++ return -1; + if (!create_named_array(devnm)) { + devnm[0] = 0; + udev_unblock(); +@@ -345,8 +345,8 @@ int create_mddev(char *dev, char *name, int autof, int trustworthy, + } + if (num >= 0) { + sprintf(devnm, "md%d", num); +- if (block_udev) +- udev_block(devnm); ++ if (block_udev && udev_block(devnm) != UDEV_STATUS_SUCCESS) ++ return -1; + if (!create_named_array(devnm)) { + devnm[0] = 0; + udev_unblock(); +@@ -369,8 +369,8 @@ int create_mddev(char *dev, char *name, int autof, int trustworthy, + return -1; + } + } +- if (block_udev) +- udev_block(devnm); ++ if (block_udev && udev_block(devnm) != UDEV_STATUS_SUCCESS) ++ return -1; + create_named_array(devnm); + } + +diff --git a/udev.c b/udev.c +index 2bac6921..bc4722b0 100644 +--- a/udev.c ++++ b/udev.c +@@ -28,6 +28,8 @@ + #include + #include + ++static char *unblock_path; ++ + /* + * udev_is_available() - Checks for udev in the system. + * +@@ -148,3 +150,45 @@ enum udev_status udev_wait_for_events(int seconds) + return UDEV_STATUS_TIMEOUT; + } + #endif ++ ++/* ++ * udev_block() - Block udev from examining newly created arrays. ++ * ++ * When array is created, we don't want udev to examine it immediately. ++ * Function creates /run/mdadm/creating-mdXXX and expects that udev rule ++ * will notice it and act accordingly. ++ * ++ * Return: ++ * UDEV_STATUS_SUCCESS when successfully blocked udev ++ * UDEV_STATUS_ERROR on error ++ */ ++enum udev_status udev_block(char *devnm) ++{ ++ int fd; ++ char *path = xcalloc(1, BUFSIZ); ++ ++ snprintf(path, BUFSIZ, "/run/mdadm/creating-%s", devnm); ++ ++ fd = open(path, O_CREAT | O_RDWR, 0600); ++ if (!is_fd_valid(fd)) { ++ pr_err("Cannot block udev, error creating blocking file.\n"); ++ pr_err("%s: %s\n", strerror(errno), path); ++ free(path); ++ return UDEV_STATUS_ERROR; ++ } ++ ++ close(fd); ++ unblock_path = path; ++ return UDEV_STATUS_SUCCESS; ++} ++ ++/* ++ * udev_unblock() - Unblock udev. ++ */ ++void udev_unblock(void) ++{ ++ if (unblock_path) ++ unlink(unblock_path); ++ free(unblock_path); ++ unblock_path = NULL; ++} +diff --git a/udev.h b/udev.h +index 33884861..ae0a3617 100644 +--- a/udev.h ++++ b/udev.h +@@ -34,4 +34,7 @@ bool udev_is_available(void); + enum udev_status udev_wait_for_events(int seconds); + #endif + ++enum udev_status udev_block(char *devnm); ++void udev_unblock(void); ++ + #endif +-- +2.41.0 + diff --git a/0171-mdadm-enable-sync-file-for-udev-rules.patch b/0171-mdadm-enable-sync-file-for-udev-rules.patch new file mode 100644 index 0000000..93e0b91 --- /dev/null +++ b/0171-mdadm-enable-sync-file-for-udev-rules.patch @@ -0,0 +1,197 @@ +From 8da27191aa62b08075d8e7ec36c14083f528eb89 Mon Sep 17 00:00:00 2001 +From: Nigel Croxon +Date: Fri, 4 Apr 2025 08:44:47 -0400 +Subject: [PATCH 1/1] mdadm: enable sync file for udev rules + +Mounting an md device may fail during boot from mdadm's claim +on the device not being released before systemd attempts to mount. + +In this case it was found that essentially there is a race condition +occurring in which the mount cannot happen without some kind of delay +being added BEFORE the mount itself triggers, or manual intervention +after a timeout. + +The findings: +the inode was for a tmp block node made by mdadm for md0. + +crash> detailedsearch ff1b0c398ff28380 +ff1b0c398f079720: ff1b0c398ff28380 slab:filp state:alloc + obj:ff1b0c398f079700 size:256 +ff1b0c398ff284f8: ff1b0c398ff28380 slab:shmem_inode_cache + state:alloc obj:ff1b0c398ff28308 size:768 + +crash> struct file.f_inode,f_path ff1b0c398f079700 +f_inode = 0xff1b0c398ff28380, +f_path = { +mnt = 0xff1b0c594aecc7a0, +dentry = 0xff1b0c3a8c614f00 +}, +crash> struct dentry.d_name 0xff1b0c3a8c614f00 +d_name = { +{ +{ hash = 3714992780, len = 16 }, +hash_len = 72434469516 +}, +name = 0xff1b0c3a8c614f38 ".tmp.md.1454:9:0" +}, + +For the race condition, mdadm and udev have some infrastructure for making +the device be ignored while under construction. e.g. + +$ cat lib/udev/rules.d/01-md-raid-creating.rules + +do not edit this file, it will be overwritten on update +While mdadm is creating an array, it creates a file +/run/mdadm/creating-mdXXX. If that file exists, then +the array is not "ready" and we should make sure the +content is ignored. +KERNEL=="md*", TEST=="/run/mdadm/creating-$kernel", ENV{SYSTEMD_READY}="0" + +However, this feature currently is only used by the mdadm create command. +See calls to udev_block/udev_unblock in the mdadm code as to where and when +this behavior is used. Any md array being started by incremental or +normal assemble commands does not use this udev integration. So assembly +of an existing array does not look to have any explicit protection from +systemd/udev seeing an array as in a usable state before an mdadm instance +with O_EXCL closes its file handle. +This is for the sake of showing the use case for such an option and why +it would be helpful to delay the mount itself. + +While mdadm is still constructing the array mdadm --incremental +that is called from within /usr/lib/udev/rules.d/64-md-raid-assembly.rules, +there is an attempt to mount the md device, but there is not a creation +of "/run/mdadm/creating-xxx" file when in incremental mode that +the rule is looking for. Therefore the device is not marked +as SYSTEMD_READY=0 in +"/usr/lib/udev/rules.d/01-md-raid-creating.rules" and missing +synchronization using the "/run/mdadm/creating-xxx" file. + +As to this change affecting containers or IMSM... +(container's array state is inactive all the time) + +Even if the "array_state" reports "inactive" when previous components +are added, the mdadm call for the very last array component that makes +it usable/ready, still needs to be synced properly - mdadm needs to drop +the claim first calling "close", then delete the "/run/mdadm/creating-xxx". +Then lets the udev know it is clear to act now (the "udev_unblock" in +mdadm code that generates a synthetic udev event so the rules are +reevalutated). It's this processing of the very last array component +that is the issue here (which is not IO error, but it is that trying to +open the dev returns -EBUSY because of the exclusive claim that mdadm +still holds while the mdadm device is being processed already by udev in +parallel, and that is what the +/run/mdadm/creating-xxx should prevent exactly). + +The patch to Incremental.c is to enable creating the +"/run/mdadm/creating-xxx" file during incremental mode. + +For the change to Create.c, the unlink is called right before dropping +the exculusive claim for the device. This should be the other way round +to avoid the race 100%. That is, if there's a "close" call and +"udev_unblock" call, the "close" should go first, then followed +"udev_unblock". + +Signed-off-by: Nigel Croxon +--- + Create.c | 2 +- + Incremental.c | 20 +++++++++++++++----- + 2 files changed, 16 insertions(+), 6 deletions(-) + +diff --git mdadm-4.2/Create.c mdadm-4.2-fix/Create.c +--- mdadm-4.2/Create.c 2025-06-05 06:17:32.656879914 -0400 ++++ mdadm-4.2-fix/Create.c 2025-06-05 07:40:05.583074551 -0400 +@@ -1321,8 +1321,8 @@ + } else { + pr_err("not starting array - not enough devices.\n"); + } +- udev_unblock(); + close(mdfd); ++ udev_unblock(); + sysfs_uevent(&info, "change"); + return 0; + +diff --git mdadm-4.2/Incremental.c mdadm-4.2-fix/Incremental.c +--- mdadm-4.2/Incremental.c 2025-06-05 06:17:32.789874082 -0400 ++++ mdadm-4.2-fix/Incremental.c 2025-06-05 07:44:27.126170772 -0400 +@@ -29,6 +29,7 @@ + */ + + #include "mdadm.h" ++#include "udev.h" + #include + #include + #include +@@ -296,7 +297,7 @@ + + /* Couldn't find an existing array, maybe make a new one */ + mdfd = create_mddev(match ? match->devname : NULL, +- name_to_use, c->autof, trustworthy, chosen_name, 0); ++ name_to_use, c->autof, trustworthy, chosen_name, 1); + + if (mdfd < 0) + goto out_unlock; +@@ -474,7 +475,6 @@ + if (is_container(info.array.level)) { + char devnm[32]; + /* Try to assemble within the container */ +- sysfs_uevent(sra, "change"); + if (!c->export && c->verbose >= 0) + pr_err("container %s now has %d device%s\n", + chosen_name, info.array.working_disks, +@@ -486,6 +486,8 @@ + if (st->ss->load_container) + rv = st->ss->load_container(st, mdfd, NULL); + close(mdfd); ++ udev_unblock(); ++ sysfs_uevent(sra, "change"); + sysfs_free(sra); + if (!rv) + rv = Incremental_container(st, chosen_name, c, NULL); +@@ -494,6 +496,7 @@ + * so that it can eg. try to rebuild degraded array */ + if (st->ss->external) + ping_monitor(devnm); ++ udev_unblock(); + return rv; + } + +@@ -630,7 +633,11 @@ + close(mdfd); + if (policy) + dev_policy_free(policy); +- sysfs_free(sra); ++ udev_unblock(); ++ if (sra) { ++ sysfs_uevent(sra, "change"); ++ sysfs_free(sra); ++ } + return rv; + out_unlock: + map_unlock(&map); +@@ -1577,7 +1584,7 @@ + ra->name, + c->autof, + trustworthy, +- chosen_name, 0); ++ chosen_name, 1); + + if (!is_fd_valid(mdfd)) { + pr_err("create_mddev failed with chosen name %s: %s.\n", +@@ -1597,6 +1604,8 @@ + map_free(map); + map = NULL; + close_fd(&mdfd); ++ udev_unblock(); ++ sysfs_uevent(&info, "change"); + } + if (c->export && result) { + char sep = '='; +@@ -1623,6 +1632,8 @@ + release: + map_free(map); + sysfs_free(list); ++ udev_unblock(); ++ sysfs_uevent(&info, "change"); + return rv; + } + diff --git a/mdadm.spec b/mdadm.spec index c5b6e80..377eca2 100644 --- a/mdadm.spec +++ b/mdadm.spec @@ -2,7 +2,7 @@ Summary: The mdadm program controls Linux md devices (software RAID arrays) Name: mdadm Version: 4.2 # extraversion is used to define rhel internal version -%define extraversion 16 +%define extraversion 17 Release: %{extraversion}%{?dist} Source: http://www.kernel.org/pub/linux/utils/raid/mdadm/mdadm-%{version}%{?subversion:-%{subversion}}.tar.xz Source1: mdmonitor.init @@ -182,6 +182,9 @@ Patch164: 0165-Fix-assembling-RAID-volume-by-using-incremental.patch Patch165: 0166-Revert-mdadm-remove-container_enough-logic.patch Patch166: 0167-manage-adjust-checking-subarray-state-in-update_suba.patch Patch167: 0168-super1-remove-support-for-name-in-config.patch +Patch168: 0169-Mdmonitor-Improve-udev-event-handling.patch +Patch169: 0170-udev-Move-udev_block-and-udev_unblock-into-udev.c.patch +Patch170: 0171-mdadm-enable-sync-file-for-udev-rules.patch # RHEL customization patches Patch200: mdadm-udev.patch @@ -276,6 +279,10 @@ rm -rf %{buildroot} /usr/share/mdadm/mdcheck %changelog +* Thu Jun 5 2025 Xiao Ni - 4.2-17 +- enable sync file for udev rules +- Resolves: RHEL-59180 + * Mon Oct 28 2024 Xiao Ni - 4.2-16 - Remove name= support in config file - Resolves RHEL-45608