119 lines
4.1 KiB
Diff
119 lines
4.1 KiB
Diff
|
From 3cbe13403ec0c78374343dcd889609aefe791f9b Mon Sep 17 00:00:00 2001
|
||
|
From: Shminderjit Singh <shminderjit.singh@oracle.com>
|
||
|
Date: Mon, 24 Jun 2024 08:58:51 +0000
|
||
|
Subject: [PATCH 111/157] mdadm: Fix socket connection failure when mdmon runs
|
||
|
in foreground mode.
|
||
|
|
||
|
While creating an IMSM RAID, mdadm will wait for the mdmon main process
|
||
|
to finish if mdmon runs in forking mode. This is because with
|
||
|
"Type=forking" in the mdmon service unit file, "systemctl start service"
|
||
|
will block until the main process of mdmon exits. At that moment, mdmon
|
||
|
has already created the socket, so the subsequent socket connect from
|
||
|
mdadm will succeed.
|
||
|
|
||
|
However, when mdmon runs in foreground mode (without "Type=forking" in
|
||
|
the service unit file), "systemctl start service" will return once the
|
||
|
mdmon process starts. This causes mdadm and mdmon to run in parallel,
|
||
|
which may lead to a socket connection failure since mdmon has not yet
|
||
|
initialized the socket when mdadm tries to connect. If the next
|
||
|
instruction/command is to access this device and try to write to it, a
|
||
|
permission error will occur since mdmon has not yet set the array to RW
|
||
|
mode.
|
||
|
|
||
|
Signed-off-by: Shminderjit Singh <shminderjit.singh@oracle.com>
|
||
|
---
|
||
|
Create.c | 6 ++++--
|
||
|
mdadm.h | 1 +
|
||
|
util.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
|
||
|
3 files changed, 50 insertions(+), 2 deletions(-)
|
||
|
|
||
|
diff --git a/Create.c b/Create.c
|
||
|
index bd4875e4..479c2715 100644
|
||
|
--- a/Create.c
|
||
|
+++ b/Create.c
|
||
|
@@ -1344,9 +1344,11 @@ int Create(struct supertype *st, struct mddev_ident *ident, int subdevs,
|
||
|
if (c->verbose >= 0)
|
||
|
pr_info("array %s started.\n", chosen_name);
|
||
|
if (st->ss->external && st->container_devnm[0]) {
|
||
|
- if (need_mdmon)
|
||
|
+ if (need_mdmon) {
|
||
|
start_mdmon(st->container_devnm);
|
||
|
-
|
||
|
+ if (wait_for_mdmon_control_socket(st->container_devnm) != MDADM_STATUS_SUCCESS)
|
||
|
+ goto abort;
|
||
|
+ }
|
||
|
ping_monitor(st->container_devnm);
|
||
|
close(container_fd);
|
||
|
}
|
||
|
diff --git a/mdadm.h b/mdadm.h
|
||
|
index e9f764a2..27009154 100644
|
||
|
--- a/mdadm.h
|
||
|
+++ b/mdadm.h
|
||
|
@@ -1776,6 +1776,7 @@ extern int is_subarray_active(char *subarray, char *devname);
|
||
|
extern int open_subarray(char *dev, char *subarray, struct supertype *st, int quiet);
|
||
|
extern struct superswitch *version_to_superswitch(char *vers);
|
||
|
|
||
|
+extern mdadm_status_t wait_for_mdmon_control_socket(const char *container_devnm);
|
||
|
extern int mdmon_running(const char *devnm);
|
||
|
extern int mdmon_pid(const char *devnm);
|
||
|
extern mdadm_status_t wait_for_mdmon(const char *devnm);
|
||
|
diff --git a/util.c b/util.c
|
||
|
index 48c97545..908f8430 100644
|
||
|
--- a/util.c
|
||
|
+++ b/util.c
|
||
|
@@ -1932,6 +1932,51 @@ int mdmon_running(const char *devnm)
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
+/*
|
||
|
+ * wait_for_mdmon_control_socket() - Waits for mdmon control socket
|
||
|
+ * to be created within specified time.
|
||
|
+ * @container_devnm: Device for which mdmon control socket should start.
|
||
|
+ *
|
||
|
+ * In foreground mode, when mdadm is trying to connect to control
|
||
|
+ * socket it is possible that the mdmon has not created it yet.
|
||
|
+ * Give some time to mdmon to create socket. Timeout set to 2 sec.
|
||
|
+ *
|
||
|
+ * Return: MDADM_STATUS_SUCCESS if connect succeed, otherwise return
|
||
|
+ * error code.
|
||
|
+ */
|
||
|
+mdadm_status_t wait_for_mdmon_control_socket(const char *container_devnm)
|
||
|
+{
|
||
|
+ enum mdadm_status status = MDADM_STATUS_SUCCESS;
|
||
|
+ int sfd, rv, retry_count = 0;
|
||
|
+ struct sockaddr_un addr;
|
||
|
+ char path[PATH_MAX];
|
||
|
+
|
||
|
+ snprintf(path, PATH_MAX, "%s/%s.sock", MDMON_DIR, container_devnm);
|
||
|
+ sfd = socket(PF_LOCAL, SOCK_STREAM, 0);
|
||
|
+ if (!is_fd_valid(sfd))
|
||
|
+ return MDADM_STATUS_ERROR;
|
||
|
+
|
||
|
+ addr.sun_family = PF_LOCAL;
|
||
|
+ strncpy(addr.sun_path, path, sizeof(addr.sun_path) - 1);
|
||
|
+ addr.sun_path[sizeof(addr.sun_path) - 1] = '\0';
|
||
|
+
|
||
|
+ for (retry_count = 0; retry_count < 10; retry_count++) {
|
||
|
+ rv = connect(sfd, (struct sockaddr*)&addr, sizeof(addr));
|
||
|
+ if (rv < 0) {
|
||
|
+ sleep_for(0, MSEC_TO_NSEC(200), true);
|
||
|
+ continue;
|
||
|
+ }
|
||
|
+ break;
|
||
|
+ }
|
||
|
+
|
||
|
+ if (rv < 0) {
|
||
|
+ pr_err("Failed to connect to control socket.\n");
|
||
|
+ status = MDADM_STATUS_ERROR;
|
||
|
+ }
|
||
|
+ close(sfd);
|
||
|
+ return status;
|
||
|
+}
|
||
|
+
|
||
|
/*
|
||
|
* wait_for_mdmon() - Waits for mdmon within specified time.
|
||
|
* @devnm: Device for which mdmon should start.
|
||
|
--
|
||
|
2.41.0
|
||
|
|