From 66a54b266f6c579e5f37b6253820903a55c3346c Mon Sep 17 00:00:00 2001 From: Shminderjit Singh Date: Tue, 4 Jun 2024 07:46:03 +0000 Subject: [PATCH 105/157] mdadm: Fix socket connection failure when mdmon runs in foreground mode. While creating an IMSM RAID, mdadm will wait for the mdmon main process to finish if mdmon runs in forking mode. This is because with "Type=forking" in the mdmon service unit file, "systemctl start service" will block until the main process of mdmon exits. At that moment, mdmon has already created the socket, so the subsequent socket connect from mdadm will succeed. However, when mdmon runs in foreground mode (without "Type=forking" in the service unit file), "systemctl start service" will return once the mdmon process starts. This causes mdadm and mdmon to run in parallel, which may lead to a socket connection failure since mdmon has not yet initialized the socket when mdadm tries to connect. If the next instruction/command is to access this device and try to write to it, a permission error will occur since mdmon has not yet set the array to RW mode. Signed-off-by: Shminderjit Singh --- msg.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/msg.c b/msg.c index ba0e25be..d17f679d 100644 --- a/msg.c +++ b/msg.c @@ -151,6 +151,7 @@ int connect_monitor(char *devname) struct sockaddr_un addr; int pos; char *c; + int rv, retry_count = 0; pos = sprintf(path, "%s/", MDMON_DIR); if (is_subarray(devname)) { @@ -170,7 +171,24 @@ int connect_monitor(char *devname) addr.sun_family = PF_LOCAL; strcpy(addr.sun_path, path); - if (connect(sfd, (struct sockaddr*)&addr, sizeof(addr)) < 0) { + + /* In foreground mode, when mdadm is trying to connect to control + * socket it is possible that the mdmon has not created it yet. + * Give some time to mdmon to create socket. + */ + for (retry_count = 0; retry_count < 10; retry_count++) { + rv = connect(sfd, (struct sockaddr*)&addr, sizeof(addr)); + + if (rv < 0) { + sleep_for(0, MSEC_TO_NSEC(200), true); + continue; + } + break; + } + + if (rv < 0) { + pr_err("Failed to connect to control socket. (%s!!)\n", + strerror(errno)); close(sfd); return -1; } -- 2.41.0