59 lines
1.8 KiB
Diff
59 lines
1.8 KiB
Diff
|
From d2e11da4b7fd0453e942f43e4196dc63b3dbd708 Mon Sep 17 00:00:00 2001
|
||
|
From: Pawel Baldysiak <pawel.baldysiak@intel.com>
|
||
|
Date: Fri, 22 Feb 2019 13:30:27 +0100
|
||
|
Subject: [RHEL7.7 PATCH 19/24] mdmon: wait for previous mdmon to exit during
|
||
|
takeover
|
||
|
|
||
|
Since the patch c76242c5("mdmon: get safe mode delay file descriptor
|
||
|
early"), safe_mode_dalay is set properly by initrd mdmon. But in some
|
||
|
cases with filesystem traffic since the very start of the system, it
|
||
|
might take a while to transit to clean state. Due to fact that new
|
||
|
mdmon does not wait for the old one to exit - it might happen that the
|
||
|
new one switches safe_mode_delay back to seconds, before old one exits.
|
||
|
As the result two mdmons are running concurrently on same array.
|
||
|
|
||
|
Wait for the old mdmon to exit by pinging it with SIGUSR1 signal, just
|
||
|
in case it is sleeping.
|
||
|
|
||
|
Signed-off-by: Pawel Baldysiak <pawel.baldysiak@intel.com>
|
||
|
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
|
||
|
---
|
||
|
mdmon.c | 14 +++++++++++---
|
||
|
1 file changed, 11 insertions(+), 3 deletions(-)
|
||
|
|
||
|
diff --git a/mdmon.c b/mdmon.c
|
||
|
index 0955fcc..ff985d2 100644
|
||
|
--- a/mdmon.c
|
||
|
+++ b/mdmon.c
|
||
|
@@ -171,6 +171,7 @@ static void try_kill_monitor(pid_t pid, char *devname, int sock)
|
||
|
int fd;
|
||
|
int n;
|
||
|
long fl;
|
||
|
+ int rv;
|
||
|
|
||
|
/* first rule of survival... don't off yourself */
|
||
|
if (pid == getpid())
|
||
|
@@ -201,9 +202,16 @@ static void try_kill_monitor(pid_t pid, char *devname, int sock)
|
||
|
fl &= ~O_NONBLOCK;
|
||
|
fcntl(sock, F_SETFL, fl);
|
||
|
n = read(sock, buf, 100);
|
||
|
- /* Ignore result, it is just the wait that
|
||
|
- * matters
|
||
|
- */
|
||
|
+
|
||
|
+ /* If there is I/O going on it might took some time to get to
|
||
|
+ * clean state. Wait for monitor to exit fully to avoid races.
|
||
|
+ * Ping it with SIGUSR1 in case that it is sleeping */
|
||
|
+ for (n = 0; n < 25; n++) {
|
||
|
+ rv = kill(pid, SIGUSR1);
|
||
|
+ if (rv < 0)
|
||
|
+ break;
|
||
|
+ usleep(200000);
|
||
|
+ }
|
||
|
}
|
||
|
|
||
|
void remove_pidfile(char *devname)
|
||
|
--
|
||
|
2.7.5
|
||
|
|