glib2/3353.patch
Michael Catanzaro 026c2e2b3d Fix race with waitpid() and child watcher sources
Resolves: RHEL-14761
2023-11-03 13:56:34 -05:00

1079 lines
35 KiB
Diff
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

From 68f0cd7f3d351da1f011a65e080d8e3f26a31525 Mon Sep 17 00:00:00 2001
From: Thomas Haller <thaller@redhat.com>
Date: Tue, 28 Mar 2023 14:38:08 +0200
Subject: [PATCH 1/7] gmain: unify win/unix implementations for child watcher
Let's move the difference between the win/unix implementations closer to
where the difference is. Thereby, we easier see the two implementations
side by side. Splitting it at a higher layer makes the code harder to
read.
This is just a preparation for what comes next.
---
glib/gmain.c | 192 ++++++++++++++++++++++++---------------------------
1 file changed, 92 insertions(+), 100 deletions(-)
diff --git a/glib/gmain.c b/glib/gmain.c
index f0cf700c0..228737efa 100644
--- a/glib/gmain.c
+++ b/glib/gmain.c
@@ -446,6 +446,11 @@ static gboolean g_child_watch_dispatch (GSource *source,
GSourceFunc callback,
gpointer user_data);
static void g_child_watch_finalize (GSource *source);
+
+#ifndef G_OS_WIN32
+static void unref_unix_signal_handler_unlocked (int signum);
+#endif
+
#ifdef G_OS_UNIX
static void g_unix_signal_handler (int signum);
static gboolean g_unix_signal_watch_prepare (GSource *source,
@@ -5214,17 +5219,49 @@ g_timeout_add_seconds (guint interval,
/* Child watch functions */
-#ifdef G_OS_WIN32
+#ifdef HAVE_PIDFD
+static int
+siginfo_t_to_wait_status (const siginfo_t *info)
+{
+ /* Each of these returns is essentially the inverse of WIFEXITED(),
+ * WIFSIGNALED(), etc. */
+ switch (info->si_code)
+ {
+ case CLD_EXITED:
+ return W_EXITCODE (info->si_status, 0);
+ case CLD_KILLED:
+ return W_EXITCODE (0, info->si_status);
+ case CLD_DUMPED:
+ return W_EXITCODE (0, info->si_status | WCOREFLAG);
+ case CLD_CONTINUED:
+ return __W_CONTINUED;
+ case CLD_STOPPED:
+ case CLD_TRAPPED:
+ default:
+ return W_STOPCODE (info->si_status);
+ }
+}
+#endif /* HAVE_PIDFD */
static gboolean
g_child_watch_prepare (GSource *source,
gint *timeout)
{
+#ifdef G_OS_WIN32
*timeout = -1;
return FALSE;
+#else /* G_OS_WIN32 */
+ {
+ GChildWatchSource *child_watch_source;
+
+ child_watch_source = (GChildWatchSource *) source;
+
+ return g_atomic_int_get (&child_watch_source->child_exited);
+ }
+#endif /* G_OS_WIN32 */
}
-static gboolean
+static gboolean
g_child_watch_check (GSource *source)
{
GChildWatchSource *child_watch_source;
@@ -5232,6 +5269,7 @@ g_child_watch_check (GSource *source)
child_watch_source = (GChildWatchSource *) source;
+#ifdef G_OS_WIN32
child_exited = child_watch_source->poll.revents & G_IO_IN;
if (child_exited)
@@ -5246,15 +5284,45 @@ g_child_watch_check (GSource *source)
*/
if (!GetExitCodeProcess (child_watch_source->pid, &child_status))
{
- gchar *emsg = g_win32_error_message (GetLastError ());
- g_warning (G_STRLOC ": GetExitCodeProcess() failed: %s", emsg);
- g_free (emsg);
+ gchar *emsg = g_win32_error_message (GetLastError ());
+ g_warning (G_STRLOC ": GetExitCodeProcess() failed: %s", emsg);
+ g_free (emsg);
- child_watch_source->child_status = -1;
- }
+ child_watch_source->child_status = -1;
+ }
else
- child_watch_source->child_status = child_status;
+ child_watch_source->child_status = child_status;
}
+#else /* G_OS_WIN32 */
+#ifdef HAVE_PIDFD
+ if (child_watch_source->using_pidfd)
+ {
+ child_exited = child_watch_source->poll.revents & G_IO_IN;
+
+ if (child_exited)
+ {
+ siginfo_t child_info = { 0, };
+
+ /* Get the exit status */
+ if (waitid (P_PIDFD, child_watch_source->poll.fd, &child_info, WEXITED | WNOHANG) >= 0 &&
+ child_info.si_pid != 0)
+ {
+ /* waitid() helpfully provides the wait status in a decomposed
+ * form which is quite useful. Unfortunately we have to report it
+ * to the #GChildWatchFunc as a waitpid()-style platform-specific
+ * wait status, so that the user code in #GChildWatchFunc can then
+ * call WIFEXITED() (etc.) on it. That means re-composing the
+ * status information. */
+ child_watch_source->child_status = siginfo_t_to_wait_status (&child_info);
+ child_watch_source->child_exited = TRUE;
+ }
+ }
+
+ return child_exited;
+ }
+#endif /* HAVE_PIDFD */
+ child_exited = g_atomic_int_get (&child_watch_source->child_exited);
+#endif /* G_OS_WIN32 */
return child_exited;
}
@@ -5262,9 +5330,24 @@ g_child_watch_check (GSource *source)
static void
g_child_watch_finalize (GSource *source)
{
+#ifndef G_OS_WIN32
+ GChildWatchSource *child_watch_source = (GChildWatchSource *) source;
+
+ if (child_watch_source->using_pidfd)
+ {
+ if (child_watch_source->poll.fd >= 0)
+ close (child_watch_source->poll.fd);
+ return;
+ }
+
+ G_LOCK (unix_signal_lock);
+ unix_child_watches = g_slist_remove (unix_child_watches, source);
+ unref_unix_signal_handler_unlocked (SIGCHLD);
+ G_UNLOCK (unix_signal_lock);
+#endif /* G_OS_WIN32 */
}
-#else /* G_OS_WIN32 */
+#ifndef G_OS_WIN32
static void
wake_source (GSource *source)
@@ -5397,79 +5480,6 @@ dispatch_unix_signals (void)
G_UNLOCK(unix_signal_lock);
}
-static gboolean
-g_child_watch_prepare (GSource *source,
- gint *timeout)
-{
- GChildWatchSource *child_watch_source;
-
- child_watch_source = (GChildWatchSource *) source;
-
- return g_atomic_int_get (&child_watch_source->child_exited);
-}
-
-#ifdef HAVE_PIDFD
-static int
-siginfo_t_to_wait_status (const siginfo_t *info)
-{
- /* Each of these returns is essentially the inverse of WIFEXITED(),
- * WIFSIGNALED(), etc. */
- switch (info->si_code)
- {
- case CLD_EXITED:
- return W_EXITCODE (info->si_status, 0);
- case CLD_KILLED:
- return W_EXITCODE (0, info->si_status);
- case CLD_DUMPED:
- return W_EXITCODE (0, info->si_status | WCOREFLAG);
- case CLD_CONTINUED:
- return __W_CONTINUED;
- case CLD_STOPPED:
- case CLD_TRAPPED:
- default:
- return W_STOPCODE (info->si_status);
- }
-}
-#endif /* HAVE_PIDFD */
-
-static gboolean
-g_child_watch_check (GSource *source)
-{
- GChildWatchSource *child_watch_source;
-
- child_watch_source = (GChildWatchSource *) source;
-
-#ifdef HAVE_PIDFD
- if (child_watch_source->using_pidfd)
- {
- gboolean child_exited = child_watch_source->poll.revents & G_IO_IN;
-
- if (child_exited)
- {
- siginfo_t child_info = { 0, };
-
- /* Get the exit status */
- if (waitid (P_PIDFD, child_watch_source->poll.fd, &child_info, WEXITED | WNOHANG) >= 0 &&
- child_info.si_pid != 0)
- {
- /* waitid() helpfully provides the wait status in a decomposed
- * form which is quite useful. Unfortunately we have to report it
- * to the #GChildWatchFunc as a waitpid()-style platform-specific
- * wait status, so that the user code in #GChildWatchFunc can then
- * call WIFEXITED() (etc.) on it. That means re-composing the
- * status information. */
- child_watch_source->child_status = siginfo_t_to_wait_status (&child_info);
- child_watch_source->child_exited = TRUE;
- }
- }
-
- return child_exited;
- }
-#endif /* HAVE_PIDFD */
-
- return g_atomic_int_get (&child_watch_source->child_exited);
-}
-
static gboolean
g_unix_signal_watch_prepare (GSource *source,
gint *timeout)
@@ -5648,24 +5658,6 @@ g_unix_signal_watch_finalize (GSource *source)
G_UNLOCK (unix_signal_lock);
}
-static void
-g_child_watch_finalize (GSource *source)
-{
- GChildWatchSource *child_watch_source = (GChildWatchSource *) source;
-
- if (child_watch_source->using_pidfd)
- {
- if (child_watch_source->poll.fd >= 0)
- close (child_watch_source->poll.fd);
- return;
- }
-
- G_LOCK (unix_signal_lock);
- unix_child_watches = g_slist_remove (unix_child_watches, source);
- unref_unix_signal_handler_unlocked (SIGCHLD);
- G_UNLOCK (unix_signal_lock);
-}
-
#endif /* G_OS_WIN32 */
static gboolean
--
2.41.0
From 984e04a77b29c60f66fd60b1f9690e6ff7880574 Mon Sep 17 00:00:00 2001
From: Thomas Haller <thaller@redhat.com>
Date: Tue, 28 Mar 2023 15:44:30 +0200
Subject: [PATCH 2/7] gmain: simplify handling child watchers in
dispatch_unix_signals_unlocked()
- if a child watch source has "using_pidfd", it is never linked in the
unix_child_watches list. Drop that check.
- replace the deep nested if, with an early "continue" in the loop,
if we detect there is nothing to do. It makes the code easier to
read.
---
glib/gmain.c | 39 +++++++++++++++++++--------------------
1 file changed, 19 insertions(+), 20 deletions(-)
diff --git a/glib/gmain.c b/glib/gmain.c
index 228737efa..8a030a409 100644
--- a/glib/gmain.c
+++ b/glib/gmain.c
@@ -5430,31 +5430,30 @@ dispatch_unix_signals_unlocked (void)
for (node = unix_child_watches; node; node = node->next)
{
GChildWatchSource *source = node->data;
+ pid_t pid;
- if (!source->using_pidfd &&
- !g_atomic_int_get (&source->child_exited))
+ if (g_atomic_int_get (&source->child_exited))
+ continue;
+
+ do
{
- pid_t pid;
- do
- {
- g_assert (source->pid > 0);
+ g_assert (source->pid > 0);
- pid = waitpid (source->pid, &source->child_status, WNOHANG);
- if (pid > 0)
- {
- g_atomic_int_set (&source->child_exited, TRUE);
- wake_source ((GSource *) source);
- }
- else if (pid == -1 && errno == ECHILD)
- {
- g_warning ("GChildWatchSource: Exit status of a child process was requested but ECHILD was received by waitpid(). See the documentation of g_child_watch_source_new() for possible causes.");
- source->child_status = 0;
- g_atomic_int_set (&source->child_exited, TRUE);
- wake_source ((GSource *) source);
- }
+ pid = waitpid (source->pid, &source->child_status, WNOHANG);
+ if (pid > 0)
+ {
+ g_atomic_int_set (&source->child_exited, TRUE);
+ wake_source ((GSource *) source);
+ }
+ else if (pid == -1 && errno == ECHILD)
+ {
+ g_warning ("GChildWatchSource: Exit status of a child process was requested but ECHILD was received by waitpid(). See the documentation of g_child_watch_source_new() for possible causes.");
+ source->child_status = 0;
+ g_atomic_int_set (&source->child_exited, TRUE);
+ wake_source ((GSource *) source);
}
- while (pid == -1 && errno == EINTR);
}
+ while (pid == -1 && errno == EINTR);
}
}
--
2.41.0
From aaac91a86288e103f7f413b9d075acc803178da9 Mon Sep 17 00:00:00 2001
From: Thomas Haller <thaller@redhat.com>
Date: Wed, 17 May 2023 08:04:02 +0200
Subject: [PATCH 3/7] gmain: remove unnecessary initialization of
source_timeout in g_main_context_prepare_unlocked()
Note that the variable source_timeout is already initialized upon
definition, at the beginning of the block.
It's easy to see, that no code changes the variable between the variable
definition, and the place where it was initialized. It was thus
unnecessary.
It's not about dropping the unnecessary code (the compiler could do that
just fine too). It's that there is the other branch of the "if/else", where
the variable is also not initialized. But the other branch also requires
that the variable is in fact initialized to -1, because prepare()
callbacks are free not to explicitly set the output value. So both
branches require the variable to be initialized to -1, but only one of
them did. This poses unnecessary questions about whether anything is
wrong. Avoid that by dropping the redundant code.
---
glib/gmain.c | 5 +----
1 file changed, 1 insertion(+), 4 deletions(-)
diff --git a/glib/gmain.c b/glib/gmain.c
index 8a030a409..28fbcc015 100644
--- a/glib/gmain.c
+++ b/glib/gmain.c
@@ -3695,10 +3695,7 @@ g_main_context_prepare (GMainContext *context,
context->in_check_or_prepare--;
}
else
- {
- source_timeout = -1;
- result = FALSE;
- }
+ result = FALSE;
if (result == FALSE && source->priv->ready_time != -1)
{
--
2.41.0
From b71ae65f14843cc09819e5b482667e5c941a27af Mon Sep 17 00:00:00 2001
From: Thomas Haller <thaller@redhat.com>
Date: Wed, 17 May 2023 08:15:16 +0200
Subject: [PATCH 4/7] gmain: remove unnecessary initialization of *timeout in
prepare() callbacks
Note that the prepare callback only has one caller, which pre-initializes
the timeout argument to -1. That may be an implementation detail and not
publicly promised, but it wouldn't make sense to do it any other way in
the caller.
Also, note that g_unix_signal_watch_prepare() and the UNIX branch of
g_child_watch_prepare() already relied on that.
---
gio/gsocket.c | 2 --
glib/giounix.c | 2 --
glib/giowin32.c | 2 --
glib/gmain.c | 1 -
glib/tests/mainloop.c | 2 ++
5 files changed, 2 insertions(+), 7 deletions(-)
diff --git a/gio/gsocket.c b/gio/gsocket.c
index f39a568b3..c624eb1ae 100644
--- a/gio/gsocket.c
+++ b/gio/gsocket.c
@@ -3955,8 +3955,6 @@ socket_source_prepare (GSource *source,
{
GSocketSource *socket_source = (GSocketSource *)source;
- *timeout = -1;
-
#ifdef G_OS_WIN32
if ((socket_source->pollfd.revents & G_IO_NVAL) != 0)
return TRUE;
diff --git a/glib/giounix.c b/glib/giounix.c
index b86d79db7..94b33253f 100644
--- a/glib/giounix.c
+++ b/glib/giounix.c
@@ -129,8 +129,6 @@ g_io_unix_prepare (GSource *source,
GIOUnixWatch *watch = (GIOUnixWatch *)source;
GIOCondition buffer_condition = g_io_channel_get_buffer_condition (watch->channel);
- *timeout = -1;
-
/* Only return TRUE here if _all_ bits in watch->condition will be set
*/
return ((watch->condition & buffer_condition) == watch->condition);
diff --git a/glib/giowin32.c b/glib/giowin32.c
index b0b6c3d85..e4b171b0d 100644
--- a/glib/giowin32.c
+++ b/glib/giowin32.c
@@ -707,8 +707,6 @@ g_io_win32_prepare (GSource *source,
GIOWin32Channel *channel = (GIOWin32Channel *)watch->channel;
int event_mask;
- *timeout = -1;
-
if (channel->debug)
g_print ("g_io_win32_prepare: source=%p channel=%p", source, channel);
diff --git a/glib/gmain.c b/glib/gmain.c
index 28fbcc015..aec04314c 100644
--- a/glib/gmain.c
+++ b/glib/gmain.c
@@ -5245,7 +5245,6 @@ g_child_watch_prepare (GSource *source,
gint *timeout)
{
#ifdef G_OS_WIN32
- *timeout = -1;
return FALSE;
#else /* G_OS_WIN32 */
{
diff --git a/glib/tests/mainloop.c b/glib/tests/mainloop.c
index d43b2cf08..a7c5b33d1 100644
--- a/glib/tests/mainloop.c
+++ b/glib/tests/mainloop.c
@@ -34,6 +34,8 @@ cb (gpointer data)
static gboolean
prepare (GSource *source, gint *time)
{
+ g_assert_nonnull (time);
+ g_assert_cmpint (*time, ==, -1);
return FALSE;
}
static gboolean
--
2.41.0
From ccbebd3bd2d9163da3e2ad7f213ded04cd92136d Mon Sep 17 00:00:00 2001
From: Thomas Haller <thaller@redhat.com>
Date: Tue, 28 Mar 2023 14:30:58 +0200
Subject: [PATCH 5/7] gmain: fix race with waitpid() and child watcher sources
GChildWatchSource uses waitpid(), among pidfd and GetExitCodeProcess().
It thus only works for child processes which the user must ensure to
exist and not being reaped yet. Also, the user must not kill() the PID
after the child process is reaped and must not race kill() against
waitpid(). Also, the user must not call waitpid()/kill() after the child
process is reaped.
Previously, GChildWatchSource would call waitpid() already when adding
the source (g_child_watch_source_new()) and from the worker thread
(dispatch_unix_signals_unlocked()). That is racy:
- if a child watcher is attached and did not yet fire, you cannot call
kill() on the PID without racing against the PID being reaped on the
worker thread. That would then lead to ESRCH or even worse, killing
the wrong process.
- if you g_source_destroy() the source that didn't fire yet, the user
doesn't know whether the PID was reaped in the background. Any
subsequent kill()/waitpid() may fail with ESRCH/ECHILD or even address
the wrong process.
The race is most visible on Unix without pidfd support, because then the
process gets reaped on the worker thread or during g_child_watch_source_new().
But it's also with Windows and pidfd, because we would have waited for
the process in g_child_watch_check(), where other callbacks could fire
between reaping the process status and emitting the source's callback.
Fix all that by calling waitpid() right before dispatching the callback.
---
glib/gmain.c | 209 +++++++++++++++++++++++++++-------------------
glib/tests/unix.c | 93 +++++++++++++++++++++
2 files changed, 218 insertions(+), 84 deletions(-)
diff --git a/glib/gmain.c b/glib/gmain.c
index aec04314c..d1ab421fb 100644
--- a/glib/gmain.c
+++ b/glib/gmain.c
@@ -350,12 +350,11 @@ struct _GChildWatchSource
{
GSource source;
GPid pid;
- gint child_status;
/* @poll is always used on Windows, and used on Unix iff @using_pidfd is set: */
GPollFD poll;
#ifndef G_OS_WIN32
- gboolean child_exited; /* (atomic); not used iff @using_pidfd is set */
- gboolean using_pidfd;
+ gboolean child_maybe_exited; /* (atomic) */
+ gboolean using_pidfd;
#endif /* G_OS_WIN32 */
};
@@ -5238,7 +5237,7 @@ siginfo_t_to_wait_status (const siginfo_t *info)
return W_STOPCODE (info->si_status);
}
}
-#endif /* HAVE_PIDFD */
+#endif /* HAVE_PIDFD */
static gboolean
g_child_watch_prepare (GSource *source,
@@ -5246,19 +5245,19 @@ g_child_watch_prepare (GSource *source,
{
#ifdef G_OS_WIN32
return FALSE;
-#else /* G_OS_WIN32 */
+#else /* G_OS_WIN32 */
{
GChildWatchSource *child_watch_source;
child_watch_source = (GChildWatchSource *) source;
- return g_atomic_int_get (&child_watch_source->child_exited);
+ return !child_watch_source->using_pidfd && g_atomic_int_get (&child_watch_source->child_maybe_exited);
}
#endif /* G_OS_WIN32 */
}
static gboolean
-g_child_watch_check (GSource *source)
+g_child_watch_check (GSource *source)
{
GChildWatchSource *child_watch_source;
gboolean child_exited;
@@ -5267,57 +5266,15 @@ g_child_watch_check (GSource *source)
#ifdef G_OS_WIN32
child_exited = child_watch_source->poll.revents & G_IO_IN;
-
- if (child_exited)
- {
- DWORD child_status;
-
- /*
- * Note: We do _not_ check for the special value of STILL_ACTIVE
- * since we know that the process has exited and doing so runs into
- * problems if the child process "happens to return STILL_ACTIVE(259)"
- * as Microsoft's Platform SDK puts it.
- */
- if (!GetExitCodeProcess (child_watch_source->pid, &child_status))
- {
- gchar *emsg = g_win32_error_message (GetLastError ());
- g_warning (G_STRLOC ": GetExitCodeProcess() failed: %s", emsg);
- g_free (emsg);
-
- child_watch_source->child_status = -1;
- }
- else
- child_watch_source->child_status = child_status;
- }
#else /* G_OS_WIN32 */
#ifdef HAVE_PIDFD
if (child_watch_source->using_pidfd)
{
child_exited = child_watch_source->poll.revents & G_IO_IN;
-
- if (child_exited)
- {
- siginfo_t child_info = { 0, };
-
- /* Get the exit status */
- if (waitid (P_PIDFD, child_watch_source->poll.fd, &child_info, WEXITED | WNOHANG) >= 0 &&
- child_info.si_pid != 0)
- {
- /* waitid() helpfully provides the wait status in a decomposed
- * form which is quite useful. Unfortunately we have to report it
- * to the #GChildWatchFunc as a waitpid()-style platform-specific
- * wait status, so that the user code in #GChildWatchFunc can then
- * call WIFEXITED() (etc.) on it. That means re-composing the
- * status information. */
- child_watch_source->child_status = siginfo_t_to_wait_status (&child_info);
- child_watch_source->child_exited = TRUE;
- }
- }
-
return child_exited;
}
-#endif /* HAVE_PIDFD */
- child_exited = g_atomic_int_get (&child_watch_source->child_exited);
+#endif /* HAVE_PIDFD */
+ child_exited = g_atomic_int_get (&child_watch_source->child_maybe_exited);
#endif /* G_OS_WIN32 */
return child_exited;
@@ -5426,30 +5383,9 @@ dispatch_unix_signals_unlocked (void)
for (node = unix_child_watches; node; node = node->next)
{
GChildWatchSource *source = node->data;
- pid_t pid;
-
- if (g_atomic_int_get (&source->child_exited))
- continue;
- do
- {
- g_assert (source->pid > 0);
-
- pid = waitpid (source->pid, &source->child_status, WNOHANG);
- if (pid > 0)
- {
- g_atomic_int_set (&source->child_exited, TRUE);
- wake_source ((GSource *) source);
- }
- else if (pid == -1 && errno == ECHILD)
- {
- g_warning ("GChildWatchSource: Exit status of a child process was requested but ECHILD was received by waitpid(). See the documentation of g_child_watch_source_new() for possible causes.");
- source->child_status = 0;
- g_atomic_int_set (&source->child_exited, TRUE);
- wake_source ((GSource *) source);
- }
- }
- while (pid == -1 && errno == EINTR);
+ if (g_atomic_int_compare_and_exchange (&source->child_maybe_exited, FALSE, TRUE))
+ wake_source ((GSource *) source);
}
}
@@ -5662,9 +5598,106 @@ g_child_watch_dispatch (GSource *source,
{
GChildWatchSource *child_watch_source;
GChildWatchFunc child_watch_callback = (GChildWatchFunc) callback;
+ int wait_status;
child_watch_source = (GChildWatchSource *) source;
+ /* We only (try to) reap the child process right before dispatching the callback.
+ * That way, the caller can rely that the process is there until the callback
+ * is invoked; or, if the caller calls g_source_destroy() without the callback
+ * being dispatched, the process is still not reaped. */
+
+#ifdef G_OS_WIN32
+ {
+ DWORD child_status;
+
+ /*
+ * Note: We do _not_ check for the special value of STILL_ACTIVE
+ * since we know that the process has exited and doing so runs into
+ * problems if the child process "happens to return STILL_ACTIVE(259)"
+ * as Microsoft's Platform SDK puts it.
+ */
+ if (!GetExitCodeProcess (child_watch_source->pid, &child_status))
+ {
+ gchar *emsg = g_win32_error_message (GetLastError ());
+ g_warning (G_STRLOC ": GetExitCodeProcess() failed: %s", emsg);
+ g_free (emsg);
+
+ /* Unknown error. We got signaled that the process might be exited,
+ * but now we failed to reap it? Assume the process is gone and proceed. */
+ wait_status = -1;
+ }
+ else
+ wait_status = child_status;
+ }
+#else /* G_OS_WIN32 */
+ {
+ gboolean child_exited = FALSE;
+
+ wait_status = -1;
+
+#ifdef HAVE_PIDFD
+ if (child_watch_source->using_pidfd)
+ {
+ siginfo_t child_info = {
+ 0,
+ };
+
+ /* Get the exit status */
+ if (waitid (P_PIDFD, child_watch_source->poll.fd, &child_info, WEXITED | WNOHANG) >= 0 &&
+ child_info.si_pid != 0)
+ {
+ /* waitid() helpfully provides the wait status in a decomposed
+ * form which is quite useful. Unfortunately we have to report it
+ * to the #GChildWatchFunc as a waitpid()-style platform-specific
+ * wait status, so that the user code in #GChildWatchFunc can then
+ * call WIFEXITED() (etc.) on it. That means re-composing the
+ * status information. */
+ wait_status = siginfo_t_to_wait_status (&child_info);
+ }
+ else
+ {
+ /* Unknown error. We got signaled that the process might be exited,
+ * but now we failed to reap it? Assume the process is gone and proceed. */
+ g_warning (G_STRLOC ": pidfd signaled ready but failed");
+ }
+ child_exited = TRUE;
+ }
+#endif /* HAVE_PIDFD*/
+
+ if (!child_exited)
+ {
+ pid_t pid;
+ int wstatus;
+
+ waitpid_again:
+
+ /* We must reset the flag before waitpid(). Otherwise, there would be a
+ * race. */
+ g_atomic_int_set (&child_watch_source->child_maybe_exited, FALSE);
+
+ pid = waitpid (child_watch_source->pid, &wstatus, WNOHANG);
+
+ if (pid == 0)
+ {
+ /* Not exited yet. Wait longer. */
+ return TRUE;
+ }
+
+ if (pid > 0)
+ wait_status = wstatus;
+ else if (errno == ECHILD)
+ g_warning ("GChildWatchSource: Exit status of a child process was requested but ECHILD was received by waitpid(). See the documentation of g_child_watch_source_new() for possible causes.");
+ else if (errno == EINTR)
+ goto waitpid_again;
+ else
+ {
+ /* Unexpected error. Whatever happened, we are done waiting for this child. */
+ }
+ }
+ }
+#endif /* G_OS_WIN32 */
+
if (!callback)
{
g_warning ("Child watch source dispatched without callback. "
@@ -5672,7 +5705,7 @@ g_child_watch_dispatch (GSource *source,
return FALSE;
}
- (child_watch_callback) (child_watch_source->pid, child_watch_source->child_status, user_data);
+ (child_watch_callback) (child_watch_source->pid, wait_status, user_data);
/* We never keep a child watch source around as the child is gone */
return FALSE;
@@ -5731,6 +5764,14 @@ g_unix_signal_handler (int signum)
* mechanism, including `waitpid(pid, ...)` or a second child-watch
* source for the same @pid
* * the application must not ignore `SIGCHLD`
+ * * Before 2.78, the application could not send a signal (`kill()`) to the
+ * watched @pid in a race free manner. Since 2.78, you can do that while the
+ * associated #GMainContext is acquired.
+ * * Before 2.78, even after destroying the #GSource, you could not
+ * be sure that @pid wasn't already reaped. Hence, it was also not
+ * safe to `kill()` or `waitpid()` on the process ID after the child watch
+ * source was gone. Destroying the source before it fired made it
+ * impossible to reliably reap the process.
*
* If any of those conditions are not met, this and related APIs will
* not work correctly. This can often be diagnosed via a GLib warning
@@ -5791,19 +5832,19 @@ g_child_watch_source_new (GPid pid)
return source;
}
- else
- {
- g_debug ("pidfd_open(%" G_PID_FORMAT ") failed with error: %s",
- pid, g_strerror (errsv));
- /* Fall through; likely the kernel isnt new enough to support pidfd_open() */
- }
-#endif /* HAVE_PIDFD */
+
+ g_debug ("pidfd_open(%" G_PID_FORMAT ") failed with error: %s",
+ pid, g_strerror (errsv));
+ /* Fall through; likely the kernel isnt new enough to support pidfd_open() */
+#endif /* HAVE_PIDFD */
+
+ /* We can do that without atomic, as the source is not yet added in
+ * unix_child_watches (which we do next under a lock). */
+ child_watch_source->child_maybe_exited = TRUE;
G_LOCK (unix_signal_lock);
ref_unix_signal_handler_unlocked (SIGCHLD);
unix_child_watches = g_slist_prepend (unix_child_watches, child_watch_source);
- if (waitpid (pid, &child_watch_source->child_status, WNOHANG) > 0)
- child_watch_source->child_exited = TRUE;
G_UNLOCK (unix_signal_lock);
#endif /* !G_OS_WIN32 */
diff --git a/glib/tests/unix.c b/glib/tests/unix.c
index 7639d066a..6f40ff893 100644
--- a/glib/tests/unix.c
+++ b/glib/tests/unix.c
@@ -330,6 +330,99 @@ test_get_passwd_entry_nonexistent (void)
g_clear_error (&local_error);
}
+static void
+_child_wait_watch_cb (GPid pid,
+ gint wait_status,
+ gpointer user_data)
+{
+ gboolean *p_got_callback = user_data;
+
+ g_assert_nonnull (p_got_callback);
+ g_assert_false (*p_got_callback);
+ *p_got_callback = TRUE;
+}
+
+static void
+test_child_wait (void)
+{
+ gboolean r;
+ GPid pid;
+ guint id;
+ pid_t pid2;
+ int wstatus;
+ gboolean got_callback = FALSE;
+ gboolean iterate_maincontext = g_test_rand_bit ();
+ char **argv;
+ int errsv;
+
+ /* - We spawn a trivial child process that exits after a short time.
+ * - We schedule a g_child_watch_add()
+ * - we may iterate the GMainContext a bit. Randomly we either get the
+ * child-watcher callback or not.
+ * - if we didn't get the callback, we g_source_remove() the child watcher.
+ *
+ * Afterwards, if the callback didn't fire, we check that we are able to waitpid()
+ * on the process ourselves. Of course, if the child watcher notified, the waitpid()
+ * will fail with ECHILD.
+ */
+
+ argv = g_test_rand_bit () ? ((char *[]){ "/bin/sleep", "0.05", NULL }) : ((char *[]){ "/bin/true", NULL });
+
+ r = g_spawn_async (NULL,
+ argv,
+ NULL,
+ G_SPAWN_DO_NOT_REAP_CHILD,
+ NULL,
+ NULL,
+ &pid,
+ NULL);
+ if (!r)
+ {
+ /* Some odd system without /bin/sleep? Skip the test. */
+ g_test_skip ("failure to spawn test process in test_child_wait()");
+ return;
+ }
+
+ g_assert_cmpint (pid, >=, 1);
+
+ if (g_test_rand_bit ())
+ g_usleep (g_test_rand_int_range (0, (G_USEC_PER_SEC / 10)));
+
+ id = g_child_watch_add (pid, _child_wait_watch_cb, &got_callback);
+
+ if (g_test_rand_bit ())
+ g_usleep (g_test_rand_int_range (0, (G_USEC_PER_SEC / 10)));
+
+ if (iterate_maincontext)
+ {
+ gint64 start_usec = g_get_monotonic_time ();
+ gint64 end_usec = start_usec + g_test_rand_int_range (0, (G_USEC_PER_SEC / 10));
+
+ while (!got_callback && g_get_monotonic_time () < end_usec)
+ g_main_context_iteration (NULL, FALSE);
+ }
+
+ if (!got_callback)
+ g_source_remove (id);
+
+ errno = 0;
+ pid2 = waitpid (pid, &wstatus, 0);
+ errsv = errno;
+ if (got_callback)
+ {
+ g_assert_true (iterate_maincontext);
+ g_assert_cmpint (errsv, ==, ECHILD);
+ g_assert_cmpint (pid2, <, 0);
+ }
+ else
+ {
+ g_assert_cmpint (errsv, ==, 0);
+ g_assert_cmpint (pid2, ==, pid);
+ g_assert_true (WIFEXITED (wstatus));
+ g_assert_cmpint (WEXITSTATUS (wstatus), ==, 0);
+ }
+}
+
int
main (int argc,
char *argv[])
--
2.41.0
From ad18d2dad0b7105a379e1b2feae653bf30418397 Mon Sep 17 00:00:00 2001
From: Thomas Haller <thaller@redhat.com>
Date: Tue, 28 Mar 2023 19:53:02 +0200
Subject: [PATCH 6/7] gmain: drop redundant using_pidfd field from
GChildWatchSource
It's redundant, which leads to impossible code like:
if (child_watch_source->using_pidfd)
{
if (child_watch_source->poll.fd >= 0)
close (child_watch_source->poll.fd);
---
glib/gmain.c | 23 ++++++++++++-----------
1 file changed, 12 insertions(+), 11 deletions(-)
diff --git a/glib/gmain.c b/glib/gmain.c
index d1ab421fb..3813b032a 100644
--- a/glib/gmain.c
+++ b/glib/gmain.c
@@ -350,11 +350,11 @@ struct _GChildWatchSource
{
GSource source;
GPid pid;
- /* @poll is always used on Windows, and used on Unix iff @using_pidfd is set: */
+ /* @poll is always used on Windows.
+ * On Unix, poll.fd will be negative if PIDFD is unavailable. */
GPollFD poll;
#ifndef G_OS_WIN32
gboolean child_maybe_exited; /* (atomic) */
- gboolean using_pidfd;
#endif /* G_OS_WIN32 */
};
@@ -5251,7 +5251,10 @@ g_child_watch_prepare (GSource *source,
child_watch_source = (GChildWatchSource *) source;
- return !child_watch_source->using_pidfd && g_atomic_int_get (&child_watch_source->child_maybe_exited);
+ if (child_watch_source->poll.fd >= 0)
+ return FALSE;
+
+ return g_atomic_int_get (&child_watch_source->child_maybe_exited);
}
#endif /* G_OS_WIN32 */
}
@@ -5268,7 +5271,7 @@ g_child_watch_check (GSource *source)
child_exited = child_watch_source->poll.revents & G_IO_IN;
#else /* G_OS_WIN32 */
#ifdef HAVE_PIDFD
- if (child_watch_source->using_pidfd)
+ if (child_watch_source->poll.fd >= 0)
{
child_exited = child_watch_source->poll.revents & G_IO_IN;
return child_exited;
@@ -5286,10 +5289,9 @@ g_child_watch_finalize (GSource *source)
#ifndef G_OS_WIN32
GChildWatchSource *child_watch_source = (GChildWatchSource *) source;
- if (child_watch_source->using_pidfd)
+ if (child_watch_source->poll.fd >= 0)
{
- if (child_watch_source->poll.fd >= 0)
- close (child_watch_source->poll.fd);
+ close (child_watch_source->poll.fd);
return;
}
@@ -5637,7 +5639,7 @@ g_child_watch_dispatch (GSource *source,
wait_status = -1;
#ifdef HAVE_PIDFD
- if (child_watch_source->using_pidfd)
+ if (child_watch_source->poll.fd >= 0)
{
siginfo_t child_info = {
0,
@@ -5822,17 +5824,15 @@ g_child_watch_source_new (GPid pid)
* better than SIGCHLD.
*/
child_watch_source->poll.fd = (int) syscall (SYS_pidfd_open, pid, 0);
- errsv = errno;
if (child_watch_source->poll.fd >= 0)
{
- child_watch_source->using_pidfd = TRUE;
child_watch_source->poll.events = G_IO_IN;
g_source_add_poll (source, &child_watch_source->poll);
-
return source;
}
+ errsv = errno;
g_debug ("pidfd_open(%" G_PID_FORMAT ") failed with error: %s",
pid, g_strerror (errsv));
/* Fall through; likely the kernel isnt new enough to support pidfd_open() */
@@ -5841,6 +5841,7 @@ g_child_watch_source_new (GPid pid)
/* We can do that without atomic, as the source is not yet added in
* unix_child_watches (which we do next under a lock). */
child_watch_source->child_maybe_exited = TRUE;
+ child_watch_source->poll.fd = -1;
G_LOCK (unix_signal_lock);
ref_unix_signal_handler_unlocked (SIGCHLD);
--
2.41.0
From bbdcc6e72ac97e577486aeb1baad0060ad8e1cd6 Mon Sep 17 00:00:00 2001
From: Thomas Haller <thaller@redhat.com>
Date: Wed, 29 Mar 2023 07:51:26 +0200
Subject: [PATCH 7/7] gmain: ensure boolean value in g_child_watch_check() is
strictly 0 or 1
No problem in practice, but it seems nice to ensure that a gboolean is
always either FALSE or TRUE.
---
glib/gmain.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/glib/gmain.c b/glib/gmain.c
index 3813b032a..b6ffc6dd0 100644
--- a/glib/gmain.c
+++ b/glib/gmain.c
@@ -5268,12 +5268,12 @@ g_child_watch_check (GSource *source)
child_watch_source = (GChildWatchSource *) source;
#ifdef G_OS_WIN32
- child_exited = child_watch_source->poll.revents & G_IO_IN;
+ child_exited = !!(child_watch_source->poll.revents & G_IO_IN);
#else /* G_OS_WIN32 */
#ifdef HAVE_PIDFD
if (child_watch_source->poll.fd >= 0)
{
- child_exited = child_watch_source->poll.revents & G_IO_IN;
+ child_exited = !!(child_watch_source->poll.revents & G_IO_IN);
return child_exited;
}
#endif /* HAVE_PIDFD */
--
2.41.0