246 lines
10 KiB
Diff
246 lines
10 KiB
Diff
From 27faf1af778849841d7c3140bd3d92aceaea2ee3 Mon Sep 17 00:00:00 2001
|
|
From: Luca Boccassi <luca.boccassi@gmail.com>
|
|
Date: Sun, 13 Apr 2025 22:10:36 +0100
|
|
Subject: [PATCH] coredump: add support for new %F PIDFD specifier
|
|
|
|
A new core_pattern specifier was added, %F, to provide a PIDFD
|
|
to the usermode helper process referring to the crashed process.
|
|
This removes all possible race conditions, ensuring only the
|
|
crashed process gets inspected by systemd-coredump.
|
|
|
|
(cherry picked from commit 868d95577ec9f862580ad365726515459be582fc)
|
|
|
|
Resolves: RHEL-104138
|
|
---
|
|
man/systemd-coredump.xml | 9 ++++
|
|
src/coredump/coredump.c | 89 ++++++++++++++++++++++++++++++++----
|
|
sysctl.d/50-coredump.conf.in | 2 +-
|
|
3 files changed, 89 insertions(+), 11 deletions(-)
|
|
|
|
diff --git a/man/systemd-coredump.xml b/man/systemd-coredump.xml
|
|
index 6cfa04f466..b3d81d838a 100644
|
|
--- a/man/systemd-coredump.xml
|
|
+++ b/man/systemd-coredump.xml
|
|
@@ -186,6 +186,15 @@ COREDUMP_FILENAME=/var/lib/systemd/coredump/core.Web….552351.….zst
|
|
</listitem>
|
|
</varlistentry>
|
|
|
|
+ <varlistentry>
|
|
+ <term><varname>COREDUMP_BY_PIDFD=</varname></term>
|
|
+ <listitem><para>If the crashed process was analyzed using a PIDFD provided by the kernel (requires
|
|
+ kernel v6.16) then this field will be present and set to <literal>1</literal>. If this field is
|
|
+ not set, then the crashed process was analyzed via a PID, which is known to be subject to race
|
|
+ conditions.</para>
|
|
+ </listitem>
|
|
+ </varlistentry>
|
|
+
|
|
<varlistentry>
|
|
<term><varname>COREDUMP_TIMESTAMP=</varname></term>
|
|
<listitem><para>The time of the crash as reported by the kernel (in µs since the epoch).</para>
|
|
diff --git a/src/coredump/coredump.c b/src/coredump/coredump.c
|
|
index cd10678c43..e0aac3c8d0 100644
|
|
--- a/src/coredump/coredump.c
|
|
+++ b/src/coredump/coredump.c
|
|
@@ -39,6 +39,7 @@
|
|
#include "mkdir-label.h"
|
|
#include "namespace-util.h"
|
|
#include "parse-util.h"
|
|
+#include "pidref.h"
|
|
#include "process-util.h"
|
|
#include "signal-util.h"
|
|
#include "socket-util.h"
|
|
@@ -98,8 +99,8 @@ enum {
|
|
/* The fields below were added to kernel/core_pattern at later points, so they might be missing. */
|
|
META_ARGV_HOSTNAME = _META_ARGV_REQUIRED, /* %h: hostname */
|
|
META_ARGV_DUMPABLE, /* %d: as set by the kernel */
|
|
+ META_ARGV_PIDFD, /* %F: pidfd of the process, since v6.16 */
|
|
_META_ARGV_MAX,
|
|
-
|
|
/* If new fields are added, they should be added here, to maintain compatibility
|
|
* with callers which don't know about the new fields. */
|
|
|
|
@@ -129,6 +130,7 @@ static const char * const meta_field_names[_META_MAX] = {
|
|
[META_ARGV_RLIMIT] = "COREDUMP_RLIMIT=",
|
|
[META_ARGV_HOSTNAME] = "COREDUMP_HOSTNAME=",
|
|
[META_ARGV_DUMPABLE] = "COREDUMP_DUMPABLE=",
|
|
+ [META_ARGV_PIDFD] = "COREDUMP_BY_PIDFD=",
|
|
[META_COMM] = "COREDUMP_COMM=",
|
|
[META_EXE] = "COREDUMP_EXE=",
|
|
[META_UNIT] = "COREDUMP_UNIT=",
|
|
@@ -136,6 +138,7 @@ static const char * const meta_field_names[_META_MAX] = {
|
|
};
|
|
|
|
typedef struct Context {
|
|
+ PidRef pidref;
|
|
const char *meta[_META_MAX];
|
|
size_t meta_size[_META_MAX];
|
|
pid_t pid;
|
|
@@ -146,6 +149,14 @@ typedef struct Context {
|
|
bool is_journald;
|
|
} Context;
|
|
|
|
+#define CONTEXT_NULL \
|
|
+ (Context) { \
|
|
+ .pidref = PIDREF_NULL, \
|
|
+ .uid = UID_INVALID, \
|
|
+ .gid = GID_INVALID, \
|
|
+ }
|
|
+
|
|
+
|
|
typedef enum CoredumpStorage {
|
|
COREDUMP_STORAGE_NONE,
|
|
COREDUMP_STORAGE_EXTERNAL,
|
|
@@ -171,6 +182,12 @@ static uint64_t arg_journal_size_max = JOURNAL_SIZE_MAX;
|
|
static uint64_t arg_keep_free = UINT64_MAX;
|
|
static uint64_t arg_max_use = UINT64_MAX;
|
|
|
|
+static void context_done(Context *c) {
|
|
+ assert(c);
|
|
+
|
|
+ pidref_done(&c->pidref);
|
|
+}
|
|
+
|
|
static int parse_config(void) {
|
|
static const ConfigTableItem items[] = {
|
|
{ "Coredump", "Storage", config_parse_coredump_storage, 0, &arg_storage },
|
|
@@ -1114,7 +1131,7 @@ static int save_context(Context *context, const struct iovec_wrapper *iovw) {
|
|
|
|
static int process_socket(int fd) {
|
|
_cleanup_close_ int input_fd = -EBADF, mntns_fd = -EBADF;
|
|
- Context context = {};
|
|
+ _cleanup_(context_done) Context context = CONTEXT_NULL;
|
|
struct iovec_wrapper iovw = {};
|
|
struct iovec iovec;
|
|
int iterations = 0, r;
|
|
@@ -1215,7 +1232,7 @@ static int process_socket(int fd) {
|
|
goto finish;
|
|
|
|
/* Make sure we received at least all fields we need. */
|
|
- for (int i = 0; i < _META_MANDATORY_MAX; i++)
|
|
+ for (int i = 0; i < _META_ARGV_REQUIRED; i++)
|
|
if (!context.meta[i]) {
|
|
r = log_error_errno(SYNTHETIC_ERRNO(EINVAL),
|
|
"A mandatory argument (%i) has not been sent, aborting.",
|
|
@@ -1301,9 +1318,9 @@ static int gather_pid_metadata_from_argv(
|
|
Context *context,
|
|
int argc, char **argv) {
|
|
|
|
+ _cleanup_(pidref_done) PidRef local_pidref = PIDREF_NULL;
|
|
_cleanup_free_ char *free_timestamp = NULL;
|
|
- int r, signo;
|
|
- char *t;
|
|
+ int r, signo, kernel_fd = -EBADF;
|
|
|
|
/* We gather all metadata that were passed via argv[] into an array of iovecs that
|
|
* we'll forward to the socket unit.
|
|
@@ -1317,8 +1334,7 @@ static int gather_pid_metadata_from_argv(
|
|
argc, _META_ARGV_REQUIRED, _META_ARGV_MAX);
|
|
|
|
for (int i = 0; i < MIN(argc, _META_ARGV_MAX); i++) {
|
|
-
|
|
- t = argv[i];
|
|
+ const char *t = argv[i];
|
|
|
|
switch (i) {
|
|
|
|
@@ -1343,6 +1359,47 @@ static int gather_pid_metadata_from_argv(
|
|
break;
|
|
}
|
|
|
|
+ if (i == META_ARGV_PID) {
|
|
+ /* Store this so that we can check whether the core will be forwarded to a container
|
|
+ * even when the kernel doesn't provide a pidfd. Can be dropped once baseline is
|
|
+ * >= v6.16. */
|
|
+ r = pidref_set_pidstr(&local_pidref, t);
|
|
+ if (r < 0)
|
|
+ return log_error_errno(r, "Failed to initialize pidref from pid %s: %m", t);
|
|
+ }
|
|
+
|
|
+ if (i == META_ARGV_PIDFD) {
|
|
+ /* If the current kernel doesn't support the %F specifier (which resolves to a
|
|
+ * pidfd), but we included it in the core_pattern expression, we'll receive an empty
|
|
+ * string here. Deal with that gracefully. */
|
|
+ if (isempty(t))
|
|
+ continue;
|
|
+
|
|
+ assert(!pidref_is_set(&context->pidref));
|
|
+ assert(kernel_fd < 0);
|
|
+
|
|
+ kernel_fd = parse_fd(t);
|
|
+ if (kernel_fd < 0)
|
|
+ return log_error_errno(kernel_fd, "Failed to parse pidfd \"%s\": %m", t);
|
|
+
|
|
+ r = pidref_set_pidfd(&context->pidref, kernel_fd);
|
|
+ if (r < 0)
|
|
+ return log_error_errno(r, "Failed to initialize pidref from pidfd %d: %m", kernel_fd);
|
|
+
|
|
+ /* If there are containers involved with different versions of the code they might
|
|
+ * not be using pidfds, so it would be wrong to set the metadata, skip it. */
|
|
+ r = in_same_namespace(getpid_cached(), context->pidref.pid, NAMESPACE_PID);
|
|
+ if (r < 0)
|
|
+ log_debug_errno(r, "Failed to check pidns of crashing process, ignoring: %m");
|
|
+ if (r <= 0)
|
|
+ continue;
|
|
+
|
|
+ /* We don't print the fd number in the journal as it's meaningless, but we still
|
|
+ * record that the parsing was done with a kernel-provided fd as it means it's safe
|
|
+ * from races, which is valuable information to provide in the journal record. */
|
|
+ t = "1";
|
|
+ }
|
|
+
|
|
r = iovw_put_string_field(iovw, meta_field_names[i], t);
|
|
if (r < 0)
|
|
return r;
|
|
@@ -1350,7 +1407,19 @@ static int gather_pid_metadata_from_argv(
|
|
|
|
/* Cache some of the process metadata we collected so far and that we'll need to
|
|
* access soon */
|
|
- return save_context(context, iovw);
|
|
+ r = save_context(context, iovw);
|
|
+ if (r < 0)
|
|
+ return r;
|
|
+
|
|
+ /* If the kernel didn't give us a PIDFD, then use the one derived from the
|
|
+ * PID immediately, given we have it. */
|
|
+ if (!pidref_is_set(&context->pidref))
|
|
+ context->pidref = TAKE_PIDREF(local_pidref);
|
|
+
|
|
+ /* Close the kernel-provided FD as the last thing after everything else succeeded. */
|
|
+ kernel_fd = safe_close(kernel_fd);
|
|
+
|
|
+ return 0;
|
|
}
|
|
|
|
static int gather_pid_metadata(struct iovec_wrapper *iovw, Context *context) {
|
|
@@ -1466,7 +1535,7 @@ static int gather_pid_metadata(struct iovec_wrapper *iovw, Context *context) {
|
|
}
|
|
|
|
static int process_kernel(int argc, char* argv[]) {
|
|
- Context context = {};
|
|
+ _cleanup_(context_done) Context context = CONTEXT_NULL;
|
|
struct iovec_wrapper *iovw;
|
|
int r, mntns_fd = -EBADF;
|
|
|
|
@@ -1543,7 +1612,7 @@ static int process_kernel(int argc, char* argv[]) {
|
|
}
|
|
|
|
static int process_backtrace(int argc, char *argv[]) {
|
|
- Context context = {};
|
|
+ _cleanup_(context_done) Context context = CONTEXT_NULL;
|
|
struct iovec_wrapper *iovw;
|
|
char *message;
|
|
int r;
|
|
diff --git a/sysctl.d/50-coredump.conf.in b/sysctl.d/50-coredump.conf.in
|
|
index 9c10a89828..1c6230ad93 100644
|
|
--- a/sysctl.d/50-coredump.conf.in
|
|
+++ b/sysctl.d/50-coredump.conf.in
|
|
@@ -13,7 +13,7 @@
|
|
# the core dump.
|
|
#
|
|
# See systemd-coredump(8) and core(5).
|
|
-kernel.core_pattern=|{{ROOTLIBEXECDIR}}/systemd-coredump %P %u %g %s %t %c %h %d
|
|
+kernel.core_pattern=|{{ROOTLIBEXECDIR}}/systemd-coredump %P %u %g %s %t %c %h %d %F
|
|
|
|
# Allow 16 coredumps to be dispatched in parallel by the kernel.
|
|
# We collect metadata from /proc/%P/, and thus need to make sure the crashed
|