systemd/0644-user-runtime-dir-enforce-tmp-and-dev-shm-quota.patch
Jan Macku b7ebf97389 systemd-257-24
Resolves: RHEL-155454, RHEL-155805, RHEL-155396, RHEL-158303, RHEL-158354, RHEL-143728, RHEL-168098, RHEL-143028
2026-04-16 15:01:05 +02:00

305 lines
13 KiB
Diff

From 4aad3334b4618edc47d4813bdbed9b3ca6b86aec Mon Sep 17 00:00:00 2001
From: Lennart Poettering <lennart@poettering.net>
Date: Fri, 10 Jan 2025 11:34:18 +0100
Subject: [PATCH] user-runtime-dir: enforce /tmp/ and /dev/shm/ quota
Enforce the quota on these two tmpfs at the same place where we mount
the per-user $XDG_RUNTIME_DIR. Conceptually these are very similar
concepts, and it makes sure to enforce the limits at the same place with
the same lifecycle.
(cherry picked from commit b1c95fb2e9d11fc190017dec3d64f468f9d378bc)
Resolves: RHEL-143028
---
README | 2 +
man/user@.service.xml | 13 +--
src/login/user-runtime-dir.c | 191 ++++++++++++++++++++++++++++++-----
3 files changed, 173 insertions(+), 33 deletions(-)
diff --git a/README b/README
index b9a58389ad..7d38f17b5a 100644
--- a/README
+++ b/README
@@ -41,6 +41,8 @@ REQUIREMENTS:
≥ 5.3 for bounded loops in BPF program
≥ 5.4 for pidfd and signed Verity images
≥ 5.7 for CLONE_INTO_CGROUP, BPF links and the BPF LSM hook
+ ≥ 5.14 for quotactl_fd()
+ ≥ 6.6 for quota support on tmpfs
⛔ Kernel versions below 3.15 ("minimum baseline") are not supported at
all, and are missing required functionality (e.g. CLOCK_BOOTTIME
diff --git a/man/user@.service.xml b/man/user@.service.xml
index e9cbda4833..f2e83c1b16 100644
--- a/man/user@.service.xml
+++ b/man/user@.service.xml
@@ -42,12 +42,13 @@
<citerefentry><refentrytitle>systemd.special</refentrytitle><manvolnum>7</manvolnum></citerefentry> for a
list of units that form the basis of the unit hierarchies of system and user units.</para>
- <para><filename>user@<replaceable>UID</replaceable>.service</filename> is accompanied by the
- system unit <filename>user-runtime-dir@<replaceable>UID</replaceable>.service</filename>, which
- creates the user's runtime directory
- <filename>/run/user/<replaceable>UID</replaceable></filename>, and then removes it when this
- unit is stopped. <filename>user-runtime-dir@<replaceable>UID</replaceable>.service</filename>
- executes the <filename>systemd-user-runtime-dir</filename> binary to do the actual work.</para>
+ <para><filename>user@<replaceable>UID</replaceable>.service</filename> is accompanied by the system unit
+ <filename>user-runtime-dir@<replaceable>UID</replaceable>.service</filename>, which creates the user's
+ runtime directory <filename>/run/user/<replaceable>UID</replaceable></filename> when started, and removes
+ it when it is stopped. It also might apply runtime quota settings on <filename>/tmp/</filename> and/or
+ <filename>/dev/shm/</filename> for the
+ user. <filename>user-runtime-dir@<replaceable>UID</replaceable>.service</filename> executes the
+ <filename>systemd-user-runtime-dir</filename> binary to do the actual work.</para>
<para>User processes may be started by the <filename>user@.service</filename> instance, in which
case they will be part of that unit in the system hierarchy. They may also be started elsewhere,
diff --git a/src/login/user-runtime-dir.c b/src/login/user-runtime-dir.c
index f39c1ad225..94117c95db 100644
--- a/src/login/user-runtime-dir.c
+++ b/src/login/user-runtime-dir.c
@@ -8,15 +8,20 @@
#include "bus-error.h"
#include "bus-locator.h"
#include "dev-setup.h"
+#include "devnum-util.h"
+#include "fd-util.h"
#include "format-util.h"
#include "fs-util.h"
#include "label-util.h"
#include "limits-util.h"
#include "main-func.h"
+#include "missing_magic.h"
+#include "missing_syscall.h"
#include "mkdir-label.h"
#include "mount-util.h"
#include "mountpoint-util.h"
#include "path-util.h"
+#include "quota-util.h"
#include "rm-rf.h"
#include "selinux-util.h"
#include "smack-util.h"
@@ -24,6 +29,7 @@
#include "string-util.h"
#include "strv.h"
#include "user-util.h"
+#include "userdb.h"
static int acquire_runtime_dir_properties(uint64_t *ret_size, uint64_t *ret_inodes) {
_cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
@@ -126,6 +132,26 @@ static int user_mkdir_runtime_path(
return 0;
}
+static int do_mount(UserRecord *ur) {
+ int r;
+
+ assert(ur);
+
+ if (!uid_is_valid(ur->uid) || !gid_is_valid(ur->gid))
+ return log_error_errno(SYNTHETIC_ERRNO(ENOMSG), "User '%s' lacks UID or GID, refusing.", ur->user_name);
+
+ uint64_t runtime_dir_size, runtime_dir_inodes;
+ r = acquire_runtime_dir_properties(&runtime_dir_size, &runtime_dir_inodes);
+ if (r < 0)
+ return r;
+
+ char runtime_path[STRLEN("/run/user/") + DECIMAL_STR_MAX(uid_t)];
+ xsprintf(runtime_path, "/run/user/" UID_FMT, ur->uid);
+
+ log_debug("Will mount %s owned by "UID_FMT":"GID_FMT, runtime_path, ur->uid, ur->gid);
+ return user_mkdir_runtime_path(runtime_path, ur->uid, ur->gid, runtime_dir_size, runtime_dir_inodes);
+}
+
static int user_remove_runtime_path(const char *runtime_path) {
int r;
@@ -149,31 +175,6 @@ static int user_remove_runtime_path(const char *runtime_path) {
return 0;
}
-static int do_mount(const char *user) {
- char runtime_path[STRLEN("/run/user/") + DECIMAL_STR_MAX(uid_t)];
- uint64_t runtime_dir_size, runtime_dir_inodes;
- uid_t uid;
- gid_t gid;
- int r;
-
- r = get_user_creds(&user, &uid, &gid, NULL, NULL, 0);
- if (r < 0)
- return log_error_errno(r,
- r == -ESRCH ? "No such user \"%s\"" :
- r == -ENOMSG ? "UID \"%s\" is invalid or has an invalid main group"
- : "Failed to look up user \"%s\": %m",
- user);
-
- r = acquire_runtime_dir_properties(&runtime_dir_size, &runtime_dir_inodes);
- if (r < 0)
- return r;
-
- xsprintf(runtime_path, "/run/user/" UID_FMT, uid);
-
- log_debug("Will mount %s owned by "UID_FMT":"GID_FMT, runtime_path, uid, gid);
- return user_mkdir_runtime_path(runtime_path, uid, gid, runtime_dir_size, runtime_dir_inodes);
-}
-
static int do_umount(const char *user) {
char runtime_path[STRLEN("/run/user/") + DECIMAL_STR_MAX(uid_t)];
uid_t uid;
@@ -197,6 +198,126 @@ static int do_umount(const char *user) {
return user_remove_runtime_path(runtime_path);
}
+static int apply_tmpfs_quota(
+ char **paths,
+ uid_t uid,
+ uint64_t limit,
+ uint32_t scale) {
+
+ _cleanup_set_free_ Set *processed = NULL;
+ int r;
+
+ assert(uid_is_valid(uid));
+
+ STRV_FOREACH(p, paths) {
+ _cleanup_close_ int fd = open(*p, O_DIRECTORY|O_CLOEXEC);
+ if (fd < 0) {
+ log_warning_errno(errno, "Failed to open '%s' in order to set quota, ignoring: %m", *p);
+ continue;
+ }
+
+ struct stat st;
+ if (fstat(fd, &st) < 0) {
+ log_warning_errno(errno, "Failed to stat '%s' in order to set quota, ignoring: %m", *p);
+ continue;
+ }
+
+ /* Cover for bind mounted or symlinked /var/tmp/ + /tmp/ */
+ if (set_contains(processed, DEVNUM_TO_PTR(st.st_dev))) {
+ log_debug("Not setting quota on '%s', since already processed.", *p);
+ continue;
+ }
+
+ /* Remember we already dealt with this fs, even if the subsequent operation fails, since
+ * there's no point in appyling quota twice, regardless if it succeeds or not. */
+ if (set_ensure_put(&processed, /* hash_ops= */ NULL, DEVNUM_TO_PTR(st.st_dev)) < 0)
+ return log_oom();
+
+ struct statfs sfs;
+ if (fstatfs(fd, &sfs) < 0) {
+ log_warning_errno(errno, "Failed to statfs '%s' in order to set quota, ignoring: %m", *p);
+ continue;
+ }
+
+ if (!is_fs_type(&sfs, TMPFS_MAGIC)) {
+ log_debug("Not setting quota on '%s', since not tmpfs.", *p);
+ continue;
+ }
+
+ struct dqblk req;
+ r = RET_NERRNO(quotactl_fd(fd, QCMD_FIXED(Q_GETQUOTA, USRQUOTA), uid, &req));
+ if (r == -ESRCH)
+ zero(req);
+ else if (ERRNO_IS_NEG_NOT_SUPPORTED(r)) {
+ log_debug_errno(r, "No UID quota support on %s, not setting quota: %m", *p);
+ continue;
+ } else if (ERRNO_IS_NEG_PRIVILEGE(r)) {
+ log_debug_errno(r, "Lacking privileges to query UID quota on %s, not setting quota: %m", *p);
+ continue;
+ } else if (r < 0) {
+ log_warning_errno(r, "Failed to query disk quota on %s for UID " UID_FMT ", ignoring: %m", *p, uid);
+ continue;
+ }
+
+ uint64_t v =
+ (scale == 0) ? 0 :
+ (scale == UINT32_MAX) ? UINT64_MAX :
+ (uint64_t) ((double) (sfs.f_blocks * sfs.f_frsize) / scale * UINT32_MAX);
+
+ v = MIN(v, limit);
+ v /= QIF_DQBLKSIZE;
+
+ if (FLAGS_SET(req.dqb_valid, QIF_BLIMITS) && v == req.dqb_bhardlimit) {
+ /* Shortcut things if everything is set up properly already */
+ log_debug("Configured quota on '%s' already matches the intended setting, not updating quota.", *p);
+ continue;
+ }
+
+ req.dqb_valid = QIF_BLIMITS;
+ req.dqb_bsoftlimit = req.dqb_bhardlimit = v;
+
+ r = RET_NERRNO(quotactl_fd(fd, QCMD_FIXED(Q_SETQUOTA, USRQUOTA), uid, &req));
+ if (r == -ESRCH) {
+ log_debug_errno(r, "Not setting UID quota on %s since UID quota is not supported: %m", *p);
+ continue;
+ } else if (ERRNO_IS_NEG_PRIVILEGE(r)) {
+ log_debug_errno(r, "Lacking privileges to set UID quota on %s, skipping: %m", *p);
+ continue;
+ } else if (r < 0) {
+ log_warning_errno(r, "Failed to set disk quota on %s for UID " UID_FMT ", ignoring: %m", *p, uid);
+ continue;
+ }
+
+ log_info("Successfully configured disk quota for UID " UID_FMT " on %s to %s", uid, *p, FORMAT_BYTES(v * QIF_DQBLKSIZE));
+ }
+
+ return 0;
+}
+
+static int do_tmpfs_quota(UserRecord *ur) {
+ int r;
+
+ assert(ur);
+
+ if (user_record_is_root(ur)) {
+ log_debug("Not applying tmpfs quota to root user.");
+ return 0;
+ }
+
+ if (!uid_is_valid(ur->uid))
+ return log_error_errno(SYNTHETIC_ERRNO(ENOMSG), "User '%s' lacks UID, refusing.", ur->user_name);
+
+ r = apply_tmpfs_quota(STRV_MAKE("/tmp", "/var/tmp"), ur->uid, ur->tmp_limit.limit, user_record_tmp_limit_scale(ur));
+ if (r < 0)
+ return r;
+
+ r = apply_tmpfs_quota(STRV_MAKE("/dev/shm"), ur->uid, ur->dev_shm_limit.limit, user_record_dev_shm_limit_scale(ur));
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
static int run(int argc, char *argv[]) {
int r;
@@ -218,10 +339,26 @@ static int run(int argc, char *argv[]) {
if (r < 0)
return r;
- if (streq(verb, "start"))
- return do_mount(user);
+ if (streq(verb, "start")) {
+ _cleanup_(user_record_unrefp) UserRecord *ur = NULL;
+ r = userdb_by_name(user, /* match= */ NULL, USERDB_PARSE_NUMERIC|USERDB_SUPPRESS_SHADOW, &ur);
+ if (r == -ESRCH)
+ return log_error_errno(r, "User '%s' does not exist: %m", user);
+ if (r < 0)
+ return log_error_errno(r, "Failed to resolve user '%s': %m", user);
+
+ /* We do two things here: mount the per-user XDG_RUNTIME_DIR, and set up tmpfs quota on /tmp/
+ * and /dev/shm/. */
+
+ r = 0;
+ RET_GATHER(r, do_mount(ur));
+ RET_GATHER(r, do_tmpfs_quota(ur));
+ return r;
+ }
+
if (streq(verb, "stop"))
return do_umount(user);
+
assert_not_reached();
}