181 lines
5.9 KiB
Diff
181 lines
5.9 KiB
Diff
|
From ce352accdfb07a91b5527e70ec8bce658a8b68de Mon Sep 17 00:00:00 2001
|
||
|
From: Kir Kolyshkin <kolyshkin@gmail.com>
|
||
|
Date: Tue, 23 Feb 2021 18:27:42 -0800
|
||
|
Subject: [PATCH 4/5] Fix cgroup2 mount for rootless case
|
||
|
|
||
|
In case of rootless, cgroup2 mount is not possible (see [1] for more
|
||
|
details), so since commit 9c81440fb5a7 runc bind-mounts the whole
|
||
|
/sys/fs/cgroup into container.
|
||
|
|
||
|
Problem is, if cgroupns is enabled, /sys/fs/cgroup inside the container
|
||
|
is supposed to show the cgroup files for this cgroup, not the root one.
|
||
|
|
||
|
The fix is to pass through and use the cgroup path in case cgroup2
|
||
|
mount failed, cgroupns is enabled, and the path is non-empty.
|
||
|
|
||
|
Surely this requires the /sys/fs/cgroup mount in the spec, so modify
|
||
|
runc spec --rootless to keep it.
|
||
|
|
||
|
Before:
|
||
|
|
||
|
$ ./runc run aaa
|
||
|
# find /sys/fs/cgroup/ -type d
|
||
|
/sys/fs/cgroup
|
||
|
/sys/fs/cgroup/user.slice
|
||
|
/sys/fs/cgroup/user.slice/user-1000.slice
|
||
|
/sys/fs/cgroup/user.slice/user-1000.slice/user@1000.service
|
||
|
...
|
||
|
# ls -l /sys/fs/cgroup/cgroup.controllers
|
||
|
-r--r--r-- 1 nobody nogroup 0 Feb 24 02:22 /sys/fs/cgroup/cgroup.controllers
|
||
|
# wc -w /sys/fs/cgroup/cgroup.procs
|
||
|
142 /sys/fs/cgroup/cgroup.procs
|
||
|
# cat /sys/fs/cgroup/memory.current
|
||
|
cat: can't open '/sys/fs/cgroup/memory.current': No such file or directory
|
||
|
|
||
|
After:
|
||
|
|
||
|
# find /sys/fs/cgroup/ -type d
|
||
|
/sys/fs/cgroup/
|
||
|
# ls -l /sys/fs/cgroup/cgroup.controllers
|
||
|
-r--r--r-- 1 root root 0 Feb 24 02:43 /sys/fs/cgroup/cgroup.controllers
|
||
|
# wc -w /sys/fs/cgroup/cgroup.procs
|
||
|
2 /sys/fs/cgroup/cgroup.procs
|
||
|
# cat /sys/fs/cgroup/memory.current
|
||
|
577536
|
||
|
|
||
|
[1] https://github.com/opencontainers/runc/issues/2158
|
||
|
|
||
|
Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
|
||
|
---
|
||
|
libcontainer/container_linux.go | 3 +++
|
||
|
libcontainer/init_linux.go | 1 +
|
||
|
libcontainer/rootfs_linux.go | 28 +++++++++++++++++++++-------
|
||
|
libcontainer/specconv/example.go | 18 +++++++++---------
|
||
|
4 files changed, 34 insertions(+), 16 deletions(-)
|
||
|
|
||
|
diff --git a/libcontainer/container_linux.go b/libcontainer/container_linux.go
|
||
|
index b6100aae9d5a..1cbc734172d0 100644
|
||
|
--- a/libcontainer/container_linux.go
|
||
|
+++ b/libcontainer/container_linux.go
|
||
|
@@ -610,6 +610,9 @@ func (c *linuxContainer) newInitConfig(process *Process) *initConfig {
|
||
|
if len(process.Rlimits) > 0 {
|
||
|
cfg.Rlimits = process.Rlimits
|
||
|
}
|
||
|
+ if cgroups.IsCgroup2UnifiedMode() {
|
||
|
+ cfg.Cgroup2Path = c.cgroupManager.Path("")
|
||
|
+ }
|
||
|
|
||
|
return cfg
|
||
|
}
|
||
|
diff --git a/libcontainer/init_linux.go b/libcontainer/init_linux.go
|
||
|
index c57af0eebb8b..681797099f46 100644
|
||
|
--- a/libcontainer/init_linux.go
|
||
|
+++ b/libcontainer/init_linux.go
|
||
|
@@ -70,6 +70,7 @@ type initConfig struct {
|
||
|
RootlessEUID bool `json:"rootless_euid,omitempty"`
|
||
|
RootlessCgroups bool `json:"rootless_cgroups,omitempty"`
|
||
|
SpecState *specs.State `json:"spec_state,omitempty"`
|
||
|
+ Cgroup2Path string `json:"cgroup2_path,omitempty"`
|
||
|
}
|
||
|
|
||
|
type initer interface {
|
||
|
diff --git a/libcontainer/rootfs_linux.go b/libcontainer/rootfs_linux.go
|
||
|
index 0f0495b93b3e..5d2d74cf924b 100644
|
||
|
--- a/libcontainer/rootfs_linux.go
|
||
|
+++ b/libcontainer/rootfs_linux.go
|
||
|
@@ -31,9 +31,11 @@ import (
|
||
|
const defaultMountFlags = unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV
|
||
|
|
||
|
type mountConfig struct {
|
||
|
- root string
|
||
|
- label string
|
||
|
- cgroupns bool
|
||
|
+ root string
|
||
|
+ label string
|
||
|
+ cgroup2Path string
|
||
|
+ rootlessCgroups bool
|
||
|
+ cgroupns bool
|
||
|
}
|
||
|
|
||
|
// needsSetupDev returns true if /dev needs to be set up.
|
||
|
@@ -56,9 +58,11 @@ func prepareRootfs(pipe io.ReadWriter, iConfig *initConfig) (err error) {
|
||
|
}
|
||
|
|
||
|
mountConfig := &mountConfig{
|
||
|
- root: config.Rootfs,
|
||
|
- label: config.MountLabel,
|
||
|
- cgroupns: config.Namespaces.Contains(configs.NEWCGROUP),
|
||
|
+ root: config.Rootfs,
|
||
|
+ label: config.MountLabel,
|
||
|
+ cgroup2Path: iConfig.Cgroup2Path,
|
||
|
+ rootlessCgroups: iConfig.RootlessCgroups,
|
||
|
+ cgroupns: config.Namespaces.Contains(configs.NEWCGROUP),
|
||
|
}
|
||
|
setupDev := needsSetupDev(config)
|
||
|
for _, m := range config.Mounts {
|
||
|
@@ -307,7 +311,17 @@ func mountCgroupV2(m *configs.Mount, c *mountConfig) error {
|
||
|
// when we are in UserNS but CgroupNS is not unshared, we cannot mount cgroup2 (#2158)
|
||
|
if err == unix.EPERM || err == unix.EBUSY {
|
||
|
src := fs2.UnifiedMountpoint
|
||
|
- return unix.Mount(src, dest, "", uintptr(m.Flags)|unix.MS_BIND, "")
|
||
|
+ if c.cgroupns && c.cgroup2Path != "" {
|
||
|
+ // Emulate cgroupns by bind-mounting
|
||
|
+ // the container cgroup path rather than
|
||
|
+ // the whole /sys/fs/cgroup.
|
||
|
+ src = c.cgroup2Path
|
||
|
+ }
|
||
|
+ err = unix.Mount(src, dest, "", uintptr(m.Flags)|unix.MS_BIND, "")
|
||
|
+ if err == unix.ENOENT && c.rootlessCgroups {
|
||
|
+ err = nil
|
||
|
+ }
|
||
|
+ return err
|
||
|
}
|
||
|
return err
|
||
|
}
|
||
|
diff --git a/libcontainer/specconv/example.go b/libcontainer/specconv/example.go
|
||
|
index 8a201bc78dd9..56bab3bfbfa5 100644
|
||
|
--- a/libcontainer/specconv/example.go
|
||
|
+++ b/libcontainer/specconv/example.go
|
||
|
@@ -2,6 +2,7 @@ package specconv
|
||
|
|
||
|
import (
|
||
|
"os"
|
||
|
+ "path/filepath"
|
||
|
"strings"
|
||
|
|
||
|
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||
|
@@ -200,8 +201,14 @@ func ToRootless(spec *specs.Spec) {
|
||
|
// Fix up mounts.
|
||
|
var mounts []specs.Mount
|
||
|
for _, mount := range spec.Mounts {
|
||
|
- // Ignore all mounts that are under /sys.
|
||
|
- if strings.HasPrefix(mount.Destination, "/sys") {
|
||
|
+ // Replace the /sys mount with an rbind.
|
||
|
+ if filepath.Clean(mount.Destination) == "/sys" {
|
||
|
+ mounts = append(mounts, specs.Mount{
|
||
|
+ Source: "/sys",
|
||
|
+ Destination: "/sys",
|
||
|
+ Type: "none",
|
||
|
+ Options: []string{"rbind", "nosuid", "noexec", "nodev", "ro"},
|
||
|
+ })
|
||
|
continue
|
||
|
}
|
||
|
|
||
|
@@ -216,13 +223,6 @@ func ToRootless(spec *specs.Spec) {
|
||
|
mount.Options = options
|
||
|
mounts = append(mounts, mount)
|
||
|
}
|
||
|
- // Add the sysfs mount as an rbind.
|
||
|
- mounts = append(mounts, specs.Mount{
|
||
|
- Source: "/sys",
|
||
|
- Destination: "/sys",
|
||
|
- Type: "none",
|
||
|
- Options: []string{"rbind", "nosuid", "noexec", "nodev", "ro"},
|
||
|
- })
|
||
|
spec.Mounts = mounts
|
||
|
|
||
|
// Remove cgroup settings.
|
||
|
--
|
||
|
2.31.1
|
||
|
|