Compare commits
No commits in common. "c8-beta-stream-2.0" and "c8-stream-1.0" have entirely different histories.
c8-beta-st
...
c8-stream-
2
.gitignore
vendored
2
.gitignore
vendored
@ -1 +1 @@
|
|||||||
SOURCES/runc-dc9208a.tar.gz
|
SOURCES/runc-2abd837.tar.gz
|
||||||
|
@ -1 +1 @@
|
|||||||
32859590dea35b77eed012c388d97fc12fdfdb93 SOURCES/runc-dc9208a.tar.gz
|
cf7119a838db2963e7af6ecdba90a2cc95ec0d56 SOURCES/runc-2abd837.tar.gz
|
||||||
|
62
SOURCES/0001-Revert-Apply-cgroups-earlier.patch
Normal file
62
SOURCES/0001-Revert-Apply-cgroups-earlier.patch
Normal file
@ -0,0 +1,62 @@
|
|||||||
|
From dfb3496c174377b860b62872ce6af951364cc3ac Mon Sep 17 00:00:00 2001
|
||||||
|
From: Lokesh Mandvekar <lsm5@fedoraproject.org>
|
||||||
|
Date: Tue, 12 Dec 2017 13:22:42 +0530
|
||||||
|
Subject: [PATCH] Revert "Apply cgroups earlier"
|
||||||
|
|
||||||
|
This reverts commit 7062c7556b71188abc18d7516441ff4b03fbc1fc.
|
||||||
|
---
|
||||||
|
libcontainer/process_linux.go | 31 ++++++++++++++-----------------
|
||||||
|
1 file changed, 14 insertions(+), 17 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/libcontainer/process_linux.go b/libcontainer/process_linux.go
|
||||||
|
index 149b1126..b8a395af 100644
|
||||||
|
--- a/libcontainer/process_linux.go
|
||||||
|
+++ b/libcontainer/process_linux.go
|
||||||
|
@@ -272,6 +272,20 @@ func (p *initProcess) start() error {
|
||||||
|
p.process.ops = nil
|
||||||
|
return newSystemErrorWithCause(err, "starting init process command")
|
||||||
|
}
|
||||||
|
+ if _, err := io.Copy(p.parentPipe, p.bootstrapData); err != nil {
|
||||||
|
+ return newSystemErrorWithCause(err, "copying bootstrap data to pipe")
|
||||||
|
+ }
|
||||||
|
+ if err := p.execSetns(); err != nil {
|
||||||
|
+ return newSystemErrorWithCause(err, "running exec setns process for init")
|
||||||
|
+ }
|
||||||
|
+ // Save the standard descriptor names before the container process
|
||||||
|
+ // can potentially move them (e.g., via dup2()). If we don't do this now,
|
||||||
|
+ // we won't know at checkpoint time which file descriptor to look up.
|
||||||
|
+ fds, err := getPipeFds(p.pid())
|
||||||
|
+ if err != nil {
|
||||||
|
+ return newSystemErrorWithCausef(err, "getting pipe fds for pid %d", p.pid())
|
||||||
|
+ }
|
||||||
|
+ p.setExternalDescriptors(fds)
|
||||||
|
// Do this before syncing with child so that no children can escape the
|
||||||
|
// cgroup. We don't need to worry about not doing this and not being root
|
||||||
|
// because we'd be using the rootless cgroup manager in that case.
|
||||||
|
@@ -292,23 +306,6 @@ func (p *initProcess) start() error {
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
-
|
||||||
|
- if _, err := io.Copy(p.parentPipe, p.bootstrapData); err != nil {
|
||||||
|
- return newSystemErrorWithCause(err, "copying bootstrap data to pipe")
|
||||||
|
- }
|
||||||
|
-
|
||||||
|
- if err := p.execSetns(); err != nil {
|
||||||
|
- return newSystemErrorWithCause(err, "running exec setns process for init")
|
||||||
|
- }
|
||||||
|
-
|
||||||
|
- // Save the standard descriptor names before the container process
|
||||||
|
- // can potentially move them (e.g., via dup2()). If we don't do this now,
|
||||||
|
- // we won't know at checkpoint time which file descriptor to look up.
|
||||||
|
- fds, err := getPipeFds(p.pid())
|
||||||
|
- if err != nil {
|
||||||
|
- return newSystemErrorWithCausef(err, "getting pipe fds for pid %d", p.pid())
|
||||||
|
- }
|
||||||
|
- p.setExternalDescriptors(fds)
|
||||||
|
if err := p.createNetworkInterfaces(); err != nil {
|
||||||
|
return newSystemErrorWithCause(err, "creating network interfaces")
|
||||||
|
}
|
||||||
|
--
|
||||||
|
2.14.3
|
||||||
|
|
@ -0,0 +1,290 @@
|
|||||||
|
From bf6405284aa3870a39b402309003633a1c230ed9 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Aleksa Sarai <asarai@suse.de>
|
||||||
|
Date: Wed, 9 Jan 2019 13:40:01 +1100
|
||||||
|
Subject: [PATCH 1/1] nsenter: clone /proc/self/exe to avoid exposing host
|
||||||
|
binary to container
|
||||||
|
|
||||||
|
There are quite a few circumstances where /proc/self/exe pointing to a
|
||||||
|
pretty important container binary is a _bad_ thing, so to avoid this we
|
||||||
|
have to make a copy (preferably doing self-clean-up and not being
|
||||||
|
writeable).
|
||||||
|
|
||||||
|
As a hotfix we require memfd_create(2), but we can always extend this to
|
||||||
|
use a scratch MNT_DETACH overlayfs or tmpfs. The main downside to this
|
||||||
|
approach is no page-cache sharing for the runc binary (which overlayfs
|
||||||
|
would give us) but this is far less complicated.
|
||||||
|
|
||||||
|
This is only done during nsenter so that it happens transparently to the
|
||||||
|
Go code, and any libcontainer users benefit from it. This also makes
|
||||||
|
ExtraFiles and --preserve-fds handling trivial (because we don't need to
|
||||||
|
worry about it).
|
||||||
|
|
||||||
|
Fixes: CVE-2019-5736
|
||||||
|
Co-developed-by: Christian Brauner <christian.brauner@ubuntu.com>
|
||||||
|
Signed-off-by: Aleksa Sarai <asarai@suse.de>
|
||||||
|
Signed-off-by: Mrunal Patel <mrunalp@gmail.com>
|
||||||
|
---
|
||||||
|
libcontainer/nsenter/cloned_binary.c | 221 +++++++++++++++++++++++++++
|
||||||
|
libcontainer/nsenter/nsexec.c | 11 ++
|
||||||
|
2 files changed, 232 insertions(+)
|
||||||
|
create mode 100644 libcontainer/nsenter/cloned_binary.c
|
||||||
|
|
||||||
|
diff --git a/libcontainer/nsenter/cloned_binary.c b/libcontainer/nsenter/cloned_binary.c
|
||||||
|
new file mode 100644
|
||||||
|
index 00000000..d9f6093a
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/libcontainer/nsenter/cloned_binary.c
|
||||||
|
@@ -0,0 +1,221 @@
|
||||||
|
+#define _GNU_SOURCE
|
||||||
|
+#include <unistd.h>
|
||||||
|
+#include <stdio.h>
|
||||||
|
+#include <stdlib.h>
|
||||||
|
+#include <stdbool.h>
|
||||||
|
+#include <string.h>
|
||||||
|
+#include <limits.h>
|
||||||
|
+#include <fcntl.h>
|
||||||
|
+#include <errno.h>
|
||||||
|
+
|
||||||
|
+#include <sys/types.h>
|
||||||
|
+#include <sys/stat.h>
|
||||||
|
+#include <sys/vfs.h>
|
||||||
|
+#include <sys/mman.h>
|
||||||
|
+#include <sys/sendfile.h>
|
||||||
|
+#include <sys/syscall.h>
|
||||||
|
+
|
||||||
|
+#include <linux/magic.h>
|
||||||
|
+#include <linux/memfd.h>
|
||||||
|
+
|
||||||
|
+/* Use our own wrapper for memfd_create. */
|
||||||
|
+#if !defined(SYS_memfd_create) && defined(__NR_memfd_create)
|
||||||
|
+# define SYS_memfd_create __NR_memfd_create
|
||||||
|
+#endif
|
||||||
|
+#ifndef SYS_memfd_create
|
||||||
|
+# error "memfd_create(2) syscall not supported by this glibc version"
|
||||||
|
+#endif
|
||||||
|
+int memfd_create(const char *name, unsigned int flags)
|
||||||
|
+{
|
||||||
|
+ return syscall(SYS_memfd_create, name, flags);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+/* This comes directly from <linux/fcntl.h>. */
|
||||||
|
+#ifndef F_LINUX_SPECIFIC_BASE
|
||||||
|
+# define F_LINUX_SPECIFIC_BASE 1024
|
||||||
|
+#endif
|
||||||
|
+#ifndef F_ADD_SEALS
|
||||||
|
+# define F_ADD_SEALS (F_LINUX_SPECIFIC_BASE + 9)
|
||||||
|
+# define F_GET_SEALS (F_LINUX_SPECIFIC_BASE + 10)
|
||||||
|
+#endif
|
||||||
|
+#ifndef F_SEAL_SEAL
|
||||||
|
+# define F_SEAL_SEAL 0x0001 /* prevent further seals from being set */
|
||||||
|
+# define F_SEAL_SHRINK 0x0002 /* prevent file from shrinking */
|
||||||
|
+# define F_SEAL_GROW 0x0004 /* prevent file from growing */
|
||||||
|
+# define F_SEAL_WRITE 0x0008 /* prevent writes */
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
|
+
|
||||||
|
+#define OUR_MEMFD_COMMENT "runc_cloned:/proc/self/exe"
|
||||||
|
+#define OUR_MEMFD_SEALS \
|
||||||
|
+ (F_SEAL_SEAL | F_SEAL_SHRINK | F_SEAL_GROW | F_SEAL_WRITE)
|
||||||
|
+
|
||||||
|
+static void *must_realloc(void *ptr, size_t size)
|
||||||
|
+{
|
||||||
|
+ void *old = ptr;
|
||||||
|
+ do {
|
||||||
|
+ ptr = realloc(old, size);
|
||||||
|
+ } while(!ptr);
|
||||||
|
+ return ptr;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+/*
|
||||||
|
+ * Verify whether we are currently in a self-cloned program (namely, is
|
||||||
|
+ * /proc/self/exe a memfd). F_GET_SEALS will only succeed for memfds (or rather
|
||||||
|
+ * for shmem files), and we want to be sure it's actually sealed.
|
||||||
|
+ */
|
||||||
|
+static int is_self_cloned(void)
|
||||||
|
+{
|
||||||
|
+ int fd, seals;
|
||||||
|
+
|
||||||
|
+ fd = open("/proc/self/exe", O_RDONLY|O_CLOEXEC);
|
||||||
|
+ if (fd < 0)
|
||||||
|
+ return -ENOTRECOVERABLE;
|
||||||
|
+
|
||||||
|
+ seals = fcntl(fd, F_GET_SEALS);
|
||||||
|
+ close(fd);
|
||||||
|
+ return seals == OUR_MEMFD_SEALS;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+/*
|
||||||
|
+ * Basic wrapper around mmap(2) that gives you the file length so you can
|
||||||
|
+ * safely treat it as an ordinary buffer. Only gives you read access.
|
||||||
|
+ */
|
||||||
|
+static char *read_file(char *path, size_t *length)
|
||||||
|
+{
|
||||||
|
+ int fd;
|
||||||
|
+ char buf[4096], *copy = NULL;
|
||||||
|
+
|
||||||
|
+ if (!length)
|
||||||
|
+ return NULL;
|
||||||
|
+
|
||||||
|
+ fd = open(path, O_RDONLY | O_CLOEXEC);
|
||||||
|
+ if (fd < 0)
|
||||||
|
+ return NULL;
|
||||||
|
+
|
||||||
|
+ *length = 0;
|
||||||
|
+ for (;;) {
|
||||||
|
+ int n;
|
||||||
|
+
|
||||||
|
+ n = read(fd, buf, sizeof(buf));
|
||||||
|
+ if (n < 0)
|
||||||
|
+ goto error;
|
||||||
|
+ if (!n)
|
||||||
|
+ break;
|
||||||
|
+
|
||||||
|
+ copy = must_realloc(copy, (*length + n) * sizeof(*copy));
|
||||||
|
+ memcpy(copy + *length, buf, n);
|
||||||
|
+ *length += n;
|
||||||
|
+ }
|
||||||
|
+ close(fd);
|
||||||
|
+ return copy;
|
||||||
|
+
|
||||||
|
+error:
|
||||||
|
+ close(fd);
|
||||||
|
+ free(copy);
|
||||||
|
+ return NULL;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+/*
|
||||||
|
+ * A poor-man's version of "xargs -0". Basically parses a given block of
|
||||||
|
+ * NUL-delimited data, within the given length and adds a pointer to each entry
|
||||||
|
+ * to the array of pointers.
|
||||||
|
+ */
|
||||||
|
+static int parse_xargs(char *data, int data_length, char ***output)
|
||||||
|
+{
|
||||||
|
+ int num = 0;
|
||||||
|
+ char *cur = data;
|
||||||
|
+
|
||||||
|
+ if (!data || *output != NULL)
|
||||||
|
+ return -1;
|
||||||
|
+
|
||||||
|
+ while (cur < data + data_length) {
|
||||||
|
+ num++;
|
||||||
|
+ *output = must_realloc(*output, (num + 1) * sizeof(**output));
|
||||||
|
+ (*output)[num - 1] = cur;
|
||||||
|
+ cur += strlen(cur) + 1;
|
||||||
|
+ }
|
||||||
|
+ (*output)[num] = NULL;
|
||||||
|
+ return num;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+/*
|
||||||
|
+ * "Parse" out argv and envp from /proc/self/cmdline and /proc/self/environ.
|
||||||
|
+ * This is necessary because we are running in a context where we don't have a
|
||||||
|
+ * main() that we can just get the arguments from.
|
||||||
|
+ */
|
||||||
|
+static int fetchve(char ***argv, char ***envp)
|
||||||
|
+{
|
||||||
|
+ char *cmdline = NULL, *environ = NULL;
|
||||||
|
+ size_t cmdline_size, environ_size;
|
||||||
|
+
|
||||||
|
+ cmdline = read_file("/proc/self/cmdline", &cmdline_size);
|
||||||
|
+ if (!cmdline)
|
||||||
|
+ goto error;
|
||||||
|
+ environ = read_file("/proc/self/environ", &environ_size);
|
||||||
|
+ if (!environ)
|
||||||
|
+ goto error;
|
||||||
|
+
|
||||||
|
+ if (parse_xargs(cmdline, cmdline_size, argv) <= 0)
|
||||||
|
+ goto error;
|
||||||
|
+ if (parse_xargs(environ, environ_size, envp) <= 0)
|
||||||
|
+ goto error;
|
||||||
|
+
|
||||||
|
+ return 0;
|
||||||
|
+
|
||||||
|
+error:
|
||||||
|
+ free(environ);
|
||||||
|
+ free(cmdline);
|
||||||
|
+ return -EINVAL;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+#define SENDFILE_MAX 0x7FFFF000 /* sendfile(2) is limited to 2GB. */
|
||||||
|
+static int clone_binary(void)
|
||||||
|
+{
|
||||||
|
+ int binfd, memfd, err;
|
||||||
|
+ ssize_t sent = 0;
|
||||||
|
+
|
||||||
|
+ memfd = memfd_create(OUR_MEMFD_COMMENT, MFD_CLOEXEC | MFD_ALLOW_SEALING);
|
||||||
|
+ if (memfd < 0)
|
||||||
|
+ return -ENOTRECOVERABLE;
|
||||||
|
+
|
||||||
|
+ binfd = open("/proc/self/exe", O_RDONLY | O_CLOEXEC);
|
||||||
|
+ if (binfd < 0)
|
||||||
|
+ goto error;
|
||||||
|
+
|
||||||
|
+ sent = sendfile(memfd, binfd, NULL, SENDFILE_MAX);
|
||||||
|
+ close(binfd);
|
||||||
|
+ if (sent < 0)
|
||||||
|
+ goto error;
|
||||||
|
+
|
||||||
|
+ err = fcntl(memfd, F_ADD_SEALS, OUR_MEMFD_SEALS);
|
||||||
|
+ if (err < 0)
|
||||||
|
+ goto error;
|
||||||
|
+
|
||||||
|
+ return memfd;
|
||||||
|
+
|
||||||
|
+error:
|
||||||
|
+ close(memfd);
|
||||||
|
+ return -EIO;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+int ensure_cloned_binary(void)
|
||||||
|
+{
|
||||||
|
+ int execfd;
|
||||||
|
+ char **argv = NULL, **envp = NULL;
|
||||||
|
+
|
||||||
|
+ /* Check that we're not self-cloned, and if we are then bail. */
|
||||||
|
+ int cloned = is_self_cloned();
|
||||||
|
+ if (cloned > 0 || cloned == -ENOTRECOVERABLE)
|
||||||
|
+ return cloned;
|
||||||
|
+
|
||||||
|
+ if (fetchve(&argv, &envp) < 0)
|
||||||
|
+ return -EINVAL;
|
||||||
|
+
|
||||||
|
+ execfd = clone_binary();
|
||||||
|
+ if (execfd < 0)
|
||||||
|
+ return -EIO;
|
||||||
|
+
|
||||||
|
+ fexecve(execfd, argv, envp);
|
||||||
|
+ return -ENOEXEC;
|
||||||
|
+}
|
||||||
|
diff --git a/libcontainer/nsenter/nsexec.c b/libcontainer/nsenter/nsexec.c
|
||||||
|
index cb224314..784fd9b0 100644
|
||||||
|
--- a/libcontainer/nsenter/nsexec.c
|
||||||
|
+++ b/libcontainer/nsenter/nsexec.c
|
||||||
|
@@ -528,6 +528,9 @@ void join_namespaces(char *nslist)
|
||||||
|
free(namespaces);
|
||||||
|
}
|
||||||
|
|
||||||
|
+/* Defined in cloned_binary.c. */
|
||||||
|
+int ensure_cloned_binary(void);
|
||||||
|
+
|
||||||
|
void nsexec(void)
|
||||||
|
{
|
||||||
|
int pipenum;
|
||||||
|
@@ -543,6 +546,14 @@ void nsexec(void)
|
||||||
|
if (pipenum == -1)
|
||||||
|
return;
|
||||||
|
|
||||||
|
+ /*
|
||||||
|
+ * We need to re-exec if we are not in a cloned binary. This is necessary
|
||||||
|
+ * to ensure that containers won't be able to access the host binary
|
||||||
|
+ * through /proc/self/exe. See CVE-2019-5736.
|
||||||
|
+ */
|
||||||
|
+ if (ensure_cloned_binary() < 0)
|
||||||
|
+ bail("could not ensure we are a cloned binary");
|
||||||
|
+
|
||||||
|
/* Parse all of the netlink configuration. */
|
||||||
|
nl_parse(pipenum, &config);
|
||||||
|
|
||||||
|
--
|
||||||
|
2.20.1
|
||||||
|
|
@ -1,540 +0,0 @@
|
|||||||
From 2dd156b190c02476191fc2522f9b0e0a1a098608 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Kir Kolyshkin <kolyshkin@gmail.com>
|
|
||||||
Date: Mon, 17 May 2021 16:11:35 -0700
|
|
||||||
Subject: [PATCH] rootfs: add mount destination validation
|
|
||||||
|
|
||||||
This is a manual backport of fix for CVE-2021-30465 to runc-1.0.0-rc10
|
|
||||||
(aka -rc90), upstream commit 84c14b43fa703db7 by Aleksa Sarai.
|
|
||||||
|
|
||||||
Original description follows.
|
|
||||||
|
|
||||||
----
|
|
||||||
|
|
||||||
Because the target of a mount is inside a container (which may be a
|
|
||||||
volume that is shared with another container), there exists a race
|
|
||||||
condition where the target of the mount may change to a path containing
|
|
||||||
a symlink after we have sanitised the path -- resulting in us
|
|
||||||
inadvertently mounting the path outside of the container.
|
|
||||||
|
|
||||||
This is not immediately useful because we are in a mount namespace with
|
|
||||||
MS_SLAVE mount propagation applied to "/", so we cannot mount on top of
|
|
||||||
host paths in the host namespace. However, if any subsequent mountpoints
|
|
||||||
in the configuration use a subdirectory of that host path as a source,
|
|
||||||
those subsequent mounts will use an attacker-controlled source path
|
|
||||||
(resolved within the host rootfs) -- allowing the bind-mounting of "/"
|
|
||||||
into the container.
|
|
||||||
|
|
||||||
While arguably configuration issues like this are not entirely within
|
|
||||||
runc's threat model, within the context of Kubernetes (and possibly
|
|
||||||
other container managers that provide semi-arbitrary container creation
|
|
||||||
privileges to untrusted users) this is a legitimate issue. Since we
|
|
||||||
cannot block mounting from the host into the container, we need to block
|
|
||||||
the first stage of this attack (mounting onto a path outside the
|
|
||||||
container).
|
|
||||||
|
|
||||||
The long-term plan to solve this would be to migrate to libpathrs, but
|
|
||||||
as a stop-gap we implement libpathrs-like path verification through
|
|
||||||
readlink(/proc/self/fd/$n) and then do mount operations through the
|
|
||||||
procfd once it's been verified to be inside the container. The target
|
|
||||||
could move after we've checked it, but if it is inside the container
|
|
||||||
then we can assume that it is safe for the same reason that libpathrs
|
|
||||||
operations would be safe.
|
|
||||||
|
|
||||||
A slight wrinkle is the "copyup" functionality we provide for tmpfs,
|
|
||||||
which is the only case where we want to do a mount on the host
|
|
||||||
filesystem. To facilitate this, I split out the copy-up functionality
|
|
||||||
entirely so that the logic isn't interspersed with the regular tmpfs
|
|
||||||
logic. In addition, all dependencies on m.Destination being overwritten
|
|
||||||
have been removed since that pattern was just begging to be a source of
|
|
||||||
more mount-target bugs (we do still have to modify m.Destination for
|
|
||||||
tmpfs-copyup but we only do it temporarily).
|
|
||||||
|
|
||||||
Fixes: CVE-2021-30465
|
|
||||||
Reported-by: Etienne Champetier <champetier.etienne@gmail.com>
|
|
||||||
Co-authored-by: Noah Meyerhans <nmeyerha@amazon.com>
|
|
||||||
Reviewed-by: Samuel Karp <skarp@amazon.com>
|
|
||||||
Reviewed-by: Akihiro Suda <akihiro.suda.cz@hco.ntt.co.jp>
|
|
||||||
Signed-off-by: Aleksa Sarai <cyphar@cyphar.com>
|
|
||||||
|
|
||||||
Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
|
|
||||||
---
|
|
||||||
libcontainer/rootfs_linux.go | 225 ++++++++++++++++---------------
|
|
||||||
libcontainer/utils/utils.go | 54 ++++++++
|
|
||||||
libcontainer/utils/utils_test.go | 35 +++++
|
|
||||||
3 files changed, 204 insertions(+), 110 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/libcontainer/rootfs_linux.go b/libcontainer/rootfs_linux.go
|
|
||||||
index 106c4c2b..fe9afe48 100644
|
|
||||||
--- a/libcontainer/rootfs_linux.go
|
|
||||||
+++ b/libcontainer/rootfs_linux.go
|
|
||||||
@@ -19,8 +19,9 @@ import (
|
|
||||||
"github.com/opencontainers/runc/libcontainer/configs"
|
|
||||||
"github.com/opencontainers/runc/libcontainer/mount"
|
|
||||||
"github.com/opencontainers/runc/libcontainer/system"
|
|
||||||
- libcontainerUtils "github.com/opencontainers/runc/libcontainer/utils"
|
|
||||||
+ "github.com/opencontainers/runc/libcontainer/utils"
|
|
||||||
"github.com/opencontainers/selinux/go-selinux/label"
|
|
||||||
+ "github.com/sirupsen/logrus"
|
|
||||||
|
|
||||||
"golang.org/x/sys/unix"
|
|
||||||
)
|
|
||||||
@@ -30,7 +31,7 @@ const defaultMountFlags = unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV
|
|
||||||
// needsSetupDev returns true if /dev needs to be set up.
|
|
||||||
func needsSetupDev(config *configs.Config) bool {
|
|
||||||
for _, m := range config.Mounts {
|
|
||||||
- if m.Device == "bind" && libcontainerUtils.CleanPath(m.Destination) == "/dev" {
|
|
||||||
+ if m.Device == "bind" && utils.CleanPath(m.Destination) == "/dev" {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -131,7 +132,7 @@ func prepareRootfs(pipe io.ReadWriter, iConfig *initConfig) (err error) {
|
|
||||||
func finalizeRootfs(config *configs.Config) (err error) {
|
|
||||||
// remount dev as ro if specified
|
|
||||||
for _, m := range config.Mounts {
|
|
||||||
- if libcontainerUtils.CleanPath(m.Destination) == "/dev" {
|
|
||||||
+ if utils.CleanPath(m.Destination) == "/dev" {
|
|
||||||
if m.Flags&unix.MS_RDONLY == unix.MS_RDONLY {
|
|
||||||
if err := remountReadonly(m); err != nil {
|
|
||||||
return newSystemErrorWithCausef(err, "remounting %q as readonly", m.Destination)
|
|
||||||
@@ -200,8 +201,6 @@ func prepareBindMount(m *configs.Mount, rootfs string) error {
|
|
||||||
if err := checkProcMount(rootfs, dest, m.Source); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
- // update the mount with the correct dest after symlinks are resolved.
|
|
||||||
- m.Destination = dest
|
|
||||||
if err := createIfNotExists(dest, stat.IsDir()); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
@@ -238,18 +237,21 @@ func mountCgroupV1(m *configs.Mount, rootfs, mountLabel string, enableCgroupns b
|
|
||||||
if err := os.MkdirAll(subsystemPath, 0755); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
- flags := defaultMountFlags
|
|
||||||
- if m.Flags&unix.MS_RDONLY != 0 {
|
|
||||||
- flags = flags | unix.MS_RDONLY
|
|
||||||
- }
|
|
||||||
- cgroupmount := &configs.Mount{
|
|
||||||
- Source: "cgroup",
|
|
||||||
- Device: "cgroup",
|
|
||||||
- Destination: subsystemPath,
|
|
||||||
- Flags: flags,
|
|
||||||
- Data: filepath.Base(subsystemPath),
|
|
||||||
- }
|
|
||||||
- if err := mountNewCgroup(cgroupmount); err != nil {
|
|
||||||
+ if err := utils.WithProcfd(rootfs, b.Destination, func(procfd string) error {
|
|
||||||
+ flags := defaultMountFlags
|
|
||||||
+ if m.Flags&unix.MS_RDONLY != 0 {
|
|
||||||
+ flags = flags | unix.MS_RDONLY
|
|
||||||
+ }
|
|
||||||
+ var (
|
|
||||||
+ source = "cgroup"
|
|
||||||
+ data = filepath.Base(subsystemPath)
|
|
||||||
+ )
|
|
||||||
+ if data == "systemd" {
|
|
||||||
+ data = cgroups.CgroupNamePrefix + data
|
|
||||||
+ source = "systemd"
|
|
||||||
+ }
|
|
||||||
+ return unix.Mount(source, procfd, "cgroup", uintptr(flags), data)
|
|
||||||
+ }); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
@@ -279,22 +281,67 @@ func mountCgroupV2(m *configs.Mount, rootfs, mountLabel string, enableCgroupns b
|
|
||||||
if err := os.MkdirAll(cgroupPath, 0755); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
- if err := unix.Mount(m.Source, cgroupPath, "cgroup2", uintptr(m.Flags), m.Data); err != nil {
|
|
||||||
- // when we are in UserNS but CgroupNS is not unshared, we cannot mount cgroup2 (#2158)
|
|
||||||
- if err == unix.EPERM || err == unix.EBUSY {
|
|
||||||
- return unix.Mount("/sys/fs/cgroup", cgroupPath, "", uintptr(m.Flags)|unix.MS_BIND, "")
|
|
||||||
+ return utils.WithProcfd(rootfs, m.Destination, func(procfd string) error {
|
|
||||||
+ if err := unix.Mount(m.Source, procfd, "cgroup2", uintptr(m.Flags), m.Data); err != nil {
|
|
||||||
+ // when we are in UserNS but CgroupNS is not unshared, we cannot mount cgroup2 (#2158)
|
|
||||||
+ if err == unix.EPERM || err == unix.EBUSY {
|
|
||||||
+ return unix.Mount("/sys/fs/cgroup", procfd, "", uintptr(m.Flags)|unix.MS_BIND, "")
|
|
||||||
+ }
|
|
||||||
+ return err
|
|
||||||
}
|
|
||||||
+ return nil
|
|
||||||
+ })
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+func doTmpfsCopyUp(m *configs.Mount, rootfs, mountLabel string) (Err error) {
|
|
||||||
+ // Set up a scratch dir for the tmpfs on the host.
|
|
||||||
+ tmpdir, err := prepareTmp("/tmp")
|
|
||||||
+ if err != nil {
|
|
||||||
+ return newSystemErrorWithCause(err, "tmpcopyup: failed to setup tmpdir")
|
|
||||||
+ }
|
|
||||||
+ defer cleanupTmp(tmpdir)
|
|
||||||
+ tmpDir, err := ioutil.TempDir(tmpdir, "runctmpdir")
|
|
||||||
+ if err != nil {
|
|
||||||
+ return newSystemErrorWithCause(err, "tmpcopyup: failed to create tmpdir")
|
|
||||||
+ }
|
|
||||||
+ defer os.RemoveAll(tmpDir)
|
|
||||||
+
|
|
||||||
+ // Configure the *host* tmpdir as if it's the container mount. We change
|
|
||||||
+ // m.Destination since we are going to mount *on the host*.
|
|
||||||
+ oldDest := m.Destination
|
|
||||||
+ m.Destination = tmpDir
|
|
||||||
+ err = mountPropagate(m, "/", mountLabel)
|
|
||||||
+ m.Destination = oldDest
|
|
||||||
+ if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
- return nil
|
|
||||||
+ defer func() {
|
|
||||||
+ if Err != nil {
|
|
||||||
+ if err := unix.Unmount(tmpDir, unix.MNT_DETACH); err != nil {
|
|
||||||
+ logrus.Warnf("tmpcopyup: failed to unmount tmpdir on error: %v", err)
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
+ }()
|
|
||||||
+
|
|
||||||
+ return utils.WithProcfd(rootfs, m.Destination, func(procfd string) (Err error) {
|
|
||||||
+ // Copy the container data to the host tmpdir. We append "/" to force
|
|
||||||
+ // CopyDirectory to resolve the symlink rather than trying to copy the
|
|
||||||
+ // symlink itself.
|
|
||||||
+ if err := fileutils.CopyDirectory(procfd+"/", tmpDir); err != nil {
|
|
||||||
+ return fmt.Errorf("tmpcopyup: failed to copy %s to %s (%s): %v", m.Destination, procfd, tmpDir, err)
|
|
||||||
+ }
|
|
||||||
+ // Now move the mount into the container.
|
|
||||||
+ if err := unix.Mount(tmpDir, procfd, "", unix.MS_MOVE, ""); err != nil {
|
|
||||||
+ return fmt.Errorf("tmpcopyup: failed to move mount %s to %s (%s): %v", tmpDir, procfd, m.Destination, err)
|
|
||||||
+ }
|
|
||||||
+ return nil
|
|
||||||
+ })
|
|
||||||
}
|
|
||||||
|
|
||||||
func mountToRootfs(m *configs.Mount, rootfs, mountLabel string, enableCgroupns bool) error {
|
|
||||||
- var (
|
|
||||||
- dest = m.Destination
|
|
||||||
- )
|
|
||||||
- if !strings.HasPrefix(dest, rootfs) {
|
|
||||||
- dest = filepath.Join(rootfs, dest)
|
|
||||||
+ dest, err := securejoin.SecureJoin(rootfs, m.Destination)
|
|
||||||
+ if err != nil {
|
|
||||||
+ return err
|
|
||||||
}
|
|
||||||
|
|
||||||
switch m.Device {
|
|
||||||
@@ -329,46 +376,21 @@ func mountToRootfs(m *configs.Mount, rootfs, mountLabel string, enableCgroupns b
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
case "tmpfs":
|
|
||||||
- copyUp := m.Extensions&configs.EXT_COPYUP == configs.EXT_COPYUP
|
|
||||||
- tmpDir := ""
|
|
||||||
stat, err := os.Stat(dest)
|
|
||||||
if err != nil {
|
|
||||||
if err := os.MkdirAll(dest, 0755); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
- if copyUp {
|
|
||||||
- tmpdir, err := prepareTmp("/tmp")
|
|
||||||
- if err != nil {
|
|
||||||
- return newSystemErrorWithCause(err, "tmpcopyup: failed to setup tmpdir")
|
|
||||||
- }
|
|
||||||
- defer cleanupTmp(tmpdir)
|
|
||||||
- tmpDir, err = ioutil.TempDir(tmpdir, "runctmpdir")
|
|
||||||
- if err != nil {
|
|
||||||
- return newSystemErrorWithCause(err, "tmpcopyup: failed to create tmpdir")
|
|
||||||
- }
|
|
||||||
- defer os.RemoveAll(tmpDir)
|
|
||||||
- m.Destination = tmpDir
|
|
||||||
+
|
|
||||||
+ if m.Extensions&configs.EXT_COPYUP == configs.EXT_COPYUP {
|
|
||||||
+ err = doTmpfsCopyUp(m, rootfs, mountLabel)
|
|
||||||
+ } else {
|
|
||||||
+ err = mountPropagate(m, rootfs, mountLabel)
|
|
||||||
}
|
|
||||||
- if err := mountPropagate(m, rootfs, mountLabel); err != nil {
|
|
||||||
+ if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
- if copyUp {
|
|
||||||
- if err := fileutils.CopyDirectory(dest, tmpDir); err != nil {
|
|
||||||
- errMsg := fmt.Errorf("tmpcopyup: failed to copy %s to %s: %v", dest, tmpDir, err)
|
|
||||||
- if err1 := unix.Unmount(tmpDir, unix.MNT_DETACH); err1 != nil {
|
|
||||||
- return newSystemErrorWithCausef(err1, "tmpcopyup: %v: failed to unmount", errMsg)
|
|
||||||
- }
|
|
||||||
- return errMsg
|
|
||||||
- }
|
|
||||||
- if err := unix.Mount(tmpDir, dest, "", unix.MS_MOVE, ""); err != nil {
|
|
||||||
- errMsg := fmt.Errorf("tmpcopyup: failed to move mount %s to %s: %v", tmpDir, dest, err)
|
|
||||||
- if err1 := unix.Unmount(tmpDir, unix.MNT_DETACH); err1 != nil {
|
|
||||||
- return newSystemErrorWithCausef(err1, "tmpcopyup: %v: failed to unmount", errMsg)
|
|
||||||
- }
|
|
||||||
- return errMsg
|
|
||||||
- }
|
|
||||||
- }
|
|
||||||
if stat != nil {
|
|
||||||
if err = os.Chmod(dest, stat.Mode()); err != nil {
|
|
||||||
return err
|
|
||||||
@@ -424,19 +446,9 @@ func mountToRootfs(m *configs.Mount, rootfs, mountLabel string, enableCgroupns b
|
|
||||||
}
|
|
||||||
}
|
|
||||||
default:
|
|
||||||
- // ensure that the destination of the mount is resolved of symlinks at mount time because
|
|
||||||
- // any previous mounts can invalidate the next mount's destination.
|
|
||||||
- // this can happen when a user specifies mounts within other mounts to cause breakouts or other
|
|
||||||
- // evil stuff to try to escape the container's rootfs.
|
|
||||||
- var err error
|
|
||||||
- if dest, err = securejoin.SecureJoin(rootfs, m.Destination); err != nil {
|
|
||||||
- return err
|
|
||||||
- }
|
|
||||||
if err := checkProcMount(rootfs, dest, m.Source); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
- // update the mount with the correct dest after symlinks are resolved.
|
|
||||||
- m.Destination = dest
|
|
||||||
if err := os.MkdirAll(dest, 0755); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
@@ -611,7 +623,7 @@ func createDevices(config *configs.Config) error {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
-func bindMountDeviceNode(dest string, node *configs.Device) error {
|
|
||||||
+func bindMountDeviceNode(rootfs, dest string, node *configs.Device) error {
|
|
||||||
f, err := os.Create(dest)
|
|
||||||
if err != nil && !os.IsExist(err) {
|
|
||||||
return err
|
|
||||||
@@ -619,24 +631,29 @@ func bindMountDeviceNode(dest string, node *configs.Device) error {
|
|
||||||
if f != nil {
|
|
||||||
f.Close()
|
|
||||||
}
|
|
||||||
- return unix.Mount(node.Path, dest, "bind", unix.MS_BIND, "")
|
|
||||||
+ return utils.WithProcfd(rootfs, dest, func(procfd string) error {
|
|
||||||
+ return unix.Mount(node.Path, procfd, "bind", unix.MS_BIND, "")
|
|
||||||
+ })
|
|
||||||
}
|
|
||||||
|
|
||||||
// Creates the device node in the rootfs of the container.
|
|
||||||
func createDeviceNode(rootfs string, node *configs.Device, bind bool) error {
|
|
||||||
- dest := filepath.Join(rootfs, node.Path)
|
|
||||||
+ dest, err := securejoin.SecureJoin(rootfs, node.Path)
|
|
||||||
+ if err != nil {
|
|
||||||
+ return err
|
|
||||||
+ }
|
|
||||||
if err := os.MkdirAll(filepath.Dir(dest), 0755); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
if bind {
|
|
||||||
- return bindMountDeviceNode(dest, node)
|
|
||||||
+ return bindMountDeviceNode(rootfs, dest, node)
|
|
||||||
}
|
|
||||||
if err := mknodDevice(dest, node); err != nil {
|
|
||||||
if os.IsExist(err) {
|
|
||||||
return nil
|
|
||||||
} else if os.IsPermission(err) {
|
|
||||||
- return bindMountDeviceNode(dest, node)
|
|
||||||
+ return bindMountDeviceNode(rootfs, dest, node)
|
|
||||||
}
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
@@ -955,55 +972,43 @@ func writeSystemProperty(key, value string) error {
|
|
||||||
}
|
|
||||||
|
|
||||||
func remount(m *configs.Mount, rootfs string) error {
|
|
||||||
- var (
|
|
||||||
- dest = m.Destination
|
|
||||||
- )
|
|
||||||
- if !strings.HasPrefix(dest, rootfs) {
|
|
||||||
- dest = filepath.Join(rootfs, dest)
|
|
||||||
- }
|
|
||||||
- return unix.Mount(m.Source, dest, m.Device, uintptr(m.Flags|unix.MS_REMOUNT), "")
|
|
||||||
+ return utils.WithProcfd(rootfs, m.Destination, func(procfd string) error {
|
|
||||||
+ return unix.Mount(m.Source, procfd, m.Device, uintptr(m.Flags|unix.MS_REMOUNT), "")
|
|
||||||
+ })
|
|
||||||
}
|
|
||||||
|
|
||||||
// Do the mount operation followed by additional mounts required to take care
|
|
||||||
-// of propagation flags.
|
|
||||||
+// of propagation flags. This will always be scoped inside the container rootfs.
|
|
||||||
func mountPropagate(m *configs.Mount, rootfs string, mountLabel string) error {
|
|
||||||
var (
|
|
||||||
- dest = m.Destination
|
|
||||||
data = label.FormatMountLabel(m.Data, mountLabel)
|
|
||||||
flags = m.Flags
|
|
||||||
)
|
|
||||||
- if libcontainerUtils.CleanPath(dest) == "/dev" {
|
|
||||||
+ if utils.CleanPath(m.Destination) == "/dev" {
|
|
||||||
flags &= ^unix.MS_RDONLY
|
|
||||||
}
|
|
||||||
|
|
||||||
- copyUp := m.Extensions&configs.EXT_COPYUP == configs.EXT_COPYUP
|
|
||||||
- if !(copyUp || strings.HasPrefix(dest, rootfs)) {
|
|
||||||
- dest = filepath.Join(rootfs, dest)
|
|
||||||
- }
|
|
||||||
-
|
|
||||||
- if err := unix.Mount(m.Source, dest, m.Device, uintptr(flags), data); err != nil {
|
|
||||||
- return err
|
|
||||||
- }
|
|
||||||
-
|
|
||||||
- for _, pflag := range m.PropagationFlags {
|
|
||||||
- if err := unix.Mount("", dest, "", uintptr(pflag), ""); err != nil {
|
|
||||||
- return err
|
|
||||||
+ // Because the destination is inside a container path which might be
|
|
||||||
+ // mutating underneath us, we verify that we are actually going to mount
|
|
||||||
+ // inside the container with WithProcfd() -- mounting through a procfd
|
|
||||||
+ // mounts on the target.
|
|
||||||
+ if err := utils.WithProcfd(rootfs, m.Destination, func(procfd string) error {
|
|
||||||
+ return unix.Mount(m.Source, procfd, m.Device, uintptr(flags), data)
|
|
||||||
+ }); err != nil {
|
|
||||||
+ return fmt.Errorf("mount through procfd: %v", err)
|
|
||||||
+ }
|
|
||||||
+ // We have to apply mount propagation flags in a separate WithProcfd() call
|
|
||||||
+ // because the previous call invalidates the passed procfd -- the mount
|
|
||||||
+ // target needs to be re-opened.
|
|
||||||
+ if err := utils.WithProcfd(rootfs, m.Destination, func(procfd string) error {
|
|
||||||
+ for _, pflag := range m.PropagationFlags {
|
|
||||||
+ if err := unix.Mount("", procfd, "", uintptr(pflag), ""); err != nil {
|
|
||||||
+ return err
|
|
||||||
+ }
|
|
||||||
}
|
|
||||||
- }
|
|
||||||
- return nil
|
|
||||||
-}
|
|
||||||
-
|
|
||||||
-func mountNewCgroup(m *configs.Mount) error {
|
|
||||||
- var (
|
|
||||||
- data = m.Data
|
|
||||||
- source = m.Source
|
|
||||||
- )
|
|
||||||
- if data == "systemd" {
|
|
||||||
- data = cgroups.CgroupNamePrefix + data
|
|
||||||
- source = "systemd"
|
|
||||||
- }
|
|
||||||
- if err := unix.Mount(source, m.Destination, m.Device, uintptr(m.Flags), data); err != nil {
|
|
||||||
- return err
|
|
||||||
+ return nil
|
|
||||||
+ }); err != nil {
|
|
||||||
+ return fmt.Errorf("change mount propagation through procfd: %v", err)
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
diff --git a/libcontainer/utils/utils.go b/libcontainer/utils/utils.go
|
|
||||||
index 40ccfaa1..c1418ef9 100644
|
|
||||||
--- a/libcontainer/utils/utils.go
|
|
||||||
+++ b/libcontainer/utils/utils.go
|
|
||||||
@@ -2,12 +2,15 @@ package utils
|
|
||||||
|
|
||||||
import (
|
|
||||||
"encoding/json"
|
|
||||||
+ "fmt"
|
|
||||||
"io"
|
|
||||||
"os"
|
|
||||||
"path/filepath"
|
|
||||||
+ "strconv"
|
|
||||||
"strings"
|
|
||||||
"unsafe"
|
|
||||||
|
|
||||||
+ securejoin "github.com/cyphar/filepath-securejoin"
|
|
||||||
"golang.org/x/sys/unix"
|
|
||||||
)
|
|
||||||
|
|
||||||
@@ -73,6 +76,57 @@ func CleanPath(path string) string {
|
|
||||||
return filepath.Clean(path)
|
|
||||||
}
|
|
||||||
|
|
||||||
+// stripRoot returns the passed path, stripping the root path if it was
|
|
||||||
+// (lexicially) inside it. Note that both passed paths will always be treated
|
|
||||||
+// as absolute, and the returned path will also always be absolute. In
|
|
||||||
+// addition, the paths are cleaned before stripping the root.
|
|
||||||
+func stripRoot(root, path string) string {
|
|
||||||
+ // Make the paths clean and absolute.
|
|
||||||
+ root, path = CleanPath("/"+root), CleanPath("/"+path)
|
|
||||||
+ switch {
|
|
||||||
+ case path == root:
|
|
||||||
+ path = "/"
|
|
||||||
+ case root == "/":
|
|
||||||
+ // do nothing
|
|
||||||
+ case strings.HasPrefix(path, root+"/"):
|
|
||||||
+ path = strings.TrimPrefix(path, root+"/")
|
|
||||||
+ }
|
|
||||||
+ return CleanPath("/" + path)
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+// WithProcfd runs the passed closure with a procfd path (/proc/self/fd/...)
|
|
||||||
+// corresponding to the unsafePath resolved within the root. Before passing the
|
|
||||||
+// fd, this path is verified to have been inside the root -- so operating on it
|
|
||||||
+// through the passed fdpath should be safe. Do not access this path through
|
|
||||||
+// the original path strings, and do not attempt to use the pathname outside of
|
|
||||||
+// the passed closure (the file handle will be freed once the closure returns).
|
|
||||||
+func WithProcfd(root, unsafePath string, fn func(procfd string) error) error {
|
|
||||||
+ // Remove the root then forcefully resolve inside the root.
|
|
||||||
+ unsafePath = stripRoot(root, unsafePath)
|
|
||||||
+ path, err := securejoin.SecureJoin(root, unsafePath)
|
|
||||||
+ if err != nil {
|
|
||||||
+ return fmt.Errorf("resolving path inside rootfs failed: %v", err)
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ // Open the target path.
|
|
||||||
+ fh, err := os.OpenFile(path, unix.O_PATH|unix.O_CLOEXEC, 0)
|
|
||||||
+ if err != nil {
|
|
||||||
+ return fmt.Errorf("open o_path procfd: %v", err)
|
|
||||||
+ }
|
|
||||||
+ defer fh.Close()
|
|
||||||
+
|
|
||||||
+ // Double-check the path is the one we expected.
|
|
||||||
+ procfd := "/proc/self/fd/" + strconv.Itoa(int(fh.Fd()))
|
|
||||||
+ if realpath, err := os.Readlink(procfd); err != nil {
|
|
||||||
+ return fmt.Errorf("procfd verification failed: %v", err)
|
|
||||||
+ } else if realpath != path {
|
|
||||||
+ return fmt.Errorf("possibly malicious path detected -- refusing to operate on %s", realpath)
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ // Run the closure.
|
|
||||||
+ return fn(procfd)
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
// SearchLabels searches a list of key-value pairs for the provided key and
|
|
||||||
// returns the corresponding value. The pairs must be separated with '='.
|
|
||||||
func SearchLabels(labels []string, query string) string {
|
|
||||||
diff --git a/libcontainer/utils/utils_test.go b/libcontainer/utils/utils_test.go
|
|
||||||
index 395eedcf..5b80cac6 100644
|
|
||||||
--- a/libcontainer/utils/utils_test.go
|
|
||||||
+++ b/libcontainer/utils/utils_test.go
|
|
||||||
@@ -140,3 +140,38 @@ func TestCleanPath(t *testing.T) {
|
|
||||||
t.Errorf("expected to receive '/foo' and received %s", path)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
+
|
|
||||||
+func TestStripRoot(t *testing.T) {
|
|
||||||
+ for _, test := range []struct {
|
|
||||||
+ root, path, out string
|
|
||||||
+ }{
|
|
||||||
+ // Works with multiple components.
|
|
||||||
+ {"/a/b", "/a/b/c", "/c"},
|
|
||||||
+ {"/hello/world", "/hello/world/the/quick-brown/fox", "/the/quick-brown/fox"},
|
|
||||||
+ // '/' must be a no-op.
|
|
||||||
+ {"/", "/a/b/c", "/a/b/c"},
|
|
||||||
+ // Must be the correct order.
|
|
||||||
+ {"/a/b", "/a/c/b", "/a/c/b"},
|
|
||||||
+ // Must be at start.
|
|
||||||
+ {"/abc/def", "/foo/abc/def/bar", "/foo/abc/def/bar"},
|
|
||||||
+ // Must be a lexical parent.
|
|
||||||
+ {"/foo/bar", "/foo/barSAMECOMPONENT", "/foo/barSAMECOMPONENT"},
|
|
||||||
+ // Must only strip the root once.
|
|
||||||
+ {"/foo/bar", "/foo/bar/foo/bar/baz", "/foo/bar/baz"},
|
|
||||||
+ // Deal with .. in a fairly sane way.
|
|
||||||
+ {"/foo/bar", "/foo/bar/../baz", "/foo/baz"},
|
|
||||||
+ {"/foo/bar", "../../../../../../foo/bar/baz", "/baz"},
|
|
||||||
+ {"/foo/bar", "/../../../../../../foo/bar/baz", "/baz"},
|
|
||||||
+ {"/foo/bar/../baz", "/foo/baz/bar", "/bar"},
|
|
||||||
+ {"/foo/bar/../baz", "/foo/baz/../bar/../baz/./foo", "/foo"},
|
|
||||||
+ // All paths are made absolute before stripping.
|
|
||||||
+ {"foo/bar", "/foo/bar/baz/bee", "/baz/bee"},
|
|
||||||
+ {"/foo/bar", "foo/bar/baz/beef", "/baz/beef"},
|
|
||||||
+ {"foo/bar", "foo/bar/baz/beets", "/baz/beets"},
|
|
||||||
+ } {
|
|
||||||
+ got := stripRoot(test.root, test.path)
|
|
||||||
+ if got != test.out {
|
|
||||||
+ t.Errorf("stripRoot(%q, %q) -- got %q, expected %q", test.root, test.path, got, test.out)
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
+}
|
|
||||||
--
|
|
||||||
2.31.1
|
|
||||||
|
|
@ -1,178 +1,168 @@
|
|||||||
From 3d99c51e1b38a440804a55c9f314f62cc50b8902 Mon Sep 17 00:00:00 2001
|
From ecf53c23545092019602578583031c28fde4d2a1 Mon Sep 17 00:00:00 2001
|
||||||
From: Giuseppe Scrivano <gscrivan@redhat.com>
|
From: Giuseppe Scrivano <gscrivan@redhat.com>
|
||||||
Date: Fri, 25 May 2018 18:04:06 +0200
|
Date: Fri, 25 May 2018 18:04:06 +0200
|
||||||
Subject: [PATCH] sd-notify: do not hang when NOTIFY_SOCKET is used with create
|
Subject: [PATCH] sd-notify: do not hang when NOTIFY_SOCKET is used with create
|
||||||
|
|
||||||
if NOTIFY_SOCKET is used, do not block the main runc process waiting
|
if NOTIFY_SOCKET is used, do not block the main runc process waiting
|
||||||
for events on the notify socket. Bind mount the parent directory of
|
for events on the notify socket. Change the logic to create a new
|
||||||
the notify socket, so that "start" can create the socket and it is
|
process that monitors exclusively the notify socket until an event is
|
||||||
still accessible from the container.
|
received.
|
||||||
|
|
||||||
Signed-off-by: Giuseppe Scrivano <gscrivan@redhat.com>
|
Signed-off-by: Giuseppe Scrivano <gscrivan@redhat.com>
|
||||||
---
|
---
|
||||||
notify_socket.go | 112 ++++++++++++++++++++++++++++++++++-------------
|
init.go | 12 +++++++
|
||||||
signals.go | 4 +-
|
notify_socket.go | 101 ++++++++++++++++++++++++++++++++++++++++++++++---------
|
||||||
start.go | 13 +++++-
|
signals.go | 5 +--
|
||||||
utils_linux.go | 12 ++++-
|
3 files changed, 99 insertions(+), 19 deletions(-)
|
||||||
4 files changed, 105 insertions(+), 36 deletions(-)
|
|
||||||
|
|
||||||
|
diff --git a/init.go b/init.go
|
||||||
|
index c8f453192..6a3d9e91c 100644
|
||||||
|
--- a/init.go
|
||||||
|
+++ b/init.go
|
||||||
|
@@ -20,6 +20,18 @@ var initCommand = cli.Command{
|
||||||
|
Name: "init",
|
||||||
|
Usage: `initialize the namespaces and launch the process (do not call it outside of runc)`,
|
||||||
|
Action: func(context *cli.Context) error {
|
||||||
|
+ // If NOTIFY_SOCKET is used create a new process that stays around
|
||||||
|
+ // so to not block "runc start". It will automatically exits when the
|
||||||
|
+ // container notifies that it is ready, or when the container is deleted
|
||||||
|
+ if os.Getenv("_NOTIFY_SOCKET_FD") != "" {
|
||||||
|
+ fd := os.Getenv("_NOTIFY_SOCKET_FD")
|
||||||
|
+ pid := os.Getenv("_NOTIFY_SOCKET_PID")
|
||||||
|
+ hostNotifySocket := os.Getenv("_NOTIFY_SOCKET_HOST")
|
||||||
|
+ notifySocketPath := os.Getenv("_NOTIFY_SOCKET_PATH")
|
||||||
|
+ notifySocketInit(fd, pid, hostNotifySocket, notifySocketPath)
|
||||||
|
+ os.Exit(0)
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
factory, _ := libcontainer.New("")
|
||||||
|
if err := factory.StartInitialization(); err != nil {
|
||||||
|
// as the error is sent back to the parent there is no need to log
|
||||||
diff --git a/notify_socket.go b/notify_socket.go
|
diff --git a/notify_socket.go b/notify_socket.go
|
||||||
index e7453c62..d961453a 100644
|
index cd6c0a989..e04e9d660 100644
|
||||||
--- a/notify_socket.go
|
--- a/notify_socket.go
|
||||||
+++ b/notify_socket.go
|
+++ b/notify_socket.go
|
||||||
@@ -7,11 +7,13 @@ import (
|
@@ -6,10 +6,13 @@ import (
|
||||||
|
"bytes"
|
||||||
"fmt"
|
"fmt"
|
||||||
"net"
|
"net"
|
||||||
"os"
|
+ "os"
|
||||||
+ "path"
|
+ "os/exec"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
+ "strconv"
|
+ "strconv"
|
||||||
+ "time"
|
+ "time"
|
||||||
|
|
||||||
+ "github.com/opencontainers/runc/libcontainer"
|
|
||||||
"github.com/opencontainers/runtime-spec/specs-go"
|
"github.com/opencontainers/runtime-spec/specs-go"
|
||||||
-
|
-
|
||||||
- "github.com/sirupsen/logrus"
|
"github.com/sirupsen/logrus"
|
||||||
"github.com/urfave/cli"
|
"github.com/urfave/cli"
|
||||||
)
|
)
|
||||||
|
@@ -64,24 +67,94 @@ func (s *notifySocket) setupSocket() error {
|
||||||
@@ -27,12 +29,12 @@ func newNotifySocket(context *cli.Context, notifySocketHost string, id string) *
|
|
||||||
}
|
|
||||||
|
|
||||||
root := filepath.Join(context.GlobalString("root"), id)
|
|
||||||
- path := filepath.Join(root, "notify.sock")
|
|
||||||
+ socketPath := filepath.Join(root, "notify", "notify.sock")
|
|
||||||
|
|
||||||
notifySocket := ¬ifySocket{
|
|
||||||
socket: nil,
|
|
||||||
host: notifySocketHost,
|
|
||||||
- socketPath: path,
|
|
||||||
+ socketPath: socketPath,
|
|
||||||
}
|
|
||||||
|
|
||||||
return notifySocket
|
|
||||||
@@ -44,13 +46,19 @@ func (s *notifySocket) Close() error {
|
|
||||||
|
|
||||||
// If systemd is supporting sd_notify protocol, this function will add support
|
|
||||||
// for sd_notify protocol from within the container.
|
|
||||||
-func (s *notifySocket) setupSpec(context *cli.Context, spec *specs.Spec) {
|
|
||||||
- mount := specs.Mount{Destination: s.host, Source: s.socketPath, Options: []string{"bind"}}
|
|
||||||
+func (s *notifySocket) setupSpec(context *cli.Context, spec *specs.Spec) error {
|
|
||||||
+ pathInContainer := filepath.Join("/run/notify", path.Base(s.socketPath))
|
|
||||||
+ mount := specs.Mount{
|
|
||||||
+ Destination: path.Dir(pathInContainer),
|
|
||||||
+ Source: path.Dir(s.socketPath),
|
|
||||||
+ Options: []string{"bind", "nosuid", "noexec", "nodev", "ro"},
|
|
||||||
+ }
|
|
||||||
spec.Mounts = append(spec.Mounts, mount)
|
|
||||||
- spec.Process.Env = append(spec.Process.Env, fmt.Sprintf("NOTIFY_SOCKET=%s", s.host))
|
|
||||||
+ spec.Process.Env = append(spec.Process.Env, fmt.Sprintf("NOTIFY_SOCKET=%s", pathInContainer))
|
|
||||||
+ return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
-func (s *notifySocket) setupSocket() error {
|
|
||||||
+func (s *notifySocket) bindSocket() error {
|
|
||||||
addr := net.UnixAddr{
|
|
||||||
Name: s.socketPath,
|
|
||||||
Net: "unixgram",
|
|
||||||
@@ -71,45 +79,89 @@ func (s *notifySocket) setupSocket() error {
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
-// pid1 must be set only with -d, as it is used to set the new process as the main process
|
+func (notifySocket *notifySocket) notifyNewPid(pid int) {
|
||||||
-// for the service in systemd
|
+ notifySocketHostAddr := net.UnixAddr{Name: notifySocket.host, Net: "unixgram"}
|
||||||
-func (s *notifySocket) run(pid1 int) {
|
+ client, err := net.DialUnix("unixgram", nil, ¬ifySocketHostAddr)
|
||||||
- buf := make([]byte, 512)
|
|
||||||
- notifySocketHostAddr := net.UnixAddr{Name: s.host, Net: "unixgram"}
|
|
||||||
+func (s *notifySocket) setupSocketDirectory() error {
|
|
||||||
+ return os.Mkdir(path.Dir(s.socketPath), 0755)
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+func notifySocketStart(context *cli.Context, notifySocketHost, id string) (*notifySocket, error) {
|
|
||||||
+ notifySocket := newNotifySocket(context, notifySocketHost, id)
|
|
||||||
+ if notifySocket == nil {
|
|
||||||
+ return nil, nil
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ if err := notifySocket.bindSocket(); err != nil {
|
|
||||||
+ return nil, err
|
|
||||||
+ }
|
|
||||||
+ return notifySocket, nil
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+func (n *notifySocket) waitForContainer(container libcontainer.Container) error {
|
|
||||||
+ s, err := container.State()
|
|
||||||
+ if err != nil {
|
+ if err != nil {
|
||||||
+ return err
|
+ return
|
||||||
+ }
|
+ }
|
||||||
+ return n.run(s.InitProcessPid)
|
+ newPid := fmt.Sprintf("MAINPID=%d\n", pid)
|
||||||
|
+ client.Write([]byte(newPid))
|
||||||
+}
|
+}
|
||||||
+
|
+
|
||||||
+func (n *notifySocket) run(pid1 int) error {
|
// pid1 must be set only with -d, as it is used to set the new process as the main process
|
||||||
+ if n.socket == nil {
|
// for the service in systemd
|
||||||
+ return nil
|
func (notifySocket *notifySocket) run(pid1 int) {
|
||||||
+ }
|
- buf := make([]byte, 512)
|
||||||
+ notifySocketHostAddr := net.UnixAddr{Name: n.host, Net: "unixgram"}
|
- notifySocketHostAddr := net.UnixAddr{Name: notifySocket.host, Net: "unixgram"}
|
||||||
client, err := net.DialUnix("unixgram", nil, ¬ifySocketHostAddr)
|
- client, err := net.DialUnix("unixgram", nil, ¬ifySocketHostAddr)
|
||||||
|
+ file, err := notifySocket.socket.File()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
- logrus.Error(err)
|
logrus.Error(err)
|
||||||
- return
|
return
|
||||||
+ return err
|
|
||||||
}
|
}
|
||||||
- for {
|
- for {
|
||||||
- r, err := s.socket.Read(buf)
|
- r, err := notifySocket.socket.Read(buf)
|
||||||
- if err != nil {
|
- if err != nil {
|
||||||
- break
|
- break
|
||||||
|
+ defer file.Close()
|
||||||
|
+ defer notifySocket.socket.Close()
|
||||||
+
|
+
|
||||||
+ ticker := time.NewTicker(time.Millisecond * 100)
|
+ cmd := exec.Command("/proc/self/exe", "init")
|
||||||
+ defer ticker.Stop()
|
+ cmd.ExtraFiles = []*os.File{file}
|
||||||
|
+ cmd.Env = append(cmd.Env, "_NOTIFY_SOCKET_FD=3",
|
||||||
|
+ fmt.Sprintf("_NOTIFY_SOCKET_PID=%d", pid1),
|
||||||
|
+ fmt.Sprintf("_NOTIFY_SOCKET_HOST=%s", notifySocket.host),
|
||||||
|
+ fmt.Sprintf("_NOTIFY_SOCKET_PATH=%s", notifySocket.socketPath))
|
||||||
|
+
|
||||||
|
+ if err := cmd.Start(); err != nil {
|
||||||
|
+ logrus.Fatal(err)
|
||||||
|
+ }
|
||||||
|
+ notifySocket.notifyNewPid(cmd.Process.Pid)
|
||||||
|
+ cmd.Process.Release()
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+func notifySocketInit(envFd string, envPid string, notifySocketHost string, notifySocketPath string) {
|
||||||
|
+ intFd, err := strconv.Atoi(envFd)
|
||||||
|
+ if err != nil {
|
||||||
|
+ return
|
||||||
|
+ }
|
||||||
|
+ pid1, err := strconv.Atoi(envPid)
|
||||||
|
+ if err != nil {
|
||||||
|
+ return
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ file := os.NewFile(uintptr(intFd), "unixgram")
|
||||||
|
+ defer file.Close()
|
||||||
+
|
+
|
||||||
+ fileChan := make(chan []byte)
|
+ fileChan := make(chan []byte)
|
||||||
|
+ exitChan := make(chan bool)
|
||||||
|
+
|
||||||
+ go func() {
|
+ go func() {
|
||||||
+ for {
|
+ for {
|
||||||
+ buf := make([]byte, 512)
|
+ buf := make([]byte, 512)
|
||||||
+ r, err := n.socket.Read(buf)
|
+ r, err := file.Read(buf)
|
||||||
+ if err != nil {
|
+ if err != nil {
|
||||||
+ return
|
+ return
|
||||||
+ }
|
+ }
|
||||||
+ got := buf[0:r]
|
+ fileChan <- buf[0:r]
|
||||||
+ if !bytes.HasPrefix(got, []byte("READY=")) {
|
|
||||||
+ continue
|
|
||||||
+ }
|
|
||||||
+ fileChan <- got
|
|
||||||
+ return
|
|
||||||
}
|
}
|
||||||
- var out bytes.Buffer
|
- var out bytes.Buffer
|
||||||
- for _, line := range bytes.Split(buf[0:r], []byte{'\n'}) {
|
- for _, line := range bytes.Split(buf[0:r], []byte{'\n'}) {
|
||||||
- if bytes.HasPrefix(line, []byte("READY=")) {
|
- if bytes.HasPrefix(line, []byte("READY=")) {
|
||||||
+ }()
|
+ }()
|
||||||
|
+ go func() {
|
||||||
|
+ for {
|
||||||
|
+ if _, err := os.Stat(notifySocketPath); os.IsNotExist(err) {
|
||||||
|
+ exitChan <- true
|
||||||
|
+ return
|
||||||
|
+ }
|
||||||
|
+ time.Sleep(time.Second)
|
||||||
|
+ }
|
||||||
|
+ }()
|
||||||
|
+
|
||||||
|
+ notifySocketHostAddr := net.UnixAddr{Name: notifySocketHost, Net: "unixgram"}
|
||||||
|
+ client, err := net.DialUnix("unixgram", nil, ¬ifySocketHostAddr)
|
||||||
|
+ if err != nil {
|
||||||
|
+ return
|
||||||
|
+ }
|
||||||
+
|
+
|
||||||
+ for {
|
+ for {
|
||||||
+ select {
|
+ select {
|
||||||
+ case <-ticker.C:
|
+ case <-exitChan:
|
||||||
+ _, err := os.Stat(filepath.Join("/proc", strconv.Itoa(pid1)))
|
+ return
|
||||||
+ if err != nil {
|
|
||||||
+ return nil
|
|
||||||
+ }
|
|
||||||
+ case b := <-fileChan:
|
+ case b := <-fileChan:
|
||||||
+ for _, line := range bytes.Split(b, []byte{'\n'}) {
|
+ for _, line := range bytes.Split(b, []byte{'\n'}) {
|
||||||
|
+ if !bytes.HasPrefix(line, []byte("READY=")) {
|
||||||
|
+ continue
|
||||||
|
+ }
|
||||||
|
+
|
||||||
+ var out bytes.Buffer
|
+ var out bytes.Buffer
|
||||||
_, err = out.Write(line)
|
_, err = out.Write(line)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
- return
|
return
|
||||||
+ return err
|
@@ -98,10 +171,8 @@ func (notifySocket *notifySocket) run(pid1 int) {
|
||||||
}
|
|
||||||
|
|
||||||
_, err = out.Write([]byte{'\n'})
|
|
||||||
if err != nil {
|
|
||||||
- return
|
|
||||||
+ return err
|
|
||||||
}
|
|
||||||
|
|
||||||
_, err = client.Write(out.Bytes())
|
|
||||||
if err != nil {
|
|
||||||
- return
|
|
||||||
+ return err
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// now we can inform systemd to use pid1 as the pid to monitor
|
// now we can inform systemd to use pid1 as the pid to monitor
|
||||||
@ -180,26 +170,25 @@ index e7453c62..d961453a 100644
|
|||||||
- newPid := fmt.Sprintf("MAINPID=%d\n", pid1)
|
- newPid := fmt.Sprintf("MAINPID=%d\n", pid1)
|
||||||
- client.Write([]byte(newPid))
|
- client.Write([]byte(newPid))
|
||||||
- }
|
- }
|
||||||
- return
|
|
||||||
+ newPid := fmt.Sprintf("MAINPID=%d\n", pid1)
|
+ newPid := fmt.Sprintf("MAINPID=%d\n", pid1)
|
||||||
+ client.Write([]byte(newPid))
|
+ client.Write([]byte(newPid))
|
||||||
+ return nil
|
return
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
diff --git a/signals.go b/signals.go
|
diff --git a/signals.go b/signals.go
|
||||||
index b67f65a0..dd25e094 100644
|
index 1811de837..d0988cb39 100644
|
||||||
--- a/signals.go
|
--- a/signals.go
|
||||||
+++ b/signals.go
|
+++ b/signals.go
|
||||||
@@ -70,6 +70,7 @@ func (h *signalHandler) forward(process *libcontainer.Process, tty *tty, detach
|
@@ -70,7 +70,7 @@ func (h *signalHandler) forward(process *libcontainer.Process, tty *tty, detach
|
||||||
h.notifySocket.run(pid1)
|
h.notifySocket.run(pid1)
|
||||||
return 0, nil
|
return 0, nil
|
||||||
|
} else {
|
||||||
|
- go h.notifySocket.run(0)
|
||||||
|
+ h.notifySocket.run(os.Getpid())
|
||||||
}
|
}
|
||||||
+ h.notifySocket.run(os.Getpid())
|
|
||||||
go h.notifySocket.run(0)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -97,9 +98,6 @@ func (h *signalHandler) forward(process *libcontainer.Process, tty *tty, detach
|
@@ -98,9 +98,6 @@ func (h *signalHandler) forward(process *libcontainer.Process, tty *tty, detach
|
||||||
// status because we must ensure that any of the go specific process
|
// status because we must ensure that any of the go specific process
|
||||||
// fun such as flushing pipes are complete before we return.
|
// fun such as flushing pipes are complete before we return.
|
||||||
process.Wait()
|
process.Wait()
|
||||||
@ -209,70 +198,3 @@ index b67f65a0..dd25e094 100644
|
|||||||
return e.status, nil
|
return e.status, nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
diff --git a/start.go b/start.go
|
|
||||||
index 2bb698b2..3a1769a4 100644
|
|
||||||
--- a/start.go
|
|
||||||
+++ b/start.go
|
|
||||||
@@ -3,6 +3,7 @@ package main
|
|
||||||
import (
|
|
||||||
"errors"
|
|
||||||
"fmt"
|
|
||||||
+ "os"
|
|
||||||
|
|
||||||
"github.com/opencontainers/runc/libcontainer"
|
|
||||||
"github.com/urfave/cli"
|
|
||||||
@@ -31,7 +32,17 @@ your host.`,
|
|
||||||
}
|
|
||||||
switch status {
|
|
||||||
case libcontainer.Created:
|
|
||||||
- return container.Exec()
|
|
||||||
+ notifySocket, err := notifySocketStart(context, os.Getenv("NOTIFY_SOCKET"), container.ID())
|
|
||||||
+ if err != nil {
|
|
||||||
+ return err
|
|
||||||
+ }
|
|
||||||
+ if err := container.Exec(); err != nil {
|
|
||||||
+ return err
|
|
||||||
+ }
|
|
||||||
+ if notifySocket != nil {
|
|
||||||
+ return notifySocket.waitForContainer(container)
|
|
||||||
+ }
|
|
||||||
+ return nil
|
|
||||||
case libcontainer.Stopped:
|
|
||||||
return errors.New("cannot start a container that has stopped")
|
|
||||||
case libcontainer.Running:
|
|
||||||
diff --git a/utils_linux.go b/utils_linux.go
|
|
||||||
index 984e6b0f..46c26246 100644
|
|
||||||
--- a/utils_linux.go
|
|
||||||
+++ b/utils_linux.go
|
|
||||||
@@ -408,7 +408,9 @@ func startContainer(context *cli.Context, spec *specs.Spec, action CtAct, criuOp
|
|
||||||
|
|
||||||
notifySocket := newNotifySocket(context, os.Getenv("NOTIFY_SOCKET"), id)
|
|
||||||
if notifySocket != nil {
|
|
||||||
- notifySocket.setupSpec(context, spec)
|
|
||||||
+ if err := notifySocket.setupSpec(context, spec); err != nil {
|
|
||||||
+ return -1, err
|
|
||||||
+ }
|
|
||||||
}
|
|
||||||
|
|
||||||
container, err := createContainer(context, id, spec)
|
|
||||||
@@ -417,10 +419,16 @@ func startContainer(context *cli.Context, spec *specs.Spec, action CtAct, criuOp
|
|
||||||
}
|
|
||||||
|
|
||||||
if notifySocket != nil {
|
|
||||||
- err := notifySocket.setupSocket()
|
|
||||||
+ err := notifySocket.setupSocketDirectory()
|
|
||||||
if err != nil {
|
|
||||||
return -1, err
|
|
||||||
}
|
|
||||||
+ if action == CT_ACT_RUN {
|
|
||||||
+ err := notifySocket.bindSocket()
|
|
||||||
+ if err != nil {
|
|
||||||
+ return -1, err
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
}
|
|
||||||
|
|
||||||
// Support on-demand socket activation by passing file descriptors into the container init process.
|
|
||||||
--
|
|
||||||
2.21.0
|
|
||||||
|
|
||||||
|
1
SOURCES/99-containers.conf
Normal file
1
SOURCES/99-containers.conf
Normal file
@ -0,0 +1 @@
|
|||||||
|
fs.may_detach_mounts=1
|
61
SOURCES/change-default-root.patch
Normal file
61
SOURCES/change-default-root.patch
Normal file
@ -0,0 +1,61 @@
|
|||||||
|
diff --git a/list.go b/list.go
|
||||||
|
index 0313d8c..328798b 100644
|
||||||
|
--- a/list.go
|
||||||
|
+++ b/list.go
|
||||||
|
@@ -50,7 +50,7 @@ var listCommand = cli.Command{
|
||||||
|
ArgsUsage: `
|
||||||
|
|
||||||
|
Where the given root is specified via the global option "--root"
|
||||||
|
-(default: "/run/runc").
|
||||||
|
+(default: "/run/runc-ctrs").
|
||||||
|
|
||||||
|
EXAMPLE 1:
|
||||||
|
To list containers created via the default "--root":
|
||||||
|
diff --git a/main.go b/main.go
|
||||||
|
index 278399a..0f49fce 100644
|
||||||
|
--- a/main.go
|
||||||
|
+++ b/main.go
|
||||||
|
@@ -62,7 +62,7 @@ func main() {
|
||||||
|
v = append(v, fmt.Sprintf("spec: %s", specs.Version))
|
||||||
|
app.Version = strings.Join(v, "\n")
|
||||||
|
|
||||||
|
- root := "/run/runc"
|
||||||
|
+ root := "/run/runc-ctrs"
|
||||||
|
rootless, err := isRootless(nil)
|
||||||
|
if err != nil {
|
||||||
|
fatal(err)
|
||||||
|
@@ -70,7 +70,7 @@ func main() {
|
||||||
|
if rootless {
|
||||||
|
runtimeDir := os.Getenv("XDG_RUNTIME_DIR")
|
||||||
|
if runtimeDir != "" {
|
||||||
|
- root = runtimeDir + "/runc"
|
||||||
|
+ root = runtimeDir + "/runc-ctrs"
|
||||||
|
// According to the XDG specification, we need to set anything in
|
||||||
|
// XDG_RUNTIME_DIR to have a sticky bit if we don't want it to get
|
||||||
|
// auto-pruned.
|
||||||
|
diff --git a/man/runc-list.8.md b/man/runc-list.8.md
|
||||||
|
index f737424..107220e 100644
|
||||||
|
--- a/man/runc-list.8.md
|
||||||
|
+++ b/man/runc-list.8.md
|
||||||
|
@@ -6,7 +6,7 @@
|
||||||
|
|
||||||
|
# EXAMPLE
|
||||||
|
Where the given root is specified via the global option "--root"
|
||||||
|
-(default: "/run/runc").
|
||||||
|
+(default: "/run/runc-ctrs").
|
||||||
|
|
||||||
|
To list containers created via the default "--root":
|
||||||
|
# runc list
|
||||||
|
diff --git a/man/runc.8.md b/man/runc.8.md
|
||||||
|
index 6d0ddff..337bc73 100644
|
||||||
|
--- a/man/runc.8.md
|
||||||
|
+++ b/man/runc.8.md
|
||||||
|
@@ -51,7 +51,7 @@ value for "bundle" is the current directory.
|
||||||
|
--debug enable debug output for logging
|
||||||
|
--log value set the log file path where internal debug information is written (default: "/dev/null")
|
||||||
|
--log-format value set the format used by logs ('text' (default), or 'json') (default: "text")
|
||||||
|
- --root value root directory for storage of container state (this should be located in tmpfs) (default: "/run/runc" or $XDG_RUNTIME_DIR/runc for rootless containers)
|
||||||
|
+ --root value root directory for storage of container state (this should be located in tmpfs) (default: "/run/runc-ctrs" or $XDG_RUNTIME_DIR/runc-ctrs for rootless containers)
|
||||||
|
--criu value path to the criu binary used for checkpoint and restore (default: "criu")
|
||||||
|
--systemd-cgroup enable systemd cgroup support, expects cgroupsPath to be of form "slice:prefix:name" for e.g. "system.slice:runc:434234"
|
||||||
|
--rootless value enable rootless mode ('true', 'false', or 'auto') (default: "auto")
|
72
SOURCES/pivot-root.patch
Normal file
72
SOURCES/pivot-root.patch
Normal file
@ -0,0 +1,72 @@
|
|||||||
|
From 28a697cce3e4f905dca700eda81d681a30eef9cd Mon Sep 17 00:00:00 2001
|
||||||
|
From: Giuseppe Scrivano <gscrivan@redhat.com>
|
||||||
|
Date: Fri, 11 Jan 2019 21:53:45 +0100
|
||||||
|
Subject: [PATCH] rootfs: umount all procfs and sysfs with --no-pivot
|
||||||
|
|
||||||
|
When creating a new user namespace, the kernel doesn't allow to mount
|
||||||
|
a new procfs or sysfs file system if there is not already one instance
|
||||||
|
fully visible in the current mount namespace.
|
||||||
|
|
||||||
|
When using --no-pivot we were effectively inhibiting this protection
|
||||||
|
from the kernel, as /proc and /sys from the host are still present in
|
||||||
|
the container mount namespace.
|
||||||
|
|
||||||
|
A container without full access to /proc could then create a new user
|
||||||
|
namespace, and from there able to mount a fully visible /proc, bypassing
|
||||||
|
the limitations in the container.
|
||||||
|
|
||||||
|
A simple reproducer for this issue is:
|
||||||
|
|
||||||
|
unshare -mrfp sh -c "mount -t proc none /proc && echo c > /proc/sysrq-trigger"
|
||||||
|
|
||||||
|
Signed-off-by: Giuseppe Scrivano <gscrivan@redhat.com>
|
||||||
|
---
|
||||||
|
libcontainer/rootfs_linux.go | 35 +++++++++++++++++++++++++++++++++++
|
||||||
|
1 file changed, 35 insertions(+)
|
||||||
|
|
||||||
|
diff --git a/libcontainer/rootfs_linux.go b/libcontainer/rootfs_linux.go
|
||||||
|
index e7c2f8ada..6bd6da74a 100644
|
||||||
|
--- a/libcontainer/rootfs_linux.go
|
||||||
|
+++ b/libcontainer/rootfs_linux.go
|
||||||
|
@@ -748,6 +748,41 @@ func pivotRoot(rootfs string) error {
|
||||||
|
}
|
||||||
|
|
||||||
|
func msMoveRoot(rootfs string) error {
|
||||||
|
+ mountinfos, err := mount.GetMounts()
|
||||||
|
+ if err != nil {
|
||||||
|
+ return err
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ absRootfs, err := filepath.Abs(rootfs)
|
||||||
|
+ if err != nil {
|
||||||
|
+ return err
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ for _, info := range mountinfos {
|
||||||
|
+ p, err := filepath.Abs(info.Mountpoint)
|
||||||
|
+ if err != nil {
|
||||||
|
+ return err
|
||||||
|
+ }
|
||||||
|
+ // Umount every syfs and proc file systems, except those under the container rootfs
|
||||||
|
+ if (info.Fstype != "proc" && info.Fstype != "sysfs") || filepath.HasPrefix(p, absRootfs) {
|
||||||
|
+ continue
|
||||||
|
+ }
|
||||||
|
+ // Be sure umount events are not propagated to the host.
|
||||||
|
+ if err := unix.Mount("", p, "", unix.MS_SLAVE|unix.MS_REC, ""); err != nil {
|
||||||
|
+ return err
|
||||||
|
+ }
|
||||||
|
+ if err := unix.Unmount(p, unix.MNT_DETACH); err != nil {
|
||||||
|
+ if err != unix.EINVAL && err != unix.EPERM {
|
||||||
|
+ return err
|
||||||
|
+ } else {
|
||||||
|
+ // If we have not privileges for umounting (e.g. rootless), then
|
||||||
|
+ // cover the path.
|
||||||
|
+ if err := unix.Mount("tmpfs", p, "tmpfs", 0, ""); err != nil {
|
||||||
|
+ return err
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
if err := unix.Mount(rootfs, "/", "", unix.MS_MOVE, ""); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
@ -12,34 +12,41 @@
|
|||||||
%if 0%{?rhel} > 7 && ! 0%{?fedora}
|
%if 0%{?rhel} > 7 && ! 0%{?fedora}
|
||||||
%define gobuild(o:) \
|
%define gobuild(o:) \
|
||||||
go build -buildmode pie -compiler gc -tags="rpm_crashtraceback no_openssl ${BUILDTAGS:-}" -ldflags "${LDFLAGS:-} -compressdwarf=false -B 0x$(head -c20 /dev/urandom|od -An -tx1|tr -d ' \\n') -extldflags '%__global_ldflags'" -a -v -x %{?**};
|
go build -buildmode pie -compiler gc -tags="rpm_crashtraceback no_openssl ${BUILDTAGS:-}" -ldflags "${LDFLAGS:-} -compressdwarf=false -B 0x$(head -c20 /dev/urandom|od -An -tx1|tr -d ' \\n') -extldflags '%__global_ldflags'" -a -v -x %{?**};
|
||||||
%endif
|
%endif # distro
|
||||||
|
|
||||||
%global provider github
|
%global provider github
|
||||||
%global provider_tld com
|
%global provider_tld com
|
||||||
%global project opencontainers
|
%global project opencontainers
|
||||||
%global repo runc
|
%global repo runc
|
||||||
# https://github.com/opencontainers/runc
|
# https://github.com/opencontainers/runc
|
||||||
%global import_path %{provider}.%{provider_tld}/%{project}/%{repo}
|
%global provider_prefix %{provider}.%{provider_tld}/%{project}/%{repo}
|
||||||
%global git0 https://%{import_path}
|
%global import_path %{provider_prefix}
|
||||||
%global commit0 dc9208a3303feef5b3839f4323d9beb36df0a9dd
|
%global git0 https://github.com/opencontainers/runc
|
||||||
|
%global commit0 2abd837c8c25b0102ac4ce14f17bc0bc7ddffba7
|
||||||
%global shortcommit0 %(c=%{commit0}; echo ${c:0:7})
|
%global shortcommit0 %(c=%{commit0}; echo ${c:0:7})
|
||||||
|
|
||||||
Name: %{repo}
|
Name: %{repo}
|
||||||
Version: 1.0.0
|
Version: 1.0.0
|
||||||
Release: 66.rc10%{?dist}
|
Release: 56.rc5.dev.git%{shortcommit0}%{?dist}
|
||||||
Summary: CLI for running Open Containers
|
Summary: CLI for running Open Containers
|
||||||
ExcludeArch: %{ix86}
|
ExcludeArch: %{ix86}
|
||||||
License: ASL 2.0
|
License: ASL 2.0
|
||||||
URL: %{git0}
|
URL: http//%{provider_prefix}
|
||||||
Source0: %{git0}/archive/%{commit0}/%{name}-%{shortcommit0}.tar.gz
|
Source0: %{git0}/archive/%{commit0}/%{repo}-%{shortcommit0}.tar.gz
|
||||||
Patch0: 1807.patch
|
Source1: 99-containers.conf
|
||||||
Patch1: 0001-rootfs-add-mount-destination-validation.patch
|
Patch0: change-default-root.patch
|
||||||
BuildRequires: golang >= 1.12.12-4
|
Patch1: 0001-Revert-Apply-cgroups-earlier.patch
|
||||||
|
Patch2: 1807.patch
|
||||||
|
Patch3: 0001-nsenter-clone-proc-self-exe-to-avoid-exposing-host-b-runc.patch
|
||||||
|
Patch4: pivot-root.patch
|
||||||
|
Requires: criu
|
||||||
|
Requires(pre): container-selinux >= 2:2.2-2
|
||||||
|
|
||||||
|
# If go_compiler is not set to 1, there is no virtual provide. Use golang instead.
|
||||||
|
BuildRequires: %{?go_compiler:compiler(go-compiler)}%{!?go_compiler:golang} >= 1.6.2
|
||||||
BuildRequires: git
|
BuildRequires: git
|
||||||
BuildRequires: go-md2man
|
BuildRequires: go-md2man
|
||||||
BuildRequires: libseccomp-devel
|
BuildRequires: libseccomp-devel
|
||||||
Requires: criu
|
|
||||||
Requires(pre): container-selinux >= 2:2.2-2
|
|
||||||
|
|
||||||
%description
|
%description
|
||||||
The runc command can be used to start containers which are packaged
|
The runc command can be used to start containers which are packaged
|
||||||
@ -58,10 +65,9 @@ pushd GOPATH
|
|||||||
popd
|
popd
|
||||||
|
|
||||||
pushd GOPATH/src/%{import_path}
|
pushd GOPATH/src/%{import_path}
|
||||||
export GO111MODULE=off
|
|
||||||
export GOPATH=%{gopath}:$(pwd)/GOPATH
|
export GOPATH=%{gopath}:$(pwd)/GOPATH
|
||||||
export BUILDTAGS="selinux seccomp"
|
export BUILDTAGS="selinux seccomp"
|
||||||
%gobuild -o %{name} %{import_path}
|
%gobuild -o %{name} %{import_path}
|
||||||
|
|
||||||
pushd man
|
pushd man
|
||||||
./md2man-all.sh
|
./md2man-all.sh
|
||||||
@ -91,45 +97,12 @@ install -p -m 0644 contrib/completions/bash/%{name} %{buildroot}%{_datadir}/bash
|
|||||||
%{_datadir}/bash-completion/completions/%{name}
|
%{_datadir}/bash-completion/completions/%{name}
|
||||||
|
|
||||||
%changelog
|
%changelog
|
||||||
* Wed May 19 2021 Jindrich Novy <jnovy@redhat.com> - 1.0.0-66.rc10
|
* Thu Nov 28 2019 Jindrich Novy <jnovy@redhat.com> - 1.0.0-56.rc5.dev.git2abd837
|
||||||
- set GO111MODULE=off to fix build
|
- rebuild because of CVE-2019-9512 and CVE-2019-9514
|
||||||
- Related: #1955651
|
- Resolves: #1766328, #1766300
|
||||||
|
|
||||||
* Wed May 19 2021 Jindrich Novy <jnovy@redhat.com> - 1.0.0-65.rc10
|
|
||||||
- fix CVE-2021-30465
|
|
||||||
- Resolves: #1955651
|
|
||||||
|
|
||||||
* Thu Feb 13 2020 Jindrich Novy <jnovy@redhat.com> - 1.0.0-64.rc10
|
|
||||||
- address CVE-2019-19921 by updating to rc10
|
|
||||||
- Resolves: #1801888
|
|
||||||
|
|
||||||
* Wed Dec 11 2019 Jindrich Novy <jnovy@redhat.com> - 1.0.0-63.rc9
|
|
||||||
- use no_openssl in BUILDTAGS (no vendored crypto in runc)
|
|
||||||
- Related: RHELPLAN-25139
|
|
||||||
|
|
||||||
* Mon Dec 09 2019 Jindrich Novy <jnovy@redhat.com> - 1.0.0-62.rc9
|
|
||||||
- be sure to use golang >= 1.12.12-4
|
|
||||||
- Related: RHELPLAN-25139
|
|
||||||
|
|
||||||
* Thu Nov 21 2019 Jindrich Novy <jnovy@redhat.com> - 1.0.0-61.rc9
|
|
||||||
- update to runc 1.0.0-rc9 release
|
|
||||||
- amend golang deps
|
|
||||||
- fixes CVE-2019-16884
|
|
||||||
- Resolves: #1759651
|
|
||||||
|
|
||||||
* Mon Jun 17 2019 Lokesh Mandvekar <lsm5@redhat.com> - 1.0.0-60.rc8
|
|
||||||
- Resolves: #1721247 - enable fips mode
|
|
||||||
|
|
||||||
* Mon Jun 17 2019 Lokesh Mandvekar <lsm5@redhat.com> - 1.0.0-59.rc8
|
|
||||||
- Resolves: #1720654 - rebase to v1.0.0-rc8
|
|
||||||
|
|
||||||
* Thu Apr 11 2019 Eduardo Santiago <santiago@redhat.com> - 1.0.0-57.rc5.dev.git2abd837
|
|
||||||
- Resolves: #1693424 - podman rootless: cannot specify gid= mount options
|
|
||||||
|
|
||||||
* Wed Feb 27 2019 Lokesh Mandvekar <lsm5@redhat.com> - 1.0.0-56.rc5.dev.git2abd837
|
|
||||||
- change-default-root patch not needed as there's no docker on rhel8
|
|
||||||
|
|
||||||
* Tue Feb 12 2019 Lokesh Mandvekar <lsm5@redhat.com> - 1.0.0-55.rc5.dev.git2abd837
|
* Tue Feb 12 2019 Lokesh Mandvekar <lsm5@redhat.com> - 1.0.0-55.rc5.dev.git2abd837
|
||||||
|
- Resolves: #1665770 - rootfs: umount all procfs and sysfs with --no-pivot
|
||||||
- Resolves: CVE-2019-5736
|
- Resolves: CVE-2019-5736
|
||||||
|
|
||||||
* Tue Dec 18 2018 Frantisek Kluknavsky <fkluknav@redhat.com> - 1.0.0-54.rc5.dev.git2abd837
|
* Tue Dec 18 2018 Frantisek Kluknavsky <fkluknav@redhat.com> - 1.0.0-54.rc5.dev.git2abd837
|
||||||
|
Loading…
Reference in New Issue
Block a user