Compare commits
No commits in common. "c8-stream-1.0" and "c8-stream-2.0" have entirely different histories.
c8-stream-
...
c8-stream-
2
.gitignore
vendored
2
.gitignore
vendored
@ -1 +1 @@
|
|||||||
SOURCES/runc-2abd837.tar.gz
|
SOURCES/runc-dc9208a.tar.gz
|
||||||
|
@ -1 +1 @@
|
|||||||
cf7119a838db2963e7af6ecdba90a2cc95ec0d56 SOURCES/runc-2abd837.tar.gz
|
32859590dea35b77eed012c388d97fc12fdfdb93 SOURCES/runc-dc9208a.tar.gz
|
||||||
|
@ -1,62 +0,0 @@
|
|||||||
From dfb3496c174377b860b62872ce6af951364cc3ac Mon Sep 17 00:00:00 2001
|
|
||||||
From: Lokesh Mandvekar <lsm5@fedoraproject.org>
|
|
||||||
Date: Tue, 12 Dec 2017 13:22:42 +0530
|
|
||||||
Subject: [PATCH] Revert "Apply cgroups earlier"
|
|
||||||
|
|
||||||
This reverts commit 7062c7556b71188abc18d7516441ff4b03fbc1fc.
|
|
||||||
---
|
|
||||||
libcontainer/process_linux.go | 31 ++++++++++++++-----------------
|
|
||||||
1 file changed, 14 insertions(+), 17 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/libcontainer/process_linux.go b/libcontainer/process_linux.go
|
|
||||||
index 149b1126..b8a395af 100644
|
|
||||||
--- a/libcontainer/process_linux.go
|
|
||||||
+++ b/libcontainer/process_linux.go
|
|
||||||
@@ -272,6 +272,20 @@ func (p *initProcess) start() error {
|
|
||||||
p.process.ops = nil
|
|
||||||
return newSystemErrorWithCause(err, "starting init process command")
|
|
||||||
}
|
|
||||||
+ if _, err := io.Copy(p.parentPipe, p.bootstrapData); err != nil {
|
|
||||||
+ return newSystemErrorWithCause(err, "copying bootstrap data to pipe")
|
|
||||||
+ }
|
|
||||||
+ if err := p.execSetns(); err != nil {
|
|
||||||
+ return newSystemErrorWithCause(err, "running exec setns process for init")
|
|
||||||
+ }
|
|
||||||
+ // Save the standard descriptor names before the container process
|
|
||||||
+ // can potentially move them (e.g., via dup2()). If we don't do this now,
|
|
||||||
+ // we won't know at checkpoint time which file descriptor to look up.
|
|
||||||
+ fds, err := getPipeFds(p.pid())
|
|
||||||
+ if err != nil {
|
|
||||||
+ return newSystemErrorWithCausef(err, "getting pipe fds for pid %d", p.pid())
|
|
||||||
+ }
|
|
||||||
+ p.setExternalDescriptors(fds)
|
|
||||||
// Do this before syncing with child so that no children can escape the
|
|
||||||
// cgroup. We don't need to worry about not doing this and not being root
|
|
||||||
// because we'd be using the rootless cgroup manager in that case.
|
|
||||||
@@ -292,23 +306,6 @@ func (p *initProcess) start() error {
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
-
|
|
||||||
- if _, err := io.Copy(p.parentPipe, p.bootstrapData); err != nil {
|
|
||||||
- return newSystemErrorWithCause(err, "copying bootstrap data to pipe")
|
|
||||||
- }
|
|
||||||
-
|
|
||||||
- if err := p.execSetns(); err != nil {
|
|
||||||
- return newSystemErrorWithCause(err, "running exec setns process for init")
|
|
||||||
- }
|
|
||||||
-
|
|
||||||
- // Save the standard descriptor names before the container process
|
|
||||||
- // can potentially move them (e.g., via dup2()). If we don't do this now,
|
|
||||||
- // we won't know at checkpoint time which file descriptor to look up.
|
|
||||||
- fds, err := getPipeFds(p.pid())
|
|
||||||
- if err != nil {
|
|
||||||
- return newSystemErrorWithCausef(err, "getting pipe fds for pid %d", p.pid())
|
|
||||||
- }
|
|
||||||
- p.setExternalDescriptors(fds)
|
|
||||||
if err := p.createNetworkInterfaces(); err != nil {
|
|
||||||
return newSystemErrorWithCause(err, "creating network interfaces")
|
|
||||||
}
|
|
||||||
--
|
|
||||||
2.14.3
|
|
||||||
|
|
@ -1,290 +0,0 @@
|
|||||||
From bf6405284aa3870a39b402309003633a1c230ed9 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Aleksa Sarai <asarai@suse.de>
|
|
||||||
Date: Wed, 9 Jan 2019 13:40:01 +1100
|
|
||||||
Subject: [PATCH 1/1] nsenter: clone /proc/self/exe to avoid exposing host
|
|
||||||
binary to container
|
|
||||||
|
|
||||||
There are quite a few circumstances where /proc/self/exe pointing to a
|
|
||||||
pretty important container binary is a _bad_ thing, so to avoid this we
|
|
||||||
have to make a copy (preferably doing self-clean-up and not being
|
|
||||||
writeable).
|
|
||||||
|
|
||||||
As a hotfix we require memfd_create(2), but we can always extend this to
|
|
||||||
use a scratch MNT_DETACH overlayfs or tmpfs. The main downside to this
|
|
||||||
approach is no page-cache sharing for the runc binary (which overlayfs
|
|
||||||
would give us) but this is far less complicated.
|
|
||||||
|
|
||||||
This is only done during nsenter so that it happens transparently to the
|
|
||||||
Go code, and any libcontainer users benefit from it. This also makes
|
|
||||||
ExtraFiles and --preserve-fds handling trivial (because we don't need to
|
|
||||||
worry about it).
|
|
||||||
|
|
||||||
Fixes: CVE-2019-5736
|
|
||||||
Co-developed-by: Christian Brauner <christian.brauner@ubuntu.com>
|
|
||||||
Signed-off-by: Aleksa Sarai <asarai@suse.de>
|
|
||||||
Signed-off-by: Mrunal Patel <mrunalp@gmail.com>
|
|
||||||
---
|
|
||||||
libcontainer/nsenter/cloned_binary.c | 221 +++++++++++++++++++++++++++
|
|
||||||
libcontainer/nsenter/nsexec.c | 11 ++
|
|
||||||
2 files changed, 232 insertions(+)
|
|
||||||
create mode 100644 libcontainer/nsenter/cloned_binary.c
|
|
||||||
|
|
||||||
diff --git a/libcontainer/nsenter/cloned_binary.c b/libcontainer/nsenter/cloned_binary.c
|
|
||||||
new file mode 100644
|
|
||||||
index 00000000..d9f6093a
|
|
||||||
--- /dev/null
|
|
||||||
+++ b/libcontainer/nsenter/cloned_binary.c
|
|
||||||
@@ -0,0 +1,221 @@
|
|
||||||
+#define _GNU_SOURCE
|
|
||||||
+#include <unistd.h>
|
|
||||||
+#include <stdio.h>
|
|
||||||
+#include <stdlib.h>
|
|
||||||
+#include <stdbool.h>
|
|
||||||
+#include <string.h>
|
|
||||||
+#include <limits.h>
|
|
||||||
+#include <fcntl.h>
|
|
||||||
+#include <errno.h>
|
|
||||||
+
|
|
||||||
+#include <sys/types.h>
|
|
||||||
+#include <sys/stat.h>
|
|
||||||
+#include <sys/vfs.h>
|
|
||||||
+#include <sys/mman.h>
|
|
||||||
+#include <sys/sendfile.h>
|
|
||||||
+#include <sys/syscall.h>
|
|
||||||
+
|
|
||||||
+#include <linux/magic.h>
|
|
||||||
+#include <linux/memfd.h>
|
|
||||||
+
|
|
||||||
+/* Use our own wrapper for memfd_create. */
|
|
||||||
+#if !defined(SYS_memfd_create) && defined(__NR_memfd_create)
|
|
||||||
+# define SYS_memfd_create __NR_memfd_create
|
|
||||||
+#endif
|
|
||||||
+#ifndef SYS_memfd_create
|
|
||||||
+# error "memfd_create(2) syscall not supported by this glibc version"
|
|
||||||
+#endif
|
|
||||||
+int memfd_create(const char *name, unsigned int flags)
|
|
||||||
+{
|
|
||||||
+ return syscall(SYS_memfd_create, name, flags);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+/* This comes directly from <linux/fcntl.h>. */
|
|
||||||
+#ifndef F_LINUX_SPECIFIC_BASE
|
|
||||||
+# define F_LINUX_SPECIFIC_BASE 1024
|
|
||||||
+#endif
|
|
||||||
+#ifndef F_ADD_SEALS
|
|
||||||
+# define F_ADD_SEALS (F_LINUX_SPECIFIC_BASE + 9)
|
|
||||||
+# define F_GET_SEALS (F_LINUX_SPECIFIC_BASE + 10)
|
|
||||||
+#endif
|
|
||||||
+#ifndef F_SEAL_SEAL
|
|
||||||
+# define F_SEAL_SEAL 0x0001 /* prevent further seals from being set */
|
|
||||||
+# define F_SEAL_SHRINK 0x0002 /* prevent file from shrinking */
|
|
||||||
+# define F_SEAL_GROW 0x0004 /* prevent file from growing */
|
|
||||||
+# define F_SEAL_WRITE 0x0008 /* prevent writes */
|
|
||||||
+#endif
|
|
||||||
+
|
|
||||||
+
|
|
||||||
+#define OUR_MEMFD_COMMENT "runc_cloned:/proc/self/exe"
|
|
||||||
+#define OUR_MEMFD_SEALS \
|
|
||||||
+ (F_SEAL_SEAL | F_SEAL_SHRINK | F_SEAL_GROW | F_SEAL_WRITE)
|
|
||||||
+
|
|
||||||
+static void *must_realloc(void *ptr, size_t size)
|
|
||||||
+{
|
|
||||||
+ void *old = ptr;
|
|
||||||
+ do {
|
|
||||||
+ ptr = realloc(old, size);
|
|
||||||
+ } while(!ptr);
|
|
||||||
+ return ptr;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+/*
|
|
||||||
+ * Verify whether we are currently in a self-cloned program (namely, is
|
|
||||||
+ * /proc/self/exe a memfd). F_GET_SEALS will only succeed for memfds (or rather
|
|
||||||
+ * for shmem files), and we want to be sure it's actually sealed.
|
|
||||||
+ */
|
|
||||||
+static int is_self_cloned(void)
|
|
||||||
+{
|
|
||||||
+ int fd, seals;
|
|
||||||
+
|
|
||||||
+ fd = open("/proc/self/exe", O_RDONLY|O_CLOEXEC);
|
|
||||||
+ if (fd < 0)
|
|
||||||
+ return -ENOTRECOVERABLE;
|
|
||||||
+
|
|
||||||
+ seals = fcntl(fd, F_GET_SEALS);
|
|
||||||
+ close(fd);
|
|
||||||
+ return seals == OUR_MEMFD_SEALS;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+/*
|
|
||||||
+ * Basic wrapper around mmap(2) that gives you the file length so you can
|
|
||||||
+ * safely treat it as an ordinary buffer. Only gives you read access.
|
|
||||||
+ */
|
|
||||||
+static char *read_file(char *path, size_t *length)
|
|
||||||
+{
|
|
||||||
+ int fd;
|
|
||||||
+ char buf[4096], *copy = NULL;
|
|
||||||
+
|
|
||||||
+ if (!length)
|
|
||||||
+ return NULL;
|
|
||||||
+
|
|
||||||
+ fd = open(path, O_RDONLY | O_CLOEXEC);
|
|
||||||
+ if (fd < 0)
|
|
||||||
+ return NULL;
|
|
||||||
+
|
|
||||||
+ *length = 0;
|
|
||||||
+ for (;;) {
|
|
||||||
+ int n;
|
|
||||||
+
|
|
||||||
+ n = read(fd, buf, sizeof(buf));
|
|
||||||
+ if (n < 0)
|
|
||||||
+ goto error;
|
|
||||||
+ if (!n)
|
|
||||||
+ break;
|
|
||||||
+
|
|
||||||
+ copy = must_realloc(copy, (*length + n) * sizeof(*copy));
|
|
||||||
+ memcpy(copy + *length, buf, n);
|
|
||||||
+ *length += n;
|
|
||||||
+ }
|
|
||||||
+ close(fd);
|
|
||||||
+ return copy;
|
|
||||||
+
|
|
||||||
+error:
|
|
||||||
+ close(fd);
|
|
||||||
+ free(copy);
|
|
||||||
+ return NULL;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+/*
|
|
||||||
+ * A poor-man's version of "xargs -0". Basically parses a given block of
|
|
||||||
+ * NUL-delimited data, within the given length and adds a pointer to each entry
|
|
||||||
+ * to the array of pointers.
|
|
||||||
+ */
|
|
||||||
+static int parse_xargs(char *data, int data_length, char ***output)
|
|
||||||
+{
|
|
||||||
+ int num = 0;
|
|
||||||
+ char *cur = data;
|
|
||||||
+
|
|
||||||
+ if (!data || *output != NULL)
|
|
||||||
+ return -1;
|
|
||||||
+
|
|
||||||
+ while (cur < data + data_length) {
|
|
||||||
+ num++;
|
|
||||||
+ *output = must_realloc(*output, (num + 1) * sizeof(**output));
|
|
||||||
+ (*output)[num - 1] = cur;
|
|
||||||
+ cur += strlen(cur) + 1;
|
|
||||||
+ }
|
|
||||||
+ (*output)[num] = NULL;
|
|
||||||
+ return num;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+/*
|
|
||||||
+ * "Parse" out argv and envp from /proc/self/cmdline and /proc/self/environ.
|
|
||||||
+ * This is necessary because we are running in a context where we don't have a
|
|
||||||
+ * main() that we can just get the arguments from.
|
|
||||||
+ */
|
|
||||||
+static int fetchve(char ***argv, char ***envp)
|
|
||||||
+{
|
|
||||||
+ char *cmdline = NULL, *environ = NULL;
|
|
||||||
+ size_t cmdline_size, environ_size;
|
|
||||||
+
|
|
||||||
+ cmdline = read_file("/proc/self/cmdline", &cmdline_size);
|
|
||||||
+ if (!cmdline)
|
|
||||||
+ goto error;
|
|
||||||
+ environ = read_file("/proc/self/environ", &environ_size);
|
|
||||||
+ if (!environ)
|
|
||||||
+ goto error;
|
|
||||||
+
|
|
||||||
+ if (parse_xargs(cmdline, cmdline_size, argv) <= 0)
|
|
||||||
+ goto error;
|
|
||||||
+ if (parse_xargs(environ, environ_size, envp) <= 0)
|
|
||||||
+ goto error;
|
|
||||||
+
|
|
||||||
+ return 0;
|
|
||||||
+
|
|
||||||
+error:
|
|
||||||
+ free(environ);
|
|
||||||
+ free(cmdline);
|
|
||||||
+ return -EINVAL;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+#define SENDFILE_MAX 0x7FFFF000 /* sendfile(2) is limited to 2GB. */
|
|
||||||
+static int clone_binary(void)
|
|
||||||
+{
|
|
||||||
+ int binfd, memfd, err;
|
|
||||||
+ ssize_t sent = 0;
|
|
||||||
+
|
|
||||||
+ memfd = memfd_create(OUR_MEMFD_COMMENT, MFD_CLOEXEC | MFD_ALLOW_SEALING);
|
|
||||||
+ if (memfd < 0)
|
|
||||||
+ return -ENOTRECOVERABLE;
|
|
||||||
+
|
|
||||||
+ binfd = open("/proc/self/exe", O_RDONLY | O_CLOEXEC);
|
|
||||||
+ if (binfd < 0)
|
|
||||||
+ goto error;
|
|
||||||
+
|
|
||||||
+ sent = sendfile(memfd, binfd, NULL, SENDFILE_MAX);
|
|
||||||
+ close(binfd);
|
|
||||||
+ if (sent < 0)
|
|
||||||
+ goto error;
|
|
||||||
+
|
|
||||||
+ err = fcntl(memfd, F_ADD_SEALS, OUR_MEMFD_SEALS);
|
|
||||||
+ if (err < 0)
|
|
||||||
+ goto error;
|
|
||||||
+
|
|
||||||
+ return memfd;
|
|
||||||
+
|
|
||||||
+error:
|
|
||||||
+ close(memfd);
|
|
||||||
+ return -EIO;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+int ensure_cloned_binary(void)
|
|
||||||
+{
|
|
||||||
+ int execfd;
|
|
||||||
+ char **argv = NULL, **envp = NULL;
|
|
||||||
+
|
|
||||||
+ /* Check that we're not self-cloned, and if we are then bail. */
|
|
||||||
+ int cloned = is_self_cloned();
|
|
||||||
+ if (cloned > 0 || cloned == -ENOTRECOVERABLE)
|
|
||||||
+ return cloned;
|
|
||||||
+
|
|
||||||
+ if (fetchve(&argv, &envp) < 0)
|
|
||||||
+ return -EINVAL;
|
|
||||||
+
|
|
||||||
+ execfd = clone_binary();
|
|
||||||
+ if (execfd < 0)
|
|
||||||
+ return -EIO;
|
|
||||||
+
|
|
||||||
+ fexecve(execfd, argv, envp);
|
|
||||||
+ return -ENOEXEC;
|
|
||||||
+}
|
|
||||||
diff --git a/libcontainer/nsenter/nsexec.c b/libcontainer/nsenter/nsexec.c
|
|
||||||
index cb224314..784fd9b0 100644
|
|
||||||
--- a/libcontainer/nsenter/nsexec.c
|
|
||||||
+++ b/libcontainer/nsenter/nsexec.c
|
|
||||||
@@ -528,6 +528,9 @@ void join_namespaces(char *nslist)
|
|
||||||
free(namespaces);
|
|
||||||
}
|
|
||||||
|
|
||||||
+/* Defined in cloned_binary.c. */
|
|
||||||
+int ensure_cloned_binary(void);
|
|
||||||
+
|
|
||||||
void nsexec(void)
|
|
||||||
{
|
|
||||||
int pipenum;
|
|
||||||
@@ -543,6 +546,14 @@ void nsexec(void)
|
|
||||||
if (pipenum == -1)
|
|
||||||
return;
|
|
||||||
|
|
||||||
+ /*
|
|
||||||
+ * We need to re-exec if we are not in a cloned binary. This is necessary
|
|
||||||
+ * to ensure that containers won't be able to access the host binary
|
|
||||||
+ * through /proc/self/exe. See CVE-2019-5736.
|
|
||||||
+ */
|
|
||||||
+ if (ensure_cloned_binary() < 0)
|
|
||||||
+ bail("could not ensure we are a cloned binary");
|
|
||||||
+
|
|
||||||
/* Parse all of the netlink configuration. */
|
|
||||||
nl_parse(pipenum, &config);
|
|
||||||
|
|
||||||
--
|
|
||||||
2.20.1
|
|
||||||
|
|
540
SOURCES/0001-rootfs-add-mount-destination-validation.patch
Normal file
540
SOURCES/0001-rootfs-add-mount-destination-validation.patch
Normal file
@ -0,0 +1,540 @@
|
|||||||
|
From 2dd156b190c02476191fc2522f9b0e0a1a098608 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Kir Kolyshkin <kolyshkin@gmail.com>
|
||||||
|
Date: Mon, 17 May 2021 16:11:35 -0700
|
||||||
|
Subject: [PATCH] rootfs: add mount destination validation
|
||||||
|
|
||||||
|
This is a manual backport of fix for CVE-2021-30465 to runc-1.0.0-rc10
|
||||||
|
(aka -rc90), upstream commit 84c14b43fa703db7 by Aleksa Sarai.
|
||||||
|
|
||||||
|
Original description follows.
|
||||||
|
|
||||||
|
----
|
||||||
|
|
||||||
|
Because the target of a mount is inside a container (which may be a
|
||||||
|
volume that is shared with another container), there exists a race
|
||||||
|
condition where the target of the mount may change to a path containing
|
||||||
|
a symlink after we have sanitised the path -- resulting in us
|
||||||
|
inadvertently mounting the path outside of the container.
|
||||||
|
|
||||||
|
This is not immediately useful because we are in a mount namespace with
|
||||||
|
MS_SLAVE mount propagation applied to "/", so we cannot mount on top of
|
||||||
|
host paths in the host namespace. However, if any subsequent mountpoints
|
||||||
|
in the configuration use a subdirectory of that host path as a source,
|
||||||
|
those subsequent mounts will use an attacker-controlled source path
|
||||||
|
(resolved within the host rootfs) -- allowing the bind-mounting of "/"
|
||||||
|
into the container.
|
||||||
|
|
||||||
|
While arguably configuration issues like this are not entirely within
|
||||||
|
runc's threat model, within the context of Kubernetes (and possibly
|
||||||
|
other container managers that provide semi-arbitrary container creation
|
||||||
|
privileges to untrusted users) this is a legitimate issue. Since we
|
||||||
|
cannot block mounting from the host into the container, we need to block
|
||||||
|
the first stage of this attack (mounting onto a path outside the
|
||||||
|
container).
|
||||||
|
|
||||||
|
The long-term plan to solve this would be to migrate to libpathrs, but
|
||||||
|
as a stop-gap we implement libpathrs-like path verification through
|
||||||
|
readlink(/proc/self/fd/$n) and then do mount operations through the
|
||||||
|
procfd once it's been verified to be inside the container. The target
|
||||||
|
could move after we've checked it, but if it is inside the container
|
||||||
|
then we can assume that it is safe for the same reason that libpathrs
|
||||||
|
operations would be safe.
|
||||||
|
|
||||||
|
A slight wrinkle is the "copyup" functionality we provide for tmpfs,
|
||||||
|
which is the only case where we want to do a mount on the host
|
||||||
|
filesystem. To facilitate this, I split out the copy-up functionality
|
||||||
|
entirely so that the logic isn't interspersed with the regular tmpfs
|
||||||
|
logic. In addition, all dependencies on m.Destination being overwritten
|
||||||
|
have been removed since that pattern was just begging to be a source of
|
||||||
|
more mount-target bugs (we do still have to modify m.Destination for
|
||||||
|
tmpfs-copyup but we only do it temporarily).
|
||||||
|
|
||||||
|
Fixes: CVE-2021-30465
|
||||||
|
Reported-by: Etienne Champetier <champetier.etienne@gmail.com>
|
||||||
|
Co-authored-by: Noah Meyerhans <nmeyerha@amazon.com>
|
||||||
|
Reviewed-by: Samuel Karp <skarp@amazon.com>
|
||||||
|
Reviewed-by: Akihiro Suda <akihiro.suda.cz@hco.ntt.co.jp>
|
||||||
|
Signed-off-by: Aleksa Sarai <cyphar@cyphar.com>
|
||||||
|
|
||||||
|
Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
|
||||||
|
---
|
||||||
|
libcontainer/rootfs_linux.go | 225 ++++++++++++++++---------------
|
||||||
|
libcontainer/utils/utils.go | 54 ++++++++
|
||||||
|
libcontainer/utils/utils_test.go | 35 +++++
|
||||||
|
3 files changed, 204 insertions(+), 110 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/libcontainer/rootfs_linux.go b/libcontainer/rootfs_linux.go
|
||||||
|
index 106c4c2b..fe9afe48 100644
|
||||||
|
--- a/libcontainer/rootfs_linux.go
|
||||||
|
+++ b/libcontainer/rootfs_linux.go
|
||||||
|
@@ -19,8 +19,9 @@ import (
|
||||||
|
"github.com/opencontainers/runc/libcontainer/configs"
|
||||||
|
"github.com/opencontainers/runc/libcontainer/mount"
|
||||||
|
"github.com/opencontainers/runc/libcontainer/system"
|
||||||
|
- libcontainerUtils "github.com/opencontainers/runc/libcontainer/utils"
|
||||||
|
+ "github.com/opencontainers/runc/libcontainer/utils"
|
||||||
|
"github.com/opencontainers/selinux/go-selinux/label"
|
||||||
|
+ "github.com/sirupsen/logrus"
|
||||||
|
|
||||||
|
"golang.org/x/sys/unix"
|
||||||
|
)
|
||||||
|
@@ -30,7 +31,7 @@ const defaultMountFlags = unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV
|
||||||
|
// needsSetupDev returns true if /dev needs to be set up.
|
||||||
|
func needsSetupDev(config *configs.Config) bool {
|
||||||
|
for _, m := range config.Mounts {
|
||||||
|
- if m.Device == "bind" && libcontainerUtils.CleanPath(m.Destination) == "/dev" {
|
||||||
|
+ if m.Device == "bind" && utils.CleanPath(m.Destination) == "/dev" {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@@ -131,7 +132,7 @@ func prepareRootfs(pipe io.ReadWriter, iConfig *initConfig) (err error) {
|
||||||
|
func finalizeRootfs(config *configs.Config) (err error) {
|
||||||
|
// remount dev as ro if specified
|
||||||
|
for _, m := range config.Mounts {
|
||||||
|
- if libcontainerUtils.CleanPath(m.Destination) == "/dev" {
|
||||||
|
+ if utils.CleanPath(m.Destination) == "/dev" {
|
||||||
|
if m.Flags&unix.MS_RDONLY == unix.MS_RDONLY {
|
||||||
|
if err := remountReadonly(m); err != nil {
|
||||||
|
return newSystemErrorWithCausef(err, "remounting %q as readonly", m.Destination)
|
||||||
|
@@ -200,8 +201,6 @@ func prepareBindMount(m *configs.Mount, rootfs string) error {
|
||||||
|
if err := checkProcMount(rootfs, dest, m.Source); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
- // update the mount with the correct dest after symlinks are resolved.
|
||||||
|
- m.Destination = dest
|
||||||
|
if err := createIfNotExists(dest, stat.IsDir()); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
@@ -238,18 +237,21 @@ func mountCgroupV1(m *configs.Mount, rootfs, mountLabel string, enableCgroupns b
|
||||||
|
if err := os.MkdirAll(subsystemPath, 0755); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
- flags := defaultMountFlags
|
||||||
|
- if m.Flags&unix.MS_RDONLY != 0 {
|
||||||
|
- flags = flags | unix.MS_RDONLY
|
||||||
|
- }
|
||||||
|
- cgroupmount := &configs.Mount{
|
||||||
|
- Source: "cgroup",
|
||||||
|
- Device: "cgroup",
|
||||||
|
- Destination: subsystemPath,
|
||||||
|
- Flags: flags,
|
||||||
|
- Data: filepath.Base(subsystemPath),
|
||||||
|
- }
|
||||||
|
- if err := mountNewCgroup(cgroupmount); err != nil {
|
||||||
|
+ if err := utils.WithProcfd(rootfs, b.Destination, func(procfd string) error {
|
||||||
|
+ flags := defaultMountFlags
|
||||||
|
+ if m.Flags&unix.MS_RDONLY != 0 {
|
||||||
|
+ flags = flags | unix.MS_RDONLY
|
||||||
|
+ }
|
||||||
|
+ var (
|
||||||
|
+ source = "cgroup"
|
||||||
|
+ data = filepath.Base(subsystemPath)
|
||||||
|
+ )
|
||||||
|
+ if data == "systemd" {
|
||||||
|
+ data = cgroups.CgroupNamePrefix + data
|
||||||
|
+ source = "systemd"
|
||||||
|
+ }
|
||||||
|
+ return unix.Mount(source, procfd, "cgroup", uintptr(flags), data)
|
||||||
|
+ }); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
@@ -279,22 +281,67 @@ func mountCgroupV2(m *configs.Mount, rootfs, mountLabel string, enableCgroupns b
|
||||||
|
if err := os.MkdirAll(cgroupPath, 0755); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
- if err := unix.Mount(m.Source, cgroupPath, "cgroup2", uintptr(m.Flags), m.Data); err != nil {
|
||||||
|
- // when we are in UserNS but CgroupNS is not unshared, we cannot mount cgroup2 (#2158)
|
||||||
|
- if err == unix.EPERM || err == unix.EBUSY {
|
||||||
|
- return unix.Mount("/sys/fs/cgroup", cgroupPath, "", uintptr(m.Flags)|unix.MS_BIND, "")
|
||||||
|
+ return utils.WithProcfd(rootfs, m.Destination, func(procfd string) error {
|
||||||
|
+ if err := unix.Mount(m.Source, procfd, "cgroup2", uintptr(m.Flags), m.Data); err != nil {
|
||||||
|
+ // when we are in UserNS but CgroupNS is not unshared, we cannot mount cgroup2 (#2158)
|
||||||
|
+ if err == unix.EPERM || err == unix.EBUSY {
|
||||||
|
+ return unix.Mount("/sys/fs/cgroup", procfd, "", uintptr(m.Flags)|unix.MS_BIND, "")
|
||||||
|
+ }
|
||||||
|
+ return err
|
||||||
|
}
|
||||||
|
+ return nil
|
||||||
|
+ })
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+func doTmpfsCopyUp(m *configs.Mount, rootfs, mountLabel string) (Err error) {
|
||||||
|
+ // Set up a scratch dir for the tmpfs on the host.
|
||||||
|
+ tmpdir, err := prepareTmp("/tmp")
|
||||||
|
+ if err != nil {
|
||||||
|
+ return newSystemErrorWithCause(err, "tmpcopyup: failed to setup tmpdir")
|
||||||
|
+ }
|
||||||
|
+ defer cleanupTmp(tmpdir)
|
||||||
|
+ tmpDir, err := ioutil.TempDir(tmpdir, "runctmpdir")
|
||||||
|
+ if err != nil {
|
||||||
|
+ return newSystemErrorWithCause(err, "tmpcopyup: failed to create tmpdir")
|
||||||
|
+ }
|
||||||
|
+ defer os.RemoveAll(tmpDir)
|
||||||
|
+
|
||||||
|
+ // Configure the *host* tmpdir as if it's the container mount. We change
|
||||||
|
+ // m.Destination since we are going to mount *on the host*.
|
||||||
|
+ oldDest := m.Destination
|
||||||
|
+ m.Destination = tmpDir
|
||||||
|
+ err = mountPropagate(m, "/", mountLabel)
|
||||||
|
+ m.Destination = oldDest
|
||||||
|
+ if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
- return nil
|
||||||
|
+ defer func() {
|
||||||
|
+ if Err != nil {
|
||||||
|
+ if err := unix.Unmount(tmpDir, unix.MNT_DETACH); err != nil {
|
||||||
|
+ logrus.Warnf("tmpcopyup: failed to unmount tmpdir on error: %v", err)
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+ }()
|
||||||
|
+
|
||||||
|
+ return utils.WithProcfd(rootfs, m.Destination, func(procfd string) (Err error) {
|
||||||
|
+ // Copy the container data to the host tmpdir. We append "/" to force
|
||||||
|
+ // CopyDirectory to resolve the symlink rather than trying to copy the
|
||||||
|
+ // symlink itself.
|
||||||
|
+ if err := fileutils.CopyDirectory(procfd+"/", tmpDir); err != nil {
|
||||||
|
+ return fmt.Errorf("tmpcopyup: failed to copy %s to %s (%s): %v", m.Destination, procfd, tmpDir, err)
|
||||||
|
+ }
|
||||||
|
+ // Now move the mount into the container.
|
||||||
|
+ if err := unix.Mount(tmpDir, procfd, "", unix.MS_MOVE, ""); err != nil {
|
||||||
|
+ return fmt.Errorf("tmpcopyup: failed to move mount %s to %s (%s): %v", tmpDir, procfd, m.Destination, err)
|
||||||
|
+ }
|
||||||
|
+ return nil
|
||||||
|
+ })
|
||||||
|
}
|
||||||
|
|
||||||
|
func mountToRootfs(m *configs.Mount, rootfs, mountLabel string, enableCgroupns bool) error {
|
||||||
|
- var (
|
||||||
|
- dest = m.Destination
|
||||||
|
- )
|
||||||
|
- if !strings.HasPrefix(dest, rootfs) {
|
||||||
|
- dest = filepath.Join(rootfs, dest)
|
||||||
|
+ dest, err := securejoin.SecureJoin(rootfs, m.Destination)
|
||||||
|
+ if err != nil {
|
||||||
|
+ return err
|
||||||
|
}
|
||||||
|
|
||||||
|
switch m.Device {
|
||||||
|
@@ -329,46 +376,21 @@ func mountToRootfs(m *configs.Mount, rootfs, mountLabel string, enableCgroupns b
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
case "tmpfs":
|
||||||
|
- copyUp := m.Extensions&configs.EXT_COPYUP == configs.EXT_COPYUP
|
||||||
|
- tmpDir := ""
|
||||||
|
stat, err := os.Stat(dest)
|
||||||
|
if err != nil {
|
||||||
|
if err := os.MkdirAll(dest, 0755); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
- if copyUp {
|
||||||
|
- tmpdir, err := prepareTmp("/tmp")
|
||||||
|
- if err != nil {
|
||||||
|
- return newSystemErrorWithCause(err, "tmpcopyup: failed to setup tmpdir")
|
||||||
|
- }
|
||||||
|
- defer cleanupTmp(tmpdir)
|
||||||
|
- tmpDir, err = ioutil.TempDir(tmpdir, "runctmpdir")
|
||||||
|
- if err != nil {
|
||||||
|
- return newSystemErrorWithCause(err, "tmpcopyup: failed to create tmpdir")
|
||||||
|
- }
|
||||||
|
- defer os.RemoveAll(tmpDir)
|
||||||
|
- m.Destination = tmpDir
|
||||||
|
+
|
||||||
|
+ if m.Extensions&configs.EXT_COPYUP == configs.EXT_COPYUP {
|
||||||
|
+ err = doTmpfsCopyUp(m, rootfs, mountLabel)
|
||||||
|
+ } else {
|
||||||
|
+ err = mountPropagate(m, rootfs, mountLabel)
|
||||||
|
}
|
||||||
|
- if err := mountPropagate(m, rootfs, mountLabel); err != nil {
|
||||||
|
+ if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
- if copyUp {
|
||||||
|
- if err := fileutils.CopyDirectory(dest, tmpDir); err != nil {
|
||||||
|
- errMsg := fmt.Errorf("tmpcopyup: failed to copy %s to %s: %v", dest, tmpDir, err)
|
||||||
|
- if err1 := unix.Unmount(tmpDir, unix.MNT_DETACH); err1 != nil {
|
||||||
|
- return newSystemErrorWithCausef(err1, "tmpcopyup: %v: failed to unmount", errMsg)
|
||||||
|
- }
|
||||||
|
- return errMsg
|
||||||
|
- }
|
||||||
|
- if err := unix.Mount(tmpDir, dest, "", unix.MS_MOVE, ""); err != nil {
|
||||||
|
- errMsg := fmt.Errorf("tmpcopyup: failed to move mount %s to %s: %v", tmpDir, dest, err)
|
||||||
|
- if err1 := unix.Unmount(tmpDir, unix.MNT_DETACH); err1 != nil {
|
||||||
|
- return newSystemErrorWithCausef(err1, "tmpcopyup: %v: failed to unmount", errMsg)
|
||||||
|
- }
|
||||||
|
- return errMsg
|
||||||
|
- }
|
||||||
|
- }
|
||||||
|
if stat != nil {
|
||||||
|
if err = os.Chmod(dest, stat.Mode()); err != nil {
|
||||||
|
return err
|
||||||
|
@@ -424,19 +446,9 @@ func mountToRootfs(m *configs.Mount, rootfs, mountLabel string, enableCgroupns b
|
||||||
|
}
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
- // ensure that the destination of the mount is resolved of symlinks at mount time because
|
||||||
|
- // any previous mounts can invalidate the next mount's destination.
|
||||||
|
- // this can happen when a user specifies mounts within other mounts to cause breakouts or other
|
||||||
|
- // evil stuff to try to escape the container's rootfs.
|
||||||
|
- var err error
|
||||||
|
- if dest, err = securejoin.SecureJoin(rootfs, m.Destination); err != nil {
|
||||||
|
- return err
|
||||||
|
- }
|
||||||
|
if err := checkProcMount(rootfs, dest, m.Source); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
- // update the mount with the correct dest after symlinks are resolved.
|
||||||
|
- m.Destination = dest
|
||||||
|
if err := os.MkdirAll(dest, 0755); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
@@ -611,7 +623,7 @@ func createDevices(config *configs.Config) error {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
-func bindMountDeviceNode(dest string, node *configs.Device) error {
|
||||||
|
+func bindMountDeviceNode(rootfs, dest string, node *configs.Device) error {
|
||||||
|
f, err := os.Create(dest)
|
||||||
|
if err != nil && !os.IsExist(err) {
|
||||||
|
return err
|
||||||
|
@@ -619,24 +631,29 @@ func bindMountDeviceNode(dest string, node *configs.Device) error {
|
||||||
|
if f != nil {
|
||||||
|
f.Close()
|
||||||
|
}
|
||||||
|
- return unix.Mount(node.Path, dest, "bind", unix.MS_BIND, "")
|
||||||
|
+ return utils.WithProcfd(rootfs, dest, func(procfd string) error {
|
||||||
|
+ return unix.Mount(node.Path, procfd, "bind", unix.MS_BIND, "")
|
||||||
|
+ })
|
||||||
|
}
|
||||||
|
|
||||||
|
// Creates the device node in the rootfs of the container.
|
||||||
|
func createDeviceNode(rootfs string, node *configs.Device, bind bool) error {
|
||||||
|
- dest := filepath.Join(rootfs, node.Path)
|
||||||
|
+ dest, err := securejoin.SecureJoin(rootfs, node.Path)
|
||||||
|
+ if err != nil {
|
||||||
|
+ return err
|
||||||
|
+ }
|
||||||
|
if err := os.MkdirAll(filepath.Dir(dest), 0755); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if bind {
|
||||||
|
- return bindMountDeviceNode(dest, node)
|
||||||
|
+ return bindMountDeviceNode(rootfs, dest, node)
|
||||||
|
}
|
||||||
|
if err := mknodDevice(dest, node); err != nil {
|
||||||
|
if os.IsExist(err) {
|
||||||
|
return nil
|
||||||
|
} else if os.IsPermission(err) {
|
||||||
|
- return bindMountDeviceNode(dest, node)
|
||||||
|
+ return bindMountDeviceNode(rootfs, dest, node)
|
||||||
|
}
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
@@ -955,55 +972,43 @@ func writeSystemProperty(key, value string) error {
|
||||||
|
}
|
||||||
|
|
||||||
|
func remount(m *configs.Mount, rootfs string) error {
|
||||||
|
- var (
|
||||||
|
- dest = m.Destination
|
||||||
|
- )
|
||||||
|
- if !strings.HasPrefix(dest, rootfs) {
|
||||||
|
- dest = filepath.Join(rootfs, dest)
|
||||||
|
- }
|
||||||
|
- return unix.Mount(m.Source, dest, m.Device, uintptr(m.Flags|unix.MS_REMOUNT), "")
|
||||||
|
+ return utils.WithProcfd(rootfs, m.Destination, func(procfd string) error {
|
||||||
|
+ return unix.Mount(m.Source, procfd, m.Device, uintptr(m.Flags|unix.MS_REMOUNT), "")
|
||||||
|
+ })
|
||||||
|
}
|
||||||
|
|
||||||
|
// Do the mount operation followed by additional mounts required to take care
|
||||||
|
-// of propagation flags.
|
||||||
|
+// of propagation flags. This will always be scoped inside the container rootfs.
|
||||||
|
func mountPropagate(m *configs.Mount, rootfs string, mountLabel string) error {
|
||||||
|
var (
|
||||||
|
- dest = m.Destination
|
||||||
|
data = label.FormatMountLabel(m.Data, mountLabel)
|
||||||
|
flags = m.Flags
|
||||||
|
)
|
||||||
|
- if libcontainerUtils.CleanPath(dest) == "/dev" {
|
||||||
|
+ if utils.CleanPath(m.Destination) == "/dev" {
|
||||||
|
flags &= ^unix.MS_RDONLY
|
||||||
|
}
|
||||||
|
|
||||||
|
- copyUp := m.Extensions&configs.EXT_COPYUP == configs.EXT_COPYUP
|
||||||
|
- if !(copyUp || strings.HasPrefix(dest, rootfs)) {
|
||||||
|
- dest = filepath.Join(rootfs, dest)
|
||||||
|
- }
|
||||||
|
-
|
||||||
|
- if err := unix.Mount(m.Source, dest, m.Device, uintptr(flags), data); err != nil {
|
||||||
|
- return err
|
||||||
|
- }
|
||||||
|
-
|
||||||
|
- for _, pflag := range m.PropagationFlags {
|
||||||
|
- if err := unix.Mount("", dest, "", uintptr(pflag), ""); err != nil {
|
||||||
|
- return err
|
||||||
|
+ // Because the destination is inside a container path which might be
|
||||||
|
+ // mutating underneath us, we verify that we are actually going to mount
|
||||||
|
+ // inside the container with WithProcfd() -- mounting through a procfd
|
||||||
|
+ // mounts on the target.
|
||||||
|
+ if err := utils.WithProcfd(rootfs, m.Destination, func(procfd string) error {
|
||||||
|
+ return unix.Mount(m.Source, procfd, m.Device, uintptr(flags), data)
|
||||||
|
+ }); err != nil {
|
||||||
|
+ return fmt.Errorf("mount through procfd: %v", err)
|
||||||
|
+ }
|
||||||
|
+ // We have to apply mount propagation flags in a separate WithProcfd() call
|
||||||
|
+ // because the previous call invalidates the passed procfd -- the mount
|
||||||
|
+ // target needs to be re-opened.
|
||||||
|
+ if err := utils.WithProcfd(rootfs, m.Destination, func(procfd string) error {
|
||||||
|
+ for _, pflag := range m.PropagationFlags {
|
||||||
|
+ if err := unix.Mount("", procfd, "", uintptr(pflag), ""); err != nil {
|
||||||
|
+ return err
|
||||||
|
+ }
|
||||||
|
}
|
||||||
|
- }
|
||||||
|
- return nil
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
-func mountNewCgroup(m *configs.Mount) error {
|
||||||
|
- var (
|
||||||
|
- data = m.Data
|
||||||
|
- source = m.Source
|
||||||
|
- )
|
||||||
|
- if data == "systemd" {
|
||||||
|
- data = cgroups.CgroupNamePrefix + data
|
||||||
|
- source = "systemd"
|
||||||
|
- }
|
||||||
|
- if err := unix.Mount(source, m.Destination, m.Device, uintptr(m.Flags), data); err != nil {
|
||||||
|
- return err
|
||||||
|
+ return nil
|
||||||
|
+ }); err != nil {
|
||||||
|
+ return fmt.Errorf("change mount propagation through procfd: %v", err)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
diff --git a/libcontainer/utils/utils.go b/libcontainer/utils/utils.go
|
||||||
|
index 40ccfaa1..c1418ef9 100644
|
||||||
|
--- a/libcontainer/utils/utils.go
|
||||||
|
+++ b/libcontainer/utils/utils.go
|
||||||
|
@@ -2,12 +2,15 @@ package utils
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
+ "fmt"
|
||||||
|
"io"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
+ "strconv"
|
||||||
|
"strings"
|
||||||
|
"unsafe"
|
||||||
|
|
||||||
|
+ securejoin "github.com/cyphar/filepath-securejoin"
|
||||||
|
"golang.org/x/sys/unix"
|
||||||
|
)
|
||||||
|
|
||||||
|
@@ -73,6 +76,57 @@ func CleanPath(path string) string {
|
||||||
|
return filepath.Clean(path)
|
||||||
|
}
|
||||||
|
|
||||||
|
+// stripRoot returns the passed path, stripping the root path if it was
|
||||||
|
+// (lexicially) inside it. Note that both passed paths will always be treated
|
||||||
|
+// as absolute, and the returned path will also always be absolute. In
|
||||||
|
+// addition, the paths are cleaned before stripping the root.
|
||||||
|
+func stripRoot(root, path string) string {
|
||||||
|
+ // Make the paths clean and absolute.
|
||||||
|
+ root, path = CleanPath("/"+root), CleanPath("/"+path)
|
||||||
|
+ switch {
|
||||||
|
+ case path == root:
|
||||||
|
+ path = "/"
|
||||||
|
+ case root == "/":
|
||||||
|
+ // do nothing
|
||||||
|
+ case strings.HasPrefix(path, root+"/"):
|
||||||
|
+ path = strings.TrimPrefix(path, root+"/")
|
||||||
|
+ }
|
||||||
|
+ return CleanPath("/" + path)
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+// WithProcfd runs the passed closure with a procfd path (/proc/self/fd/...)
|
||||||
|
+// corresponding to the unsafePath resolved within the root. Before passing the
|
||||||
|
+// fd, this path is verified to have been inside the root -- so operating on it
|
||||||
|
+// through the passed fdpath should be safe. Do not access this path through
|
||||||
|
+// the original path strings, and do not attempt to use the pathname outside of
|
||||||
|
+// the passed closure (the file handle will be freed once the closure returns).
|
||||||
|
+func WithProcfd(root, unsafePath string, fn func(procfd string) error) error {
|
||||||
|
+ // Remove the root then forcefully resolve inside the root.
|
||||||
|
+ unsafePath = stripRoot(root, unsafePath)
|
||||||
|
+ path, err := securejoin.SecureJoin(root, unsafePath)
|
||||||
|
+ if err != nil {
|
||||||
|
+ return fmt.Errorf("resolving path inside rootfs failed: %v", err)
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ // Open the target path.
|
||||||
|
+ fh, err := os.OpenFile(path, unix.O_PATH|unix.O_CLOEXEC, 0)
|
||||||
|
+ if err != nil {
|
||||||
|
+ return fmt.Errorf("open o_path procfd: %v", err)
|
||||||
|
+ }
|
||||||
|
+ defer fh.Close()
|
||||||
|
+
|
||||||
|
+ // Double-check the path is the one we expected.
|
||||||
|
+ procfd := "/proc/self/fd/" + strconv.Itoa(int(fh.Fd()))
|
||||||
|
+ if realpath, err := os.Readlink(procfd); err != nil {
|
||||||
|
+ return fmt.Errorf("procfd verification failed: %v", err)
|
||||||
|
+ } else if realpath != path {
|
||||||
|
+ return fmt.Errorf("possibly malicious path detected -- refusing to operate on %s", realpath)
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ // Run the closure.
|
||||||
|
+ return fn(procfd)
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
// SearchLabels searches a list of key-value pairs for the provided key and
|
||||||
|
// returns the corresponding value. The pairs must be separated with '='.
|
||||||
|
func SearchLabels(labels []string, query string) string {
|
||||||
|
diff --git a/libcontainer/utils/utils_test.go b/libcontainer/utils/utils_test.go
|
||||||
|
index 395eedcf..5b80cac6 100644
|
||||||
|
--- a/libcontainer/utils/utils_test.go
|
||||||
|
+++ b/libcontainer/utils/utils_test.go
|
||||||
|
@@ -140,3 +140,38 @@ func TestCleanPath(t *testing.T) {
|
||||||
|
t.Errorf("expected to receive '/foo' and received %s", path)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
+
|
||||||
|
+func TestStripRoot(t *testing.T) {
|
||||||
|
+ for _, test := range []struct {
|
||||||
|
+ root, path, out string
|
||||||
|
+ }{
|
||||||
|
+ // Works with multiple components.
|
||||||
|
+ {"/a/b", "/a/b/c", "/c"},
|
||||||
|
+ {"/hello/world", "/hello/world/the/quick-brown/fox", "/the/quick-brown/fox"},
|
||||||
|
+ // '/' must be a no-op.
|
||||||
|
+ {"/", "/a/b/c", "/a/b/c"},
|
||||||
|
+ // Must be the correct order.
|
||||||
|
+ {"/a/b", "/a/c/b", "/a/c/b"},
|
||||||
|
+ // Must be at start.
|
||||||
|
+ {"/abc/def", "/foo/abc/def/bar", "/foo/abc/def/bar"},
|
||||||
|
+ // Must be a lexical parent.
|
||||||
|
+ {"/foo/bar", "/foo/barSAMECOMPONENT", "/foo/barSAMECOMPONENT"},
|
||||||
|
+ // Must only strip the root once.
|
||||||
|
+ {"/foo/bar", "/foo/bar/foo/bar/baz", "/foo/bar/baz"},
|
||||||
|
+ // Deal with .. in a fairly sane way.
|
||||||
|
+ {"/foo/bar", "/foo/bar/../baz", "/foo/baz"},
|
||||||
|
+ {"/foo/bar", "../../../../../../foo/bar/baz", "/baz"},
|
||||||
|
+ {"/foo/bar", "/../../../../../../foo/bar/baz", "/baz"},
|
||||||
|
+ {"/foo/bar/../baz", "/foo/baz/bar", "/bar"},
|
||||||
|
+ {"/foo/bar/../baz", "/foo/baz/../bar/../baz/./foo", "/foo"},
|
||||||
|
+ // All paths are made absolute before stripping.
|
||||||
|
+ {"foo/bar", "/foo/bar/baz/bee", "/baz/bee"},
|
||||||
|
+ {"/foo/bar", "foo/bar/baz/beef", "/baz/beef"},
|
||||||
|
+ {"foo/bar", "foo/bar/baz/beets", "/baz/beets"},
|
||||||
|
+ } {
|
||||||
|
+ got := stripRoot(test.root, test.path)
|
||||||
|
+ if got != test.out {
|
||||||
|
+ t.Errorf("stripRoot(%q, %q) -- got %q, expected %q", test.root, test.path, got, test.out)
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
--
|
||||||
|
2.31.1
|
||||||
|
|
@ -1,168 +1,178 @@
|
|||||||
From ecf53c23545092019602578583031c28fde4d2a1 Mon Sep 17 00:00:00 2001
|
From 3d99c51e1b38a440804a55c9f314f62cc50b8902 Mon Sep 17 00:00:00 2001
|
||||||
From: Giuseppe Scrivano <gscrivan@redhat.com>
|
From: Giuseppe Scrivano <gscrivan@redhat.com>
|
||||||
Date: Fri, 25 May 2018 18:04:06 +0200
|
Date: Fri, 25 May 2018 18:04:06 +0200
|
||||||
Subject: [PATCH] sd-notify: do not hang when NOTIFY_SOCKET is used with create
|
Subject: [PATCH] sd-notify: do not hang when NOTIFY_SOCKET is used with create
|
||||||
|
|
||||||
if NOTIFY_SOCKET is used, do not block the main runc process waiting
|
if NOTIFY_SOCKET is used, do not block the main runc process waiting
|
||||||
for events on the notify socket. Change the logic to create a new
|
for events on the notify socket. Bind mount the parent directory of
|
||||||
process that monitors exclusively the notify socket until an event is
|
the notify socket, so that "start" can create the socket and it is
|
||||||
received.
|
still accessible from the container.
|
||||||
|
|
||||||
Signed-off-by: Giuseppe Scrivano <gscrivan@redhat.com>
|
Signed-off-by: Giuseppe Scrivano <gscrivan@redhat.com>
|
||||||
---
|
---
|
||||||
init.go | 12 +++++++
|
notify_socket.go | 112 ++++++++++++++++++++++++++++++++++-------------
|
||||||
notify_socket.go | 101 ++++++++++++++++++++++++++++++++++++++++++++++---------
|
signals.go | 4 +-
|
||||||
signals.go | 5 +--
|
start.go | 13 +++++-
|
||||||
3 files changed, 99 insertions(+), 19 deletions(-)
|
utils_linux.go | 12 ++++-
|
||||||
|
4 files changed, 105 insertions(+), 36 deletions(-)
|
||||||
|
|
||||||
diff --git a/init.go b/init.go
|
|
||||||
index c8f453192..6a3d9e91c 100644
|
|
||||||
--- a/init.go
|
|
||||||
+++ b/init.go
|
|
||||||
@@ -20,6 +20,18 @@ var initCommand = cli.Command{
|
|
||||||
Name: "init",
|
|
||||||
Usage: `initialize the namespaces and launch the process (do not call it outside of runc)`,
|
|
||||||
Action: func(context *cli.Context) error {
|
|
||||||
+ // If NOTIFY_SOCKET is used create a new process that stays around
|
|
||||||
+ // so to not block "runc start". It will automatically exits when the
|
|
||||||
+ // container notifies that it is ready, or when the container is deleted
|
|
||||||
+ if os.Getenv("_NOTIFY_SOCKET_FD") != "" {
|
|
||||||
+ fd := os.Getenv("_NOTIFY_SOCKET_FD")
|
|
||||||
+ pid := os.Getenv("_NOTIFY_SOCKET_PID")
|
|
||||||
+ hostNotifySocket := os.Getenv("_NOTIFY_SOCKET_HOST")
|
|
||||||
+ notifySocketPath := os.Getenv("_NOTIFY_SOCKET_PATH")
|
|
||||||
+ notifySocketInit(fd, pid, hostNotifySocket, notifySocketPath)
|
|
||||||
+ os.Exit(0)
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
factory, _ := libcontainer.New("")
|
|
||||||
if err := factory.StartInitialization(); err != nil {
|
|
||||||
// as the error is sent back to the parent there is no need to log
|
|
||||||
diff --git a/notify_socket.go b/notify_socket.go
|
diff --git a/notify_socket.go b/notify_socket.go
|
||||||
index cd6c0a989..e04e9d660 100644
|
index e7453c62..d961453a 100644
|
||||||
--- a/notify_socket.go
|
--- a/notify_socket.go
|
||||||
+++ b/notify_socket.go
|
+++ b/notify_socket.go
|
||||||
@@ -6,10 +6,13 @@ import (
|
@@ -7,11 +7,13 @@ import (
|
||||||
"bytes"
|
|
||||||
"fmt"
|
"fmt"
|
||||||
"net"
|
"net"
|
||||||
+ "os"
|
"os"
|
||||||
+ "os/exec"
|
+ "path"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
+ "strconv"
|
+ "strconv"
|
||||||
+ "time"
|
+ "time"
|
||||||
|
|
||||||
|
+ "github.com/opencontainers/runc/libcontainer"
|
||||||
"github.com/opencontainers/runtime-spec/specs-go"
|
"github.com/opencontainers/runtime-spec/specs-go"
|
||||||
-
|
-
|
||||||
"github.com/sirupsen/logrus"
|
- "github.com/sirupsen/logrus"
|
||||||
"github.com/urfave/cli"
|
"github.com/urfave/cli"
|
||||||
)
|
)
|
||||||
@@ -64,24 +67,94 @@ func (s *notifySocket) setupSocket() error {
|
|
||||||
|
@@ -27,12 +29,12 @@ func newNotifySocket(context *cli.Context, notifySocketHost string, id string) *
|
||||||
|
}
|
||||||
|
|
||||||
|
root := filepath.Join(context.GlobalString("root"), id)
|
||||||
|
- path := filepath.Join(root, "notify.sock")
|
||||||
|
+ socketPath := filepath.Join(root, "notify", "notify.sock")
|
||||||
|
|
||||||
|
notifySocket := ¬ifySocket{
|
||||||
|
socket: nil,
|
||||||
|
host: notifySocketHost,
|
||||||
|
- socketPath: path,
|
||||||
|
+ socketPath: socketPath,
|
||||||
|
}
|
||||||
|
|
||||||
|
return notifySocket
|
||||||
|
@@ -44,13 +46,19 @@ func (s *notifySocket) Close() error {
|
||||||
|
|
||||||
|
// If systemd is supporting sd_notify protocol, this function will add support
|
||||||
|
// for sd_notify protocol from within the container.
|
||||||
|
-func (s *notifySocket) setupSpec(context *cli.Context, spec *specs.Spec) {
|
||||||
|
- mount := specs.Mount{Destination: s.host, Source: s.socketPath, Options: []string{"bind"}}
|
||||||
|
+func (s *notifySocket) setupSpec(context *cli.Context, spec *specs.Spec) error {
|
||||||
|
+ pathInContainer := filepath.Join("/run/notify", path.Base(s.socketPath))
|
||||||
|
+ mount := specs.Mount{
|
||||||
|
+ Destination: path.Dir(pathInContainer),
|
||||||
|
+ Source: path.Dir(s.socketPath),
|
||||||
|
+ Options: []string{"bind", "nosuid", "noexec", "nodev", "ro"},
|
||||||
|
+ }
|
||||||
|
spec.Mounts = append(spec.Mounts, mount)
|
||||||
|
- spec.Process.Env = append(spec.Process.Env, fmt.Sprintf("NOTIFY_SOCKET=%s", s.host))
|
||||||
|
+ spec.Process.Env = append(spec.Process.Env, fmt.Sprintf("NOTIFY_SOCKET=%s", pathInContainer))
|
||||||
|
+ return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
-func (s *notifySocket) setupSocket() error {
|
||||||
|
+func (s *notifySocket) bindSocket() error {
|
||||||
|
addr := net.UnixAddr{
|
||||||
|
Name: s.socketPath,
|
||||||
|
Net: "unixgram",
|
||||||
|
@@ -71,45 +79,89 @@ func (s *notifySocket) setupSocket() error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
+func (notifySocket *notifySocket) notifyNewPid(pid int) {
|
-// pid1 must be set only with -d, as it is used to set the new process as the main process
|
||||||
+ notifySocketHostAddr := net.UnixAddr{Name: notifySocket.host, Net: "unixgram"}
|
-// for the service in systemd
|
||||||
+ client, err := net.DialUnix("unixgram", nil, ¬ifySocketHostAddr)
|
-func (s *notifySocket) run(pid1 int) {
|
||||||
+ if err != nil {
|
- buf := make([]byte, 512)
|
||||||
+ return
|
- notifySocketHostAddr := net.UnixAddr{Name: s.host, Net: "unixgram"}
|
||||||
+ }
|
+func (s *notifySocket) setupSocketDirectory() error {
|
||||||
+ newPid := fmt.Sprintf("MAINPID=%d\n", pid)
|
+ return os.Mkdir(path.Dir(s.socketPath), 0755)
|
||||||
+ client.Write([]byte(newPid))
|
|
||||||
+}
|
+}
|
||||||
+
|
+
|
||||||
// pid1 must be set only with -d, as it is used to set the new process as the main process
|
+func notifySocketStart(context *cli.Context, notifySocketHost, id string) (*notifySocket, error) {
|
||||||
// for the service in systemd
|
+ notifySocket := newNotifySocket(context, notifySocketHost, id)
|
||||||
func (notifySocket *notifySocket) run(pid1 int) {
|
+ if notifySocket == nil {
|
||||||
- buf := make([]byte, 512)
|
+ return nil, nil
|
||||||
- notifySocketHostAddr := net.UnixAddr{Name: notifySocket.host, Net: "unixgram"}
|
+ }
|
||||||
- client, err := net.DialUnix("unixgram", nil, ¬ifySocketHostAddr)
|
+
|
||||||
+ file, err := notifySocket.socket.File()
|
+ if err := notifySocket.bindSocket(); err != nil {
|
||||||
|
+ return nil, err
|
||||||
|
+ }
|
||||||
|
+ return notifySocket, nil
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+func (n *notifySocket) waitForContainer(container libcontainer.Container) error {
|
||||||
|
+ s, err := container.State()
|
||||||
|
+ if err != nil {
|
||||||
|
+ return err
|
||||||
|
+ }
|
||||||
|
+ return n.run(s.InitProcessPid)
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+func (n *notifySocket) run(pid1 int) error {
|
||||||
|
+ if n.socket == nil {
|
||||||
|
+ return nil
|
||||||
|
+ }
|
||||||
|
+ notifySocketHostAddr := net.UnixAddr{Name: n.host, Net: "unixgram"}
|
||||||
|
client, err := net.DialUnix("unixgram", nil, ¬ifySocketHostAddr)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logrus.Error(err)
|
- logrus.Error(err)
|
||||||
return
|
- return
|
||||||
|
+ return err
|
||||||
}
|
}
|
||||||
- for {
|
- for {
|
||||||
- r, err := notifySocket.socket.Read(buf)
|
- r, err := s.socket.Read(buf)
|
||||||
- if err != nil {
|
- if err != nil {
|
||||||
- break
|
- break
|
||||||
+ defer file.Close()
|
|
||||||
+ defer notifySocket.socket.Close()
|
|
||||||
+
|
+
|
||||||
+ cmd := exec.Command("/proc/self/exe", "init")
|
+ ticker := time.NewTicker(time.Millisecond * 100)
|
||||||
+ cmd.ExtraFiles = []*os.File{file}
|
+ defer ticker.Stop()
|
||||||
+ cmd.Env = append(cmd.Env, "_NOTIFY_SOCKET_FD=3",
|
|
||||||
+ fmt.Sprintf("_NOTIFY_SOCKET_PID=%d", pid1),
|
|
||||||
+ fmt.Sprintf("_NOTIFY_SOCKET_HOST=%s", notifySocket.host),
|
|
||||||
+ fmt.Sprintf("_NOTIFY_SOCKET_PATH=%s", notifySocket.socketPath))
|
|
||||||
+
|
|
||||||
+ if err := cmd.Start(); err != nil {
|
|
||||||
+ logrus.Fatal(err)
|
|
||||||
+ }
|
|
||||||
+ notifySocket.notifyNewPid(cmd.Process.Pid)
|
|
||||||
+ cmd.Process.Release()
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+func notifySocketInit(envFd string, envPid string, notifySocketHost string, notifySocketPath string) {
|
|
||||||
+ intFd, err := strconv.Atoi(envFd)
|
|
||||||
+ if err != nil {
|
|
||||||
+ return
|
|
||||||
+ }
|
|
||||||
+ pid1, err := strconv.Atoi(envPid)
|
|
||||||
+ if err != nil {
|
|
||||||
+ return
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ file := os.NewFile(uintptr(intFd), "unixgram")
|
|
||||||
+ defer file.Close()
|
|
||||||
+
|
+
|
||||||
+ fileChan := make(chan []byte)
|
+ fileChan := make(chan []byte)
|
||||||
+ exitChan := make(chan bool)
|
|
||||||
+
|
|
||||||
+ go func() {
|
+ go func() {
|
||||||
+ for {
|
+ for {
|
||||||
+ buf := make([]byte, 512)
|
+ buf := make([]byte, 512)
|
||||||
+ r, err := file.Read(buf)
|
+ r, err := n.socket.Read(buf)
|
||||||
+ if err != nil {
|
+ if err != nil {
|
||||||
+ return
|
+ return
|
||||||
+ }
|
+ }
|
||||||
+ fileChan <- buf[0:r]
|
+ got := buf[0:r]
|
||||||
|
+ if !bytes.HasPrefix(got, []byte("READY=")) {
|
||||||
|
+ continue
|
||||||
|
+ }
|
||||||
|
+ fileChan <- got
|
||||||
|
+ return
|
||||||
}
|
}
|
||||||
- var out bytes.Buffer
|
- var out bytes.Buffer
|
||||||
- for _, line := range bytes.Split(buf[0:r], []byte{'\n'}) {
|
- for _, line := range bytes.Split(buf[0:r], []byte{'\n'}) {
|
||||||
- if bytes.HasPrefix(line, []byte("READY=")) {
|
- if bytes.HasPrefix(line, []byte("READY=")) {
|
||||||
+ }()
|
+ }()
|
||||||
+ go func() {
|
|
||||||
+ for {
|
|
||||||
+ if _, err := os.Stat(notifySocketPath); os.IsNotExist(err) {
|
|
||||||
+ exitChan <- true
|
|
||||||
+ return
|
|
||||||
+ }
|
|
||||||
+ time.Sleep(time.Second)
|
|
||||||
+ }
|
|
||||||
+ }()
|
|
||||||
+
|
|
||||||
+ notifySocketHostAddr := net.UnixAddr{Name: notifySocketHost, Net: "unixgram"}
|
|
||||||
+ client, err := net.DialUnix("unixgram", nil, ¬ifySocketHostAddr)
|
|
||||||
+ if err != nil {
|
|
||||||
+ return
|
|
||||||
+ }
|
|
||||||
+
|
+
|
||||||
+ for {
|
+ for {
|
||||||
+ select {
|
+ select {
|
||||||
+ case <-exitChan:
|
+ case <-ticker.C:
|
||||||
+ return
|
+ _, err := os.Stat(filepath.Join("/proc", strconv.Itoa(pid1)))
|
||||||
|
+ if err != nil {
|
||||||
|
+ return nil
|
||||||
|
+ }
|
||||||
+ case b := <-fileChan:
|
+ case b := <-fileChan:
|
||||||
+ for _, line := range bytes.Split(b, []byte{'\n'}) {
|
+ for _, line := range bytes.Split(b, []byte{'\n'}) {
|
||||||
+ if !bytes.HasPrefix(line, []byte("READY=")) {
|
|
||||||
+ continue
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ var out bytes.Buffer
|
+ var out bytes.Buffer
|
||||||
_, err = out.Write(line)
|
_, err = out.Write(line)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return
|
- return
|
||||||
@@ -98,10 +171,8 @@ func (notifySocket *notifySocket) run(pid1 int) {
|
+ return err
|
||||||
|
}
|
||||||
|
|
||||||
|
_, err = out.Write([]byte{'\n'})
|
||||||
|
if err != nil {
|
||||||
|
- return
|
||||||
|
+ return err
|
||||||
|
}
|
||||||
|
|
||||||
|
_, err = client.Write(out.Bytes())
|
||||||
|
if err != nil {
|
||||||
|
- return
|
||||||
|
+ return err
|
||||||
}
|
}
|
||||||
|
|
||||||
// now we can inform systemd to use pid1 as the pid to monitor
|
// now we can inform systemd to use pid1 as the pid to monitor
|
||||||
@ -170,25 +180,26 @@ index cd6c0a989..e04e9d660 100644
|
|||||||
- newPid := fmt.Sprintf("MAINPID=%d\n", pid1)
|
- newPid := fmt.Sprintf("MAINPID=%d\n", pid1)
|
||||||
- client.Write([]byte(newPid))
|
- client.Write([]byte(newPid))
|
||||||
- }
|
- }
|
||||||
|
- return
|
||||||
+ newPid := fmt.Sprintf("MAINPID=%d\n", pid1)
|
+ newPid := fmt.Sprintf("MAINPID=%d\n", pid1)
|
||||||
+ client.Write([]byte(newPid))
|
+ client.Write([]byte(newPid))
|
||||||
return
|
+ return nil
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
diff --git a/signals.go b/signals.go
|
diff --git a/signals.go b/signals.go
|
||||||
index 1811de837..d0988cb39 100644
|
index b67f65a0..dd25e094 100644
|
||||||
--- a/signals.go
|
--- a/signals.go
|
||||||
+++ b/signals.go
|
+++ b/signals.go
|
||||||
@@ -70,7 +70,7 @@ func (h *signalHandler) forward(process *libcontainer.Process, tty *tty, detach
|
@@ -70,6 +70,7 @@ func (h *signalHandler) forward(process *libcontainer.Process, tty *tty, detach
|
||||||
h.notifySocket.run(pid1)
|
h.notifySocket.run(pid1)
|
||||||
return 0, nil
|
return 0, nil
|
||||||
} else {
|
|
||||||
- go h.notifySocket.run(0)
|
|
||||||
+ h.notifySocket.run(os.Getpid())
|
|
||||||
}
|
}
|
||||||
|
+ h.notifySocket.run(os.Getpid())
|
||||||
|
go h.notifySocket.run(0)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -98,9 +98,6 @@ func (h *signalHandler) forward(process *libcontainer.Process, tty *tty, detach
|
@@ -97,9 +98,6 @@ func (h *signalHandler) forward(process *libcontainer.Process, tty *tty, detach
|
||||||
// status because we must ensure that any of the go specific process
|
// status because we must ensure that any of the go specific process
|
||||||
// fun such as flushing pipes are complete before we return.
|
// fun such as flushing pipes are complete before we return.
|
||||||
process.Wait()
|
process.Wait()
|
||||||
@ -198,3 +209,70 @@ index 1811de837..d0988cb39 100644
|
|||||||
return e.status, nil
|
return e.status, nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
diff --git a/start.go b/start.go
|
||||||
|
index 2bb698b2..3a1769a4 100644
|
||||||
|
--- a/start.go
|
||||||
|
+++ b/start.go
|
||||||
|
@@ -3,6 +3,7 @@ package main
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
+ "os"
|
||||||
|
|
||||||
|
"github.com/opencontainers/runc/libcontainer"
|
||||||
|
"github.com/urfave/cli"
|
||||||
|
@@ -31,7 +32,17 @@ your host.`,
|
||||||
|
}
|
||||||
|
switch status {
|
||||||
|
case libcontainer.Created:
|
||||||
|
- return container.Exec()
|
||||||
|
+ notifySocket, err := notifySocketStart(context, os.Getenv("NOTIFY_SOCKET"), container.ID())
|
||||||
|
+ if err != nil {
|
||||||
|
+ return err
|
||||||
|
+ }
|
||||||
|
+ if err := container.Exec(); err != nil {
|
||||||
|
+ return err
|
||||||
|
+ }
|
||||||
|
+ if notifySocket != nil {
|
||||||
|
+ return notifySocket.waitForContainer(container)
|
||||||
|
+ }
|
||||||
|
+ return nil
|
||||||
|
case libcontainer.Stopped:
|
||||||
|
return errors.New("cannot start a container that has stopped")
|
||||||
|
case libcontainer.Running:
|
||||||
|
diff --git a/utils_linux.go b/utils_linux.go
|
||||||
|
index 984e6b0f..46c26246 100644
|
||||||
|
--- a/utils_linux.go
|
||||||
|
+++ b/utils_linux.go
|
||||||
|
@@ -408,7 +408,9 @@ func startContainer(context *cli.Context, spec *specs.Spec, action CtAct, criuOp
|
||||||
|
|
||||||
|
notifySocket := newNotifySocket(context, os.Getenv("NOTIFY_SOCKET"), id)
|
||||||
|
if notifySocket != nil {
|
||||||
|
- notifySocket.setupSpec(context, spec)
|
||||||
|
+ if err := notifySocket.setupSpec(context, spec); err != nil {
|
||||||
|
+ return -1, err
|
||||||
|
+ }
|
||||||
|
}
|
||||||
|
|
||||||
|
container, err := createContainer(context, id, spec)
|
||||||
|
@@ -417,10 +419,16 @@ func startContainer(context *cli.Context, spec *specs.Spec, action CtAct, criuOp
|
||||||
|
}
|
||||||
|
|
||||||
|
if notifySocket != nil {
|
||||||
|
- err := notifySocket.setupSocket()
|
||||||
|
+ err := notifySocket.setupSocketDirectory()
|
||||||
|
if err != nil {
|
||||||
|
return -1, err
|
||||||
|
}
|
||||||
|
+ if action == CT_ACT_RUN {
|
||||||
|
+ err := notifySocket.bindSocket()
|
||||||
|
+ if err != nil {
|
||||||
|
+ return -1, err
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
}
|
||||||
|
|
||||||
|
// Support on-demand socket activation by passing file descriptors into the container init process.
|
||||||
|
--
|
||||||
|
2.21.0
|
||||||
|
|
||||||
|
@ -1 +0,0 @@
|
|||||||
fs.may_detach_mounts=1
|
|
@ -1,61 +0,0 @@
|
|||||||
diff --git a/list.go b/list.go
|
|
||||||
index 0313d8c..328798b 100644
|
|
||||||
--- a/list.go
|
|
||||||
+++ b/list.go
|
|
||||||
@@ -50,7 +50,7 @@ var listCommand = cli.Command{
|
|
||||||
ArgsUsage: `
|
|
||||||
|
|
||||||
Where the given root is specified via the global option "--root"
|
|
||||||
-(default: "/run/runc").
|
|
||||||
+(default: "/run/runc-ctrs").
|
|
||||||
|
|
||||||
EXAMPLE 1:
|
|
||||||
To list containers created via the default "--root":
|
|
||||||
diff --git a/main.go b/main.go
|
|
||||||
index 278399a..0f49fce 100644
|
|
||||||
--- a/main.go
|
|
||||||
+++ b/main.go
|
|
||||||
@@ -62,7 +62,7 @@ func main() {
|
|
||||||
v = append(v, fmt.Sprintf("spec: %s", specs.Version))
|
|
||||||
app.Version = strings.Join(v, "\n")
|
|
||||||
|
|
||||||
- root := "/run/runc"
|
|
||||||
+ root := "/run/runc-ctrs"
|
|
||||||
rootless, err := isRootless(nil)
|
|
||||||
if err != nil {
|
|
||||||
fatal(err)
|
|
||||||
@@ -70,7 +70,7 @@ func main() {
|
|
||||||
if rootless {
|
|
||||||
runtimeDir := os.Getenv("XDG_RUNTIME_DIR")
|
|
||||||
if runtimeDir != "" {
|
|
||||||
- root = runtimeDir + "/runc"
|
|
||||||
+ root = runtimeDir + "/runc-ctrs"
|
|
||||||
// According to the XDG specification, we need to set anything in
|
|
||||||
// XDG_RUNTIME_DIR to have a sticky bit if we don't want it to get
|
|
||||||
// auto-pruned.
|
|
||||||
diff --git a/man/runc-list.8.md b/man/runc-list.8.md
|
|
||||||
index f737424..107220e 100644
|
|
||||||
--- a/man/runc-list.8.md
|
|
||||||
+++ b/man/runc-list.8.md
|
|
||||||
@@ -6,7 +6,7 @@
|
|
||||||
|
|
||||||
# EXAMPLE
|
|
||||||
Where the given root is specified via the global option "--root"
|
|
||||||
-(default: "/run/runc").
|
|
||||||
+(default: "/run/runc-ctrs").
|
|
||||||
|
|
||||||
To list containers created via the default "--root":
|
|
||||||
# runc list
|
|
||||||
diff --git a/man/runc.8.md b/man/runc.8.md
|
|
||||||
index 6d0ddff..337bc73 100644
|
|
||||||
--- a/man/runc.8.md
|
|
||||||
+++ b/man/runc.8.md
|
|
||||||
@@ -51,7 +51,7 @@ value for "bundle" is the current directory.
|
|
||||||
--debug enable debug output for logging
|
|
||||||
--log value set the log file path where internal debug information is written (default: "/dev/null")
|
|
||||||
--log-format value set the format used by logs ('text' (default), or 'json') (default: "text")
|
|
||||||
- --root value root directory for storage of container state (this should be located in tmpfs) (default: "/run/runc" or $XDG_RUNTIME_DIR/runc for rootless containers)
|
|
||||||
+ --root value root directory for storage of container state (this should be located in tmpfs) (default: "/run/runc-ctrs" or $XDG_RUNTIME_DIR/runc-ctrs for rootless containers)
|
|
||||||
--criu value path to the criu binary used for checkpoint and restore (default: "criu")
|
|
||||||
--systemd-cgroup enable systemd cgroup support, expects cgroupsPath to be of form "slice:prefix:name" for e.g. "system.slice:runc:434234"
|
|
||||||
--rootless value enable rootless mode ('true', 'false', or 'auto') (default: "auto")
|
|
@ -1,72 +0,0 @@
|
|||||||
From 28a697cce3e4f905dca700eda81d681a30eef9cd Mon Sep 17 00:00:00 2001
|
|
||||||
From: Giuseppe Scrivano <gscrivan@redhat.com>
|
|
||||||
Date: Fri, 11 Jan 2019 21:53:45 +0100
|
|
||||||
Subject: [PATCH] rootfs: umount all procfs and sysfs with --no-pivot
|
|
||||||
|
|
||||||
When creating a new user namespace, the kernel doesn't allow to mount
|
|
||||||
a new procfs or sysfs file system if there is not already one instance
|
|
||||||
fully visible in the current mount namespace.
|
|
||||||
|
|
||||||
When using --no-pivot we were effectively inhibiting this protection
|
|
||||||
from the kernel, as /proc and /sys from the host are still present in
|
|
||||||
the container mount namespace.
|
|
||||||
|
|
||||||
A container without full access to /proc could then create a new user
|
|
||||||
namespace, and from there able to mount a fully visible /proc, bypassing
|
|
||||||
the limitations in the container.
|
|
||||||
|
|
||||||
A simple reproducer for this issue is:
|
|
||||||
|
|
||||||
unshare -mrfp sh -c "mount -t proc none /proc && echo c > /proc/sysrq-trigger"
|
|
||||||
|
|
||||||
Signed-off-by: Giuseppe Scrivano <gscrivan@redhat.com>
|
|
||||||
---
|
|
||||||
libcontainer/rootfs_linux.go | 35 +++++++++++++++++++++++++++++++++++
|
|
||||||
1 file changed, 35 insertions(+)
|
|
||||||
|
|
||||||
diff --git a/libcontainer/rootfs_linux.go b/libcontainer/rootfs_linux.go
|
|
||||||
index e7c2f8ada..6bd6da74a 100644
|
|
||||||
--- a/libcontainer/rootfs_linux.go
|
|
||||||
+++ b/libcontainer/rootfs_linux.go
|
|
||||||
@@ -748,6 +748,41 @@ func pivotRoot(rootfs string) error {
|
|
||||||
}
|
|
||||||
|
|
||||||
func msMoveRoot(rootfs string) error {
|
|
||||||
+ mountinfos, err := mount.GetMounts()
|
|
||||||
+ if err != nil {
|
|
||||||
+ return err
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ absRootfs, err := filepath.Abs(rootfs)
|
|
||||||
+ if err != nil {
|
|
||||||
+ return err
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ for _, info := range mountinfos {
|
|
||||||
+ p, err := filepath.Abs(info.Mountpoint)
|
|
||||||
+ if err != nil {
|
|
||||||
+ return err
|
|
||||||
+ }
|
|
||||||
+ // Umount every syfs and proc file systems, except those under the container rootfs
|
|
||||||
+ if (info.Fstype != "proc" && info.Fstype != "sysfs") || filepath.HasPrefix(p, absRootfs) {
|
|
||||||
+ continue
|
|
||||||
+ }
|
|
||||||
+ // Be sure umount events are not propagated to the host.
|
|
||||||
+ if err := unix.Mount("", p, "", unix.MS_SLAVE|unix.MS_REC, ""); err != nil {
|
|
||||||
+ return err
|
|
||||||
+ }
|
|
||||||
+ if err := unix.Unmount(p, unix.MNT_DETACH); err != nil {
|
|
||||||
+ if err != unix.EINVAL && err != unix.EPERM {
|
|
||||||
+ return err
|
|
||||||
+ } else {
|
|
||||||
+ // If we have not privileges for umounting (e.g. rootless), then
|
|
||||||
+ // cover the path.
|
|
||||||
+ if err := unix.Mount("tmpfs", p, "tmpfs", 0, ""); err != nil {
|
|
||||||
+ return err
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
if err := unix.Mount(rootfs, "/", "", unix.MS_MOVE, ""); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
@ -12,41 +12,34 @@
|
|||||||
%if 0%{?rhel} > 7 && ! 0%{?fedora}
|
%if 0%{?rhel} > 7 && ! 0%{?fedora}
|
||||||
%define gobuild(o:) \
|
%define gobuild(o:) \
|
||||||
go build -buildmode pie -compiler gc -tags="rpm_crashtraceback no_openssl ${BUILDTAGS:-}" -ldflags "${LDFLAGS:-} -compressdwarf=false -B 0x$(head -c20 /dev/urandom|od -An -tx1|tr -d ' \\n') -extldflags '%__global_ldflags'" -a -v -x %{?**};
|
go build -buildmode pie -compiler gc -tags="rpm_crashtraceback no_openssl ${BUILDTAGS:-}" -ldflags "${LDFLAGS:-} -compressdwarf=false -B 0x$(head -c20 /dev/urandom|od -An -tx1|tr -d ' \\n') -extldflags '%__global_ldflags'" -a -v -x %{?**};
|
||||||
%endif # distro
|
%endif
|
||||||
|
|
||||||
%global provider github
|
%global provider github
|
||||||
%global provider_tld com
|
%global provider_tld com
|
||||||
%global project opencontainers
|
%global project opencontainers
|
||||||
%global repo runc
|
%global repo runc
|
||||||
# https://github.com/opencontainers/runc
|
# https://github.com/opencontainers/runc
|
||||||
%global provider_prefix %{provider}.%{provider_tld}/%{project}/%{repo}
|
%global import_path %{provider}.%{provider_tld}/%{project}/%{repo}
|
||||||
%global import_path %{provider_prefix}
|
%global git0 https://%{import_path}
|
||||||
%global git0 https://github.com/opencontainers/runc
|
%global commit0 dc9208a3303feef5b3839f4323d9beb36df0a9dd
|
||||||
%global commit0 2abd837c8c25b0102ac4ce14f17bc0bc7ddffba7
|
|
||||||
%global shortcommit0 %(c=%{commit0}; echo ${c:0:7})
|
%global shortcommit0 %(c=%{commit0}; echo ${c:0:7})
|
||||||
|
|
||||||
Name: %{repo}
|
Name: %{repo}
|
||||||
Version: 1.0.0
|
Version: 1.0.0
|
||||||
Release: 56.rc5.dev.git%{shortcommit0}%{?dist}
|
Release: 66.rc10%{?dist}
|
||||||
Summary: CLI for running Open Containers
|
Summary: CLI for running Open Containers
|
||||||
ExcludeArch: %{ix86}
|
ExcludeArch: %{ix86}
|
||||||
License: ASL 2.0
|
License: ASL 2.0
|
||||||
URL: http//%{provider_prefix}
|
URL: %{git0}
|
||||||
Source0: %{git0}/archive/%{commit0}/%{repo}-%{shortcommit0}.tar.gz
|
Source0: %{git0}/archive/%{commit0}/%{name}-%{shortcommit0}.tar.gz
|
||||||
Source1: 99-containers.conf
|
Patch0: 1807.patch
|
||||||
Patch0: change-default-root.patch
|
Patch1: 0001-rootfs-add-mount-destination-validation.patch
|
||||||
Patch1: 0001-Revert-Apply-cgroups-earlier.patch
|
BuildRequires: golang >= 1.12.12-4
|
||||||
Patch2: 1807.patch
|
|
||||||
Patch3: 0001-nsenter-clone-proc-self-exe-to-avoid-exposing-host-b-runc.patch
|
|
||||||
Patch4: pivot-root.patch
|
|
||||||
Requires: criu
|
|
||||||
Requires(pre): container-selinux >= 2:2.2-2
|
|
||||||
|
|
||||||
# If go_compiler is not set to 1, there is no virtual provide. Use golang instead.
|
|
||||||
BuildRequires: %{?go_compiler:compiler(go-compiler)}%{!?go_compiler:golang} >= 1.6.2
|
|
||||||
BuildRequires: git
|
BuildRequires: git
|
||||||
BuildRequires: go-md2man
|
BuildRequires: go-md2man
|
||||||
BuildRequires: libseccomp-devel
|
BuildRequires: libseccomp-devel
|
||||||
|
Requires: criu
|
||||||
|
Requires(pre): container-selinux >= 2:2.2-2
|
||||||
|
|
||||||
%description
|
%description
|
||||||
The runc command can be used to start containers which are packaged
|
The runc command can be used to start containers which are packaged
|
||||||
@ -65,6 +58,7 @@ pushd GOPATH
|
|||||||
popd
|
popd
|
||||||
|
|
||||||
pushd GOPATH/src/%{import_path}
|
pushd GOPATH/src/%{import_path}
|
||||||
|
export GO111MODULE=off
|
||||||
export GOPATH=%{gopath}:$(pwd)/GOPATH
|
export GOPATH=%{gopath}:$(pwd)/GOPATH
|
||||||
export BUILDTAGS="selinux seccomp"
|
export BUILDTAGS="selinux seccomp"
|
||||||
%gobuild -o %{name} %{import_path}
|
%gobuild -o %{name} %{import_path}
|
||||||
@ -97,12 +91,45 @@ install -p -m 0644 contrib/completions/bash/%{name} %{buildroot}%{_datadir}/bash
|
|||||||
%{_datadir}/bash-completion/completions/%{name}
|
%{_datadir}/bash-completion/completions/%{name}
|
||||||
|
|
||||||
%changelog
|
%changelog
|
||||||
* Thu Nov 28 2019 Jindrich Novy <jnovy@redhat.com> - 1.0.0-56.rc5.dev.git2abd837
|
* Wed May 19 2021 Jindrich Novy <jnovy@redhat.com> - 1.0.0-66.rc10
|
||||||
- rebuild because of CVE-2019-9512 and CVE-2019-9514
|
- set GO111MODULE=off to fix build
|
||||||
- Resolves: #1766328, #1766300
|
- Related: #1955651
|
||||||
|
|
||||||
|
* Wed May 19 2021 Jindrich Novy <jnovy@redhat.com> - 1.0.0-65.rc10
|
||||||
|
- fix CVE-2021-30465
|
||||||
|
- Resolves: #1955651
|
||||||
|
|
||||||
|
* Thu Feb 13 2020 Jindrich Novy <jnovy@redhat.com> - 1.0.0-64.rc10
|
||||||
|
- address CVE-2019-19921 by updating to rc10
|
||||||
|
- Resolves: #1801888
|
||||||
|
|
||||||
|
* Wed Dec 11 2019 Jindrich Novy <jnovy@redhat.com> - 1.0.0-63.rc9
|
||||||
|
- use no_openssl in BUILDTAGS (no vendored crypto in runc)
|
||||||
|
- Related: RHELPLAN-25139
|
||||||
|
|
||||||
|
* Mon Dec 09 2019 Jindrich Novy <jnovy@redhat.com> - 1.0.0-62.rc9
|
||||||
|
- be sure to use golang >= 1.12.12-4
|
||||||
|
- Related: RHELPLAN-25139
|
||||||
|
|
||||||
|
* Thu Nov 21 2019 Jindrich Novy <jnovy@redhat.com> - 1.0.0-61.rc9
|
||||||
|
- update to runc 1.0.0-rc9 release
|
||||||
|
- amend golang deps
|
||||||
|
- fixes CVE-2019-16884
|
||||||
|
- Resolves: #1759651
|
||||||
|
|
||||||
|
* Mon Jun 17 2019 Lokesh Mandvekar <lsm5@redhat.com> - 1.0.0-60.rc8
|
||||||
|
- Resolves: #1721247 - enable fips mode
|
||||||
|
|
||||||
|
* Mon Jun 17 2019 Lokesh Mandvekar <lsm5@redhat.com> - 1.0.0-59.rc8
|
||||||
|
- Resolves: #1720654 - rebase to v1.0.0-rc8
|
||||||
|
|
||||||
|
* Thu Apr 11 2019 Eduardo Santiago <santiago@redhat.com> - 1.0.0-57.rc5.dev.git2abd837
|
||||||
|
- Resolves: #1693424 - podman rootless: cannot specify gid= mount options
|
||||||
|
|
||||||
|
* Wed Feb 27 2019 Lokesh Mandvekar <lsm5@redhat.com> - 1.0.0-56.rc5.dev.git2abd837
|
||||||
|
- change-default-root patch not needed as there's no docker on rhel8
|
||||||
|
|
||||||
* Tue Feb 12 2019 Lokesh Mandvekar <lsm5@redhat.com> - 1.0.0-55.rc5.dev.git2abd837
|
* Tue Feb 12 2019 Lokesh Mandvekar <lsm5@redhat.com> - 1.0.0-55.rc5.dev.git2abd837
|
||||||
- Resolves: #1665770 - rootfs: umount all procfs and sysfs with --no-pivot
|
|
||||||
- Resolves: CVE-2019-5736
|
- Resolves: CVE-2019-5736
|
||||||
|
|
||||||
* Tue Dec 18 2018 Frantisek Kluknavsky <fkluknav@redhat.com> - 1.0.0-54.rc5.dev.git2abd837
|
* Tue Dec 18 2018 Frantisek Kluknavsky <fkluknav@redhat.com> - 1.0.0-54.rc5.dev.git2abd837
|
||||||
|
Loading…
Reference in New Issue
Block a user