10 changed files with 789 additions and 630 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1 +1 @@
-SOURCES/runc-2abd837.tar.gz
+SOURCES/runc-dc9208a.tar.gz
--- a/.runc.metadata
+++ b/.runc.metadata
@ -1 +1 @@
-cf7119a838db2963e7af6ecdba90a2cc95ec0d56 SOURCES/runc-2abd837.tar.gz
+32859590dea35b77eed012c388d97fc12fdfdb93 SOURCES/runc-dc9208a.tar.gz
--- a/SOURCES/0001-Revert-Apply-cgroups-earlier.patch
+++ b/SOURCES/0001-Revert-Apply-cgroups-earlier.patch
@ -1,62 +0,0 @@
-From dfb3496c174377b860b62872ce6af951364cc3ac Mon Sep 17 00:00:00 2001
-From: Lokesh Mandvekar <lsm5@fedoraproject.org>
-Date: Tue, 12 Dec 2017 13:22:42 +0530
-Subject: [PATCH] Revert "Apply cgroups earlier"
-
-This reverts commit 7062c7556b71188abc18d7516441ff4b03fbc1fc.
---
- libcontainer/process_linux.go | 31 ++++++++++++++-----------------
- 1 file changed, 14 insertions(+), 17 deletions(-)
-
-diff --git a/libcontainer/process_linux.go b/libcontainer/process_linux.go
-index 149b1126..b8a395af 100644
--- a/libcontainer/process_linux.go
-+++ b/libcontainer/process_linux.go
-@@ -272,6 +272,20 @@ func (p *initProcess) start() error {
- 		p.process.ops = nil
- 		return newSystemErrorWithCause(err, "starting init process command")
- 	}
-+	if _, err := io.Copy(p.parentPipe, p.bootstrapData); err != nil {
-+		return newSystemErrorWithCause(err, "copying bootstrap data to pipe")
-+	}
-+	if err := p.execSetns(); err != nil {
-+		return newSystemErrorWithCause(err, "running exec setns process for init")
-+	}
-+	// Save the standard descriptor names before the container process
-+	// can potentially move them (e.g., via dup2()).  If we don't do this now,
-+	// we won't know at checkpoint time which file descriptor to look up.
-+	fds, err := getPipeFds(p.pid())
-+	if err != nil {
-+		return newSystemErrorWithCausef(err, "getting pipe fds for pid %d", p.pid())
-+	}
-+	p.setExternalDescriptors(fds)
- 	// Do this before syncing with child so that no children can escape the
- 	// cgroup. We don't need to worry about not doing this and not being root
- 	// because we'd be using the rootless cgroup manager in that case.
-@@ -292,23 +306,6 @@ func (p *initProcess) start() error {
- 			}
- 		}
- 	}()
-
-	if _, err := io.Copy(p.parentPipe, p.bootstrapData); err != nil {
-		return newSystemErrorWithCause(err, "copying bootstrap data to pipe")
-	}
-
-	if err := p.execSetns(); err != nil {
-		return newSystemErrorWithCause(err, "running exec setns process for init")
-	}
-
-	// Save the standard descriptor names before the container process
-	// can potentially move them (e.g., via dup2()).  If we don't do this now,
-	// we won't know at checkpoint time which file descriptor to look up.
-	fds, err := getPipeFds(p.pid())
-	if err != nil {
-		return newSystemErrorWithCausef(err, "getting pipe fds for pid %d", p.pid())
-	}
-	p.setExternalDescriptors(fds)
- 	if err := p.createNetworkInterfaces(); err != nil {
- 		return newSystemErrorWithCause(err, "creating network interfaces")
- 	}
-- 
-2.14.3
-
--- a/SOURCES/0001-nsenter-clone-proc-self-exe-to-avoid-exposing-host-b-runc.patch
+++ b/SOURCES/0001-nsenter-clone-proc-self-exe-to-avoid-exposing-host-b-runc.patch
@ -1,290 +0,0 @@
-From bf6405284aa3870a39b402309003633a1c230ed9 Mon Sep 17 00:00:00 2001
-From: Aleksa Sarai <asarai@suse.de>
-Date: Wed, 9 Jan 2019 13:40:01 +1100
-Subject: [PATCH 1/1] nsenter: clone /proc/self/exe to avoid exposing host
- binary to container
-
-There are quite a few circumstances where /proc/self/exe pointing to a
-pretty important container binary is a _bad_ thing, so to avoid this we
-have to make a copy (preferably doing self-clean-up and not being
-writeable).
-
-As a hotfix we require memfd_create(2), but we can always extend this to
-use a scratch MNT_DETACH overlayfs or tmpfs. The main downside to this
-approach is no page-cache sharing for the runc binary (which overlayfs
-would give us) but this is far less complicated.
-
-This is only done during nsenter so that it happens transparently to the
-Go code, and any libcontainer users benefit from it. This also makes
-ExtraFiles and --preserve-fds handling trivial (because we don't need to
-worry about it).
-
-Fixes: CVE-2019-5736
-Co-developed-by: Christian Brauner <christian.brauner@ubuntu.com>
-Signed-off-by: Aleksa Sarai <asarai@suse.de>
-Signed-off-by: Mrunal Patel <mrunalp@gmail.com>
---
- libcontainer/nsenter/cloned_binary.c | 221 +++++++++++++++++++++++++++
- libcontainer/nsenter/nsexec.c        |  11 ++
- 2 files changed, 232 insertions(+)
- create mode 100644 libcontainer/nsenter/cloned_binary.c
-
-diff --git a/libcontainer/nsenter/cloned_binary.c b/libcontainer/nsenter/cloned_binary.c
-new file mode 100644
-index 00000000..d9f6093a
--- /dev/null
-+++ b/libcontainer/nsenter/cloned_binary.c
-@@ -0,0 +1,221 @@
-+#define _GNU_SOURCE
-+#include <unistd.h>
-+#include <stdio.h>
-+#include <stdlib.h>
-+#include <stdbool.h>
-+#include <string.h>
-+#include <limits.h>
-+#include <fcntl.h>
-+#include <errno.h>
-+
-+#include <sys/types.h>
-+#include <sys/stat.h>
-+#include <sys/vfs.h>
-+#include <sys/mman.h>
-+#include <sys/sendfile.h>
-+#include <sys/syscall.h>
-+
-+#include <linux/magic.h>
-+#include <linux/memfd.h>
-+
-+/* Use our own wrapper for memfd_create. */
-+#if !defined(SYS_memfd_create) && defined(__NR_memfd_create)
-+#  define SYS_memfd_create __NR_memfd_create
-+#endif
-+#ifndef SYS_memfd_create
-+#  error "memfd_create(2) syscall not supported by this glibc version"
-+#endif
-+int memfd_create(const char *name, unsigned int flags)
-+{
-+	return syscall(SYS_memfd_create, name, flags);
-+}
-+
-+/* This comes directly from <linux/fcntl.h>. */
-+#ifndef F_LINUX_SPECIFIC_BASE
-+#  define F_LINUX_SPECIFIC_BASE 1024
-+#endif
-+#ifndef F_ADD_SEALS
-+#  define F_ADD_SEALS (F_LINUX_SPECIFIC_BASE + 9)
-+#  define F_GET_SEALS (F_LINUX_SPECIFIC_BASE + 10)
-+#endif
-+#ifndef F_SEAL_SEAL
-+#  define F_SEAL_SEAL   0x0001	/* prevent further seals from being set */
-+#  define F_SEAL_SHRINK 0x0002	/* prevent file from shrinking */
-+#  define F_SEAL_GROW   0x0004	/* prevent file from growing */
-+#  define F_SEAL_WRITE  0x0008	/* prevent writes */
-+#endif
-+
-+
-+#define OUR_MEMFD_COMMENT "runc_cloned:/proc/self/exe"
-+#define OUR_MEMFD_SEALS \
-+	(F_SEAL_SEAL | F_SEAL_SHRINK | F_SEAL_GROW | F_SEAL_WRITE)
-+
-+static void *must_realloc(void *ptr, size_t size)
-+{
-+	void *old = ptr;
-+	do {
-+		ptr = realloc(old, size);
-+	} while(!ptr);
-+	return ptr;
-+}
-+
-+/*
-+ * Verify whether we are currently in a self-cloned program (namely, is
-+ * /proc/self/exe a memfd). F_GET_SEALS will only succeed for memfds (or rather
-+ * for shmem files), and we want to be sure it's actually sealed.
-+ */
-+static int is_self_cloned(void)
-+{
-+	int fd, seals;
-+
-+	fd = open("/proc/self/exe", O_RDONLY|O_CLOEXEC);
-+	if (fd < 0)
-+		return -ENOTRECOVERABLE;
-+
-+	seals = fcntl(fd, F_GET_SEALS);
-+	close(fd);
-+	return seals == OUR_MEMFD_SEALS;
-+}
-+
-+/*
-+ * Basic wrapper around mmap(2) that gives you the file length so you can
-+ * safely treat it as an ordinary buffer. Only gives you read access.
-+ */
-+static char *read_file(char *path, size_t *length)
-+{
-+	int fd;
-+	char buf[4096], *copy = NULL;
-+
-+	if (!length)
-+		return NULL;
-+
-+	fd = open(path, O_RDONLY | O_CLOEXEC);
-+	if (fd < 0)
-+		return NULL;
-+
-+	*length = 0;
-+	for (;;) {
-+		int n;
-+
-+		n = read(fd, buf, sizeof(buf));
-+		if (n < 0)
-+			goto error;
-+		if (!n)
-+			break;
-+
-+		copy = must_realloc(copy, (*length + n) * sizeof(*copy));
-+		memcpy(copy + *length, buf, n);
-+		*length += n;
-+	}
-+	close(fd);
-+	return copy;
-+
-+error:
-+	close(fd);
-+	free(copy);
-+	return NULL;
-+}
-+
-+/*
-+ * A poor-man's version of "xargs -0". Basically parses a given block of
-+ * NUL-delimited data, within the given length and adds a pointer to each entry
-+ * to the array of pointers.
-+ */
-+static int parse_xargs(char *data, int data_length, char ***output)
-+{
-+	int num = 0;
-+	char *cur = data;
-+
-+	if (!data || *output != NULL)
-+		return -1;
-+
-+	while (cur < data + data_length) {
-+		num++;
-+		*output = must_realloc(*output, (num + 1) * sizeof(**output));
-+		(*output)[num - 1] = cur;
-+		cur += strlen(cur) + 1;
-+	}
-+	(*output)[num] = NULL;
-+	return num;
-+}
-+
-+/*
-+ * "Parse" out argv and envp from /proc/self/cmdline and /proc/self/environ.
-+ * This is necessary because we are running in a context where we don't have a
-+ * main() that we can just get the arguments from.
-+ */
-+static int fetchve(char ***argv, char ***envp)
-+{
-+	char *cmdline = NULL, *environ = NULL;
-+	size_t cmdline_size, environ_size;
-+
-+	cmdline = read_file("/proc/self/cmdline", &cmdline_size);
-+	if (!cmdline)
-+		goto error;
-+	environ = read_file("/proc/self/environ", &environ_size);
-+	if (!environ)
-+		goto error;
-+
-+	if (parse_xargs(cmdline, cmdline_size, argv) <= 0)
-+		goto error;
-+	if (parse_xargs(environ, environ_size, envp) <= 0)
-+		goto error;
-+
-+	return 0;
-+
-+error:
-+	free(environ);
-+	free(cmdline);
-+	return -EINVAL;
-+}
-+
-+#define SENDFILE_MAX 0x7FFFF000 /* sendfile(2) is limited to 2GB. */
-+static int clone_binary(void)
-+{
-+	int binfd, memfd, err;
-+	ssize_t sent = 0;
-+
-+	memfd = memfd_create(OUR_MEMFD_COMMENT, MFD_CLOEXEC | MFD_ALLOW_SEALING);
-+	if (memfd < 0)
-+		return -ENOTRECOVERABLE;
-+
-+	binfd = open("/proc/self/exe", O_RDONLY | O_CLOEXEC);
-+	if (binfd < 0)
-+		goto error;
-+
-+	sent = sendfile(memfd, binfd, NULL, SENDFILE_MAX);
-+	close(binfd);
-+	if (sent < 0)
-+		goto error;
-+
-+	err = fcntl(memfd, F_ADD_SEALS, OUR_MEMFD_SEALS);
-+	if (err < 0)
-+		goto error;
-+
-+	return memfd;
-+
-+error:
-+	close(memfd);
-+	return -EIO;
-+}
-+
-+int ensure_cloned_binary(void)
-+{
-+	int execfd;
-+	char **argv = NULL, **envp = NULL;
-+
-+	/* Check that we're not self-cloned, and if we are then bail. */
-+	int cloned = is_self_cloned();
-+	if (cloned > 0 || cloned == -ENOTRECOVERABLE)
-+		return cloned;
-+
-+	if (fetchve(&argv, &envp) < 0)
-+		return -EINVAL;
-+
-+	execfd = clone_binary();
-+	if (execfd < 0)
-+		return -EIO;
-+
-+	fexecve(execfd, argv, envp);
-+	return -ENOEXEC;
-+}
-diff --git a/libcontainer/nsenter/nsexec.c b/libcontainer/nsenter/nsexec.c
-index cb224314..784fd9b0 100644
--- a/libcontainer/nsenter/nsexec.c
-+++ b/libcontainer/nsenter/nsexec.c
-@@ -528,6 +528,9 @@ void join_namespaces(char *nslist)
- 	free(namespaces);
- }
- 
-+/* Defined in cloned_binary.c. */
-+int ensure_cloned_binary(void);
-+
- void nsexec(void)
- {
- 	int pipenum;
-@@ -543,6 +546,14 @@ void nsexec(void)
- 	if (pipenum == -1)
- 		return;
- 
-+	/*
-+	 * We need to re-exec if we are not in a cloned binary. This is necessary
-+	 * to ensure that containers won't be able to access the host binary
-+	 * through /proc/self/exe. See CVE-2019-5736.
-+	 */
-+	if (ensure_cloned_binary() < 0)
-+		bail("could not ensure we are a cloned binary");
-+
- 	/* Parse all of the netlink configuration. */
- 	nl_parse(pipenum, &config);
- 
-- 
-2.20.1
-
--- a/SOURCES/0001-rootfs-add-mount-destination-validation.patch
+++ b/SOURCES/0001-rootfs-add-mount-destination-validation.patch
@ -0,0 +1,540 @@
+From 2dd156b190c02476191fc2522f9b0e0a1a098608 Mon Sep 17 00:00:00 2001
+From: Kir Kolyshkin <kolyshkin@gmail.com>
+Date: Mon, 17 May 2021 16:11:35 -0700
+Subject: [PATCH] rootfs: add mount destination validation
+
+This is a manual backport of fix for CVE-2021-30465 to runc-1.0.0-rc10
+(aka -rc90), upstream commit 84c14b43fa703db7 by Aleksa Sarai.
+
+Original description follows.
+
+----
+
+Because the target of a mount is inside a container (which may be a
+volume that is shared with another container), there exists a race
+condition where the target of the mount may change to a path containing
+a symlink after we have sanitised the path -- resulting in us
+inadvertently mounting the path outside of the container.
+
+This is not immediately useful because we are in a mount namespace with
+MS_SLAVE mount propagation applied to "/", so we cannot mount on top of
+host paths in the host namespace. However, if any subsequent mountpoints
+in the configuration use a subdirectory of that host path as a source,
+those subsequent mounts will use an attacker-controlled source path
+(resolved within the host rootfs) -- allowing the bind-mounting of "/"
+into the container.
+
+While arguably configuration issues like this are not entirely within
+runc's threat model, within the context of Kubernetes (and possibly
+other container managers that provide semi-arbitrary container creation
+privileges to untrusted users) this is a legitimate issue. Since we
+cannot block mounting from the host into the container, we need to block
+the first stage of this attack (mounting onto a path outside the
+container).
+
+The long-term plan to solve this would be to migrate to libpathrs, but
+as a stop-gap we implement libpathrs-like path verification through
+readlink(/proc/self/fd/$n) and then do mount operations through the
+procfd once it's been verified to be inside the container. The target
+could move after we've checked it, but if it is inside the container
+then we can assume that it is safe for the same reason that libpathrs
+operations would be safe.
+
+A slight wrinkle is the "copyup" functionality we provide for tmpfs,
+which is the only case where we want to do a mount on the host
+filesystem. To facilitate this, I split out the copy-up functionality
+entirely so that the logic isn't interspersed with the regular tmpfs
+logic. In addition, all dependencies on m.Destination being overwritten
+have been removed since that pattern was just begging to be a source of
+more mount-target bugs (we do still have to modify m.Destination for
+tmpfs-copyup but we only do it temporarily).
+
+Fixes: CVE-2021-30465
+Reported-by: Etienne Champetier <champetier.etienne@gmail.com>
+Co-authored-by: Noah Meyerhans <nmeyerha@amazon.com>
+Reviewed-by: Samuel Karp <skarp@amazon.com>
+Reviewed-by: Akihiro Suda <akihiro.suda.cz@hco.ntt.co.jp>
+Signed-off-by: Aleksa Sarai <cyphar@cyphar.com>
+
+Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
+---
+ libcontainer/rootfs_linux.go     | 225 ++++++++++++++++---------------
+ libcontainer/utils/utils.go      |  54 ++++++++
+ libcontainer/utils/utils_test.go |  35 +++++
+ 3 files changed, 204 insertions(+), 110 deletions(-)
+
+diff --git a/libcontainer/rootfs_linux.go b/libcontainer/rootfs_linux.go
+index 106c4c2b..fe9afe48 100644
+--- a/libcontainer/rootfs_linux.go
+++ b/libcontainer/rootfs_linux.go
+@@ -19,8 +19,9 @@ import (
+ 	"github.com/opencontainers/runc/libcontainer/configs"
+ 	"github.com/opencontainers/runc/libcontainer/mount"
+ 	"github.com/opencontainers/runc/libcontainer/system"
+-	libcontainerUtils "github.com/opencontainers/runc/libcontainer/utils"
+	"github.com/opencontainers/runc/libcontainer/utils"
+ 	"github.com/opencontainers/selinux/go-selinux/label"
+	"github.com/sirupsen/logrus"
+ 
+ 	"golang.org/x/sys/unix"
+ )
+@@ -30,7 +31,7 @@ const defaultMountFlags = unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV
+ // needsSetupDev returns true if /dev needs to be set up.
+ func needsSetupDev(config *configs.Config) bool {
+ 	for _, m := range config.Mounts {
+-		if m.Device == "bind" && libcontainerUtils.CleanPath(m.Destination) == "/dev" {
+		if m.Device == "bind" && utils.CleanPath(m.Destination) == "/dev" {
+ 			return false
+ 		}
+ 	}
+@@ -131,7 +132,7 @@ func prepareRootfs(pipe io.ReadWriter, iConfig *initConfig) (err error) {
+ func finalizeRootfs(config *configs.Config) (err error) {
+ 	// remount dev as ro if specified
+ 	for _, m := range config.Mounts {
+-		if libcontainerUtils.CleanPath(m.Destination) == "/dev" {
+		if utils.CleanPath(m.Destination) == "/dev" {
+ 			if m.Flags&unix.MS_RDONLY == unix.MS_RDONLY {
+ 				if err := remountReadonly(m); err != nil {
+ 					return newSystemErrorWithCausef(err, "remounting %q as readonly", m.Destination)
+@@ -200,8 +201,6 @@ func prepareBindMount(m *configs.Mount, rootfs string) error {
+ 	if err := checkProcMount(rootfs, dest, m.Source); err != nil {
+ 		return err
+ 	}
+-	// update the mount with the correct dest after symlinks are resolved.
+-	m.Destination = dest
+ 	if err := createIfNotExists(dest, stat.IsDir()); err != nil {
+ 		return err
+ 	}
+@@ -238,18 +237,21 @@ func mountCgroupV1(m *configs.Mount, rootfs, mountLabel string, enableCgroupns b
+ 			if err := os.MkdirAll(subsystemPath, 0755); err != nil {
+ 				return err
+ 			}
+-			flags := defaultMountFlags
+-			if m.Flags&unix.MS_RDONLY != 0 {
+-				flags = flags | unix.MS_RDONLY
+-			}
+-			cgroupmount := &configs.Mount{
+-				Source:      "cgroup",
+-				Device:      "cgroup",
+-				Destination: subsystemPath,
+-				Flags:       flags,
+-				Data:        filepath.Base(subsystemPath),
+-			}
+-			if err := mountNewCgroup(cgroupmount); err != nil {
+			if err := utils.WithProcfd(rootfs, b.Destination, func(procfd string) error {
+				flags := defaultMountFlags
+				if m.Flags&unix.MS_RDONLY != 0 {
+					flags = flags | unix.MS_RDONLY
+				}
+				var (
+					source = "cgroup"
+					data   = filepath.Base(subsystemPath)
+				)
+				if data == "systemd" {
+					data = cgroups.CgroupNamePrefix + data
+					source = "systemd"
+				}
+				return unix.Mount(source, procfd, "cgroup", uintptr(flags), data)
+			}); err != nil {
+ 				return err
+ 			}
+ 		} else {
+@@ -279,22 +281,67 @@ func mountCgroupV2(m *configs.Mount, rootfs, mountLabel string, enableCgroupns b
+ 	if err := os.MkdirAll(cgroupPath, 0755); err != nil {
+ 		return err
+ 	}
+-	if err := unix.Mount(m.Source, cgroupPath, "cgroup2", uintptr(m.Flags), m.Data); err != nil {
+-		// when we are in UserNS but CgroupNS is not unshared, we cannot mount cgroup2 (#2158)
+-		if err == unix.EPERM || err == unix.EBUSY {
+-			return unix.Mount("/sys/fs/cgroup", cgroupPath, "", uintptr(m.Flags)|unix.MS_BIND, "")
+	return utils.WithProcfd(rootfs, m.Destination, func(procfd string) error {
+		if err := unix.Mount(m.Source, procfd, "cgroup2", uintptr(m.Flags), m.Data); err != nil {
+			// when we are in UserNS but CgroupNS is not unshared, we cannot mount cgroup2 (#2158)
+			if err == unix.EPERM || err == unix.EBUSY {
+				return unix.Mount("/sys/fs/cgroup", procfd, "", uintptr(m.Flags)|unix.MS_BIND, "")
+			}
+			return err
+ 		}
+		return nil
+	})
+}
+
+func doTmpfsCopyUp(m *configs.Mount, rootfs, mountLabel string) (Err error) {
+	// Set up a scratch dir for the tmpfs on the host.
+	tmpdir, err := prepareTmp("/tmp")
+	if err != nil {
+		return newSystemErrorWithCause(err, "tmpcopyup: failed to setup tmpdir")
+	}
+	defer cleanupTmp(tmpdir)
+	tmpDir, err := ioutil.TempDir(tmpdir, "runctmpdir")
+	if err != nil {
+		return newSystemErrorWithCause(err, "tmpcopyup: failed to create tmpdir")
+	}
+	defer os.RemoveAll(tmpDir)
+
+	// Configure the *host* tmpdir as if it's the container mount. We change
+	// m.Destination since we are going to mount *on the host*.
+	oldDest := m.Destination
+	m.Destination = tmpDir
+	err = mountPropagate(m, "/", mountLabel)
+	m.Destination = oldDest
+	if err != nil {
+ 		return err
+ 	}
+-	return nil
+	defer func() {
+		if Err != nil {
+			if err := unix.Unmount(tmpDir, unix.MNT_DETACH); err != nil {
+				logrus.Warnf("tmpcopyup: failed to unmount tmpdir on error: %v", err)
+			}
+		}
+	}()
+
+	return utils.WithProcfd(rootfs, m.Destination, func(procfd string) (Err error) {
+		// Copy the container data to the host tmpdir. We append "/" to force
+		// CopyDirectory to resolve the symlink rather than trying to copy the
+		// symlink itself.
+		if err := fileutils.CopyDirectory(procfd+"/", tmpDir); err != nil {
+			return fmt.Errorf("tmpcopyup: failed to copy %s to %s (%s): %v", m.Destination, procfd, tmpDir, err)
+		}
+		// Now move the mount into the container.
+		if err := unix.Mount(tmpDir, procfd, "", unix.MS_MOVE, ""); err != nil {
+			return fmt.Errorf("tmpcopyup: failed to move mount %s to %s (%s): %v", tmpDir, procfd, m.Destination, err)
+		}
+		return nil
+	})
+ }
+ 
+ func mountToRootfs(m *configs.Mount, rootfs, mountLabel string, enableCgroupns bool) error {
+-	var (
+-		dest = m.Destination
+-	)
+-	if !strings.HasPrefix(dest, rootfs) {
+-		dest = filepath.Join(rootfs, dest)
+	dest, err := securejoin.SecureJoin(rootfs, m.Destination)
+	if err != nil {
+		return err
+ 	}
+ 
+ 	switch m.Device {
+@@ -329,46 +376,21 @@ func mountToRootfs(m *configs.Mount, rootfs, mountLabel string, enableCgroupns b
+ 		}
+ 		return nil
+ 	case "tmpfs":
+-		copyUp := m.Extensions&configs.EXT_COPYUP == configs.EXT_COPYUP
+-		tmpDir := ""
+ 		stat, err := os.Stat(dest)
+ 		if err != nil {
+ 			if err := os.MkdirAll(dest, 0755); err != nil {
+ 				return err
+ 			}
+ 		}
+-		if copyUp {
+-			tmpdir, err := prepareTmp("/tmp")
+-			if err != nil {
+-				return newSystemErrorWithCause(err, "tmpcopyup: failed to setup tmpdir")
+-			}
+-			defer cleanupTmp(tmpdir)
+-			tmpDir, err = ioutil.TempDir(tmpdir, "runctmpdir")
+-			if err != nil {
+-				return newSystemErrorWithCause(err, "tmpcopyup: failed to create tmpdir")
+-			}
+-			defer os.RemoveAll(tmpDir)
+-			m.Destination = tmpDir
+
+		if m.Extensions&configs.EXT_COPYUP == configs.EXT_COPYUP {
+			err = doTmpfsCopyUp(m, rootfs, mountLabel)
+		} else {
+			err = mountPropagate(m, rootfs, mountLabel)
+ 		}
+-		if err := mountPropagate(m, rootfs, mountLabel); err != nil {
+		if err != nil {
+ 			return err
+ 		}
+-		if copyUp {
+-			if err := fileutils.CopyDirectory(dest, tmpDir); err != nil {
+-				errMsg := fmt.Errorf("tmpcopyup: failed to copy %s to %s: %v", dest, tmpDir, err)
+-				if err1 := unix.Unmount(tmpDir, unix.MNT_DETACH); err1 != nil {
+-					return newSystemErrorWithCausef(err1, "tmpcopyup: %v: failed to unmount", errMsg)
+-				}
+-				return errMsg
+-			}
+-			if err := unix.Mount(tmpDir, dest, "", unix.MS_MOVE, ""); err != nil {
+-				errMsg := fmt.Errorf("tmpcopyup: failed to move mount %s to %s: %v", tmpDir, dest, err)
+-				if err1 := unix.Unmount(tmpDir, unix.MNT_DETACH); err1 != nil {
+-					return newSystemErrorWithCausef(err1, "tmpcopyup: %v: failed to unmount", errMsg)
+-				}
+-				return errMsg
+-			}
+-		}
+ 		if stat != nil {
+ 			if err = os.Chmod(dest, stat.Mode()); err != nil {
+ 				return err
+@@ -424,19 +446,9 @@ func mountToRootfs(m *configs.Mount, rootfs, mountLabel string, enableCgroupns b
+ 			}
+ 		}
+ 	default:
+-		// ensure that the destination of the mount is resolved of symlinks at mount time because
+-		// any previous mounts can invalidate the next mount's destination.
+-		// this can happen when a user specifies mounts within other mounts to cause breakouts or other
+-		// evil stuff to try to escape the container's rootfs.
+-		var err error
+-		if dest, err = securejoin.SecureJoin(rootfs, m.Destination); err != nil {
+-			return err
+-		}
+ 		if err := checkProcMount(rootfs, dest, m.Source); err != nil {
+ 			return err
+ 		}
+-		// update the mount with the correct dest after symlinks are resolved.
+-		m.Destination = dest
+ 		if err := os.MkdirAll(dest, 0755); err != nil {
+ 			return err
+ 		}
+@@ -611,7 +623,7 @@ func createDevices(config *configs.Config) error {
+ 	return nil
+ }
+ 
+-func bindMountDeviceNode(dest string, node *configs.Device) error {
+func bindMountDeviceNode(rootfs, dest string, node *configs.Device) error {
+ 	f, err := os.Create(dest)
+ 	if err != nil && !os.IsExist(err) {
+ 		return err
+@@ -619,24 +631,29 @@ func bindMountDeviceNode(dest string, node *configs.Device) error {
+ 	if f != nil {
+ 		f.Close()
+ 	}
+-	return unix.Mount(node.Path, dest, "bind", unix.MS_BIND, "")
+	return utils.WithProcfd(rootfs, dest, func(procfd string) error {
+		return unix.Mount(node.Path, procfd, "bind", unix.MS_BIND, "")
+	})
+ }
+ 
+ // Creates the device node in the rootfs of the container.
+ func createDeviceNode(rootfs string, node *configs.Device, bind bool) error {
+-	dest := filepath.Join(rootfs, node.Path)
+	dest, err := securejoin.SecureJoin(rootfs, node.Path)
+	if err != nil {
+		return err
+	}
+ 	if err := os.MkdirAll(filepath.Dir(dest), 0755); err != nil {
+ 		return err
+ 	}
+ 
+ 	if bind {
+-		return bindMountDeviceNode(dest, node)
+		return bindMountDeviceNode(rootfs, dest, node)
+ 	}
+ 	if err := mknodDevice(dest, node); err != nil {
+ 		if os.IsExist(err) {
+ 			return nil
+ 		} else if os.IsPermission(err) {
+-			return bindMountDeviceNode(dest, node)
+			return bindMountDeviceNode(rootfs, dest, node)
+ 		}
+ 		return err
+ 	}
+@@ -955,55 +972,43 @@ func writeSystemProperty(key, value string) error {
+ }
+ 
+ func remount(m *configs.Mount, rootfs string) error {
+-	var (
+-		dest = m.Destination
+-	)
+-	if !strings.HasPrefix(dest, rootfs) {
+-		dest = filepath.Join(rootfs, dest)
+-	}
+-	return unix.Mount(m.Source, dest, m.Device, uintptr(m.Flags|unix.MS_REMOUNT), "")
+	return utils.WithProcfd(rootfs, m.Destination, func(procfd string) error {
+		return unix.Mount(m.Source, procfd, m.Device, uintptr(m.Flags|unix.MS_REMOUNT), "")
+	})
+ }
+ 
+ // Do the mount operation followed by additional mounts required to take care
+-// of propagation flags.
+// of propagation flags.  This will always be scoped inside the container rootfs.
+ func mountPropagate(m *configs.Mount, rootfs string, mountLabel string) error {
+ 	var (
+-		dest  = m.Destination
+ 		data  = label.FormatMountLabel(m.Data, mountLabel)
+ 		flags = m.Flags
+ 	)
+-	if libcontainerUtils.CleanPath(dest) == "/dev" {
+	if utils.CleanPath(m.Destination) == "/dev" {
+ 		flags &= ^unix.MS_RDONLY
+ 	}
+ 
+-	copyUp := m.Extensions&configs.EXT_COPYUP == configs.EXT_COPYUP
+-	if !(copyUp || strings.HasPrefix(dest, rootfs)) {
+-		dest = filepath.Join(rootfs, dest)
+-	}
+-
+-	if err := unix.Mount(m.Source, dest, m.Device, uintptr(flags), data); err != nil {
+-		return err
+-	}
+-
+-	for _, pflag := range m.PropagationFlags {
+-		if err := unix.Mount("", dest, "", uintptr(pflag), ""); err != nil {
+-			return err
+	// Because the destination is inside a container path which might be
+	// mutating underneath us, we verify that we are actually going to mount
+	// inside the container with WithProcfd() -- mounting through a procfd
+	// mounts on the target.
+	if err := utils.WithProcfd(rootfs, m.Destination, func(procfd string) error {
+		return unix.Mount(m.Source, procfd, m.Device, uintptr(flags), data)
+	}); err != nil {
+		return fmt.Errorf("mount through procfd: %v", err)
+	}
+	// We have to apply mount propagation flags in a separate WithProcfd() call
+	// because the previous call invalidates the passed procfd -- the mount
+	// target needs to be re-opened.
+	if err := utils.WithProcfd(rootfs, m.Destination, func(procfd string) error {
+		for _, pflag := range m.PropagationFlags {
+			if err := unix.Mount("", procfd, "", uintptr(pflag), ""); err != nil {
+				return err
+			}
+ 		}
+-	}
+-	return nil
+-}
+-
+-func mountNewCgroup(m *configs.Mount) error {
+-	var (
+-		data   = m.Data
+-		source = m.Source
+-	)
+-	if data == "systemd" {
+-		data = cgroups.CgroupNamePrefix + data
+-		source = "systemd"
+-	}
+-	if err := unix.Mount(source, m.Destination, m.Device, uintptr(m.Flags), data); err != nil {
+-		return err
+		return nil
+	}); err != nil {
+		return fmt.Errorf("change mount propagation through procfd: %v", err)
+ 	}
+ 	return nil
+ }
+diff --git a/libcontainer/utils/utils.go b/libcontainer/utils/utils.go
+index 40ccfaa1..c1418ef9 100644
+--- a/libcontainer/utils/utils.go
+++ b/libcontainer/utils/utils.go
+@@ -2,12 +2,15 @@ package utils
+ 
+ import (
+ 	"encoding/json"
+	"fmt"
+ 	"io"
+ 	"os"
+ 	"path/filepath"
+	"strconv"
+ 	"strings"
+ 	"unsafe"
+ 
+	securejoin "github.com/cyphar/filepath-securejoin"
+ 	"golang.org/x/sys/unix"
+ )
+ 
+@@ -73,6 +76,57 @@ func CleanPath(path string) string {
+ 	return filepath.Clean(path)
+ }
+ 
+// stripRoot returns the passed path, stripping the root path if it was
+// (lexicially) inside it. Note that both passed paths will always be treated
+// as absolute, and the returned path will also always be absolute. In
+// addition, the paths are cleaned before stripping the root.
+func stripRoot(root, path string) string {
+	// Make the paths clean and absolute.
+	root, path = CleanPath("/"+root), CleanPath("/"+path)
+	switch {
+	case path == root:
+		path = "/"
+	case root == "/":
+		// do nothing
+	case strings.HasPrefix(path, root+"/"):
+		path = strings.TrimPrefix(path, root+"/")
+	}
+	return CleanPath("/" + path)
+}
+
+// WithProcfd runs the passed closure with a procfd path (/proc/self/fd/...)
+// corresponding to the unsafePath resolved within the root. Before passing the
+// fd, this path is verified to have been inside the root -- so operating on it
+// through the passed fdpath should be safe. Do not access this path through
+// the original path strings, and do not attempt to use the pathname outside of
+// the passed closure (the file handle will be freed once the closure returns).
+func WithProcfd(root, unsafePath string, fn func(procfd string) error) error {
+	// Remove the root then forcefully resolve inside the root.
+	unsafePath = stripRoot(root, unsafePath)
+	path, err := securejoin.SecureJoin(root, unsafePath)
+	if err != nil {
+		return fmt.Errorf("resolving path inside rootfs failed: %v", err)
+	}
+
+	// Open the target path.
+	fh, err := os.OpenFile(path, unix.O_PATH|unix.O_CLOEXEC, 0)
+	if err != nil {
+		return fmt.Errorf("open o_path procfd: %v", err)
+	}
+	defer fh.Close()
+
+	// Double-check the path is the one we expected.
+	procfd := "/proc/self/fd/" + strconv.Itoa(int(fh.Fd()))
+	if realpath, err := os.Readlink(procfd); err != nil {
+		return fmt.Errorf("procfd verification failed: %v", err)
+	} else if realpath != path {
+		return fmt.Errorf("possibly malicious path detected -- refusing to operate on %s", realpath)
+	}
+
+	// Run the closure.
+	return fn(procfd)
+}
+
+ // SearchLabels searches a list of key-value pairs for the provided key and
+ // returns the corresponding value. The pairs must be separated with '='.
+ func SearchLabels(labels []string, query string) string {
+diff --git a/libcontainer/utils/utils_test.go b/libcontainer/utils/utils_test.go
+index 395eedcf..5b80cac6 100644
+--- a/libcontainer/utils/utils_test.go
+++ b/libcontainer/utils/utils_test.go
+@@ -140,3 +140,38 @@ func TestCleanPath(t *testing.T) {
+ 		t.Errorf("expected to receive '/foo' and received %s", path)
+ 	}
+ }
+
+func TestStripRoot(t *testing.T) {
+	for _, test := range []struct {
+		root, path, out string
+	}{
+		// Works with multiple components.
+		{"/a/b", "/a/b/c", "/c"},
+		{"/hello/world", "/hello/world/the/quick-brown/fox", "/the/quick-brown/fox"},
+		// '/' must be a no-op.
+		{"/", "/a/b/c", "/a/b/c"},
+		// Must be the correct order.
+		{"/a/b", "/a/c/b", "/a/c/b"},
+		// Must be at start.
+		{"/abc/def", "/foo/abc/def/bar", "/foo/abc/def/bar"},
+		// Must be a lexical parent.
+		{"/foo/bar", "/foo/barSAMECOMPONENT", "/foo/barSAMECOMPONENT"},
+		// Must only strip the root once.
+		{"/foo/bar", "/foo/bar/foo/bar/baz", "/foo/bar/baz"},
+		// Deal with .. in a fairly sane way.
+		{"/foo/bar", "/foo/bar/../baz", "/foo/baz"},
+		{"/foo/bar", "../../../../../../foo/bar/baz", "/baz"},
+		{"/foo/bar", "/../../../../../../foo/bar/baz", "/baz"},
+		{"/foo/bar/../baz", "/foo/baz/bar", "/bar"},
+		{"/foo/bar/../baz", "/foo/baz/../bar/../baz/./foo", "/foo"},
+		// All paths are made absolute before stripping.
+		{"foo/bar", "/foo/bar/baz/bee", "/baz/bee"},
+		{"/foo/bar", "foo/bar/baz/beef", "/baz/beef"},
+		{"foo/bar", "foo/bar/baz/beets", "/baz/beets"},
+	} {
+		got := stripRoot(test.root, test.path)
+		if got != test.out {
+			t.Errorf("stripRoot(%q, %q) -- got %q, expected %q", test.root, test.path, got, test.out)
+		}
+	}
+}
+-- 
+2.31.1
+
--- a/SOURCES/1807.patch
+++ b/SOURCES/1807.patch
@ -1,168 +1,178 @@
-From ecf53c23545092019602578583031c28fde4d2a1 Mon Sep 17 00:00:00 2001
+From 3d99c51e1b38a440804a55c9f314f62cc50b8902 Mon Sep 17 00:00:00 2001
 From: Giuseppe Scrivano <gscrivan@redhat.com>
 Date: Fri, 25 May 2018 18:04:06 +0200
 Subject: [PATCH] sd-notify: do not hang when NOTIFY_SOCKET is used with create

 if NOTIFY_SOCKET is used, do not block the main runc process waiting
-for events on the notify socket.  Change the logic to create a new
-process that monitors exclusively the notify socket until an event is
-received.
+for events on the notify socket.  Bind mount the parent directory of
+the notify socket, so that "start" can create the socket and it is
+still accessible from the container.

 Signed-off-by: Giuseppe Scrivano <gscrivan@redhat.com>
 ---
- init.go          |  12 +++++++
- notify_socket.go | 101 ++++++++++++++++++++++++++++++++++++++++++++++---------
- signals.go       |   5 +--
- 3 files changed, 99 insertions(+), 19 deletions(-)
+ notify_socket.go | 112 ++++++++++++++++++++++++++++++++++-------------
+ signals.go       |   4 +-
+ start.go         |  13 +++++-
+ utils_linux.go   |  12 ++++-
+ 4 files changed, 105 insertions(+), 36 deletions(-)

-diff --git a/init.go b/init.go
-index c8f453192..6a3d9e91c 100644
--- a/init.go
-+++ b/init.go
-@@ -20,6 +20,18 @@ var initCommand = cli.Command{
- 	Name:  "init",
- 	Usage: `initialize the namespaces and launch the process (do not call it outside of runc)`,
- 	Action: func(context *cli.Context) error {
-+		// If NOTIFY_SOCKET is used create a new process that stays around
-+		// so to not block "runc start".  It will automatically exits when the
-+		// container notifies that it is ready, or when the container is deleted
-+		if os.Getenv("_NOTIFY_SOCKET_FD") != "" {
-+			fd := os.Getenv("_NOTIFY_SOCKET_FD")
-+			pid := os.Getenv("_NOTIFY_SOCKET_PID")
-+			hostNotifySocket := os.Getenv("_NOTIFY_SOCKET_HOST")
-+			notifySocketPath := os.Getenv("_NOTIFY_SOCKET_PATH")
-+			notifySocketInit(fd, pid, hostNotifySocket, notifySocketPath)
-+			os.Exit(0)
-+		}
-+
- 		factory, _ := libcontainer.New("")
- 		if err := factory.StartInitialization(); err != nil {
- 			// as the error is sent back to the parent there is no need to log
 diff --git a/notify_socket.go b/notify_socket.go
-index cd6c0a989..e04e9d660 100644
+index e7453c62..d961453a 100644
 --- a/notify_socket.go
 +++ b/notify_socket.go
-@@ -6,10 +6,13 @@ import (
- 	"bytes"
+@@ -7,11 +7,13 @@ import (
 	"fmt"
 	"net"
-+	"os"
-+	"os/exec"
+ 	"os"
+	"path"
 	"path/filepath"
 +	"strconv"
 +	"time"
 
+	"github.com/opencontainers/runc/libcontainer"
 	"github.com/opencontainers/runtime-spec/specs-go"
 -
- 	"github.com/sirupsen/logrus"
+-	"github.com/sirupsen/logrus"
 	"github.com/urfave/cli"
 )
-@@ -64,24 +67,94 @@ func (s *notifySocket) setupSocket() error {
+ 
+@@ -27,12 +29,12 @@ func newNotifySocket(context *cli.Context, notifySocketHost string, id string) *
+ 	}
+ 
+ 	root := filepath.Join(context.GlobalString("root"), id)
+-	path := filepath.Join(root, "notify.sock")
+	socketPath := filepath.Join(root, "notify", "notify.sock")
+ 
+ 	notifySocket := &notifySocket{
+ 		socket:     nil,
+ 		host:       notifySocketHost,
+-		socketPath: path,
+		socketPath: socketPath,
+ 	}
+ 
+ 	return notifySocket
+@@ -44,13 +46,19 @@ func (s *notifySocket) Close() error {
+ 
+ // If systemd is supporting sd_notify protocol, this function will add support
+ // for sd_notify protocol from within the container.
+-func (s *notifySocket) setupSpec(context *cli.Context, spec *specs.Spec) {
+-	mount := specs.Mount{Destination: s.host, Source: s.socketPath, Options: []string{"bind"}}
+func (s *notifySocket) setupSpec(context *cli.Context, spec *specs.Spec) error {
+	pathInContainer := filepath.Join("/run/notify", path.Base(s.socketPath))
+	mount := specs.Mount{
+		Destination: path.Dir(pathInContainer),
+		Source:      path.Dir(s.socketPath),
+		Options:     []string{"bind", "nosuid", "noexec", "nodev", "ro"},
+	}
+ 	spec.Mounts = append(spec.Mounts, mount)
+-	spec.Process.Env = append(spec.Process.Env, fmt.Sprintf("NOTIFY_SOCKET=%s", s.host))
+	spec.Process.Env = append(spec.Process.Env, fmt.Sprintf("NOTIFY_SOCKET=%s", pathInContainer))
+	return nil
+ }
+ 
+-func (s *notifySocket) setupSocket() error {
+func (s *notifySocket) bindSocket() error {
+ 	addr := net.UnixAddr{
+ 		Name: s.socketPath,
+ 		Net:  "unixgram",
+@@ -71,45 +79,89 @@ func (s *notifySocket) setupSocket() error {
 	return nil
 }
 
-+func (notifySocket *notifySocket) notifyNewPid(pid int) {
-+	notifySocketHostAddr := net.UnixAddr{Name: notifySocket.host, Net: "unixgram"}
-+	client, err := net.DialUnix("unixgram", nil, &notifySocketHostAddr)
-+	if err != nil {
-+		return
-+	}
-+	newPid := fmt.Sprintf("MAINPID=%d\n", pid)
-+	client.Write([]byte(newPid))
+-// pid1 must be set only with -d, as it is used to set the new process as the main process
+-// for the service in systemd
+-func (s *notifySocket) run(pid1 int) {
+-	buf := make([]byte, 512)
+-	notifySocketHostAddr := net.UnixAddr{Name: s.host, Net: "unixgram"}
+func (s *notifySocket) setupSocketDirectory() error {
+	return os.Mkdir(path.Dir(s.socketPath), 0755)
 +}
 +
- // pid1 must be set only with -d, as it is used to set the new process as the main process
- // for the service in systemd
- func (notifySocket *notifySocket) run(pid1 int) {
-	buf := make([]byte, 512)
-	notifySocketHostAddr := net.UnixAddr{Name: notifySocket.host, Net: "unixgram"}
-	client, err := net.DialUnix("unixgram", nil, &notifySocketHostAddr)
-+	file, err := notifySocket.socket.File()
+func notifySocketStart(context *cli.Context, notifySocketHost, id string) (*notifySocket, error) {
+	notifySocket := newNotifySocket(context, notifySocketHost, id)
+	if notifySocket == nil {
+		return nil, nil
+	}
+
+	if err := notifySocket.bindSocket(); err != nil {
+		return nil, err
+	}
+	return notifySocket, nil
+}
+
+func (n *notifySocket) waitForContainer(container libcontainer.Container) error {
+	s, err := container.State()
+	if err != nil {
+		return err
+	}
+	return n.run(s.InitProcessPid)
+}
+
+func (n *notifySocket) run(pid1 int) error {
+	if n.socket == nil {
+		return nil
+	}
+	notifySocketHostAddr := net.UnixAddr{Name: n.host, Net: "unixgram"}
+ 	client, err := net.DialUnix("unixgram", nil, &notifySocketHostAddr)
 	if err != nil {
- 		logrus.Error(err)
- 		return
+-		logrus.Error(err)
+-		return
+		return err
 	}
 -	for {
-		r, err := notifySocket.socket.Read(buf)
+-		r, err := s.socket.Read(buf)
 -		if err != nil {
 -			break
-+	defer file.Close()
-+	defer notifySocket.socket.Close()
 +
-+	cmd := exec.Command("/proc/self/exe", "init")
-+	cmd.ExtraFiles = []*os.File{file}
-+	cmd.Env = append(cmd.Env, "_NOTIFY_SOCKET_FD=3",
-+		fmt.Sprintf("_NOTIFY_SOCKET_PID=%d", pid1),
-+		fmt.Sprintf("_NOTIFY_SOCKET_HOST=%s", notifySocket.host),
-+		fmt.Sprintf("_NOTIFY_SOCKET_PATH=%s", notifySocket.socketPath))
-+
-+	if err := cmd.Start(); err != nil {
-+		logrus.Fatal(err)
-+	}
-+	notifySocket.notifyNewPid(cmd.Process.Pid)
-+	cmd.Process.Release()
-+}
-+
-+func notifySocketInit(envFd string, envPid string, notifySocketHost string, notifySocketPath string) {
-+	intFd, err := strconv.Atoi(envFd)
-+	if err != nil {
-+		return
-+	}
-+	pid1, err := strconv.Atoi(envPid)
-+	if err != nil {
-+		return
-+	}
-+
-+	file := os.NewFile(uintptr(intFd), "unixgram")
-+	defer file.Close()
+	ticker := time.NewTicker(time.Millisecond * 100)
+	defer ticker.Stop()
 +
 +	fileChan := make(chan []byte)
-+	exitChan := make(chan bool)
-+
 +	go func() {
 +		for {
 +			buf := make([]byte, 512)
-+			r, err := file.Read(buf)
+			r, err := n.socket.Read(buf)
 +			if err != nil {
 +				return
 +			}
-+			fileChan <- buf[0:r]
+			got := buf[0:r]
+			if !bytes.HasPrefix(got, []byte("READY=")) {
+				continue
+			}
+			fileChan <- got
+			return
 		}
 -		var out bytes.Buffer
 -		for _, line := range bytes.Split(buf[0:r], []byte{'\n'}) {
 -			if bytes.HasPrefix(line, []byte("READY=")) {
 +	}()
-+	go func() {
-+		for {
-+			if _, err := os.Stat(notifySocketPath); os.IsNotExist(err) {
-+				exitChan <- true
-+				return
-+			}
-+			time.Sleep(time.Second)
-+		}
-+	}()
-+
-+	notifySocketHostAddr := net.UnixAddr{Name: notifySocketHost, Net: "unixgram"}
-+	client, err := net.DialUnix("unixgram", nil, &notifySocketHostAddr)
-+	if err != nil {
-+		return
-+	}
 +
 +	for {
 +		select {
-+		case <-exitChan:
-+			return
+		case <-ticker.C:
+			_, err := os.Stat(filepath.Join("/proc", strconv.Itoa(pid1)))
+			if err != nil {
+				return nil
+			}
 +		case b := <-fileChan:
 +			for _, line := range bytes.Split(b, []byte{'\n'}) {
-+				if !bytes.HasPrefix(line, []byte("READY=")) {
-+					continue
-+				}
-+
 +				var out bytes.Buffer
 				_, err = out.Write(line)
 				if err != nil {
- 					return
-@@ -98,10 +171,8 @@ func (notifySocket *notifySocket) run(pid1 int) {
+-					return
+					return err
+ 				}
+ 
+ 				_, err = out.Write([]byte{'\n'})
+ 				if err != nil {
+-					return
+					return err
+ 				}
+ 
+ 				_, err = client.Write(out.Bytes())
+ 				if err != nil {
+-					return
+					return err
 				}
 
 				// now we can inform systemd to use pid1 as the pid to monitor
@ -170,25 +180,26 @@ index cd6c0a989..e04e9d660 100644
 -					newPid := fmt.Sprintf("MAINPID=%d\n", pid1)
 -					client.Write([]byte(newPid))
 -				}
+-				return
 +				newPid := fmt.Sprintf("MAINPID=%d\n", pid1)
 +				client.Write([]byte(newPid))
- 				return
+				return nil
 			}
 		}
+ 	}
 diff --git a/signals.go b/signals.go
-index 1811de837..d0988cb39 100644
+index b67f65a0..dd25e094 100644
 --- a/signals.go
 +++ b/signals.go
-@@ -70,7 +70,7 @@ func (h *signalHandler) forward(process *libcontainer.Process, tty *tty, detach
+@@ -70,6 +70,7 @@ func (h *signalHandler) forward(process *libcontainer.Process, tty *tty, detach
 			h.notifySocket.run(pid1)
 			return 0, nil
- 		} else {
-			go h.notifySocket.run(0)
-+			h.notifySocket.run(os.Getpid())
 		}
+		h.notifySocket.run(os.Getpid())
+ 		go h.notifySocket.run(0)
 	}
 
-@@ -98,9 +98,6 @@ func (h *signalHandler) forward(process *libcontainer.Process, tty *tty, detach
+@@ -97,9 +98,6 @@ func (h *signalHandler) forward(process *libcontainer.Process, tty *tty, detach
 					// status because we must ensure that any of the go specific process
 					// fun such as flushing pipes are complete before we return.
 					process.Wait()
@ -198,3 +209,70 @@ index 1811de837..d0988cb39 100644
 					return e.status, nil
 				}
 			}
+diff --git a/start.go b/start.go
+index 2bb698b2..3a1769a4 100644
+--- a/start.go
+++ b/start.go
+@@ -3,6 +3,7 @@ package main
+ import (
+ 	"errors"
+ 	"fmt"
+	"os"
+ 
+ 	"github.com/opencontainers/runc/libcontainer"
+ 	"github.com/urfave/cli"
+@@ -31,7 +32,17 @@ your host.`,
+ 		}
+ 		switch status {
+ 		case libcontainer.Created:
+-			return container.Exec()
+			notifySocket, err := notifySocketStart(context, os.Getenv("NOTIFY_SOCKET"), container.ID())
+			if err != nil {
+				return err
+			}
+			if err := container.Exec(); err != nil {
+				return err
+			}
+			if notifySocket != nil {
+				return notifySocket.waitForContainer(container)
+			}
+			return nil
+ 		case libcontainer.Stopped:
+ 			return errors.New("cannot start a container that has stopped")
+ 		case libcontainer.Running:
+diff --git a/utils_linux.go b/utils_linux.go
+index 984e6b0f..46c26246 100644
+--- a/utils_linux.go
+++ b/utils_linux.go
+@@ -408,7 +408,9 @@ func startContainer(context *cli.Context, spec *specs.Spec, action CtAct, criuOp
+ 
+ 	notifySocket := newNotifySocket(context, os.Getenv("NOTIFY_SOCKET"), id)
+ 	if notifySocket != nil {
+-		notifySocket.setupSpec(context, spec)
+		if err := notifySocket.setupSpec(context, spec); err != nil {
+			return -1, err
+		}
+ 	}
+ 
+ 	container, err := createContainer(context, id, spec)
+@@ -417,10 +419,16 @@ func startContainer(context *cli.Context, spec *specs.Spec, action CtAct, criuOp
+ 	}
+ 
+ 	if notifySocket != nil {
+-		err := notifySocket.setupSocket()
+		err := notifySocket.setupSocketDirectory()
+ 		if err != nil {
+ 			return -1, err
+ 		}
+		if action == CT_ACT_RUN {
+			err := notifySocket.bindSocket()
+			if err != nil {
+				return -1, err
+			}
+		}
+ 	}
+ 
+ 	// Support on-demand socket activation by passing file descriptors into the container init process.
+-- 
+2.21.0
+
--- a/SOURCES/99-containers.conf
+++ b/SOURCES/99-containers.conf
@ -1 +0,0 @@
-fs.may_detach_mounts=1
--- a/SOURCES/change-default-root.patch
+++ b/SOURCES/change-default-root.patch
@ -1,61 +0,0 @@
-diff --git a/list.go b/list.go
-index 0313d8c..328798b 100644
--- a/list.go
-+++ b/list.go
-@@ -50,7 +50,7 @@ var listCommand = cli.Command{
- 	ArgsUsage: `
- 
- Where the given root is specified via the global option "--root"
-(default: "/run/runc").
-+(default: "/run/runc-ctrs").
- 
- EXAMPLE 1:
- To list containers created via the default "--root":
-diff --git a/main.go b/main.go
-index 278399a..0f49fce 100644
--- a/main.go
-+++ b/main.go
-@@ -62,7 +62,7 @@ func main() {
- 	v = append(v, fmt.Sprintf("spec: %s", specs.Version))
- 	app.Version = strings.Join(v, "\n")
- 
-	root := "/run/runc"
-+	root := "/run/runc-ctrs"
- 	rootless, err := isRootless(nil)
- 	if err != nil {
- 		fatal(err)
-@@ -70,7 +70,7 @@ func main() {
- 	if rootless {
- 		runtimeDir := os.Getenv("XDG_RUNTIME_DIR")
- 		if runtimeDir != "" {
-			root = runtimeDir + "/runc"
-+			root = runtimeDir + "/runc-ctrs"
- 			// According to the XDG specification, we need to set anything in
- 			// XDG_RUNTIME_DIR to have a sticky bit if we don't want it to get
- 			// auto-pruned.
-diff --git a/man/runc-list.8.md b/man/runc-list.8.md
-index f737424..107220e 100644
--- a/man/runc-list.8.md
-+++ b/man/runc-list.8.md
-@@ -6,7 +6,7 @@
- 
- # EXAMPLE
- Where the given root is specified via the global option "--root"
-(default: "/run/runc").
-+(default: "/run/runc-ctrs").
- 
- To list containers created via the default "--root":
-        # runc list
-diff --git a/man/runc.8.md b/man/runc.8.md
-index 6d0ddff..337bc73 100644
--- a/man/runc.8.md
-+++ b/man/runc.8.md
-@@ -51,7 +51,7 @@ value for "bundle" is the current directory.
-    --debug              enable debug output for logging
-    --log value          set the log file path where internal debug information is written (default: "/dev/null")
-    --log-format value   set the format used by logs ('text' (default), or 'json') (default: "text")
-   --root value         root directory for storage of container state (this should be located in tmpfs) (default: "/run/runc" or $XDG_RUNTIME_DIR/runc for rootless containers)
-+   --root value         root directory for storage of container state (this should be located in tmpfs) (default: "/run/runc-ctrs" or $XDG_RUNTIME_DIR/runc-ctrs for rootless containers)
-    --criu value         path to the criu binary used for checkpoint and restore (default: "criu")
-    --systemd-cgroup     enable systemd cgroup support, expects cgroupsPath to be of form "slice:prefix:name" for e.g. "system.slice:runc:434234"
-    --rootless value    enable rootless mode ('true', 'false', or 'auto') (default: "auto")
--- a/SOURCES/pivot-root.patch
+++ b/SOURCES/pivot-root.patch
@ -1,72 +0,0 @@
-From 28a697cce3e4f905dca700eda81d681a30eef9cd Mon Sep 17 00:00:00 2001
-From: Giuseppe Scrivano <gscrivan@redhat.com>
-Date: Fri, 11 Jan 2019 21:53:45 +0100
-Subject: [PATCH] rootfs: umount all procfs and sysfs with --no-pivot
-
-When creating a new user namespace, the kernel doesn't allow to mount
-a new procfs or sysfs file system if there is not already one instance
-fully visible in the current mount namespace.
-
-When using --no-pivot we were effectively inhibiting this protection
-from the kernel, as /proc and /sys from the host are still present in
-the container mount namespace.
-
-A container without full access to /proc could then create a new user
-namespace, and from there able to mount a fully visible /proc, bypassing
-the limitations in the container.
-
-A simple reproducer for this issue is:
-
-unshare -mrfp sh -c "mount -t proc none /proc && echo c > /proc/sysrq-trigger"
-
-Signed-off-by: Giuseppe Scrivano <gscrivan@redhat.com>
---
- libcontainer/rootfs_linux.go | 35 +++++++++++++++++++++++++++++++++++
- 1 file changed, 35 insertions(+)
-
-diff --git a/libcontainer/rootfs_linux.go b/libcontainer/rootfs_linux.go
-index e7c2f8ada..6bd6da74a 100644
--- a/libcontainer/rootfs_linux.go
-+++ b/libcontainer/rootfs_linux.go
-@@ -748,6 +748,41 @@ func pivotRoot(rootfs string) error {
- }
- 
- func msMoveRoot(rootfs string) error {
-+	mountinfos, err := mount.GetMounts()
-+	if err != nil {
-+		return err
-+	}
-+
-+	absRootfs, err := filepath.Abs(rootfs)
-+	if err != nil {
-+		return err
-+	}
-+
-+	for _, info := range mountinfos {
-+		p, err := filepath.Abs(info.Mountpoint)
-+		if err != nil {
-+			return err
-+		}
-+		// Umount every syfs and proc file systems, except those under the container rootfs
-+		if (info.Fstype != "proc" && info.Fstype != "sysfs") || filepath.HasPrefix(p, absRootfs) {
-+			continue
-+		}
-+		// Be sure umount events are not propagated to the host.
-+		if err := unix.Mount("", p, "", unix.MS_SLAVE|unix.MS_REC, ""); err != nil {
-+			return err
-+		}
-+		if err := unix.Unmount(p, unix.MNT_DETACH); err != nil {
-+			if err != unix.EINVAL && err != unix.EPERM {
-+				return err
-+			} else {
-+				// If we have not privileges for umounting (e.g. rootless), then
-+				// cover the path.
-+				if err := unix.Mount("tmpfs", p, "tmpfs", 0, ""); err != nil {
-+					return err
-+				}
-+			}
-+		}
-+	}
- 	if err := unix.Mount(rootfs, "/", "", unix.MS_MOVE, ""); err != nil {
- 		return err
- 	}
--- a/SPECS/runc.spec
+++ b/SPECS/runc.spec
@ -12,41 +12,34 @@
 %if 0%{?rhel} > 7 && ! 0%{?fedora}
 %define gobuild(o:) \
 go build -buildmode pie -compiler gc -tags="rpm_crashtraceback no_openssl ${BUILDTAGS:-}" -ldflags "${LDFLAGS:-} -compressdwarf=false -B 0x$(head -c20 /dev/urandom|od -An -tx1|tr -d ' \\n') -extldflags '%__global_ldflags'" -a -v -x %{?**};
-%endif # distro
+%endif

 %global provider github
 %global provider_tld com
 %global project opencontainers
 %global repo runc
 # https://github.com/opencontainers/runc
-%global provider_prefix %{provider}.%{provider_tld}/%{project}/%{repo}
-%global import_path %{provider_prefix}
-%global git0 https://github.com/opencontainers/runc
-%global commit0 2abd837c8c25b0102ac4ce14f17bc0bc7ddffba7
+%global import_path %{provider}.%{provider_tld}/%{project}/%{repo}
+%global git0 https://%{import_path}
+%global commit0 dc9208a3303feef5b3839f4323d9beb36df0a9dd
 %global shortcommit0 %(c=%{commit0}; echo ${c:0:7})

 Name: %{repo}
 Version: 1.0.0
-Release: 56.rc5.dev.git%{shortcommit0}%{?dist}
+Release: 66.rc10%{?dist}
 Summary: CLI for running Open Containers
 ExcludeArch: %{ix86}
 License: ASL 2.0
-URL: http//%{provider_prefix}
-Source0: %{git0}/archive/%{commit0}/%{repo}-%{shortcommit0}.tar.gz
-Source1: 99-containers.conf
-Patch0: change-default-root.patch
-Patch1: 0001-Revert-Apply-cgroups-earlier.patch
-Patch2: 1807.patch
-Patch3: 0001-nsenter-clone-proc-self-exe-to-avoid-exposing-host-b-runc.patch
-Patch4: pivot-root.patch
-Requires: criu
-Requires(pre): container-selinux >= 2:2.2-2
-
-# If go_compiler is not set to 1, there is no virtual provide. Use golang instead.
-BuildRequires: %{?go_compiler:compiler(go-compiler)}%{!?go_compiler:golang} >= 1.6.2
+URL: %{git0}
+Source0: %{git0}/archive/%{commit0}/%{name}-%{shortcommit0}.tar.gz
+Patch0: 1807.patch
+Patch1: 0001-rootfs-add-mount-destination-validation.patch
+BuildRequires: golang >= 1.12.12-4
 BuildRequires: git
 BuildRequires: go-md2man
 BuildRequires: libseccomp-devel
+Requires: criu
+Requires(pre): container-selinux >= 2:2.2-2

 %description
 The runc command can be used to start containers which are packaged
@ -65,9 +58,10 @@ pushd GOPATH
 popd

 pushd GOPATH/src/%{import_path}
+export GO111MODULE=off
 export GOPATH=%{gopath}:$(pwd)/GOPATH
 export BUILDTAGS="selinux seccomp"
-%gobuild -o %{name} %{import_path} 
+%gobuild -o %{name} %{import_path}

 pushd man
 ./md2man-all.sh
@ -97,12 +91,45 @@ install -p -m 0644 contrib/completions/bash/%{name} %{buildroot}%{_datadir}/bash
 %{_datadir}/bash-completion/completions/%{name}

 %changelog
-* Thu Nov 28 2019 Jindrich Novy <jnovy@redhat.com> - 1.0.0-56.rc5.dev.git2abd837
- rebuild because of CVE-2019-9512 and CVE-2019-9514
- Resolves: #1766328, #1766300
+* Wed May 19 2021 Jindrich Novy <jnovy@redhat.com> - 1.0.0-66.rc10
+- set GO111MODULE=off to fix build
+- Related: #1955651
+
+* Wed May 19 2021 Jindrich Novy <jnovy@redhat.com> - 1.0.0-65.rc10
+- fix CVE-2021-30465
+- Resolves: #1955651
+
+* Thu Feb 13 2020 Jindrich Novy <jnovy@redhat.com> - 1.0.0-64.rc10
+- address CVE-2019-19921 by updating to rc10
+- Resolves: #1801888
+
+* Wed Dec 11 2019 Jindrich Novy <jnovy@redhat.com> - 1.0.0-63.rc9
+- use no_openssl in BUILDTAGS (no vendored crypto in runc)
+- Related: RHELPLAN-25139
+
+* Mon Dec 09 2019 Jindrich Novy <jnovy@redhat.com> - 1.0.0-62.rc9
+- be sure to use golang >= 1.12.12-4
+- Related: RHELPLAN-25139
+
+* Thu Nov 21 2019 Jindrich Novy <jnovy@redhat.com> - 1.0.0-61.rc9
+- update to runc 1.0.0-rc9 release
+- amend golang deps
+- fixes CVE-2019-16884
+- Resolves: #1759651
+
+* Mon Jun 17 2019 Lokesh Mandvekar <lsm5@redhat.com> - 1.0.0-60.rc8
+- Resolves: #1721247 - enable fips mode
+
+* Mon Jun 17 2019 Lokesh Mandvekar <lsm5@redhat.com> - 1.0.0-59.rc8
+- Resolves: #1720654 - rebase to v1.0.0-rc8
+
+* Thu Apr 11 2019 Eduardo Santiago <santiago@redhat.com> - 1.0.0-57.rc5.dev.git2abd837
+- Resolves: #1693424 - podman rootless: cannot specify gid= mount options
+
+* Wed Feb 27 2019 Lokesh Mandvekar <lsm5@redhat.com> - 1.0.0-56.rc5.dev.git2abd837
+- change-default-root patch not needed as there's no docker on rhel8

 * Tue Feb 12 2019 Lokesh Mandvekar <lsm5@redhat.com> - 1.0.0-55.rc5.dev.git2abd837
- Resolves: #1665770 - rootfs: umount all procfs and sysfs with --no-pivot
 - Resolves: CVE-2019-5736

 * Tue Dec 18 2018 Frantisek Kluknavsky <fkluknav@redhat.com> - 1.0.0-54.rc5.dev.git2abd837