a5bd08701a
- kvm-target-arm-arch_dump-Add-SVE-notes.patch [bz#1725084] - kvm-vhost-Add-names-to-section-rounded-warning.patch [bz#1779041] - kvm-vhost-Only-align-sections-for-vhost-user.patch [bz#1779041] - kvm-vhost-coding-style-fix.patch [bz#1779041] - kvm-virtio-fs-fix-MSI-X-nvectors-calculation.patch [bz#1694164] - kvm-vhost-user-fs-remove-vhostfd-property.patch [bz#1694164] - kvm-build-rename-CONFIG_LIBCAP-to-CONFIG_LIBCAP_NG.patch [bz#1694164] - kvm-virtiofsd-Pull-in-upstream-headers.patch [bz#1694164] - kvm-virtiofsd-Pull-in-kernel-s-fuse.h.patch [bz#1694164] - kvm-virtiofsd-Add-auxiliary-.c-s.patch [bz#1694164] - kvm-virtiofsd-Add-fuse_lowlevel.c.patch [bz#1694164] - kvm-virtiofsd-Add-passthrough_ll.patch [bz#1694164] - kvm-virtiofsd-Trim-down-imported-files.patch [bz#1694164] - kvm-virtiofsd-Format-imported-files-to-qemu-style.patch [bz#1694164] - kvm-virtiofsd-remove-mountpoint-dummy-argument.patch [bz#1694164] - kvm-virtiofsd-remove-unused-notify-reply-support.patch [bz#1694164] - kvm-virtiofsd-Remove-unused-enum-fuse_buf_copy_flags.patch [bz#1694164] - kvm-virtiofsd-Fix-fuse_daemonize-ignored-return-values.patch [bz#1694164] - kvm-virtiofsd-Fix-common-header-and-define-for-QEMU-buil.patch [bz#1694164] - kvm-virtiofsd-Trim-out-compatibility-code.patch [bz#1694164] - kvm-vitriofsd-passthrough_ll-fix-fallocate-ifdefs.patch [bz#1694164] - kvm-virtiofsd-Make-fsync-work-even-if-only-inode-is-pass.patch [bz#1694164] - kvm-virtiofsd-Add-options-for-virtio.patch [bz#1694164] - kvm-virtiofsd-add-o-source-PATH-to-help-output.patch [bz#1694164] - kvm-virtiofsd-Open-vhost-connection-instead-of-mounting.patch [bz#1694164] - kvm-virtiofsd-Start-wiring-up-vhost-user.patch [bz#1694164] - kvm-virtiofsd-Add-main-virtio-loop.patch [bz#1694164] - kvm-virtiofsd-get-set-features-callbacks.patch [bz#1694164] - kvm-virtiofsd-Start-queue-threads.patch [bz#1694164] - kvm-virtiofsd-Poll-kick_fd-for-queue.patch [bz#1694164] - kvm-virtiofsd-Start-reading-commands-from-queue.patch [bz#1694164] - kvm-virtiofsd-Send-replies-to-messages.patch [bz#1694164] - kvm-virtiofsd-Keep-track-of-replies.patch [bz#1694164] - kvm-virtiofsd-Add-Makefile-wiring-for-virtiofsd-contrib.patch [bz#1694164] - kvm-virtiofsd-Fast-path-for-virtio-read.patch [bz#1694164] - kvm-virtiofsd-add-fd-FDNUM-fd-passing-option.patch [bz#1694164] - kvm-virtiofsd-make-f-foreground-the-default.patch [bz#1694164] - kvm-virtiofsd-add-vhost-user.json-file.patch [bz#1694164] - kvm-virtiofsd-add-print-capabilities-option.patch [bz#1694164] - kvm-virtiofs-Add-maintainers-entry.patch [bz#1694164] - kvm-virtiofsd-passthrough_ll-create-new-files-in-caller-.patch [bz#1694164] - kvm-virtiofsd-passthrough_ll-add-lo_map-for-ino-fh-indir.patch [bz#1694164] - kvm-virtiofsd-passthrough_ll-add-ino_map-to-hide-lo_inod.patch [bz#1694164] - kvm-virtiofsd-passthrough_ll-add-dirp_map-to-hide-lo_dir.patch [bz#1694164] - kvm-virtiofsd-passthrough_ll-add-fd_map-to-hide-file-des.patch [bz#1694164] - kvm-virtiofsd-passthrough_ll-add-fallback-for-racy-ops.patch [bz#1694164] - kvm-virtiofsd-validate-path-components.patch [bz#1694164] - kvm-virtiofsd-Plumb-fuse_bufvec-through-to-do_write_buf.patch [bz#1694164] - kvm-virtiofsd-Pass-write-iov-s-all-the-way-through.patch [bz#1694164] - kvm-virtiofsd-add-fuse_mbuf_iter-API.patch [bz#1694164] - kvm-virtiofsd-validate-input-buffer-sizes-in-do_write_bu.patch [bz#1694164] - kvm-virtiofsd-check-input-buffer-size-in-fuse_lowlevel.c.patch [bz#1694164] - kvm-virtiofsd-prevent-.-escape-in-lo_do_lookup.patch [bz#1694164] - kvm-virtiofsd-prevent-.-escape-in-lo_do_readdir.patch [bz#1694164] - kvm-virtiofsd-use-proc-self-fd-O_PATH-file-descriptor.patch [bz#1694164] - kvm-virtiofsd-sandbox-mount-namespace.patch [bz#1694164] - kvm-virtiofsd-move-to-an-empty-network-namespace.patch [bz#1694164] - kvm-virtiofsd-move-to-a-new-pid-namespace.patch [bz#1694164] - kvm-virtiofsd-add-seccomp-whitelist.patch [bz#1694164] - kvm-virtiofsd-Parse-flag-FUSE_WRITE_KILL_PRIV.patch [bz#1694164] - kvm-virtiofsd-cap-ng-helpers.patch [bz#1694164] - kvm-virtiofsd-Drop-CAP_FSETID-if-client-asked-for-it.patch [bz#1694164] - kvm-virtiofsd-set-maximum-RLIMIT_NOFILE-limit.patch [bz#1694164] - kvm-virtiofsd-fix-libfuse-information-leaks.patch [bz#1694164] - kvm-virtiofsd-add-syslog-command-line-option.patch [bz#1694164] - kvm-virtiofsd-print-log-only-when-priority-is-high-enoug.patch [bz#1694164] - kvm-virtiofsd-Add-ID-to-the-log-with-FUSE_LOG_DEBUG-leve.patch [bz#1694164] - kvm-virtiofsd-Add-timestamp-to-the-log-with-FUSE_LOG_DEB.patch [bz#1694164] - kvm-virtiofsd-Handle-reinit.patch [bz#1694164] - kvm-virtiofsd-Handle-hard-reboot.patch [bz#1694164] - kvm-virtiofsd-Kill-threads-when-queues-are-stopped.patch [bz#1694164] - kvm-vhost-user-Print-unexpected-slave-message-types.patch [bz#1694164] - kvm-contrib-libvhost-user-Protect-slave-fd-with-mutex.patch [bz#1694164] - kvm-virtiofsd-passthrough_ll-add-renameat2-support.patch [bz#1694164] - kvm-virtiofsd-passthrough_ll-disable-readdirplus-on-cach.patch [bz#1694164] - kvm-virtiofsd-passthrough_ll-control-readdirplus.patch [bz#1694164] - kvm-virtiofsd-rename-unref_inode-to-unref_inode_lolocked.patch [bz#1694164] - kvm-virtiofsd-fail-when-parent-inode-isn-t-known-in-lo_d.patch [bz#1694164] - kvm-virtiofsd-extract-root-inode-init-into-setup_root.patch [bz#1694164] - kvm-virtiofsd-passthrough_ll-clean-up-cache-related-opti.patch [bz#1694164] - kvm-virtiofsd-passthrough_ll-use-hashtable.patch [bz#1694164] - kvm-virtiofsd-Clean-up-inodes-on-destroy.patch [bz#1694164] - kvm-virtiofsd-support-nanosecond-resolution-for-file-tim.patch [bz#1694164] - kvm-virtiofsd-fix-error-handling-in-main.patch [bz#1694164] - kvm-virtiofsd-cleanup-allocated-resource-in-se.patch [bz#1694164] - kvm-virtiofsd-fix-memory-leak-on-lo.source.patch [bz#1694164] - kvm-virtiofsd-add-helper-for-lo_data-cleanup.patch [bz#1694164] - kvm-virtiofsd-Prevent-multiply-running-with-same-vhost_u.patch [bz#1694164] - kvm-virtiofsd-enable-PARALLEL_DIROPS-during-INIT.patch [bz#1694164] - kvm-virtiofsd-fix-incorrect-error-handling-in-lo_do_look.patch [bz#1694164] - kvm-Virtiofsd-fix-memory-leak-on-fuse-queueinfo.patch [bz#1694164] - kvm-virtiofsd-Support-remote-posix-locks.patch [bz#1694164] - kvm-virtiofsd-use-fuse_lowlevel_is_virtio-in-fuse_sessio.patch [bz#1694164] - kvm-virtiofsd-prevent-fv_queue_thread-vs-virtio_loop-rac.patch [bz#1694164] - kvm-virtiofsd-make-lo_release-atomic.patch [bz#1694164] - kvm-virtiofsd-prevent-races-with-lo_dirp_put.patch [bz#1694164] - kvm-virtiofsd-rename-inode-refcount-to-inode-nlookup.patch [bz#1694164] - kvm-libvhost-user-Fix-some-memtable-remap-cases.patch [bz#1694164] - kvm-virtiofsd-passthrough_ll-fix-refcounting-on-remove-r.patch [bz#1694164] - kvm-virtiofsd-introduce-inode-refcount-to-prevent-use-af.patch [bz#1694164] - kvm-virtiofsd-do-not-always-set-FUSE_FLOCK_LOCKS.patch [bz#1694164] - kvm-virtiofsd-convert-more-fprintf-and-perror-to-use-fus.patch [bz#1694164] - kvm-virtiofsd-Reset-O_DIRECT-flag-during-file-open.patch [bz#1694164] - kvm-virtiofsd-Fix-data-corruption-with-O_APPEND-write-in.patch [bz#1694164] - kvm-virtiofsd-passthrough_ll-Use-cache_readdir-for-direc.patch [bz#1694164] - kvm-virtiofsd-add-definition-of-fuse_buf_writev.patch [bz#1694164] - kvm-virtiofsd-use-fuse_buf_writev-to-replace-fuse_buf_wr.patch [bz#1694164] - kvm-virtiofsd-process-requests-in-a-thread-pool.patch [bz#1694164] - kvm-virtiofsd-prevent-FUSE_INIT-FUSE_DESTROY-races.patch [bz#1694164] - kvm-virtiofsd-fix-lo_destroy-resource-leaks.patch [bz#1694164] - kvm-virtiofsd-add-thread-pool-size-NUM-option.patch [bz#1694164] - kvm-virtiofsd-Convert-lo_destroy-to-take-the-lo-mutex-lo.patch [bz#1694164] - kvm-virtiofsd-passthrough_ll-Pass-errno-to-fuse_reply_er.patch [bz#1694164] - kvm-virtiofsd-stop-all-queue-threads-on-exit-in-virtio_l.patch [bz#1694164] - kvm-virtiofsd-add-some-options-to-the-help-message.patch [bz#1694164] - kvm-redhat-ship-virtiofsd-vhost-user-device-backend.patch [bz#1694164] - Resolves: bz#1694164 (virtio-fs: host<->guest shared file system (qemu)) - Resolves: bz#1725084 (aarch64: support dumping SVE registers) - Resolves: bz#1779041 (netkvm: no connectivity Windows guest with q35 + hugepages + vhost + hv_synic)
224 lines
7.2 KiB
Diff
224 lines
7.2 KiB
Diff
From a7a87a751a9893830d031a957a751b7622b71fb2 Mon Sep 17 00:00:00 2001
|
|
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
|
|
Date: Mon, 27 Jan 2020 19:01:29 +0100
|
|
Subject: [PATCH 058/116] virtiofsd: move to a new pid namespace
|
|
MIME-Version: 1.0
|
|
Content-Type: text/plain; charset=UTF-8
|
|
Content-Transfer-Encoding: 8bit
|
|
|
|
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
|
Message-id: <20200127190227.40942-55-dgilbert@redhat.com>
|
|
Patchwork-id: 93510
|
|
O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 054/112] virtiofsd: move to a new pid namespace
|
|
Bugzilla: 1694164
|
|
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
|
|
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
|
RH-Acked-by: Sergio Lopez Pascual <slp@redhat.com>
|
|
|
|
From: Stefan Hajnoczi <stefanha@redhat.com>
|
|
|
|
virtiofsd needs access to /proc/self/fd. Let's move to a new pid
|
|
namespace so that a compromised process cannot see another other
|
|
processes running on the system.
|
|
|
|
One wrinkle in this approach: unshare(CLONE_NEWPID) affects *child*
|
|
processes and not the current process. Therefore we need to fork the
|
|
pid 1 process that will actually run virtiofsd and leave a parent in
|
|
waitpid(2). This is not the same thing as daemonization and parent
|
|
processes should not notice a difference.
|
|
|
|
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
|
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
|
|
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
|
(cherry picked from commit 8e1d4ef231d8327be219f7aea7aa15d181375bbc)
|
|
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
|
|
---
|
|
tools/virtiofsd/passthrough_ll.c | 134 +++++++++++++++++++++++++--------------
|
|
1 file changed, 86 insertions(+), 48 deletions(-)
|
|
|
|
diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
|
|
index 27ab328..0947d14 100644
|
|
--- a/tools/virtiofsd/passthrough_ll.c
|
|
+++ b/tools/virtiofsd/passthrough_ll.c
|
|
@@ -51,7 +51,10 @@
|
|
#include <string.h>
|
|
#include <sys/file.h>
|
|
#include <sys/mount.h>
|
|
+#include <sys/prctl.h>
|
|
#include <sys/syscall.h>
|
|
+#include <sys/types.h>
|
|
+#include <sys/wait.h>
|
|
#include <sys/xattr.h>
|
|
#include <unistd.h>
|
|
|
|
@@ -1945,24 +1948,95 @@ static void print_capabilities(void)
|
|
}
|
|
|
|
/*
|
|
- * Called after our UNIX domain sockets have been created, now we can move to
|
|
- * an empty network namespace to prevent TCP/IP and other network activity in
|
|
- * case this process is compromised.
|
|
+ * Move to a new mount, net, and pid namespaces to isolate this process.
|
|
*/
|
|
-static void setup_net_namespace(void)
|
|
+static void setup_namespaces(struct lo_data *lo, struct fuse_session *se)
|
|
{
|
|
- if (unshare(CLONE_NEWNET) != 0) {
|
|
- fuse_log(FUSE_LOG_ERR, "unshare(CLONE_NEWNET): %m\n");
|
|
+ pid_t child;
|
|
+
|
|
+ /*
|
|
+ * Create a new pid namespace for *child* processes. We'll have to
|
|
+ * fork in order to enter the new pid namespace. A new mount namespace
|
|
+ * is also needed so that we can remount /proc for the new pid
|
|
+ * namespace.
|
|
+ *
|
|
+ * Our UNIX domain sockets have been created. Now we can move to
|
|
+ * an empty network namespace to prevent TCP/IP and other network
|
|
+ * activity in case this process is compromised.
|
|
+ */
|
|
+ if (unshare(CLONE_NEWPID | CLONE_NEWNS | CLONE_NEWNET) != 0) {
|
|
+ fuse_log(FUSE_LOG_ERR, "unshare(CLONE_NEWPID | CLONE_NEWNS): %m\n");
|
|
+ exit(1);
|
|
+ }
|
|
+
|
|
+ child = fork();
|
|
+ if (child < 0) {
|
|
+ fuse_log(FUSE_LOG_ERR, "fork() failed: %m\n");
|
|
+ exit(1);
|
|
+ }
|
|
+ if (child > 0) {
|
|
+ pid_t waited;
|
|
+ int wstatus;
|
|
+
|
|
+ /* The parent waits for the child */
|
|
+ do {
|
|
+ waited = waitpid(child, &wstatus, 0);
|
|
+ } while (waited < 0 && errno == EINTR && !se->exited);
|
|
+
|
|
+ /* We were terminated by a signal, see fuse_signals.c */
|
|
+ if (se->exited) {
|
|
+ exit(0);
|
|
+ }
|
|
+
|
|
+ if (WIFEXITED(wstatus)) {
|
|
+ exit(WEXITSTATUS(wstatus));
|
|
+ }
|
|
+
|
|
+ exit(1);
|
|
+ }
|
|
+
|
|
+ /* Send us SIGTERM when the parent thread terminates, see prctl(2) */
|
|
+ prctl(PR_SET_PDEATHSIG, SIGTERM);
|
|
+
|
|
+ /*
|
|
+ * If the mounts have shared propagation then we want to opt out so our
|
|
+ * mount changes don't affect the parent mount namespace.
|
|
+ */
|
|
+ if (mount(NULL, "/", NULL, MS_REC | MS_SLAVE, NULL) < 0) {
|
|
+ fuse_log(FUSE_LOG_ERR, "mount(/, MS_REC|MS_SLAVE): %m\n");
|
|
+ exit(1);
|
|
+ }
|
|
+
|
|
+ /* The child must remount /proc to use the new pid namespace */
|
|
+ if (mount("proc", "/proc", "proc",
|
|
+ MS_NODEV | MS_NOEXEC | MS_NOSUID | MS_RELATIME, NULL) < 0) {
|
|
+ fuse_log(FUSE_LOG_ERR, "mount(/proc): %m\n");
|
|
+ exit(1);
|
|
+ }
|
|
+
|
|
+ /* Now we can get our /proc/self/fd directory file descriptor */
|
|
+ lo->proc_self_fd = open("/proc/self/fd", O_PATH);
|
|
+ if (lo->proc_self_fd == -1) {
|
|
+ fuse_log(FUSE_LOG_ERR, "open(/proc/self/fd, O_PATH): %m\n");
|
|
exit(1);
|
|
}
|
|
}
|
|
|
|
-/* This magic is based on lxc's lxc_pivot_root() */
|
|
-static void setup_pivot_root(const char *source)
|
|
+/*
|
|
+ * Make the source directory our root so symlinks cannot escape and no other
|
|
+ * files are accessible. Assumes unshare(CLONE_NEWNS) was already called.
|
|
+ */
|
|
+static void setup_mounts(const char *source)
|
|
{
|
|
int oldroot;
|
|
int newroot;
|
|
|
|
+ if (mount(source, source, NULL, MS_BIND, NULL) < 0) {
|
|
+ fuse_log(FUSE_LOG_ERR, "mount(%s, %s, MS_BIND): %m\n", source, source);
|
|
+ exit(1);
|
|
+ }
|
|
+
|
|
+ /* This magic is based on lxc's lxc_pivot_root() */
|
|
oldroot = open("/", O_DIRECTORY | O_RDONLY | O_CLOEXEC);
|
|
if (oldroot < 0) {
|
|
fuse_log(FUSE_LOG_ERR, "open(/): %m\n");
|
|
@@ -2009,47 +2083,14 @@ static void setup_pivot_root(const char *source)
|
|
close(oldroot);
|
|
}
|
|
|
|
-static void setup_proc_self_fd(struct lo_data *lo)
|
|
-{
|
|
- lo->proc_self_fd = open("/proc/self/fd", O_PATH);
|
|
- if (lo->proc_self_fd == -1) {
|
|
- fuse_log(FUSE_LOG_ERR, "open(/proc/self/fd, O_PATH): %m\n");
|
|
- exit(1);
|
|
- }
|
|
-}
|
|
-
|
|
-/*
|
|
- * Make the source directory our root so symlinks cannot escape and no other
|
|
- * files are accessible.
|
|
- */
|
|
-static void setup_mount_namespace(const char *source)
|
|
-{
|
|
- if (unshare(CLONE_NEWNS) != 0) {
|
|
- fuse_log(FUSE_LOG_ERR, "unshare(CLONE_NEWNS): %m\n");
|
|
- exit(1);
|
|
- }
|
|
-
|
|
- if (mount(NULL, "/", NULL, MS_REC | MS_SLAVE, NULL) < 0) {
|
|
- fuse_log(FUSE_LOG_ERR, "mount(/, MS_REC|MS_PRIVATE): %m\n");
|
|
- exit(1);
|
|
- }
|
|
-
|
|
- if (mount(source, source, NULL, MS_BIND, NULL) < 0) {
|
|
- fuse_log(FUSE_LOG_ERR, "mount(%s, %s, MS_BIND): %m\n", source, source);
|
|
- exit(1);
|
|
- }
|
|
-
|
|
- setup_pivot_root(source);
|
|
-}
|
|
-
|
|
/*
|
|
* Lock down this process to prevent access to other processes or files outside
|
|
* source directory. This reduces the impact of arbitrary code execution bugs.
|
|
*/
|
|
-static void setup_sandbox(struct lo_data *lo)
|
|
+static void setup_sandbox(struct lo_data *lo, struct fuse_session *se)
|
|
{
|
|
- setup_net_namespace();
|
|
- setup_mount_namespace(lo->source);
|
|
+ setup_namespaces(lo, se);
|
|
+ setup_mounts(lo->source);
|
|
}
|
|
|
|
int main(int argc, char *argv[])
|
|
@@ -2173,10 +2214,7 @@ int main(int argc, char *argv[])
|
|
|
|
fuse_daemonize(opts.foreground);
|
|
|
|
- /* Must be after daemonize to get the right /proc/self/fd */
|
|
- setup_proc_self_fd(&lo);
|
|
-
|
|
- setup_sandbox(&lo);
|
|
+ setup_sandbox(&lo, se);
|
|
|
|
/* Block until ctrl+c or fusermount -u */
|
|
ret = virtio_loop(se);
|
|
--
|
|
1.8.3.1
|
|
|