* Fri Jan 31 2020 Miroslav Rezanina <mrezanin@redhat.com> - 4.2.0-8.el8

- kvm-target-arm-arch_dump-Add-SVE-notes.patch [bz#1725084]
- kvm-vhost-Add-names-to-section-rounded-warning.patch [bz#1779041]
- kvm-vhost-Only-align-sections-for-vhost-user.patch [bz#1779041]
- kvm-vhost-coding-style-fix.patch [bz#1779041]
- kvm-virtio-fs-fix-MSI-X-nvectors-calculation.patch [bz#1694164]
- kvm-vhost-user-fs-remove-vhostfd-property.patch [bz#1694164]
- kvm-build-rename-CONFIG_LIBCAP-to-CONFIG_LIBCAP_NG.patch [bz#1694164]
- kvm-virtiofsd-Pull-in-upstream-headers.patch [bz#1694164]
- kvm-virtiofsd-Pull-in-kernel-s-fuse.h.patch [bz#1694164]
- kvm-virtiofsd-Add-auxiliary-.c-s.patch [bz#1694164]
- kvm-virtiofsd-Add-fuse_lowlevel.c.patch [bz#1694164]
- kvm-virtiofsd-Add-passthrough_ll.patch [bz#1694164]
- kvm-virtiofsd-Trim-down-imported-files.patch [bz#1694164]
- kvm-virtiofsd-Format-imported-files-to-qemu-style.patch [bz#1694164]
- kvm-virtiofsd-remove-mountpoint-dummy-argument.patch [bz#1694164]
- kvm-virtiofsd-remove-unused-notify-reply-support.patch [bz#1694164]
- kvm-virtiofsd-Remove-unused-enum-fuse_buf_copy_flags.patch [bz#1694164]
- kvm-virtiofsd-Fix-fuse_daemonize-ignored-return-values.patch [bz#1694164]
- kvm-virtiofsd-Fix-common-header-and-define-for-QEMU-buil.patch [bz#1694164]
- kvm-virtiofsd-Trim-out-compatibility-code.patch [bz#1694164]
- kvm-vitriofsd-passthrough_ll-fix-fallocate-ifdefs.patch [bz#1694164]
- kvm-virtiofsd-Make-fsync-work-even-if-only-inode-is-pass.patch [bz#1694164]
- kvm-virtiofsd-Add-options-for-virtio.patch [bz#1694164]
- kvm-virtiofsd-add-o-source-PATH-to-help-output.patch [bz#1694164]
- kvm-virtiofsd-Open-vhost-connection-instead-of-mounting.patch [bz#1694164]
- kvm-virtiofsd-Start-wiring-up-vhost-user.patch [bz#1694164]
- kvm-virtiofsd-Add-main-virtio-loop.patch [bz#1694164]
- kvm-virtiofsd-get-set-features-callbacks.patch [bz#1694164]
- kvm-virtiofsd-Start-queue-threads.patch [bz#1694164]
- kvm-virtiofsd-Poll-kick_fd-for-queue.patch [bz#1694164]
- kvm-virtiofsd-Start-reading-commands-from-queue.patch [bz#1694164]
- kvm-virtiofsd-Send-replies-to-messages.patch [bz#1694164]
- kvm-virtiofsd-Keep-track-of-replies.patch [bz#1694164]
- kvm-virtiofsd-Add-Makefile-wiring-for-virtiofsd-contrib.patch [bz#1694164]
- kvm-virtiofsd-Fast-path-for-virtio-read.patch [bz#1694164]
- kvm-virtiofsd-add-fd-FDNUM-fd-passing-option.patch [bz#1694164]
- kvm-virtiofsd-make-f-foreground-the-default.patch [bz#1694164]
- kvm-virtiofsd-add-vhost-user.json-file.patch [bz#1694164]
- kvm-virtiofsd-add-print-capabilities-option.patch [bz#1694164]
- kvm-virtiofs-Add-maintainers-entry.patch [bz#1694164]
- kvm-virtiofsd-passthrough_ll-create-new-files-in-caller-.patch [bz#1694164]
- kvm-virtiofsd-passthrough_ll-add-lo_map-for-ino-fh-indir.patch [bz#1694164]
- kvm-virtiofsd-passthrough_ll-add-ino_map-to-hide-lo_inod.patch [bz#1694164]
- kvm-virtiofsd-passthrough_ll-add-dirp_map-to-hide-lo_dir.patch [bz#1694164]
- kvm-virtiofsd-passthrough_ll-add-fd_map-to-hide-file-des.patch [bz#1694164]
- kvm-virtiofsd-passthrough_ll-add-fallback-for-racy-ops.patch [bz#1694164]
- kvm-virtiofsd-validate-path-components.patch [bz#1694164]
- kvm-virtiofsd-Plumb-fuse_bufvec-through-to-do_write_buf.patch [bz#1694164]
- kvm-virtiofsd-Pass-write-iov-s-all-the-way-through.patch [bz#1694164]
- kvm-virtiofsd-add-fuse_mbuf_iter-API.patch [bz#1694164]
- kvm-virtiofsd-validate-input-buffer-sizes-in-do_write_bu.patch [bz#1694164]
- kvm-virtiofsd-check-input-buffer-size-in-fuse_lowlevel.c.patch [bz#1694164]
- kvm-virtiofsd-prevent-.-escape-in-lo_do_lookup.patch [bz#1694164]
- kvm-virtiofsd-prevent-.-escape-in-lo_do_readdir.patch [bz#1694164]
- kvm-virtiofsd-use-proc-self-fd-O_PATH-file-descriptor.patch [bz#1694164]
- kvm-virtiofsd-sandbox-mount-namespace.patch [bz#1694164]
- kvm-virtiofsd-move-to-an-empty-network-namespace.patch [bz#1694164]
- kvm-virtiofsd-move-to-a-new-pid-namespace.patch [bz#1694164]
- kvm-virtiofsd-add-seccomp-whitelist.patch [bz#1694164]
- kvm-virtiofsd-Parse-flag-FUSE_WRITE_KILL_PRIV.patch [bz#1694164]
- kvm-virtiofsd-cap-ng-helpers.patch [bz#1694164]
- kvm-virtiofsd-Drop-CAP_FSETID-if-client-asked-for-it.patch [bz#1694164]
- kvm-virtiofsd-set-maximum-RLIMIT_NOFILE-limit.patch [bz#1694164]
- kvm-virtiofsd-fix-libfuse-information-leaks.patch [bz#1694164]
- kvm-virtiofsd-add-syslog-command-line-option.patch [bz#1694164]
- kvm-virtiofsd-print-log-only-when-priority-is-high-enoug.patch [bz#1694164]
- kvm-virtiofsd-Add-ID-to-the-log-with-FUSE_LOG_DEBUG-leve.patch [bz#1694164]
- kvm-virtiofsd-Add-timestamp-to-the-log-with-FUSE_LOG_DEB.patch [bz#1694164]
- kvm-virtiofsd-Handle-reinit.patch [bz#1694164]
- kvm-virtiofsd-Handle-hard-reboot.patch [bz#1694164]
- kvm-virtiofsd-Kill-threads-when-queues-are-stopped.patch [bz#1694164]
- kvm-vhost-user-Print-unexpected-slave-message-types.patch [bz#1694164]
- kvm-contrib-libvhost-user-Protect-slave-fd-with-mutex.patch [bz#1694164]
- kvm-virtiofsd-passthrough_ll-add-renameat2-support.patch [bz#1694164]
- kvm-virtiofsd-passthrough_ll-disable-readdirplus-on-cach.patch [bz#1694164]
- kvm-virtiofsd-passthrough_ll-control-readdirplus.patch [bz#1694164]
- kvm-virtiofsd-rename-unref_inode-to-unref_inode_lolocked.patch [bz#1694164]
- kvm-virtiofsd-fail-when-parent-inode-isn-t-known-in-lo_d.patch [bz#1694164]
- kvm-virtiofsd-extract-root-inode-init-into-setup_root.patch [bz#1694164]
- kvm-virtiofsd-passthrough_ll-clean-up-cache-related-opti.patch [bz#1694164]
- kvm-virtiofsd-passthrough_ll-use-hashtable.patch [bz#1694164]
- kvm-virtiofsd-Clean-up-inodes-on-destroy.patch [bz#1694164]
- kvm-virtiofsd-support-nanosecond-resolution-for-file-tim.patch [bz#1694164]
- kvm-virtiofsd-fix-error-handling-in-main.patch [bz#1694164]
- kvm-virtiofsd-cleanup-allocated-resource-in-se.patch [bz#1694164]
- kvm-virtiofsd-fix-memory-leak-on-lo.source.patch [bz#1694164]
- kvm-virtiofsd-add-helper-for-lo_data-cleanup.patch [bz#1694164]
- kvm-virtiofsd-Prevent-multiply-running-with-same-vhost_u.patch [bz#1694164]
- kvm-virtiofsd-enable-PARALLEL_DIROPS-during-INIT.patch [bz#1694164]
- kvm-virtiofsd-fix-incorrect-error-handling-in-lo_do_look.patch [bz#1694164]
- kvm-Virtiofsd-fix-memory-leak-on-fuse-queueinfo.patch [bz#1694164]
- kvm-virtiofsd-Support-remote-posix-locks.patch [bz#1694164]
- kvm-virtiofsd-use-fuse_lowlevel_is_virtio-in-fuse_sessio.patch [bz#1694164]
- kvm-virtiofsd-prevent-fv_queue_thread-vs-virtio_loop-rac.patch [bz#1694164]
- kvm-virtiofsd-make-lo_release-atomic.patch [bz#1694164]
- kvm-virtiofsd-prevent-races-with-lo_dirp_put.patch [bz#1694164]
- kvm-virtiofsd-rename-inode-refcount-to-inode-nlookup.patch [bz#1694164]
- kvm-libvhost-user-Fix-some-memtable-remap-cases.patch [bz#1694164]
- kvm-virtiofsd-passthrough_ll-fix-refcounting-on-remove-r.patch [bz#1694164]
- kvm-virtiofsd-introduce-inode-refcount-to-prevent-use-af.patch [bz#1694164]
- kvm-virtiofsd-do-not-always-set-FUSE_FLOCK_LOCKS.patch [bz#1694164]
- kvm-virtiofsd-convert-more-fprintf-and-perror-to-use-fus.patch [bz#1694164]
- kvm-virtiofsd-Reset-O_DIRECT-flag-during-file-open.patch [bz#1694164]
- kvm-virtiofsd-Fix-data-corruption-with-O_APPEND-write-in.patch [bz#1694164]
- kvm-virtiofsd-passthrough_ll-Use-cache_readdir-for-direc.patch [bz#1694164]
- kvm-virtiofsd-add-definition-of-fuse_buf_writev.patch [bz#1694164]
- kvm-virtiofsd-use-fuse_buf_writev-to-replace-fuse_buf_wr.patch [bz#1694164]
- kvm-virtiofsd-process-requests-in-a-thread-pool.patch [bz#1694164]
- kvm-virtiofsd-prevent-FUSE_INIT-FUSE_DESTROY-races.patch [bz#1694164]
- kvm-virtiofsd-fix-lo_destroy-resource-leaks.patch [bz#1694164]
- kvm-virtiofsd-add-thread-pool-size-NUM-option.patch [bz#1694164]
- kvm-virtiofsd-Convert-lo_destroy-to-take-the-lo-mutex-lo.patch [bz#1694164]
- kvm-virtiofsd-passthrough_ll-Pass-errno-to-fuse_reply_er.patch [bz#1694164]
- kvm-virtiofsd-stop-all-queue-threads-on-exit-in-virtio_l.patch [bz#1694164]
- kvm-virtiofsd-add-some-options-to-the-help-message.patch [bz#1694164]
- kvm-redhat-ship-virtiofsd-vhost-user-device-backend.patch [bz#1694164]
- Resolves: bz#1694164
  (virtio-fs: host<->guest shared file system (qemu))
- Resolves: bz#1725084
  (aarch64: support dumping SVE registers)
- Resolves: bz#1779041
  (netkvm: no connectivity Windows guest with q35 + hugepages + vhost + hv_synic)
This commit is contained in:
Miroslav Rezanina 2020-01-31 11:12:06 +01:00
parent 4508bb8a0c
commit a5bd08701a
116 changed files with 45410 additions and 4 deletions

View File

@ -0,0 +1,63 @@
From ceb6d97674b8bc9a072db1be4167411bc0ee48d7 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Mon, 27 Jan 2020 19:02:02 +0100
Subject: [PATCH 091/116] Virtiofsd: fix memory leak on fuse queueinfo
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-id: <20200127190227.40942-88-dgilbert@redhat.com>
Patchwork-id: 93542
O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 087/112] Virtiofsd: fix memory leak on fuse queueinfo
Bugzilla: 1694164
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Sergio Lopez Pascual <slp@redhat.com>
From: Liu Bo <bo.liu@linux.alibaba.com>
For fuse's queueinfo, both queueinfo array and queueinfos are allocated in
fv_queue_set_started() but not cleaned up when the daemon process quits.
This fixes the leak in proper places.
Signed-off-by: Liu Bo <bo.liu@linux.alibaba.com>
Signed-off-by: Eric Ren <renzhen@linux.alibaba.com>
Reviewed-by: Misono Tomohiro <misono.tomohiro@jp.fujitsu.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit 740b0b700a6338a1cf60c26229651ac5f6724944)
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
tools/virtiofsd/fuse_virtio.c | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c
index b7948de..fb8d6d1 100644
--- a/tools/virtiofsd/fuse_virtio.c
+++ b/tools/virtiofsd/fuse_virtio.c
@@ -625,6 +625,8 @@ static void fv_queue_cleanup_thread(struct fv_VuDev *vud, int qidx)
}
close(ourqi->kill_fd);
ourqi->kick_fd = -1;
+ free(vud->qi[qidx]);
+ vud->qi[qidx] = NULL;
}
/* Callback from libvhost-user on start or stop of a queue */
@@ -884,6 +886,12 @@ int virtio_session_mount(struct fuse_session *se)
void virtio_session_close(struct fuse_session *se)
{
close(se->vu_socketfd);
+
+ if (!se->virtio_dev) {
+ return;
+ }
+
+ free(se->virtio_dev->qi);
free(se->virtio_dev);
se->virtio_dev = NULL;
}
--
1.8.3.1

View File

@ -0,0 +1,137 @@
From f756c1c4590a37c533ec0429644a7034ba35dada Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Mon, 27 Jan 2020 19:00:38 +0100
Subject: [PATCH 007/116] build: rename CONFIG_LIBCAP to CONFIG_LIBCAP_NG
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-id: <20200127190227.40942-4-dgilbert@redhat.com>
Patchwork-id: 93459
O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 003/112] build: rename CONFIG_LIBCAP to CONFIG_LIBCAP_NG
Bugzilla: 1694164
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Sergio Lopez Pascual <slp@redhat.com>
From: Paolo Bonzini <pbonzini@redhat.com>
Since we are actually testing for the newer capng library, rename the
symbol to match.
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit a358bca24026a377e0804e137a4499e4e041918d)
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
configure | 2 +-
qemu-bridge-helper.c | 6 +++---
scsi/qemu-pr-helper.c | 12 ++++++------
3 files changed, 10 insertions(+), 10 deletions(-)
diff --git a/configure b/configure
index 16564f8..7831618 100755
--- a/configure
+++ b/configure
@@ -6760,7 +6760,7 @@ if test "$l2tpv3" = "yes" ; then
echo "CONFIG_L2TPV3=y" >> $config_host_mak
fi
if test "$cap_ng" = "yes" ; then
- echo "CONFIG_LIBCAP=y" >> $config_host_mak
+ echo "CONFIG_LIBCAP_NG=y" >> $config_host_mak
fi
echo "CONFIG_AUDIO_DRIVERS=$audio_drv_list" >> $config_host_mak
for drv in $audio_drv_list; do
diff --git a/qemu-bridge-helper.c b/qemu-bridge-helper.c
index 3d50ec0..88b2674 100644
--- a/qemu-bridge-helper.c
+++ b/qemu-bridge-helper.c
@@ -43,7 +43,7 @@
#include "net/tap-linux.h"
-#ifdef CONFIG_LIBCAP
+#ifdef CONFIG_LIBCAP_NG
#include <cap-ng.h>
#endif
@@ -207,7 +207,7 @@ static int send_fd(int c, int fd)
return sendmsg(c, &msg, 0);
}
-#ifdef CONFIG_LIBCAP
+#ifdef CONFIG_LIBCAP_NG
static int drop_privileges(void)
{
/* clear all capabilities */
@@ -246,7 +246,7 @@ int main(int argc, char **argv)
int access_allowed, access_denied;
int ret = EXIT_SUCCESS;
-#ifdef CONFIG_LIBCAP
+#ifdef CONFIG_LIBCAP_NG
/* if we're run from an suid binary, immediately drop privileges preserving
* cap_net_admin */
if (geteuid() == 0 && getuid() != geteuid()) {
diff --git a/scsi/qemu-pr-helper.c b/scsi/qemu-pr-helper.c
index debb18f..0659cee 100644
--- a/scsi/qemu-pr-helper.c
+++ b/scsi/qemu-pr-helper.c
@@ -24,7 +24,7 @@
#include <linux/dm-ioctl.h>
#include <scsi/sg.h>
-#ifdef CONFIG_LIBCAP
+#ifdef CONFIG_LIBCAP_NG
#include <cap-ng.h>
#endif
#include <pwd.h>
@@ -70,7 +70,7 @@ static int num_active_sockets = 1;
static int noisy;
static int verbose;
-#ifdef CONFIG_LIBCAP
+#ifdef CONFIG_LIBCAP_NG
static int uid = -1;
static int gid = -1;
#endif
@@ -97,7 +97,7 @@ static void usage(const char *name)
" (default '%s')\n"
" -T, --trace [[enable=]<pattern>][,events=<file>][,file=<file>]\n"
" specify tracing options\n"
-#ifdef CONFIG_LIBCAP
+#ifdef CONFIG_LIBCAP_NG
" -u, --user=USER user to drop privileges to\n"
" -g, --group=GROUP group to drop privileges to\n"
#endif
@@ -827,7 +827,7 @@ static void close_server_socket(void)
num_active_sockets--;
}
-#ifdef CONFIG_LIBCAP
+#ifdef CONFIG_LIBCAP_NG
static int drop_privileges(void)
{
/* clear all capabilities */
@@ -920,7 +920,7 @@ int main(int argc, char **argv)
pidfile = g_strdup(optarg);
pidfile_specified = true;
break;
-#ifdef CONFIG_LIBCAP
+#ifdef CONFIG_LIBCAP_NG
case 'u': {
unsigned long res;
struct passwd *userinfo = getpwnam(optarg);
@@ -1056,7 +1056,7 @@ int main(int argc, char **argv)
exit(EXIT_FAILURE);
}
-#ifdef CONFIG_LIBCAP
+#ifdef CONFIG_LIBCAP_NG
if (drop_privileges() < 0) {
error_report("Failed to drop privileges: %s", strerror(errno));
exit(EXIT_FAILURE);
--
1.8.3.1

View File

@ -0,0 +1,134 @@
From 548de8acbf0137b6e49a14b63682badfff037d23 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Mon, 27 Jan 2020 19:01:44 +0100
Subject: [PATCH 073/116] contrib/libvhost-user: Protect slave fd with mutex
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-id: <20200127190227.40942-70-dgilbert@redhat.com>
Patchwork-id: 93523
O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 069/112] contrib/libvhost-user: Protect slave fd with mutex
Bugzilla: 1694164
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Sergio Lopez Pascual <slp@redhat.com>
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
In future patches we'll be performing commands on the slave-fd driven
by commands on queues, since those queues will be driven by individual
threads we need to make sure they don't attempt to use the slave-fd
for multiple commands in parallel.
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit c25c02b9e6a196be87a818f459c426556b24770d)
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
contrib/libvhost-user/libvhost-user.c | 24 ++++++++++++++++++++----
contrib/libvhost-user/libvhost-user.h | 3 +++
2 files changed, 23 insertions(+), 4 deletions(-)
diff --git a/contrib/libvhost-user/libvhost-user.c b/contrib/libvhost-user/libvhost-user.c
index ec27b78..63e4106 100644
--- a/contrib/libvhost-user/libvhost-user.c
+++ b/contrib/libvhost-user/libvhost-user.c
@@ -392,26 +392,37 @@ vu_send_reply(VuDev *dev, int conn_fd, VhostUserMsg *vmsg)
return vu_message_write(dev, conn_fd, vmsg);
}
+/*
+ * Processes a reply on the slave channel.
+ * Entered with slave_mutex held and releases it before exit.
+ * Returns true on success.
+ */
static bool
vu_process_message_reply(VuDev *dev, const VhostUserMsg *vmsg)
{
VhostUserMsg msg_reply;
+ bool result = false;
if ((vmsg->flags & VHOST_USER_NEED_REPLY_MASK) == 0) {
- return true;
+ result = true;
+ goto out;
}
if (!vu_message_read(dev, dev->slave_fd, &msg_reply)) {
- return false;
+ goto out;
}
if (msg_reply.request != vmsg->request) {
DPRINT("Received unexpected msg type. Expected %d received %d",
vmsg->request, msg_reply.request);
- return false;
+ goto out;
}
- return msg_reply.payload.u64 == 0;
+ result = msg_reply.payload.u64 == 0;
+
+out:
+ pthread_mutex_unlock(&dev->slave_mutex);
+ return result;
}
/* Kick the log_call_fd if required. */
@@ -1105,10 +1116,13 @@ bool vu_set_queue_host_notifier(VuDev *dev, VuVirtq *vq, int fd,
return false;
}
+ pthread_mutex_lock(&dev->slave_mutex);
if (!vu_message_write(dev, dev->slave_fd, &vmsg)) {
+ pthread_mutex_unlock(&dev->slave_mutex);
return false;
}
+ /* Also unlocks the slave_mutex */
return vu_process_message_reply(dev, &vmsg);
}
@@ -1628,6 +1642,7 @@ vu_deinit(VuDev *dev)
close(dev->slave_fd);
dev->slave_fd = -1;
}
+ pthread_mutex_destroy(&dev->slave_mutex);
if (dev->sock != -1) {
close(dev->sock);
@@ -1663,6 +1678,7 @@ vu_init(VuDev *dev,
dev->remove_watch = remove_watch;
dev->iface = iface;
dev->log_call_fd = -1;
+ pthread_mutex_init(&dev->slave_mutex, NULL);
dev->slave_fd = -1;
dev->max_queues = max_queues;
diff --git a/contrib/libvhost-user/libvhost-user.h b/contrib/libvhost-user/libvhost-user.h
index 46b6007..1844b6f 100644
--- a/contrib/libvhost-user/libvhost-user.h
+++ b/contrib/libvhost-user/libvhost-user.h
@@ -19,6 +19,7 @@
#include <stddef.h>
#include <sys/poll.h>
#include <linux/vhost.h>
+#include <pthread.h>
#include "standard-headers/linux/virtio_ring.h"
/* Based on qemu/hw/virtio/vhost-user.c */
@@ -355,6 +356,8 @@ struct VuDev {
VuVirtq *vq;
VuDevInflightInfo inflight_info;
int log_call_fd;
+ /* Must be held while using slave_fd */
+ pthread_mutex_t slave_mutex;
int slave_fd;
uint64_t log_size;
uint8_t *log_table;
--
1.8.3.1

View File

@ -0,0 +1,117 @@
From ee360b70f179cf540faebe7e55b34e323e2bb179 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Mon, 27 Jan 2020 19:02:09 +0100
Subject: [PATCH 098/116] libvhost-user: Fix some memtable remap cases
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-id: <20200127190227.40942-95-dgilbert@redhat.com>
Patchwork-id: 93548
O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 094/112] libvhost-user: Fix some memtable remap cases
Bugzilla: 1694164
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Sergio Lopez Pascual <slp@redhat.com>
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
If a new setmemtable command comes in once the vhost threads are
running, it will remap the guests address space and the threads
will now be looking in the wrong place.
Fortunately we're running this command under lock, so we can
update the queue mappings so that threads will look in the new-right
place.
Note: This doesn't fix things that the threads might be doing
without a lock (e.g. a readv/writev!) That's for another time.
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit 49e9ec749d4db62ae51f76354143cee183912a1d)
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
contrib/libvhost-user/libvhost-user.c | 33 +++++++++++++++++++++++++--------
contrib/libvhost-user/libvhost-user.h | 3 +++
2 files changed, 28 insertions(+), 8 deletions(-)
diff --git a/contrib/libvhost-user/libvhost-user.c b/contrib/libvhost-user/libvhost-user.c
index 63e4106..b89bf18 100644
--- a/contrib/libvhost-user/libvhost-user.c
+++ b/contrib/libvhost-user/libvhost-user.c
@@ -565,6 +565,21 @@ vu_reset_device_exec(VuDev *dev, VhostUserMsg *vmsg)
}
static bool
+map_ring(VuDev *dev, VuVirtq *vq)
+{
+ vq->vring.desc = qva_to_va(dev, vq->vra.desc_user_addr);
+ vq->vring.used = qva_to_va(dev, vq->vra.used_user_addr);
+ vq->vring.avail = qva_to_va(dev, vq->vra.avail_user_addr);
+
+ DPRINT("Setting virtq addresses:\n");
+ DPRINT(" vring_desc at %p\n", vq->vring.desc);
+ DPRINT(" vring_used at %p\n", vq->vring.used);
+ DPRINT(" vring_avail at %p\n", vq->vring.avail);
+
+ return !(vq->vring.desc && vq->vring.used && vq->vring.avail);
+}
+
+static bool
vu_set_mem_table_exec_postcopy(VuDev *dev, VhostUserMsg *vmsg)
{
int i;
@@ -767,6 +782,14 @@ vu_set_mem_table_exec(VuDev *dev, VhostUserMsg *vmsg)
close(vmsg->fds[i]);
}
+ for (i = 0; i < dev->max_queues; i++) {
+ if (dev->vq[i].vring.desc) {
+ if (map_ring(dev, &dev->vq[i])) {
+ vu_panic(dev, "remaping queue %d during setmemtable", i);
+ }
+ }
+ }
+
return false;
}
@@ -853,18 +876,12 @@ vu_set_vring_addr_exec(VuDev *dev, VhostUserMsg *vmsg)
DPRINT(" avail_user_addr: 0x%016" PRIx64 "\n", vra->avail_user_addr);
DPRINT(" log_guest_addr: 0x%016" PRIx64 "\n", vra->log_guest_addr);
+ vq->vra = *vra;
vq->vring.flags = vra->flags;
- vq->vring.desc = qva_to_va(dev, vra->desc_user_addr);
- vq->vring.used = qva_to_va(dev, vra->used_user_addr);
- vq->vring.avail = qva_to_va(dev, vra->avail_user_addr);
vq->vring.log_guest_addr = vra->log_guest_addr;
- DPRINT("Setting virtq addresses:\n");
- DPRINT(" vring_desc at %p\n", vq->vring.desc);
- DPRINT(" vring_used at %p\n", vq->vring.used);
- DPRINT(" vring_avail at %p\n", vq->vring.avail);
- if (!(vq->vring.desc && vq->vring.used && vq->vring.avail)) {
+ if (map_ring(dev, vq)) {
vu_panic(dev, "Invalid vring_addr message");
return false;
}
diff --git a/contrib/libvhost-user/libvhost-user.h b/contrib/libvhost-user/libvhost-user.h
index 1844b6f..5cb7708 100644
--- a/contrib/libvhost-user/libvhost-user.h
+++ b/contrib/libvhost-user/libvhost-user.h
@@ -327,6 +327,9 @@ typedef struct VuVirtq {
int err_fd;
unsigned int enable;
bool started;
+
+ /* Guest addresses of our ring */
+ struct vhost_vring_addr vra;
} VuVirtq;
enum VuWatchCondtion {
--
1.8.3.1

View File

@ -0,0 +1,298 @@
From d8871ae2842531130c9b333e7c06a6a5d1561286 Mon Sep 17 00:00:00 2001
From: Andrew Jones <drjones@redhat.com>
Date: Fri, 24 Jan 2020 09:14:34 +0100
Subject: [PATCH 001/116] target/arm/arch_dump: Add SVE notes
RH-Author: Andrew Jones <drjones@redhat.com>
Message-id: <20200124091434.15021-2-drjones@redhat.com>
Patchwork-id: 93443
O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/1] target/arm/arch_dump: Add SVE notes
Bugzilla: 1725084
RH-Acked-by: Auger Eric <eric.auger@redhat.com>
RH-Acked-by: Laszlo Ersek <lersek@redhat.com>
RH-Acked-by: Gavin Shan <gshan@redhat.com>
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1725084
Author: Andrew Jones <drjones@redhat.com>
Date: Thu, 23 Jan 2020 15:22:40 +0000
target/arm/arch_dump: Add SVE notes
When dumping a guest with dump-guest-memory also dump the SVE
registers if they are in use.
Signed-off-by: Andrew Jones <drjones@redhat.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-id: 20200120101832.18781-1-drjones@redhat.com
[PMM: fixed checkpatch nits]
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
(cherry picked from commit 538baab245ca881e6a6ff720b5133f3ad1fcaafc)
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
include/elf.h | 1 +
target/arm/arch_dump.c | 124 ++++++++++++++++++++++++++++++++++++++++++++++++-
target/arm/cpu.h | 25 ++++++++++
target/arm/kvm64.c | 24 ----------
4 files changed, 148 insertions(+), 26 deletions(-)
diff --git a/include/elf.h b/include/elf.h
index 3501e0c..8fbfe60 100644
--- a/include/elf.h
+++ b/include/elf.h
@@ -1650,6 +1650,7 @@ typedef struct elf64_shdr {
#define NT_ARM_HW_BREAK 0x402 /* ARM hardware breakpoint registers */
#define NT_ARM_HW_WATCH 0x403 /* ARM hardware watchpoint registers */
#define NT_ARM_SYSTEM_CALL 0x404 /* ARM system call number */
+#define NT_ARM_SVE 0x405 /* ARM Scalable Vector Extension regs */
/*
* Physical entry point into the kernel.
diff --git a/target/arm/arch_dump.c b/target/arm/arch_dump.c
index 26a2c09..2345dec 100644
--- a/target/arm/arch_dump.c
+++ b/target/arm/arch_dump.c
@@ -62,12 +62,23 @@ struct aarch64_user_vfp_state {
QEMU_BUILD_BUG_ON(sizeof(struct aarch64_user_vfp_state) != 528);
+/* struct user_sve_header from arch/arm64/include/uapi/asm/ptrace.h */
+struct aarch64_user_sve_header {
+ uint32_t size;
+ uint32_t max_size;
+ uint16_t vl;
+ uint16_t max_vl;
+ uint16_t flags;
+ uint16_t reserved;
+} QEMU_PACKED;
+
struct aarch64_note {
Elf64_Nhdr hdr;
char name[8]; /* align_up(sizeof("CORE"), 4) */
union {
struct aarch64_elf_prstatus prstatus;
struct aarch64_user_vfp_state vfp;
+ struct aarch64_user_sve_header sve;
};
} QEMU_PACKED;
@@ -76,6 +87,8 @@ struct aarch64_note {
(AARCH64_NOTE_HEADER_SIZE + sizeof(struct aarch64_elf_prstatus))
#define AARCH64_PRFPREG_NOTE_SIZE \
(AARCH64_NOTE_HEADER_SIZE + sizeof(struct aarch64_user_vfp_state))
+#define AARCH64_SVE_NOTE_SIZE(env) \
+ (AARCH64_NOTE_HEADER_SIZE + sve_size(env))
static void aarch64_note_init(struct aarch64_note *note, DumpState *s,
const char *name, Elf64_Word namesz,
@@ -128,11 +141,102 @@ static int aarch64_write_elf64_prfpreg(WriteCoreDumpFunction f,
return 0;
}
+#ifdef TARGET_AARCH64
+static off_t sve_zreg_offset(uint32_t vq, int n)
+{
+ off_t off = sizeof(struct aarch64_user_sve_header);
+ return ROUND_UP(off, 16) + vq * 16 * n;
+}
+
+static off_t sve_preg_offset(uint32_t vq, int n)
+{
+ return sve_zreg_offset(vq, 32) + vq * 16 / 8 * n;
+}
+
+static off_t sve_fpsr_offset(uint32_t vq)
+{
+ off_t off = sve_preg_offset(vq, 17);
+ return ROUND_UP(off, 16);
+}
+
+static off_t sve_fpcr_offset(uint32_t vq)
+{
+ return sve_fpsr_offset(vq) + sizeof(uint32_t);
+}
+
+static uint32_t sve_current_vq(CPUARMState *env)
+{
+ return sve_zcr_len_for_el(env, arm_current_el(env)) + 1;
+}
+
+static size_t sve_size_vq(uint32_t vq)
+{
+ off_t off = sve_fpcr_offset(vq) + sizeof(uint32_t);
+ return ROUND_UP(off, 16);
+}
+
+static size_t sve_size(CPUARMState *env)
+{
+ return sve_size_vq(sve_current_vq(env));
+}
+
+static int aarch64_write_elf64_sve(WriteCoreDumpFunction f,
+ CPUARMState *env, int cpuid,
+ DumpState *s)
+{
+ struct aarch64_note *note;
+ ARMCPU *cpu = env_archcpu(env);
+ uint32_t vq = sve_current_vq(env);
+ uint64_t tmp[ARM_MAX_VQ * 2], *r;
+ uint32_t fpr;
+ uint8_t *buf;
+ int ret, i;
+
+ note = g_malloc0(AARCH64_SVE_NOTE_SIZE(env));
+ buf = (uint8_t *)&note->sve;
+
+ aarch64_note_init(note, s, "LINUX", 6, NT_ARM_SVE, sve_size_vq(vq));
+
+ note->sve.size = cpu_to_dump32(s, sve_size_vq(vq));
+ note->sve.max_size = cpu_to_dump32(s, sve_size_vq(cpu->sve_max_vq));
+ note->sve.vl = cpu_to_dump16(s, vq * 16);
+ note->sve.max_vl = cpu_to_dump16(s, cpu->sve_max_vq * 16);
+ note->sve.flags = cpu_to_dump16(s, 1);
+
+ for (i = 0; i < 32; ++i) {
+ r = sve_bswap64(tmp, &env->vfp.zregs[i].d[0], vq * 2);
+ memcpy(&buf[sve_zreg_offset(vq, i)], r, vq * 16);
+ }
+
+ for (i = 0; i < 17; ++i) {
+ r = sve_bswap64(tmp, r = &env->vfp.pregs[i].p[0],
+ DIV_ROUND_UP(vq * 2, 8));
+ memcpy(&buf[sve_preg_offset(vq, i)], r, vq * 16 / 8);
+ }
+
+ fpr = cpu_to_dump32(s, vfp_get_fpsr(env));
+ memcpy(&buf[sve_fpsr_offset(vq)], &fpr, sizeof(uint32_t));
+
+ fpr = cpu_to_dump32(s, vfp_get_fpcr(env));
+ memcpy(&buf[sve_fpcr_offset(vq)], &fpr, sizeof(uint32_t));
+
+ ret = f(note, AARCH64_SVE_NOTE_SIZE(env), s);
+ g_free(note);
+
+ if (ret < 0) {
+ return -1;
+ }
+
+ return 0;
+}
+#endif
+
int arm_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs,
int cpuid, void *opaque)
{
struct aarch64_note note;
- CPUARMState *env = &ARM_CPU(cs)->env;
+ ARMCPU *cpu = ARM_CPU(cs);
+ CPUARMState *env = &cpu->env;
DumpState *s = opaque;
uint64_t pstate, sp;
int ret, i;
@@ -163,7 +267,18 @@ int arm_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs,
return -1;
}
- return aarch64_write_elf64_prfpreg(f, env, cpuid, s);
+ ret = aarch64_write_elf64_prfpreg(f, env, cpuid, s);
+ if (ret) {
+ return ret;
+ }
+
+#ifdef TARGET_AARCH64
+ if (cpu_isar_feature(aa64_sve, cpu)) {
+ ret = aarch64_write_elf64_sve(f, env, cpuid, s);
+ }
+#endif
+
+ return ret;
}
/* struct pt_regs from arch/arm/include/asm/ptrace.h */
@@ -335,6 +450,11 @@ ssize_t cpu_get_note_size(int class, int machine, int nr_cpus)
if (class == ELFCLASS64) {
note_size = AARCH64_PRSTATUS_NOTE_SIZE;
note_size += AARCH64_PRFPREG_NOTE_SIZE;
+#ifdef TARGET_AARCH64
+ if (cpu_isar_feature(aa64_sve, cpu)) {
+ note_size += AARCH64_SVE_NOTE_SIZE(env);
+ }
+#endif
} else {
note_size = ARM_PRSTATUS_NOTE_SIZE;
if (arm_feature(env, ARM_FEATURE_VFP)) {
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index 83a809d..82dd3cc 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -975,6 +975,31 @@ void aarch64_sve_narrow_vq(CPUARMState *env, unsigned vq);
void aarch64_sve_change_el(CPUARMState *env, int old_el,
int new_el, bool el0_a64);
void aarch64_add_sve_properties(Object *obj);
+
+/*
+ * SVE registers are encoded in KVM's memory in an endianness-invariant format.
+ * The byte at offset i from the start of the in-memory representation contains
+ * the bits [(7 + 8 * i) : (8 * i)] of the register value. As this means the
+ * lowest offsets are stored in the lowest memory addresses, then that nearly
+ * matches QEMU's representation, which is to use an array of host-endian
+ * uint64_t's, where the lower offsets are at the lower indices. To complete
+ * the translation we just need to byte swap the uint64_t's on big-endian hosts.
+ */
+static inline uint64_t *sve_bswap64(uint64_t *dst, uint64_t *src, int nr)
+{
+#ifdef HOST_WORDS_BIGENDIAN
+ int i;
+
+ for (i = 0; i < nr; ++i) {
+ dst[i] = bswap64(src[i]);
+ }
+
+ return dst;
+#else
+ return src;
+#endif
+}
+
#else
static inline void aarch64_sve_narrow_vq(CPUARMState *env, unsigned vq) { }
static inline void aarch64_sve_change_el(CPUARMState *env, int o,
diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c
index 876184b..e2da756 100644
--- a/target/arm/kvm64.c
+++ b/target/arm/kvm64.c
@@ -877,30 +877,6 @@ static int kvm_arch_put_fpsimd(CPUState *cs)
}
/*
- * SVE registers are encoded in KVM's memory in an endianness-invariant format.
- * The byte at offset i from the start of the in-memory representation contains
- * the bits [(7 + 8 * i) : (8 * i)] of the register value. As this means the
- * lowest offsets are stored in the lowest memory addresses, then that nearly
- * matches QEMU's representation, which is to use an array of host-endian
- * uint64_t's, where the lower offsets are at the lower indices. To complete
- * the translation we just need to byte swap the uint64_t's on big-endian hosts.
- */
-static uint64_t *sve_bswap64(uint64_t *dst, uint64_t *src, int nr)
-{
-#ifdef HOST_WORDS_BIGENDIAN
- int i;
-
- for (i = 0; i < nr; ++i) {
- dst[i] = bswap64(src[i]);
- }
-
- return dst;
-#else
- return src;
-#endif
-}
-
-/*
* KVM SVE registers come in slices where ZREGs have a slice size of 2048 bits
* and PREGS and the FFR have a slice size of 256 bits. However we simply hard
* code the slice index to zero for now as it's unlikely we'll need more than
--
1.8.3.1

View File

@ -0,0 +1,53 @@
From 0d545c5850caf76ad3e8dd9bb0fbc9f86b08e220 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Fri, 24 Jan 2020 19:46:11 +0100
Subject: [PATCH 002/116] vhost: Add names to section rounded warning
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-id: <20200124194613.41119-2-dgilbert@redhat.com>
Patchwork-id: 93450
O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 1/3] vhost: Add names to section rounded warning
Bugzilla: 1779041
RH-Acked-by: Michael S. Tsirkin <mst@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Add the memory region names to section rounding/alignment
warnings.
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-Id: <20200116202414.157959-2-dgilbert@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
(cherry picked from commit ff4776147e960b128ee68f94c728659f662f4378)
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
hw/virtio/vhost.c | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index 4da0d5a..774d87d 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -590,9 +590,10 @@ static void vhost_region_add_section(struct vhost_dev *dev,
* match up in the same RAMBlock if they do.
*/
if (mrs_gpa < prev_gpa_start) {
- error_report("%s:Section rounded to %"PRIx64
- " prior to previous %"PRIx64,
- __func__, mrs_gpa, prev_gpa_start);
+ error_report("%s:Section '%s' rounded to %"PRIx64
+ " prior to previous '%s' %"PRIx64,
+ __func__, section->mr->name, mrs_gpa,
+ prev_sec->mr->name, prev_gpa_start);
/* A way to cleanly fail here would be better */
return;
}
--
1.8.3.1

View File

@ -0,0 +1,97 @@
From c35466c168e5219bf585aa65ac31fc9bdc7cbf36 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Fri, 24 Jan 2020 19:46:12 +0100
Subject: [PATCH 003/116] vhost: Only align sections for vhost-user
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-id: <20200124194613.41119-3-dgilbert@redhat.com>
Patchwork-id: 93452
O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 2/3] vhost: Only align sections for vhost-user
Bugzilla: 1779041
RH-Acked-by: Michael S. Tsirkin <mst@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
I added hugepage alignment code in c1ece84e7c9 to deal with
vhost-user + postcopy which needs aligned pages when using userfault.
However, on x86 the lower 2MB of address space tends to be shotgun'd
with small fragments around the 512-640k range - e.g. video RAM, and
with HyperV synic pages tend to sit around there - again splitting
it up. The alignment code complains with a 'Section rounded to ...'
error and gives up.
Since vhost-user already filters out devices without an fd
(see vhost-user.c vhost_user_mem_section_filter) it shouldn't be
affected by those overlaps.
Turn the alignment off on vhost-kernel so that it doesn't try
and align, and thus won't hit the rounding issues.
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-Id: <20200116202414.157959-3-dgilbert@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 76525114736e8f669766e69b715fa59ce8648aae)
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
hw/virtio/vhost.c | 34 ++++++++++++++++++----------------
1 file changed, 18 insertions(+), 16 deletions(-)
diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index 774d87d..25fd469 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -547,26 +547,28 @@ static void vhost_region_add_section(struct vhost_dev *dev,
uintptr_t mrs_host = (uintptr_t)memory_region_get_ram_ptr(section->mr) +
section->offset_within_region;
RAMBlock *mrs_rb = section->mr->ram_block;
- size_t mrs_page = qemu_ram_pagesize(mrs_rb);
trace_vhost_region_add_section(section->mr->name, mrs_gpa, mrs_size,
mrs_host);
- /* Round the section to it's page size */
- /* First align the start down to a page boundary */
- uint64_t alignage = mrs_host & (mrs_page - 1);
- if (alignage) {
- mrs_host -= alignage;
- mrs_size += alignage;
- mrs_gpa -= alignage;
- }
- /* Now align the size up to a page boundary */
- alignage = mrs_size & (mrs_page - 1);
- if (alignage) {
- mrs_size += mrs_page - alignage;
- }
- trace_vhost_region_add_section_aligned(section->mr->name, mrs_gpa, mrs_size,
- mrs_host);
+ if (dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER) {
+ /* Round the section to it's page size */
+ /* First align the start down to a page boundary */
+ size_t mrs_page = qemu_ram_pagesize(mrs_rb);
+ uint64_t alignage = mrs_host & (mrs_page - 1);
+ if (alignage) {
+ mrs_host -= alignage;
+ mrs_size += alignage;
+ mrs_gpa -= alignage;
+ }
+ /* Now align the size up to a page boundary */
+ alignage = mrs_size & (mrs_page - 1);
+ if (alignage) {
+ mrs_size += mrs_page - alignage;
+ }
+ trace_vhost_region_add_section_aligned(section->mr->name, mrs_gpa, mrs_size,
+ mrs_host);
+ }
if (dev->n_tmp_sections) {
/* Since we already have at least one section, lets see if
--
1.8.3.1

View File

@ -0,0 +1,56 @@
From 624d96c456536e1471968a59fbeea206309cc33b Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Fri, 24 Jan 2020 19:46:13 +0100
Subject: [PATCH 004/116] vhost: coding style fix
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-id: <20200124194613.41119-4-dgilbert@redhat.com>
Patchwork-id: 93453
O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 3/3] vhost: coding style fix
Bugzilla: 1779041
RH-Acked-by: Michael S. Tsirkin <mst@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
From: "Michael S. Tsirkin" <mst@redhat.com>
Drop a trailing whitespace. Make line shorter.
Fixes: 76525114736e8 ("vhost: Only align sections for vhost-user")
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
(cherry picked from commit 8347505640238d3b80f9bb7510fdc1bb574bad19)
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
hw/virtio/vhost.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index 25fd469..9edfadc 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -551,7 +551,7 @@ static void vhost_region_add_section(struct vhost_dev *dev,
trace_vhost_region_add_section(section->mr->name, mrs_gpa, mrs_size,
mrs_host);
- if (dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER) {
+ if (dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER) {
/* Round the section to it's page size */
/* First align the start down to a page boundary */
size_t mrs_page = qemu_ram_pagesize(mrs_rb);
@@ -566,8 +566,8 @@ static void vhost_region_add_section(struct vhost_dev *dev,
if (alignage) {
mrs_size += mrs_page - alignage;
}
- trace_vhost_region_add_section_aligned(section->mr->name, mrs_gpa, mrs_size,
- mrs_host);
+ trace_vhost_region_add_section_aligned(section->mr->name, mrs_gpa,
+ mrs_size, mrs_host);
}
if (dev->n_tmp_sections) {
--
1.8.3.1

View File

@ -0,0 +1,48 @@
From d6abbdaeb2c35efe6793a599c98116e250b1f179 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Mon, 27 Jan 2020 19:01:43 +0100
Subject: [PATCH 072/116] vhost-user: Print unexpected slave message types
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-id: <20200127190227.40942-69-dgilbert@redhat.com>
Patchwork-id: 93519
O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 068/112] vhost-user: Print unexpected slave message types
Bugzilla: 1694164
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Sergio Lopez Pascual <slp@redhat.com>
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
When we receive an unexpected message type on the slave fd, print
the type.
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit 0fdc465d7d5aafeae127eba488f247ac6f58df4c)
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
hw/virtio/vhost-user.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
index 02a9b25..e4f46ec 100644
--- a/hw/virtio/vhost-user.c
+++ b/hw/virtio/vhost-user.c
@@ -1055,7 +1055,7 @@ static void slave_read(void *opaque)
fd[0]);
break;
default:
- error_report("Received unexpected msg type.");
+ error_report("Received unexpected msg type: %d.", hdr.request);
ret = -EINVAL;
}
--
1.8.3.1

View File

@ -0,0 +1,59 @@
From 912af6f7c270e2939a91c9b3f62b6ba1202edc43 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Mon, 27 Jan 2020 19:00:37 +0100
Subject: [PATCH 006/116] vhost-user-fs: remove "vhostfd" property
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-id: <20200127190227.40942-3-dgilbert@redhat.com>
Patchwork-id: 93458
O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 002/112] vhost-user-fs: remove "vhostfd" property
Bugzilla: 1694164
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Sergio Lopez Pascual <slp@redhat.com>
From: Marc-André Lureau <marcandre.lureau@redhat.com>
The property doesn't make much sense for a vhost-user device.
Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Message-Id: <20191116112016.14872-1-marcandre.lureau@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit 703857348724319735d9be7b5b996e6445c6e6b9)
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
hw/virtio/vhost-user-fs.c | 1 -
include/hw/virtio/vhost-user-fs.h | 1 -
2 files changed, 2 deletions(-)
diff --git a/hw/virtio/vhost-user-fs.c b/hw/virtio/vhost-user-fs.c
index f0df7f4..ca0b7fc 100644
--- a/hw/virtio/vhost-user-fs.c
+++ b/hw/virtio/vhost-user-fs.c
@@ -263,7 +263,6 @@ static Property vuf_properties[] = {
DEFINE_PROP_UINT16("num-request-queues", VHostUserFS,
conf.num_request_queues, 1),
DEFINE_PROP_UINT16("queue-size", VHostUserFS, conf.queue_size, 128),
- DEFINE_PROP_STRING("vhostfd", VHostUserFS, conf.vhostfd),
DEFINE_PROP_END_OF_LIST(),
};
diff --git a/include/hw/virtio/vhost-user-fs.h b/include/hw/virtio/vhost-user-fs.h
index 539885b..9ff1bdb 100644
--- a/include/hw/virtio/vhost-user-fs.h
+++ b/include/hw/virtio/vhost-user-fs.h
@@ -28,7 +28,6 @@ typedef struct {
char *tag;
uint16_t num_request_queues;
uint16_t queue_size;
- char *vhostfd;
} VHostUserFSConf;
typedef struct {
--
1.8.3.1

View File

@ -0,0 +1,60 @@
From c0cf6d8a1d3b9bf3928f37fcfd5aa8ae6f1338ca Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Mon, 27 Jan 2020 19:00:36 +0100
Subject: [PATCH 005/116] virtio-fs: fix MSI-X nvectors calculation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-id: <20200127190227.40942-2-dgilbert@redhat.com>
Patchwork-id: 93455
O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 001/112] virtio-fs: fix MSI-X nvectors calculation
Bugzilla: 1694164
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Sergio Lopez Pascual <slp@redhat.com>
From: Stefan Hajnoczi <stefanha@redhat.com>
The following MSI-X vectors are required:
* VIRTIO Configuration Change
* hiprio virtqueue
* requests virtqueues
Fix the calculation to reserve enough MSI-X vectors. Otherwise guest
drivers fall back to a sub-optional configuration where all virtqueues
share a single vector.
This change does not break live migration compatibility since
vhost-user-fs-pci devices are not migratable yet.
Reported-by: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-Id: <20191209110759.35227-1-stefanha@redhat.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit 366844f3d1329c6423dd752891a28ccb3ee8fddd)
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
hw/virtio/vhost-user-fs-pci.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/hw/virtio/vhost-user-fs-pci.c b/hw/virtio/vhost-user-fs-pci.c
index 933a3f2..e3a649d 100644
--- a/hw/virtio/vhost-user-fs-pci.c
+++ b/hw/virtio/vhost-user-fs-pci.c
@@ -40,7 +40,8 @@ static void vhost_user_fs_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp)
DeviceState *vdev = DEVICE(&dev->vdev);
if (vpci_dev->nvectors == DEV_NVECTORS_UNSPECIFIED) {
- vpci_dev->nvectors = dev->vdev.conf.num_request_queues + 1;
+ /* Also reserve config change and hiprio queue vectors */
+ vpci_dev->nvectors = dev->vdev.conf.num_request_queues + 2;
}
qdev_set_parent_bus(vdev, BUS(&vpci_dev->bus));
--
1.8.3.1

View File

@ -0,0 +1,52 @@
From f4144443eacceb04823ee72cb2d4f9f841f05495 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Mon, 27 Jan 2020 19:01:11 +0100
Subject: [PATCH 040/116] virtiofs: Add maintainers entry
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-id: <20200127190227.40942-37-dgilbert@redhat.com>
Patchwork-id: 93491
O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 036/112] virtiofs: Add maintainers entry
Bugzilla: 1694164
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Sergio Lopez Pascual <slp@redhat.com>
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Tested-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit bad7d2c3ad1af9344df035aedaf8e0967a543070)
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
MAINTAINERS | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/MAINTAINERS b/MAINTAINERS
index 5e5e3e5..d1b3e26 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1575,6 +1575,14 @@ T: git https://github.com/cohuck/qemu.git s390-next
T: git https://github.com/borntraeger/qemu.git s390-next
L: qemu-s390x@nongnu.org
+virtiofs
+M: Dr. David Alan Gilbert <dgilbert@redhat.com>
+M: Stefan Hajnoczi <stefanha@redhat.com>
+S: Supported
+F: tools/virtiofsd/*
+F: hw/virtio/vhost-user-fs*
+F: include/hw/virtio/vhost-user-fs.h
+
virtio-input
M: Gerd Hoffmann <kraxel@redhat.com>
S: Maintained
--
1.8.3.1

View File

@ -0,0 +1,86 @@
From 4d9106acfd7ed9e4d197ddf9f22b79ba6c8afdd8 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Mon, 27 Jan 2020 19:01:38 +0100
Subject: [PATCH 067/116] virtiofsd: Add ID to the log with FUSE_LOG_DEBUG
level
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-id: <20200127190227.40942-64-dgilbert@redhat.com>
Patchwork-id: 93514
O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 063/112] virtiofsd: Add ID to the log with FUSE_LOG_DEBUG level
Bugzilla: 1694164
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Sergio Lopez Pascual <slp@redhat.com>
From: Masayoshi Mizuma <m.mizuma@jp.fujitsu.com>
virtiofsd has some threads, so we see a lot of logs with debug option.
It would be useful for debugging if we can identify the specific thread
from the log.
Add ID, which is got by gettid(), to the log with FUSE_LOG_DEBUG level
so that we can grep the specific thread.
The log is like as:
]# ./virtiofsd -d -o vhost_user_socket=/tmp/vhostqemu0 -o source=/tmp/share0 -o cache=auto
...
[ID: 00000097] unique: 12696, success, outsize: 120
[ID: 00000097] virtio_send_msg: elem 18: with 2 in desc of length 120
[ID: 00000003] fv_queue_thread: Got queue event on Queue 1
[ID: 00000003] fv_queue_thread: Queue 1 gave evalue: 1 available: in: 65552 out: 80
[ID: 00000003] fv_queue_thread: Waiting for Queue 1 event
[ID: 00000071] fv_queue_worker: elem 33: with 2 out desc of length 80 bad_in_num=0 bad_out_num=0
[ID: 00000071] unique: 12694, opcode: READ (15), nodeid: 2, insize: 80, pid: 2014
[ID: 00000071] lo_read(ino=2, size=65536, off=131072)
Signed-off-by: Masayoshi Mizuma <m.mizuma@jp.fujitsu.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
added rework as suggested by Daniel P. Berrangé during review
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit 36f3846902bd41413f6c0bf797dee509028c29f4)
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
tools/virtiofsd/passthrough_ll.c | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
index ff6910f..f08324f 100644
--- a/tools/virtiofsd/passthrough_ll.c
+++ b/tools/virtiofsd/passthrough_ll.c
@@ -43,6 +43,7 @@
#include <cap-ng.h>
#include <dirent.h>
#include <errno.h>
+#include <glib.h>
#include <inttypes.h>
#include <limits.h>
#include <pthread.h>
@@ -2268,10 +2269,17 @@ static void setup_nofile_rlimit(void)
static void log_func(enum fuse_log_level level, const char *fmt, va_list ap)
{
+ g_autofree char *localfmt = NULL;
+
if (current_log_level < level) {
return;
}
+ if (current_log_level == FUSE_LOG_DEBUG) {
+ localfmt = g_strdup_printf("[ID: %08ld] %s", syscall(__NR_gettid), fmt);
+ fmt = localfmt;
+ }
+
if (use_syslog) {
int priority = LOG_ERR;
switch (level) {
--
1.8.3.1

View File

@ -0,0 +1,106 @@
From 709408de33112d32b7c6675f8c9320b8bebccd58 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Mon, 27 Jan 2020 19:01:05 +0100
Subject: [PATCH 034/116] virtiofsd: Add Makefile wiring for virtiofsd contrib
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-id: <20200127190227.40942-31-dgilbert@redhat.com>
Patchwork-id: 93482
O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 030/112] virtiofsd: Add Makefile wiring for virtiofsd contrib
Bugzilla: 1694164
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Sergio Lopez Pascual <slp@redhat.com>
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Wire up the building of the virtiofsd in tools.
virtiofsd relies on Linux-specific system calls and seccomp. Anyone
wishing to port it to other host operating systems should do so
carefully and without reducing security.
Only allow building on Linux hosts.
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Liam Merwick <liam.merwick@oracle.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit 81bfc42dcf473bc8d3790622633410da72d8e622)
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
Makefile | 10 ++++++++++
Makefile.objs | 1 +
tools/virtiofsd/Makefile.objs | 9 +++++++++
3 files changed, 20 insertions(+)
create mode 100644 tools/virtiofsd/Makefile.objs
diff --git a/Makefile b/Makefile
index 4254950..1526775 100644
--- a/Makefile
+++ b/Makefile
@@ -330,6 +330,10 @@ endif
endif
endif
+ifdef CONFIG_LINUX
+HELPERS-y += virtiofsd$(EXESUF)
+endif
+
# Sphinx does not allow building manuals into the same directory as
# the source files, so if we're doing an in-tree QEMU build we must
# build the manuals into a subdirectory (and then install them from
@@ -430,6 +434,7 @@ dummy := $(call unnest-vars,, \
elf2dmp-obj-y \
ivshmem-client-obj-y \
ivshmem-server-obj-y \
+ virtiofsd-obj-y \
rdmacm-mux-obj-y \
libvhost-user-obj-y \
vhost-user-scsi-obj-y \
@@ -675,6 +680,11 @@ rdmacm-mux$(EXESUF): LIBS += "-libumad"
rdmacm-mux$(EXESUF): $(rdmacm-mux-obj-y) $(COMMON_LDADDS)
$(call LINK, $^)
+ifdef CONFIG_LINUX # relies on Linux-specific syscalls
+virtiofsd$(EXESUF): $(virtiofsd-obj-y) libvhost-user.a $(COMMON_LDADDS)
+ $(call LINK, $^)
+endif
+
vhost-user-gpu$(EXESUF): $(vhost-user-gpu-obj-y) $(libvhost-user-obj-y) libqemuutil.a libqemustub.a
$(call LINK, $^)
diff --git a/Makefile.objs b/Makefile.objs
index fcf63e1..1a8f288 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -125,6 +125,7 @@ vhost-user-blk-obj-y = contrib/vhost-user-blk/
rdmacm-mux-obj-y = contrib/rdmacm-mux/
vhost-user-input-obj-y = contrib/vhost-user-input/
vhost-user-gpu-obj-y = contrib/vhost-user-gpu/
+virtiofsd-obj-y = tools/virtiofsd/
######################################################################
trace-events-subdirs =
diff --git a/tools/virtiofsd/Makefile.objs b/tools/virtiofsd/Makefile.objs
new file mode 100644
index 0000000..45a8075
--- /dev/null
+++ b/tools/virtiofsd/Makefile.objs
@@ -0,0 +1,9 @@
+virtiofsd-obj-y = buffer.o \
+ fuse_opt.o \
+ fuse_log.o \
+ fuse_lowlevel.o \
+ fuse_signals.o \
+ fuse_virtio.o \
+ helper.o \
+ passthrough_ll.o
+
--
1.8.3.1

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,105 @@
From 6f413d8b76ff38e5bc01f36515ca71d7fd6e6144 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Mon, 27 Jan 2020 19:00:58 +0100
Subject: [PATCH 027/116] virtiofsd: Add main virtio loop
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-id: <20200127190227.40942-24-dgilbert@redhat.com>
Patchwork-id: 93475
O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 023/112] virtiofsd: Add main virtio loop
Bugzilla: 1694164
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Sergio Lopez Pascual <slp@redhat.com>
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Processes incoming requests on the vhost-user fd.
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit 204d8ae57b3c57098642c79b3c03d42495149c09)
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
tools/virtiofsd/fuse_virtio.c | 42 +++++++++++++++++++++++++++++++++++++++---
1 file changed, 39 insertions(+), 3 deletions(-)
diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c
index 2ae3c76..1928a20 100644
--- a/tools/virtiofsd/fuse_virtio.c
+++ b/tools/virtiofsd/fuse_virtio.c
@@ -11,12 +11,14 @@
* See the file COPYING.LIB
*/
+#include "fuse_virtio.h"
#include "fuse_i.h"
#include "standard-headers/linux/fuse.h"
#include "fuse_misc.h"
#include "fuse_opt.h"
-#include "fuse_virtio.h"
+#include <assert.h>
+#include <errno.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
@@ -80,15 +82,49 @@ static const VuDevIface fv_iface = {
.queue_is_processed_in_order = fv_queue_order,
};
+/*
+ * Main loop; this mostly deals with events on the vhost-user
+ * socket itself, and not actual fuse data.
+ */
int virtio_loop(struct fuse_session *se)
{
fuse_log(FUSE_LOG_INFO, "%s: Entry\n", __func__);
- while (1) {
- /* TODO: Add stuffing */
+ while (!fuse_session_exited(se)) {
+ struct pollfd pf[1];
+ pf[0].fd = se->vu_socketfd;
+ pf[0].events = POLLIN;
+ pf[0].revents = 0;
+
+ fuse_log(FUSE_LOG_DEBUG, "%s: Waiting for VU event\n", __func__);
+ int poll_res = ppoll(pf, 1, NULL, NULL);
+
+ if (poll_res == -1) {
+ if (errno == EINTR) {
+ fuse_log(FUSE_LOG_INFO, "%s: ppoll interrupted, going around\n",
+ __func__);
+ continue;
+ }
+ fuse_log(FUSE_LOG_ERR, "virtio_loop ppoll: %m\n");
+ break;
+ }
+ assert(poll_res == 1);
+ if (pf[0].revents & (POLLERR | POLLHUP | POLLNVAL)) {
+ fuse_log(FUSE_LOG_ERR, "%s: Unexpected poll revents %x\n", __func__,
+ pf[0].revents);
+ break;
+ }
+ assert(pf[0].revents & POLLIN);
+ fuse_log(FUSE_LOG_DEBUG, "%s: Got VU event\n", __func__);
+ if (!vu_dispatch(&se->virtio_dev->dev)) {
+ fuse_log(FUSE_LOG_ERR, "%s: vu_dispatch failed\n", __func__);
+ break;
+ }
}
fuse_log(FUSE_LOG_INFO, "%s: Exit\n", __func__);
+
+ return 0;
}
int virtio_session_mount(struct fuse_session *se)
--
1.8.3.1

View File

@ -0,0 +1,103 @@
From 9c1bbe327cf8f88ffc78eed0fce8cdd6f3f006ef Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Mon, 27 Jan 2020 19:00:54 +0100
Subject: [PATCH 023/116] virtiofsd: Add options for virtio
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-id: <20200127190227.40942-20-dgilbert@redhat.com>
Patchwork-id: 93473
O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 019/112] virtiofsd: Add options for virtio
Bugzilla: 1694164
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Sergio Lopez Pascual <slp@redhat.com>
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Add options to specify parameters for virtio-fs paths, i.e.
./virtiofsd -o vhost_user_socket=/tmp/vhostqemu
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Misono Tomohiro <misono.tomohiro@jp.fujitsu.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit 205de006aab8dcbe546a7e3a51d295c2d05e654b)
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
tools/virtiofsd/fuse_i.h | 1 +
tools/virtiofsd/fuse_lowlevel.c | 11 ++++++++---
tools/virtiofsd/helper.c | 14 +++++++-------
3 files changed, 16 insertions(+), 10 deletions(-)
diff --git a/tools/virtiofsd/fuse_i.h b/tools/virtiofsd/fuse_i.h
index bae0699..26b1a7d 100644
--- a/tools/virtiofsd/fuse_i.h
+++ b/tools/virtiofsd/fuse_i.h
@@ -63,6 +63,7 @@ struct fuse_session {
struct fuse_notify_req notify_list;
size_t bufsize;
int error;
+ char *vu_socket_path;
};
struct fuse_chan {
diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c
index 8552cfb..17e8718 100644
--- a/tools/virtiofsd/fuse_lowlevel.c
+++ b/tools/virtiofsd/fuse_lowlevel.c
@@ -2115,8 +2115,11 @@ reply_err:
}
static const struct fuse_opt fuse_ll_opts[] = {
- LL_OPTION("debug", debug, 1), LL_OPTION("-d", debug, 1),
- LL_OPTION("--debug", debug, 1), LL_OPTION("allow_root", deny_others, 1),
+ LL_OPTION("debug", debug, 1),
+ LL_OPTION("-d", debug, 1),
+ LL_OPTION("--debug", debug, 1),
+ LL_OPTION("allow_root", deny_others, 1),
+ LL_OPTION("--socket-path=%s", vu_socket_path, 0),
FUSE_OPT_END
};
@@ -2132,7 +2135,9 @@ void fuse_lowlevel_help(void)
* These are not all options, but the ones that are
* potentially of interest to an end-user
*/
- printf(" -o allow_root allow access by root\n");
+ printf(
+ " -o allow_root allow access by root\n"
+ " --socket-path=PATH path for the vhost-user socket\n");
}
void fuse_session_destroy(struct fuse_session *se)
diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c
index 9333691..676032e 100644
--- a/tools/virtiofsd/helper.c
+++ b/tools/virtiofsd/helper.c
@@ -127,13 +127,13 @@ static const struct fuse_opt conn_info_opt_spec[] = {
void fuse_cmdline_help(void)
{
- printf(
- " -h --help print help\n"
- " -V --version print version\n"
- " -d -o debug enable debug output (implies -f)\n"
- " -f foreground operation\n"
- " -o max_idle_threads the maximum number of idle worker threads\n"
- " allowed (default: 10)\n");
+ printf(" -h --help print help\n"
+ " -V --version print version\n"
+ " -d -o debug enable debug output (implies -f)\n"
+ " -f foreground operation\n"
+ " -o max_idle_threads the maximum number of idle worker "
+ "threads\n"
+ " allowed (default: 10)\n");
}
static int fuse_helper_opt_proc(void *data, const char *arg, int key,
--
1.8.3.1

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,73 @@
From 52e93f2dc499ead339bf808dac3480b369dfadd1 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Mon, 27 Jan 2020 19:01:39 +0100
Subject: [PATCH 068/116] virtiofsd: Add timestamp to the log with
FUSE_LOG_DEBUG level
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-id: <20200127190227.40942-65-dgilbert@redhat.com>
Patchwork-id: 93517
O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 064/112] virtiofsd: Add timestamp to the log with FUSE_LOG_DEBUG level
Bugzilla: 1694164
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Sergio Lopez Pascual <slp@redhat.com>
From: Masayoshi Mizuma <m.mizuma@jp.fujitsu.com>
virtiofsd has some threads, so we see a lot of logs with debug option.
It would be useful for debugging if we can see the timestamp.
Add nano second timestamp, which got by get_clock(), to the log with
FUSE_LOG_DEBUG level if the syslog option isn't set.
The log is like as:
# ./virtiofsd -d -o vhost_user_socket=/tmp/vhostqemu0 -o source=/tmp/share0 -o cache=auto
...
[5365943125463727] [ID: 00000002] fv_queue_thread: Start for queue 0 kick_fd 9
[5365943125568644] [ID: 00000002] fv_queue_thread: Waiting for Queue 0 event
[5365943125573561] [ID: 00000002] fv_queue_thread: Got queue event on Queue 0
Signed-off-by: Masayoshi Mizuma <m.mizuma@jp.fujitsu.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit 50fb955aa0e6ede929422146936cf68bf1ca876f)
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
tools/virtiofsd/passthrough_ll.c | 9 ++++++++-
1 file changed, 8 insertions(+), 1 deletion(-)
diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
index f08324f..98114a3 100644
--- a/tools/virtiofsd/passthrough_ll.c
+++ b/tools/virtiofsd/passthrough_ll.c
@@ -36,6 +36,7 @@
*/
#include "qemu/osdep.h"
+#include "qemu/timer.h"
#include "fuse_virtio.h"
#include "fuse_log.h"
#include "fuse_lowlevel.h"
@@ -2276,7 +2277,13 @@ static void log_func(enum fuse_log_level level, const char *fmt, va_list ap)
}
if (current_log_level == FUSE_LOG_DEBUG) {
- localfmt = g_strdup_printf("[ID: %08ld] %s", syscall(__NR_gettid), fmt);
+ if (!use_syslog) {
+ localfmt = g_strdup_printf("[%" PRId64 "] [ID: %08ld] %s",
+ get_clock(), syscall(__NR_gettid), fmt);
+ } else {
+ localfmt = g_strdup_printf("[ID: %08ld] %s", syscall(__NR_gettid),
+ fmt);
+ }
fmt = localfmt;
}
--
1.8.3.1

View File

@ -0,0 +1,85 @@
From 2b921f7162b53204051955228bf99bbed55d2457 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Mon, 27 Jan 2020 19:01:53 +0100
Subject: [PATCH 082/116] virtiofsd: Clean up inodes on destroy
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-id: <20200127190227.40942-79-dgilbert@redhat.com>
Patchwork-id: 93532
O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 078/112] virtiofsd: Clean up inodes on destroy
Bugzilla: 1694164
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Sergio Lopez Pascual <slp@redhat.com>
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Clear out our inodes and fd's on a 'destroy' - so we get rid
of them if we reboot the guest.
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit 771b01eb76ff480fee984bd1d21727147cc3e702)
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
tools/virtiofsd/passthrough_ll.c | 26 ++++++++++++++++++++++++++
1 file changed, 26 insertions(+)
diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
index b176a31..9ed77a1 100644
--- a/tools/virtiofsd/passthrough_ll.c
+++ b/tools/virtiofsd/passthrough_ll.c
@@ -1169,6 +1169,25 @@ static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode,
}
}
+static int unref_all_inodes_cb(gpointer key, gpointer value, gpointer user_data)
+{
+ struct lo_inode *inode = value;
+ struct lo_data *lo = user_data;
+
+ inode->refcount = 0;
+ lo_map_remove(&lo->ino_map, inode->fuse_ino);
+ close(inode->fd);
+
+ return TRUE;
+}
+
+static void unref_all_inodes(struct lo_data *lo)
+{
+ pthread_mutex_lock(&lo->mutex);
+ g_hash_table_foreach_remove(lo->inodes, unref_all_inodes_cb, lo);
+ pthread_mutex_unlock(&lo->mutex);
+}
+
static void lo_forget_one(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup)
{
struct lo_data *lo = lo_data(req);
@@ -2035,6 +2054,12 @@ static void lo_lseek(fuse_req_t req, fuse_ino_t ino, off_t off, int whence,
}
}
+static void lo_destroy(void *userdata)
+{
+ struct lo_data *lo = (struct lo_data *)userdata;
+ unref_all_inodes(lo);
+}
+
static struct fuse_lowlevel_ops lo_oper = {
.init = lo_init,
.lookup = lo_lookup,
@@ -2073,6 +2098,7 @@ static struct fuse_lowlevel_ops lo_oper = {
.copy_file_range = lo_copy_file_range,
#endif
.lseek = lo_lseek,
+ .destroy = lo_destroy,
};
/* Print vhost-user.json backend program capabilities */
--
1.8.3.1

View File

@ -0,0 +1,112 @@
From 24f91062f571ad2dd2ac22db3b7d456a2c8bd2cb Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Mon, 27 Jan 2020 19:02:23 +0100
Subject: [PATCH 112/116] virtiofsd: Convert lo_destroy to take the lo->mutex
lock itself
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-id: <20200127190227.40942-109-dgilbert@redhat.com>
Patchwork-id: 93563
O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 108/112] virtiofsd: Convert lo_destroy to take the lo->mutex lock itself
Bugzilla: 1694164
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Sergio Lopez Pascual <slp@redhat.com>
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
lo_destroy was relying on some implicit knowledge of the locking;
we can avoid this if we create an unref_inode that doesn't take
the lock and then grab it for the whole of the lo_destroy.
Suggested-by: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit fe4c15798a48143dd6b1f58d2d3cad12206ce211)
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
tools/virtiofsd/passthrough_ll.c | 31 +++++++++++++++++--------------
1 file changed, 17 insertions(+), 14 deletions(-)
diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
index eb001b9..fc15d61 100644
--- a/tools/virtiofsd/passthrough_ll.c
+++ b/tools/virtiofsd/passthrough_ll.c
@@ -1344,14 +1344,13 @@ static void lo_unlink(fuse_req_t req, fuse_ino_t parent, const char *name)
lo_inode_put(lo, &inode);
}
-static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode,
- uint64_t n)
+/* To be called with lo->mutex held */
+static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n)
{
if (!inode) {
return;
}
- pthread_mutex_lock(&lo->mutex);
assert(inode->nlookup >= n);
inode->nlookup -= n;
if (!inode->nlookup) {
@@ -1362,15 +1361,24 @@ static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode,
}
g_hash_table_destroy(inode->posix_locks);
pthread_mutex_destroy(&inode->plock_mutex);
- pthread_mutex_unlock(&lo->mutex);
/* Drop our refcount from lo_do_lookup() */
lo_inode_put(lo, &inode);
- } else {
- pthread_mutex_unlock(&lo->mutex);
}
}
+static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode,
+ uint64_t n)
+{
+ if (!inode) {
+ return;
+ }
+
+ pthread_mutex_lock(&lo->mutex);
+ unref_inode(lo, inode, n);
+ pthread_mutex_unlock(&lo->mutex);
+}
+
static void lo_forget_one(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup)
{
struct lo_data *lo = lo_data(req);
@@ -2458,13 +2466,7 @@ static void lo_destroy(void *userdata)
{
struct lo_data *lo = (struct lo_data *)userdata;
- /*
- * Normally lo->mutex must be taken when traversing lo->inodes but
- * lo_destroy() is a serialized request so no races are possible here.
- *
- * In addition, we cannot acquire lo->mutex since unref_inode() takes it
- * too and this would result in a recursive lock.
- */
+ pthread_mutex_lock(&lo->mutex);
while (true) {
GHashTableIter iter;
gpointer key, value;
@@ -2475,8 +2477,9 @@ static void lo_destroy(void *userdata)
}
struct lo_inode *inode = value;
- unref_inode_lolocked(lo, inode, inode->nlookup);
+ unref_inode(lo, inode, inode->nlookup);
}
+ pthread_mutex_unlock(&lo->mutex);
}
static struct fuse_lowlevel_ops lo_oper = {
--
1.8.3.1

View File

@ -0,0 +1,176 @@
From e217ab392e0d4c770ec18dbfbe986771773cb557 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Mon, 27 Jan 2020 19:01:33 +0100
Subject: [PATCH 062/116] virtiofsd: Drop CAP_FSETID if client asked for it
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-id: <20200127190227.40942-59-dgilbert@redhat.com>
Patchwork-id: 93513
O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 058/112] virtiofsd: Drop CAP_FSETID if client asked for it
Bugzilla: 1694164
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Sergio Lopez Pascual <slp@redhat.com>
From: Vivek Goyal <vgoyal@redhat.com>
If client requested killing setuid/setgid bits on file being written, drop
CAP_FSETID capability so that setuid/setgid bits are cleared upon write
automatically.
pjdfstest chown/12.t needs this.
Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
dgilbert: reworked for libcap-ng
Reviewed-by: Misono Tomohiro <misono.tomohiro@jp.fujitsu.com>
Reviewed-by: Sergio Lopez <slp@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit ee88465224b3aed2596049caa28f86cbe0d5a3d0)
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
tools/virtiofsd/passthrough_ll.c | 105 +++++++++++++++++++++++++++++++++++++++
1 file changed, 105 insertions(+)
diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
index 97e7c75..d53cb1e 100644
--- a/tools/virtiofsd/passthrough_ll.c
+++ b/tools/virtiofsd/passthrough_ll.c
@@ -201,6 +201,91 @@ static int load_capng(void)
return 0;
}
+/*
+ * Helpers for dropping and regaining effective capabilities. Returns 0
+ * on success, error otherwise
+ */
+static int drop_effective_cap(const char *cap_name, bool *cap_dropped)
+{
+ int cap, ret;
+
+ cap = capng_name_to_capability(cap_name);
+ if (cap < 0) {
+ ret = errno;
+ fuse_log(FUSE_LOG_ERR, "capng_name_to_capability(%s) failed:%s\n",
+ cap_name, strerror(errno));
+ goto out;
+ }
+
+ if (load_capng()) {
+ ret = errno;
+ fuse_log(FUSE_LOG_ERR, "load_capng() failed\n");
+ goto out;
+ }
+
+ /* We dont have this capability in effective set already. */
+ if (!capng_have_capability(CAPNG_EFFECTIVE, cap)) {
+ ret = 0;
+ goto out;
+ }
+
+ if (capng_update(CAPNG_DROP, CAPNG_EFFECTIVE, cap)) {
+ ret = errno;
+ fuse_log(FUSE_LOG_ERR, "capng_update(DROP,) failed\n");
+ goto out;
+ }
+
+ if (capng_apply(CAPNG_SELECT_CAPS)) {
+ ret = errno;
+ fuse_log(FUSE_LOG_ERR, "drop:capng_apply() failed\n");
+ goto out;
+ }
+
+ ret = 0;
+ if (cap_dropped) {
+ *cap_dropped = true;
+ }
+
+out:
+ return ret;
+}
+
+static int gain_effective_cap(const char *cap_name)
+{
+ int cap;
+ int ret = 0;
+
+ cap = capng_name_to_capability(cap_name);
+ if (cap < 0) {
+ ret = errno;
+ fuse_log(FUSE_LOG_ERR, "capng_name_to_capability(%s) failed:%s\n",
+ cap_name, strerror(errno));
+ goto out;
+ }
+
+ if (load_capng()) {
+ ret = errno;
+ fuse_log(FUSE_LOG_ERR, "load_capng() failed\n");
+ goto out;
+ }
+
+ if (capng_update(CAPNG_ADD, CAPNG_EFFECTIVE, cap)) {
+ ret = errno;
+ fuse_log(FUSE_LOG_ERR, "capng_update(ADD,) failed\n");
+ goto out;
+ }
+
+ if (capng_apply(CAPNG_SELECT_CAPS)) {
+ ret = errno;
+ fuse_log(FUSE_LOG_ERR, "gain:capng_apply() failed\n");
+ goto out;
+ }
+ ret = 0;
+
+out:
+ return ret;
+}
+
static void lo_map_init(struct lo_map *map)
{
map->elems = NULL;
@@ -1577,6 +1662,7 @@ static void lo_write_buf(fuse_req_t req, fuse_ino_t ino,
(void)ino;
ssize_t res;
struct fuse_bufvec out_buf = FUSE_BUFVEC_INIT(fuse_buf_size(in_buf));
+ bool cap_fsetid_dropped = false;
out_buf.buf[0].flags = FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK;
out_buf.buf[0].fd = lo_fi_fd(req, fi);
@@ -1588,12 +1674,31 @@ static void lo_write_buf(fuse_req_t req, fuse_ino_t ino,
out_buf.buf[0].size, (unsigned long)off);
}
+ /*
+ * If kill_priv is set, drop CAP_FSETID which should lead to kernel
+ * clearing setuid/setgid on file.
+ */
+ if (fi->kill_priv) {
+ res = drop_effective_cap("FSETID", &cap_fsetid_dropped);
+ if (res != 0) {
+ fuse_reply_err(req, res);
+ return;
+ }
+ }
+
res = fuse_buf_copy(&out_buf, in_buf);
if (res < 0) {
fuse_reply_err(req, -res);
} else {
fuse_reply_write(req, (size_t)res);
}
+
+ if (cap_fsetid_dropped) {
+ res = gain_effective_cap("FSETID");
+ if (res) {
+ fuse_log(FUSE_LOG_ERR, "Failed to gain CAP_FSETID\n");
+ }
+ }
}
static void lo_statfs(fuse_req_t req, fuse_ino_t ino)
--
1.8.3.1

View File

@ -0,0 +1,240 @@
From 7d2efc3e4af15eff57b0c38cff7c81b371a98303 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Mon, 27 Jan 2020 19:01:06 +0100
Subject: [PATCH 035/116] virtiofsd: Fast path for virtio read
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-id: <20200127190227.40942-32-dgilbert@redhat.com>
Patchwork-id: 93480
O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 031/112] virtiofsd: Fast path for virtio read
Bugzilla: 1694164
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Sergio Lopez Pascual <slp@redhat.com>
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Readv the data straight into the guests buffer.
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
With fix by:
Signed-off-by: Eryu Guan <eguan@linux.alibaba.com>
Reviewed-by: Masayoshi Mizuma <m.mizuma@jp.fujitsu.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit eb49d187ef5134483a34c970bbfece28aaa686a7)
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
tools/virtiofsd/fuse_lowlevel.c | 5 ++
tools/virtiofsd/fuse_virtio.c | 162 ++++++++++++++++++++++++++++++++++++++++
tools/virtiofsd/fuse_virtio.h | 4 +
3 files changed, 171 insertions(+)
diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c
index 380d93b..4f4684d 100644
--- a/tools/virtiofsd/fuse_lowlevel.c
+++ b/tools/virtiofsd/fuse_lowlevel.c
@@ -475,6 +475,11 @@ static int fuse_send_data_iov_fallback(struct fuse_session *se,
return fuse_send_msg(se, ch, iov, iov_count);
}
+ if (fuse_lowlevel_is_virtio(se) && buf->count == 1 &&
+ buf->buf[0].flags == (FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK)) {
+ return virtio_send_data_iov(se, ch, iov, iov_count, buf, len);
+ }
+
abort(); /* Will have taken vhost path */
return 0;
}
diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c
index f1adeb6..7e2711b 100644
--- a/tools/virtiofsd/fuse_virtio.c
+++ b/tools/virtiofsd/fuse_virtio.c
@@ -230,6 +230,168 @@ err:
return ret;
}
+/*
+ * Callback from fuse_send_data_iov_* when it's virtio and the buffer
+ * is a single FD with FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK
+ * We need send the iov and then the buffer.
+ * Return 0 on success
+ */
+int virtio_send_data_iov(struct fuse_session *se, struct fuse_chan *ch,
+ struct iovec *iov, int count, struct fuse_bufvec *buf,
+ size_t len)
+{
+ int ret = 0;
+ VuVirtqElement *elem;
+ VuVirtq *q;
+
+ assert(count >= 1);
+ assert(iov[0].iov_len >= sizeof(struct fuse_out_header));
+
+ struct fuse_out_header *out = iov[0].iov_base;
+ /* TODO: Endianness! */
+
+ size_t iov_len = iov_size(iov, count);
+ size_t tosend_len = iov_len + len;
+
+ out->len = tosend_len;
+
+ fuse_log(FUSE_LOG_DEBUG, "%s: count=%d len=%zd iov_len=%zd\n", __func__,
+ count, len, iov_len);
+
+ /* unique == 0 is notification which we don't support */
+ assert(out->unique);
+
+ /* For virtio we always have ch */
+ assert(ch);
+ assert(!ch->qi->reply_sent);
+ elem = ch->qi->qe;
+ q = &ch->qi->virtio_dev->dev.vq[ch->qi->qidx];
+
+ /* The 'in' part of the elem is to qemu */
+ unsigned int in_num = elem->in_num;
+ struct iovec *in_sg = elem->in_sg;
+ size_t in_len = iov_size(in_sg, in_num);
+ fuse_log(FUSE_LOG_DEBUG, "%s: elem %d: with %d in desc of length %zd\n",
+ __func__, elem->index, in_num, in_len);
+
+ /*
+ * The elem should have room for a 'fuse_out_header' (out from fuse)
+ * plus the data based on the len in the header.
+ */
+ if (in_len < sizeof(struct fuse_out_header)) {
+ fuse_log(FUSE_LOG_ERR, "%s: elem %d too short for out_header\n",
+ __func__, elem->index);
+ ret = E2BIG;
+ goto err;
+ }
+ if (in_len < tosend_len) {
+ fuse_log(FUSE_LOG_ERR, "%s: elem %d too small for data len %zd\n",
+ __func__, elem->index, tosend_len);
+ ret = E2BIG;
+ goto err;
+ }
+
+ /* TODO: Limit to 'len' */
+
+ /* First copy the header data from iov->in_sg */
+ copy_iov(iov, count, in_sg, in_num, iov_len);
+
+ /*
+ * Build a copy of the the in_sg iov so we can skip bits in it,
+ * including changing the offsets
+ */
+ struct iovec *in_sg_cpy = calloc(sizeof(struct iovec), in_num);
+ assert(in_sg_cpy);
+ memcpy(in_sg_cpy, in_sg, sizeof(struct iovec) * in_num);
+ /* These get updated as we skip */
+ struct iovec *in_sg_ptr = in_sg_cpy;
+ int in_sg_cpy_count = in_num;
+
+ /* skip over parts of in_sg that contained the header iov */
+ size_t skip_size = iov_len;
+
+ size_t in_sg_left = 0;
+ do {
+ while (skip_size != 0 && in_sg_cpy_count) {
+ if (skip_size >= in_sg_ptr[0].iov_len) {
+ skip_size -= in_sg_ptr[0].iov_len;
+ in_sg_ptr++;
+ in_sg_cpy_count--;
+ } else {
+ in_sg_ptr[0].iov_len -= skip_size;
+ in_sg_ptr[0].iov_base += skip_size;
+ break;
+ }
+ }
+
+ int i;
+ for (i = 0, in_sg_left = 0; i < in_sg_cpy_count; i++) {
+ in_sg_left += in_sg_ptr[i].iov_len;
+ }
+ fuse_log(FUSE_LOG_DEBUG,
+ "%s: after skip skip_size=%zd in_sg_cpy_count=%d "
+ "in_sg_left=%zd\n",
+ __func__, skip_size, in_sg_cpy_count, in_sg_left);
+ ret = preadv(buf->buf[0].fd, in_sg_ptr, in_sg_cpy_count,
+ buf->buf[0].pos);
+
+ if (ret == -1) {
+ ret = errno;
+ fuse_log(FUSE_LOG_DEBUG, "%s: preadv failed (%m) len=%zd\n",
+ __func__, len);
+ free(in_sg_cpy);
+ goto err;
+ }
+ fuse_log(FUSE_LOG_DEBUG, "%s: preadv ret=%d len=%zd\n", __func__,
+ ret, len);
+ if (ret < len && ret) {
+ fuse_log(FUSE_LOG_DEBUG, "%s: ret < len\n", __func__);
+ /* Skip over this much next time around */
+ skip_size = ret;
+ buf->buf[0].pos += ret;
+ len -= ret;
+
+ /* Lets do another read */
+ continue;
+ }
+ if (!ret) {
+ /* EOF case? */
+ fuse_log(FUSE_LOG_DEBUG, "%s: !ret in_sg_left=%zd\n", __func__,
+ in_sg_left);
+ break;
+ }
+ if (ret != len) {
+ fuse_log(FUSE_LOG_DEBUG, "%s: ret!=len\n", __func__);
+ ret = EIO;
+ free(in_sg_cpy);
+ goto err;
+ }
+ in_sg_left -= ret;
+ len -= ret;
+ } while (in_sg_left);
+ free(in_sg_cpy);
+
+ /* Need to fix out->len on EOF */
+ if (len) {
+ struct fuse_out_header *out_sg = in_sg[0].iov_base;
+
+ tosend_len -= len;
+ out_sg->len = tosend_len;
+ }
+
+ ret = 0;
+
+ vu_queue_push(&se->virtio_dev->dev, q, elem, tosend_len);
+ vu_queue_notify(&se->virtio_dev->dev, q);
+
+err:
+ if (ret == 0) {
+ ch->qi->reply_sent = true;
+ }
+
+ return ret;
+}
+
/* Thread function for individual queues, created when a queue is 'started' */
static void *fv_queue_thread(void *opaque)
{
diff --git a/tools/virtiofsd/fuse_virtio.h b/tools/virtiofsd/fuse_virtio.h
index 135a148..cc676b9 100644
--- a/tools/virtiofsd/fuse_virtio.h
+++ b/tools/virtiofsd/fuse_virtio.h
@@ -26,4 +26,8 @@ int virtio_loop(struct fuse_session *se);
int virtio_send_msg(struct fuse_session *se, struct fuse_chan *ch,
struct iovec *iov, int count);
+int virtio_send_data_iov(struct fuse_session *se, struct fuse_chan *ch,
+ struct iovec *iov, int count,
+ struct fuse_bufvec *buf, size_t len);
+
#endif
--
1.8.3.1

View File

@ -0,0 +1,164 @@
From 6d41fc549198e140f38fddcb02975098df040ae1 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Mon, 27 Jan 2020 19:00:50 +0100
Subject: [PATCH 019/116] virtiofsd: Fix common header and define for QEMU
builds
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-id: <20200127190227.40942-16-dgilbert@redhat.com>
Patchwork-id: 93470
O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 015/112] virtiofsd: Fix common header and define for QEMU builds
Bugzilla: 1694164
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Sergio Lopez Pascual <slp@redhat.com>
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
All of the fuse files include config.h and define GNU_SOURCE
where we don't have either under our build - remove them.
Fixup path to the kernel's fuse.h in the QEMUs world.
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Tested-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit 09863ebc7e32a107235b3c815ad54d26cc64f07a)
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
tools/virtiofsd/buffer.c | 4 +---
tools/virtiofsd/fuse_i.h | 3 +++
tools/virtiofsd/fuse_log.c | 1 +
tools/virtiofsd/fuse_lowlevel.c | 6 ++----
tools/virtiofsd/fuse_opt.c | 2 +-
tools/virtiofsd/fuse_signals.c | 2 +-
tools/virtiofsd/helper.c | 1 +
tools/virtiofsd/passthrough_ll.c | 8 ++------
8 files changed, 12 insertions(+), 15 deletions(-)
diff --git a/tools/virtiofsd/buffer.c b/tools/virtiofsd/buffer.c
index 4d507f3..772efa9 100644
--- a/tools/virtiofsd/buffer.c
+++ b/tools/virtiofsd/buffer.c
@@ -9,9 +9,7 @@
* See the file COPYING.LIB
*/
-#define _GNU_SOURCE
-
-#include "config.h"
+#include "qemu/osdep.h"
#include "fuse_i.h"
#include "fuse_lowlevel.h"
#include <assert.h>
diff --git a/tools/virtiofsd/fuse_i.h b/tools/virtiofsd/fuse_i.h
index e63cb58..bae0699 100644
--- a/tools/virtiofsd/fuse_i.h
+++ b/tools/virtiofsd/fuse_i.h
@@ -6,6 +6,9 @@
* See the file COPYING.LIB
*/
+#define FUSE_USE_VERSION 31
+
+
#include "fuse.h"
#include "fuse_lowlevel.h"
diff --git a/tools/virtiofsd/fuse_log.c b/tools/virtiofsd/fuse_log.c
index 11345f9..c301ff6 100644
--- a/tools/virtiofsd/fuse_log.c
+++ b/tools/virtiofsd/fuse_log.c
@@ -8,6 +8,7 @@
* See the file COPYING.LIB
*/
+#include "qemu/osdep.h"
#include "fuse_log.h"
#include <stdarg.h>
diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c
index 3da80de..07fb8a6 100644
--- a/tools/virtiofsd/fuse_lowlevel.c
+++ b/tools/virtiofsd/fuse_lowlevel.c
@@ -9,11 +9,9 @@
* See the file COPYING.LIB
*/
-#define _GNU_SOURCE
-
-#include "config.h"
+#include "qemu/osdep.h"
#include "fuse_i.h"
-#include "fuse_kernel.h"
+#include "standard-headers/linux/fuse.h"
#include "fuse_misc.h"
#include "fuse_opt.h"
diff --git a/tools/virtiofsd/fuse_opt.c b/tools/virtiofsd/fuse_opt.c
index edd36f4..2892236 100644
--- a/tools/virtiofsd/fuse_opt.c
+++ b/tools/virtiofsd/fuse_opt.c
@@ -9,8 +9,8 @@
* See the file COPYING.LIB
*/
+#include "qemu/osdep.h"
#include "fuse_opt.h"
-#include "config.h"
#include "fuse_i.h"
#include "fuse_misc.h"
diff --git a/tools/virtiofsd/fuse_signals.c b/tools/virtiofsd/fuse_signals.c
index 19d6791..dc7c8ac 100644
--- a/tools/virtiofsd/fuse_signals.c
+++ b/tools/virtiofsd/fuse_signals.c
@@ -8,7 +8,7 @@
* See the file COPYING.LIB
*/
-#include "config.h"
+#include "qemu/osdep.h"
#include "fuse_i.h"
#include "fuse_lowlevel.h"
diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c
index d9227d7..9333691 100644
--- a/tools/virtiofsd/helper.c
+++ b/tools/virtiofsd/helper.c
@@ -10,6 +10,7 @@
* See the file COPYING.LIB.
*/
+#include "qemu/osdep.h"
#include "fuse_i.h"
#include "fuse_lowlevel.h"
#include "fuse_misc.h"
diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
index 126a56c..322a889 100644
--- a/tools/virtiofsd/passthrough_ll.c
+++ b/tools/virtiofsd/passthrough_ll.c
@@ -35,15 +35,11 @@
* \include passthrough_ll.c
*/
-#define _GNU_SOURCE
-#define FUSE_USE_VERSION 31
-
-#include "config.h"
-
+#include "qemu/osdep.h"
+#include "fuse_lowlevel.h"
#include <assert.h>
#include <dirent.h>
#include <errno.h>
-#include <fuse_lowlevel.h>
#include <inttypes.h>
#include <limits.h>
#include <pthread.h>
--
1.8.3.1

View File

@ -0,0 +1,136 @@
From 9b5fbc95a287b2ce9448142194b161d8360d5e4e Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Mon, 27 Jan 2020 19:02:15 +0100
Subject: [PATCH 104/116] virtiofsd: Fix data corruption with O_APPEND write in
writeback mode
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-id: <20200127190227.40942-101-dgilbert@redhat.com>
Patchwork-id: 93556
O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 100/112] virtiofsd: Fix data corruption with O_APPEND write in writeback mode
Bugzilla: 1694164
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Sergio Lopez Pascual <slp@redhat.com>
From: Misono Tomohiro <misono.tomohiro@jp.fujitsu.com>
When writeback mode is enabled (-o writeback), O_APPEND handling is
done in kernel. Therefore virtiofsd clears O_APPEND flag when open.
Otherwise O_APPEND flag takes precedence over pwrite() and write
data may corrupt.
Currently clearing O_APPEND flag is done in lo_open(), but we also
need the same operation in lo_create(). So, factor out the flag
update operation in lo_open() to update_open_flags() and call it
in both lo_open() and lo_create().
This fixes the failure of xfstest generic/069 in writeback mode
(which tests O_APPEND write data integrity).
Signed-off-by: Misono Tomohiro <misono.tomohiro@jp.fujitsu.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit 8e4e41e39eac5ee5f378d66f069a2f70a1734317)
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
tools/virtiofsd/passthrough_ll.c | 66 ++++++++++++++++++++--------------------
1 file changed, 33 insertions(+), 33 deletions(-)
diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
index 948cb19..4c61ac5 100644
--- a/tools/virtiofsd/passthrough_ll.c
+++ b/tools/virtiofsd/passthrough_ll.c
@@ -1692,6 +1692,37 @@ static void lo_releasedir(fuse_req_t req, fuse_ino_t ino,
fuse_reply_err(req, 0);
}
+static void update_open_flags(int writeback, struct fuse_file_info *fi)
+{
+ /*
+ * With writeback cache, kernel may send read requests even
+ * when userspace opened write-only
+ */
+ if (writeback && (fi->flags & O_ACCMODE) == O_WRONLY) {
+ fi->flags &= ~O_ACCMODE;
+ fi->flags |= O_RDWR;
+ }
+
+ /*
+ * With writeback cache, O_APPEND is handled by the kernel.
+ * This breaks atomicity (since the file may change in the
+ * underlying filesystem, so that the kernel's idea of the
+ * end of the file isn't accurate anymore). In this example,
+ * we just accept that. A more rigorous filesystem may want
+ * to return an error here
+ */
+ if (writeback && (fi->flags & O_APPEND)) {
+ fi->flags &= ~O_APPEND;
+ }
+
+ /*
+ * O_DIRECT in guest should not necessarily mean bypassing page
+ * cache on host as well. If somebody needs that behavior, it
+ * probably should be a configuration knob in daemon.
+ */
+ fi->flags &= ~O_DIRECT;
+}
+
static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name,
mode_t mode, struct fuse_file_info *fi)
{
@@ -1721,12 +1752,7 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name,
goto out;
}
- /*
- * O_DIRECT in guest should not necessarily mean bypassing page
- * cache on host as well. If somebody needs that behavior, it
- * probably should be a configuration knob in daemon.
- */
- fi->flags &= ~O_DIRECT;
+ update_open_flags(lo->writeback, fi);
fd = openat(parent_inode->fd, name, (fi->flags | O_CREAT) & ~O_NOFOLLOW,
mode);
@@ -1936,33 +1962,7 @@ static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi)
fuse_log(FUSE_LOG_DEBUG, "lo_open(ino=%" PRIu64 ", flags=%d)\n", ino,
fi->flags);
- /*
- * With writeback cache, kernel may send read requests even
- * when userspace opened write-only
- */
- if (lo->writeback && (fi->flags & O_ACCMODE) == O_WRONLY) {
- fi->flags &= ~O_ACCMODE;
- fi->flags |= O_RDWR;
- }
-
- /*
- * With writeback cache, O_APPEND is handled by the kernel.
- * This breaks atomicity (since the file may change in the
- * underlying filesystem, so that the kernel's idea of the
- * end of the file isn't accurate anymore). In this example,
- * we just accept that. A more rigorous filesystem may want
- * to return an error here
- */
- if (lo->writeback && (fi->flags & O_APPEND)) {
- fi->flags &= ~O_APPEND;
- }
-
- /*
- * O_DIRECT in guest should not necessarily mean bypassing page
- * cache on host as well. If somebody needs that behavior, it
- * probably should be a configuration knob in daemon.
- */
- fi->flags &= ~O_DIRECT;
+ update_open_flags(lo->writeback, fi);
sprintf(buf, "%i", lo_fd(req, ino));
fd = openat(lo->proc_self_fd, buf, fi->flags & ~O_NOFOLLOW);
--
1.8.3.1

View File

@ -0,0 +1,120 @@
From 9f726593bc3acbc247876dcc4d79fbf046958003 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Mon, 27 Jan 2020 19:00:49 +0100
Subject: [PATCH 018/116] virtiofsd: Fix fuse_daemonize ignored return values
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-id: <20200127190227.40942-15-dgilbert@redhat.com>
Patchwork-id: 93469
O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 014/112] virtiofsd: Fix fuse_daemonize ignored return values
Bugzilla: 1694164
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Sergio Lopez Pascual <slp@redhat.com>
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
QEMU's compiler enables warnings/errors for ignored values
and the (void) trick used in the fuse code isn't enough.
Turn all the return values into a return value on the function.
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Tested-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit 30d8e49760712d65697ea517c53671bd1d214fc7)
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
tools/virtiofsd/helper.c | 33 ++++++++++++++++++++++-----------
1 file changed, 22 insertions(+), 11 deletions(-)
diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c
index 5e6f205..d9227d7 100644
--- a/tools/virtiofsd/helper.c
+++ b/tools/virtiofsd/helper.c
@@ -10,12 +10,10 @@
* See the file COPYING.LIB.
*/
-#include "config.h"
#include "fuse_i.h"
#include "fuse_lowlevel.h"
#include "fuse_misc.h"
#include "fuse_opt.h"
-#include "mount_util.h"
#include <errno.h>
#include <limits.h>
@@ -171,6 +169,7 @@ int fuse_parse_cmdline(struct fuse_args *args, struct fuse_cmdline_opts *opts)
int fuse_daemonize(int foreground)
{
+ int ret = 0, rett;
if (!foreground) {
int nullfd;
int waiter[2];
@@ -192,8 +191,8 @@ int fuse_daemonize(int foreground)
case 0:
break;
default:
- (void)read(waiter[0], &completed, sizeof(completed));
- _exit(0);
+ _exit(read(waiter[0], &completed,
+ sizeof(completed) != sizeof(completed)));
}
if (setsid() == -1) {
@@ -201,13 +200,22 @@ int fuse_daemonize(int foreground)
return -1;
}
- (void)chdir("/");
+ ret = chdir("/");
nullfd = open("/dev/null", O_RDWR, 0);
if (nullfd != -1) {
- (void)dup2(nullfd, 0);
- (void)dup2(nullfd, 1);
- (void)dup2(nullfd, 2);
+ rett = dup2(nullfd, 0);
+ if (!ret) {
+ ret = rett;
+ }
+ rett = dup2(nullfd, 1);
+ if (!ret) {
+ ret = rett;
+ }
+ rett = dup2(nullfd, 2);
+ if (!ret) {
+ ret = rett;
+ }
if (nullfd > 2) {
close(nullfd);
}
@@ -215,13 +223,16 @@ int fuse_daemonize(int foreground)
/* Propagate completion of daemon initialization */
completed = 1;
- (void)write(waiter[1], &completed, sizeof(completed));
+ rett = write(waiter[1], &completed, sizeof(completed));
+ if (!ret) {
+ ret = rett;
+ }
close(waiter[0]);
close(waiter[1]);
} else {
- (void)chdir("/");
+ ret = chdir("/");
}
- return 0;
+ return ret;
}
void fuse_apply_conn_info_opts(struct fuse_conn_info_opts *opts,
--
1.8.3.1

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,65 @@
From 616407b06517361ce444dcc0960aeaf55b52da33 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Mon, 27 Jan 2020 19:01:41 +0100
Subject: [PATCH 070/116] virtiofsd: Handle hard reboot
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-id: <20200127190227.40942-67-dgilbert@redhat.com>
Patchwork-id: 93521
O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 066/112] virtiofsd: Handle hard reboot
Bugzilla: 1694164
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Sergio Lopez Pascual <slp@redhat.com>
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Handle a
mount
hard reboot (without unmount)
mount
we get another 'init' which FUSE doesn't normally expect.
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit e8556f49098b5d95634e592d79a97f761b76c96e)
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
tools/virtiofsd/fuse_lowlevel.c | 16 +++++++++++++++-
1 file changed, 15 insertions(+), 1 deletion(-)
diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c
index 7d742b5..65f91da 100644
--- a/tools/virtiofsd/fuse_lowlevel.c
+++ b/tools/virtiofsd/fuse_lowlevel.c
@@ -2433,7 +2433,21 @@ void fuse_session_process_buf_int(struct fuse_session *se,
goto reply_err;
}
} else if (in->opcode == FUSE_INIT || in->opcode == CUSE_INIT) {
- goto reply_err;
+ if (fuse_lowlevel_is_virtio(se)) {
+ /*
+ * TODO: This is after a hard reboot typically, we need to do
+ * a destroy, but we can't reply to this request yet so
+ * we can't use do_destroy
+ */
+ fuse_log(FUSE_LOG_DEBUG, "%s: reinit\n", __func__);
+ se->got_destroy = 1;
+ se->got_init = 0;
+ if (se->op.destroy) {
+ se->op.destroy(se->userdata);
+ }
+ } else {
+ goto reply_err;
+ }
}
err = EACCES;
--
1.8.3.1

View File

@ -0,0 +1,53 @@
From 485adfa1aa1b3e2d1449edf5c42d6ec396cbfb5d Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Mon, 27 Jan 2020 19:01:40 +0100
Subject: [PATCH 069/116] virtiofsd: Handle reinit
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-id: <20200127190227.40942-66-dgilbert@redhat.com>
Patchwork-id: 93520
O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 065/112] virtiofsd: Handle reinit
Bugzilla: 1694164
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Sergio Lopez Pascual <slp@redhat.com>
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Allow init->destroy->init for mount->umount->mount
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit c806d6435fe95fd54b379920aca2f4e3ea1f3258)
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
tools/virtiofsd/fuse_lowlevel.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c
index a7a1968..7d742b5 100644
--- a/tools/virtiofsd/fuse_lowlevel.c
+++ b/tools/virtiofsd/fuse_lowlevel.c
@@ -2028,6 +2028,7 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid,
}
se->got_init = 1;
+ se->got_destroy = 0;
if (se->op.init) {
se->op.init(se->userdata, &se->conn);
}
@@ -2130,6 +2131,7 @@ static void do_destroy(fuse_req_t req, fuse_ino_t nodeid,
(void)iter;
se->got_destroy = 1;
+ se->got_init = 0;
if (se->op.destroy) {
se->op.destroy(se->userdata);
}
--
1.8.3.1

View File

@ -0,0 +1,116 @@
From c818a1cb603cad07aa5c49ce808aa09435667c7c Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Mon, 27 Jan 2020 19:01:04 +0100
Subject: [PATCH 033/116] virtiofsd: Keep track of replies
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-id: <20200127190227.40942-30-dgilbert@redhat.com>
Patchwork-id: 93481
O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 029/112] virtiofsd: Keep track of replies
Bugzilla: 1694164
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Sergio Lopez Pascual <slp@redhat.com>
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Keep track of whether we sent a reply to a request; this is a bit
paranoid but it means:
a) We should always recycle an element even if there was an error
in the request
b) Never try and send two replies on one queue element
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit 2f65e69a7f22da8d20c747f34f339ebb40a0634f)
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
tools/virtiofsd/fuse_virtio.c | 23 ++++++++++++++++++++---
1 file changed, 20 insertions(+), 3 deletions(-)
diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c
index 05d0e29..f1adeb6 100644
--- a/tools/virtiofsd/fuse_virtio.c
+++ b/tools/virtiofsd/fuse_virtio.c
@@ -44,6 +44,7 @@ struct fv_QueueInfo {
/* The element for the command currently being processed */
VuVirtqElement *qe;
+ bool reply_sent;
};
/*
@@ -178,6 +179,7 @@ int virtio_send_msg(struct fuse_session *se, struct fuse_chan *ch,
{
VuVirtqElement *elem;
VuVirtq *q;
+ int ret = 0;
assert(count >= 1);
assert(iov[0].iov_len >= sizeof(struct fuse_out_header));
@@ -191,6 +193,7 @@ int virtio_send_msg(struct fuse_session *se, struct fuse_chan *ch,
assert(out->unique);
/* For virtio we always have ch */
assert(ch);
+ assert(!ch->qi->reply_sent);
elem = ch->qi->qe;
q = &ch->qi->virtio_dev->dev.vq[ch->qi->qidx];
@@ -208,19 +211,23 @@ int virtio_send_msg(struct fuse_session *se, struct fuse_chan *ch,
if (in_len < sizeof(struct fuse_out_header)) {
fuse_log(FUSE_LOG_ERR, "%s: elem %d too short for out_header\n",
__func__, elem->index);
- return -E2BIG;
+ ret = -E2BIG;
+ goto err;
}
if (in_len < tosend_len) {
fuse_log(FUSE_LOG_ERR, "%s: elem %d too small for data len %zd\n",
__func__, elem->index, tosend_len);
- return -E2BIG;
+ ret = -E2BIG;
+ goto err;
}
copy_iov(iov, count, in_sg, in_num, tosend_len);
vu_queue_push(&se->virtio_dev->dev, q, elem, tosend_len);
vu_queue_notify(&se->virtio_dev->dev, q);
+ ch->qi->reply_sent = true;
- return 0;
+err:
+ return ret;
}
/* Thread function for individual queues, created when a queue is 'started' */
@@ -296,6 +303,9 @@ static void *fv_queue_thread(void *opaque)
break;
}
+ qi->qe = elem;
+ qi->reply_sent = false;
+
if (!fbuf.mem) {
fbuf.mem = malloc(se->bufsize);
assert(fbuf.mem);
@@ -331,6 +341,13 @@ static void *fv_queue_thread(void *opaque)
/* TODO: Add checks for fuse_session_exited */
fuse_session_process_buf_int(se, &fbuf, &ch);
+ if (!qi->reply_sent) {
+ fuse_log(FUSE_LOG_DEBUG, "%s: elem %d no reply sent\n",
+ __func__, elem->index);
+ /* I think we've still got to recycle the element */
+ vu_queue_push(dev, q, elem, 0);
+ vu_queue_notify(dev, q);
+ }
qi->qe = NULL;
free(elem);
elem = NULL;
--
1.8.3.1

View File

@ -0,0 +1,143 @@
From b37344c38b866c7e7fb773b4a3172a39306bac7e Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Mon, 27 Jan 2020 19:01:42 +0100
Subject: [PATCH 071/116] virtiofsd: Kill threads when queues are stopped
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-id: <20200127190227.40942-68-dgilbert@redhat.com>
Patchwork-id: 93522
O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 067/112] virtiofsd: Kill threads when queues are stopped
Bugzilla: 1694164
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Sergio Lopez Pascual <slp@redhat.com>
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Kill the threads we've started when the queues get stopped.
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
With improvements by:
Signed-off-by: Eryu Guan <eguan@linux.alibaba.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit 10477ac47fc57d00a84802ff97c15450cd8021c1)
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
tools/virtiofsd/fuse_virtio.c | 51 +++++++++++++++++++++++++++++++++++++------
1 file changed, 44 insertions(+), 7 deletions(-)
diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c
index 872968f..7a8774a 100644
--- a/tools/virtiofsd/fuse_virtio.c
+++ b/tools/virtiofsd/fuse_virtio.c
@@ -41,6 +41,7 @@ struct fv_QueueInfo {
/* Our queue index, corresponds to array position */
int qidx;
int kick_fd;
+ int kill_fd; /* For killing the thread */
/* The element for the command currently being processed */
VuVirtqElement *qe;
@@ -412,14 +413,17 @@ static void *fv_queue_thread(void *opaque)
fuse_log(FUSE_LOG_INFO, "%s: Start for queue %d kick_fd %d\n", __func__,
qi->qidx, qi->kick_fd);
while (1) {
- struct pollfd pf[1];
+ struct pollfd pf[2];
pf[0].fd = qi->kick_fd;
pf[0].events = POLLIN;
pf[0].revents = 0;
+ pf[1].fd = qi->kill_fd;
+ pf[1].events = POLLIN;
+ pf[1].revents = 0;
fuse_log(FUSE_LOG_DEBUG, "%s: Waiting for Queue %d event\n", __func__,
qi->qidx);
- int poll_res = ppoll(pf, 1, NULL, NULL);
+ int poll_res = ppoll(pf, 2, NULL, NULL);
if (poll_res == -1) {
if (errno == EINTR) {
@@ -430,12 +434,23 @@ static void *fv_queue_thread(void *opaque)
fuse_log(FUSE_LOG_ERR, "fv_queue_thread ppoll: %m\n");
break;
}
- assert(poll_res == 1);
+ assert(poll_res >= 1);
if (pf[0].revents & (POLLERR | POLLHUP | POLLNVAL)) {
fuse_log(FUSE_LOG_ERR, "%s: Unexpected poll revents %x Queue %d\n",
__func__, pf[0].revents, qi->qidx);
break;
}
+ if (pf[1].revents & (POLLERR | POLLHUP | POLLNVAL)) {
+ fuse_log(FUSE_LOG_ERR,
+ "%s: Unexpected poll revents %x Queue %d killfd\n",
+ __func__, pf[1].revents, qi->qidx);
+ break;
+ }
+ if (pf[1].revents) {
+ fuse_log(FUSE_LOG_INFO, "%s: kill event on queue %d - quitting\n",
+ __func__, qi->qidx);
+ break;
+ }
assert(pf[0].revents & POLLIN);
fuse_log(FUSE_LOG_DEBUG, "%s: Got queue event on Queue %d\n", __func__,
qi->qidx);
@@ -589,6 +604,28 @@ out:
return NULL;
}
+static void fv_queue_cleanup_thread(struct fv_VuDev *vud, int qidx)
+{
+ int ret;
+ struct fv_QueueInfo *ourqi;
+
+ assert(qidx < vud->nqueues);
+ ourqi = vud->qi[qidx];
+
+ /* Kill the thread */
+ if (eventfd_write(ourqi->kill_fd, 1)) {
+ fuse_log(FUSE_LOG_ERR, "Eventfd_write for queue %d: %s\n",
+ qidx, strerror(errno));
+ }
+ ret = pthread_join(ourqi->thread, NULL);
+ if (ret) {
+ fuse_log(FUSE_LOG_ERR, "%s: Failed to join thread idx %d err %d\n",
+ __func__, qidx, ret);
+ }
+ close(ourqi->kill_fd);
+ ourqi->kick_fd = -1;
+}
+
/* Callback from libvhost-user on start or stop of a queue */
static void fv_queue_set_started(VuDev *dev, int qidx, bool started)
{
@@ -633,16 +670,16 @@ static void fv_queue_set_started(VuDev *dev, int qidx, bool started)
}
ourqi = vud->qi[qidx];
ourqi->kick_fd = dev->vq[qidx].kick_fd;
+
+ ourqi->kill_fd = eventfd(0, EFD_CLOEXEC | EFD_SEMAPHORE);
+ assert(ourqi->kill_fd != -1);
if (pthread_create(&ourqi->thread, NULL, fv_queue_thread, ourqi)) {
fuse_log(FUSE_LOG_ERR, "%s: Failed to create thread for queue %d\n",
__func__, qidx);
assert(0);
}
} else {
- /* TODO: Kill the thread */
- assert(qidx < vud->nqueues);
- ourqi = vud->qi[qidx];
- ourqi->kick_fd = -1;
+ fv_queue_cleanup_thread(vud, qidx);
}
}
--
1.8.3.1

View File

@ -0,0 +1,96 @@
From f09f13f9a001a50ee3465c165f4bbaf870fcadb9 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Mon, 27 Jan 2020 19:00:53 +0100
Subject: [PATCH 022/116] virtiofsd: Make fsync work even if only inode is
passed in
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-id: <20200127190227.40942-19-dgilbert@redhat.com>
Patchwork-id: 93472
O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 018/112] virtiofsd: Make fsync work even if only inode is passed in
Bugzilla: 1694164
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Sergio Lopez Pascual <slp@redhat.com>
From: Vivek Goyal <vgoyal@redhat.com>
If caller has not sent file handle in request, then using inode, retrieve
the fd opened using O_PATH and use that to open file again and issue
fsync. This will be needed when dax_flush() calls fsync. At that time
we only have inode information (and not file).
Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit 1b209805f8159c3f4d89ddb9390a5f64887cebff)
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
tools/virtiofsd/fuse_lowlevel.c | 6 +++++-
tools/virtiofsd/passthrough_ll.c | 28 ++++++++++++++++++++++++++--
2 files changed, 31 insertions(+), 3 deletions(-)
diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c
index 514d79c..8552cfb 100644
--- a/tools/virtiofsd/fuse_lowlevel.c
+++ b/tools/virtiofsd/fuse_lowlevel.c
@@ -1075,7 +1075,11 @@ static void do_fsync(fuse_req_t req, fuse_ino_t nodeid, const void *inarg)
fi.fh = arg->fh;
if (req->se->op.fsync) {
- req->se->op.fsync(req, nodeid, datasync, &fi);
+ if (fi.fh == (uint64_t)-1) {
+ req->se->op.fsync(req, nodeid, datasync, NULL);
+ } else {
+ req->se->op.fsync(req, nodeid, datasync, &fi);
+ }
} else {
fuse_reply_err(req, ENOSYS);
}
diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
index 6c4da18..26ac870 100644
--- a/tools/virtiofsd/passthrough_ll.c
+++ b/tools/virtiofsd/passthrough_ll.c
@@ -903,10 +903,34 @@ static void lo_fsync(fuse_req_t req, fuse_ino_t ino, int datasync,
{
int res;
(void)ino;
+ int fd;
+ char *buf;
+
+ fuse_log(FUSE_LOG_DEBUG, "lo_fsync(ino=%" PRIu64 ", fi=0x%p)\n", ino,
+ (void *)fi);
+
+ if (!fi) {
+ res = asprintf(&buf, "/proc/self/fd/%i", lo_fd(req, ino));
+ if (res == -1) {
+ return (void)fuse_reply_err(req, errno);
+ }
+
+ fd = open(buf, O_RDWR);
+ free(buf);
+ if (fd == -1) {
+ return (void)fuse_reply_err(req, errno);
+ }
+ } else {
+ fd = fi->fh;
+ }
+
if (datasync) {
- res = fdatasync(fi->fh);
+ res = fdatasync(fd);
} else {
- res = fsync(fi->fh);
+ res = fsync(fd);
+ }
+ if (!fi) {
+ close(fd);
}
fuse_reply_err(req, res == -1 ? errno : 0);
}
--
1.8.3.1

View File

@ -0,0 +1,257 @@
From a96042f05eaf494fbe26a9cbd940f5f815f782f9 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Mon, 27 Jan 2020 19:00:56 +0100
Subject: [PATCH 025/116] virtiofsd: Open vhost connection instead of mounting
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-id: <20200127190227.40942-22-dgilbert@redhat.com>
Patchwork-id: 93476
O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 021/112] virtiofsd: Open vhost connection instead of mounting
Bugzilla: 1694164
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Sergio Lopez Pascual <slp@redhat.com>
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
When run with vhost-user options we conect to the QEMU instead
via a socket. Start this off by creating the socket.
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Reviewed-by: Misono Tomohiro <misono.tomohiro@jp.fujitsu.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit d14bf584dd965821e80d14c16d9292a464b1ab85)
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
tools/virtiofsd/fuse_i.h | 7 ++--
tools/virtiofsd/fuse_lowlevel.c | 55 ++++------------------------
tools/virtiofsd/fuse_virtio.c | 79 +++++++++++++++++++++++++++++++++++++++++
tools/virtiofsd/fuse_virtio.h | 23 ++++++++++++
4 files changed, 114 insertions(+), 50 deletions(-)
create mode 100644 tools/virtiofsd/fuse_virtio.c
create mode 100644 tools/virtiofsd/fuse_virtio.h
diff --git a/tools/virtiofsd/fuse_i.h b/tools/virtiofsd/fuse_i.h
index 26b1a7d..82d6ac7 100644
--- a/tools/virtiofsd/fuse_i.h
+++ b/tools/virtiofsd/fuse_i.h
@@ -6,9 +6,10 @@
* See the file COPYING.LIB
*/
-#define FUSE_USE_VERSION 31
-
+#ifndef FUSE_I_H
+#define FUSE_I_H
+#define FUSE_USE_VERSION 31
#include "fuse.h"
#include "fuse_lowlevel.h"
@@ -101,3 +102,5 @@ void fuse_session_process_buf_int(struct fuse_session *se,
/* room needed in buffer to accommodate header */
#define FUSE_BUFFER_HEADER_SIZE 0x1000
+
+#endif
diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c
index 17e8718..5df124e 100644
--- a/tools/virtiofsd/fuse_lowlevel.c
+++ b/tools/virtiofsd/fuse_lowlevel.c
@@ -14,6 +14,7 @@
#include "standard-headers/linux/fuse.h"
#include "fuse_misc.h"
#include "fuse_opt.h"
+#include "fuse_virtio.h"
#include <assert.h>
#include <errno.h>
@@ -2202,6 +2203,11 @@ struct fuse_session *fuse_session_new(struct fuse_args *args,
goto out4;
}
+ if (!se->vu_socket_path) {
+ fprintf(stderr, "fuse: missing -o vhost_user_socket option\n");
+ goto out4;
+ }
+
se->bufsize = FUSE_MAX_MAX_PAGES * getpagesize() + FUSE_BUFFER_HEADER_SIZE;
list_init_req(&se->list);
@@ -2224,54 +2230,7 @@ out1:
int fuse_session_mount(struct fuse_session *se)
{
- int fd;
-
- /*
- * Make sure file descriptors 0, 1 and 2 are open, otherwise chaos
- * would ensue.
- */
- do {
- fd = open("/dev/null", O_RDWR);
- if (fd > 2) {
- close(fd);
- }
- } while (fd >= 0 && fd <= 2);
-
- /*
- * To allow FUSE daemons to run without privileges, the caller may open
- * /dev/fuse before launching the file system and pass on the file
- * descriptor by specifying /dev/fd/N as the mount point. Note that the
- * parent process takes care of performing the mount in this case.
- */
- fd = fuse_mnt_parse_fuse_fd(mountpoint);
- if (fd != -1) {
- if (fcntl(fd, F_GETFD) == -1) {
- fuse_log(FUSE_LOG_ERR, "fuse: Invalid file descriptor /dev/fd/%u\n",
- fd);
- return -1;
- }
- se->fd = fd;
- return 0;
- }
-
- /* Open channel */
- fd = fuse_kern_mount(mountpoint, se->mo);
- if (fd == -1) {
- return -1;
- }
- se->fd = fd;
-
- /* Save mountpoint */
- se->mountpoint = strdup(mountpoint);
- if (se->mountpoint == NULL) {
- goto error_out;
- }
-
- return 0;
-
-error_out:
- fuse_kern_unmount(mountpoint, fd);
- return -1;
+ return virtio_session_mount(se);
}
int fuse_session_fd(struct fuse_session *se)
diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c
new file mode 100644
index 0000000..cbef6ff
--- /dev/null
+++ b/tools/virtiofsd/fuse_virtio.c
@@ -0,0 +1,79 @@
+/*
+ * virtio-fs glue for FUSE
+ * Copyright (C) 2018 Red Hat, Inc. and/or its affiliates
+ *
+ * Authors:
+ * Dave Gilbert <dgilbert@redhat.com>
+ *
+ * Implements the glue between libfuse and libvhost-user
+ *
+ * This program can be distributed under the terms of the GNU LGPLv2.
+ * See the file COPYING.LIB
+ */
+
+#include "fuse_i.h"
+#include "standard-headers/linux/fuse.h"
+#include "fuse_misc.h"
+#include "fuse_opt.h"
+#include "fuse_virtio.h"
+
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/un.h>
+#include <unistd.h>
+
+/* From spec */
+struct virtio_fs_config {
+ char tag[36];
+ uint32_t num_queues;
+};
+
+int virtio_session_mount(struct fuse_session *se)
+{
+ struct sockaddr_un un;
+ mode_t old_umask;
+
+ if (strlen(se->vu_socket_path) >= sizeof(un.sun_path)) {
+ fuse_log(FUSE_LOG_ERR, "Socket path too long\n");
+ return -1;
+ }
+
+ se->fd = -1;
+
+ /*
+ * Create the Unix socket to communicate with qemu
+ * based on QEMU's vhost-user-bridge
+ */
+ unlink(se->vu_socket_path);
+ strcpy(un.sun_path, se->vu_socket_path);
+ size_t addr_len = sizeof(un);
+
+ int listen_sock = socket(AF_UNIX, SOCK_STREAM, 0);
+ if (listen_sock == -1) {
+ fuse_log(FUSE_LOG_ERR, "vhost socket creation: %m\n");
+ return -1;
+ }
+ un.sun_family = AF_UNIX;
+
+ /*
+ * Unfortunately bind doesn't let you set the mask on the socket,
+ * so set umask to 077 and restore it later.
+ */
+ old_umask = umask(0077);
+ if (bind(listen_sock, (struct sockaddr *)&un, addr_len) == -1) {
+ fuse_log(FUSE_LOG_ERR, "vhost socket bind: %m\n");
+ umask(old_umask);
+ return -1;
+ }
+ umask(old_umask);
+
+ if (listen(listen_sock, 1) == -1) {
+ fuse_log(FUSE_LOG_ERR, "vhost socket listen: %m\n");
+ return -1;
+ }
+
+ return -1;
+}
diff --git a/tools/virtiofsd/fuse_virtio.h b/tools/virtiofsd/fuse_virtio.h
new file mode 100644
index 0000000..8f2edb6
--- /dev/null
+++ b/tools/virtiofsd/fuse_virtio.h
@@ -0,0 +1,23 @@
+/*
+ * virtio-fs glue for FUSE
+ * Copyright (C) 2018 Red Hat, Inc. and/or its affiliates
+ *
+ * Authors:
+ * Dave Gilbert <dgilbert@redhat.com>
+ *
+ * Implements the glue between libfuse and libvhost-user
+ *
+ * This program can be distributed under the terms of the GNU LGPLv2.
+ * See the file COPYING.LIB
+ */
+
+#ifndef FUSE_VIRTIO_H
+#define FUSE_VIRTIO_H
+
+#include "fuse_i.h"
+
+struct fuse_session;
+
+int virtio_session_mount(struct fuse_session *se);
+
+#endif
--
1.8.3.1

View File

@ -0,0 +1,76 @@
From ade3dcad8a907d281549b341a8908851e36ba458 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Mon, 27 Jan 2020 19:01:31 +0100
Subject: [PATCH 060/116] virtiofsd: Parse flag FUSE_WRITE_KILL_PRIV
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-id: <20200127190227.40942-57-dgilbert@redhat.com>
Patchwork-id: 93505
O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 056/112] virtiofsd: Parse flag FUSE_WRITE_KILL_PRIV
Bugzilla: 1694164
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Sergio Lopez Pascual <slp@redhat.com>
From: Vivek Goyal <vgoyal@redhat.com>
Caller can set FUSE_WRITE_KILL_PRIV in write_flags. Parse it and pass it
to the filesystem.
Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
Reviewed-by: Misono Tomohiro <misono.tomohiro@jp.fujitsu.com>
Reviewed-by: Sergio Lopez <slp@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit f779bc5265e7e7abb13a03d4bfbc74151afc15c2)
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
tools/virtiofsd/fuse_common.h | 6 +++++-
tools/virtiofsd/fuse_lowlevel.c | 4 +++-
2 files changed, 8 insertions(+), 2 deletions(-)
diff --git a/tools/virtiofsd/fuse_common.h b/tools/virtiofsd/fuse_common.h
index f8f6433..686c42c 100644
--- a/tools/virtiofsd/fuse_common.h
+++ b/tools/virtiofsd/fuse_common.h
@@ -93,8 +93,12 @@ struct fuse_file_info {
*/
unsigned int cache_readdir:1;
+ /* Indicates that suid/sgid bits should be removed upon write */
+ unsigned int kill_priv:1;
+
+
/** Padding. Reserved for future use*/
- unsigned int padding:25;
+ unsigned int padding:24;
unsigned int padding2:32;
/*
diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c
index 02e1d83..2d6dc5a 100644
--- a/tools/virtiofsd/fuse_lowlevel.c
+++ b/tools/virtiofsd/fuse_lowlevel.c
@@ -1142,6 +1142,7 @@ static void do_write(fuse_req_t req, fuse_ino_t nodeid,
memset(&fi, 0, sizeof(fi));
fi.fh = arg->fh;
fi.writepage = (arg->write_flags & FUSE_WRITE_CACHE) != 0;
+ fi.kill_priv = !!(arg->write_flags & FUSE_WRITE_KILL_PRIV);
fi.lock_owner = arg->lock_owner;
fi.flags = arg->flags;
@@ -1177,7 +1178,8 @@ static void do_write_buf(fuse_req_t req, fuse_ino_t nodeid,
fi.lock_owner = arg->lock_owner;
fi.flags = arg->flags;
fi.fh = arg->fh;
- fi.writepage = arg->write_flags & FUSE_WRITE_CACHE;
+ fi.writepage = !!(arg->write_flags & FUSE_WRITE_CACHE);
+ fi.kill_priv = !!(arg->write_flags & FUSE_WRITE_KILL_PRIV);
if (ibufv->count == 1) {
assert(!(tmpbufv.buf[0].flags & FUSE_BUF_IS_FD));
--
1.8.3.1

View File

@ -0,0 +1,140 @@
From d5986c804f05070a07dfe702f7c66357daaa1ab6 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Mon, 27 Jan 2020 19:01:20 +0100
Subject: [PATCH 049/116] virtiofsd: Pass write iov's all the way through
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-id: <20200127190227.40942-46-dgilbert@redhat.com>
Patchwork-id: 93497
O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 045/112] virtiofsd: Pass write iov's all the way through
Bugzilla: 1694164
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Sergio Lopez Pascual <slp@redhat.com>
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Pass the write iov pointing to guest RAM all the way through rather
than copying the data.
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Xiao Yang <yangx.jy@cn.fujitsu.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit e17f7a580e2c599330ad3a6946be615ca2fe97d9)
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
tools/virtiofsd/fuse_virtio.c | 79 +++++++++++++++++++++++++++++++++++++++----
1 file changed, 73 insertions(+), 6 deletions(-)
diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c
index fd588a4..872968f 100644
--- a/tools/virtiofsd/fuse_virtio.c
+++ b/tools/virtiofsd/fuse_virtio.c
@@ -454,6 +454,10 @@ static void *fv_queue_thread(void *opaque)
__func__, qi->qidx, (size_t)evalue, in_bytes, out_bytes);
while (1) {
+ bool allocated_bufv = false;
+ struct fuse_bufvec bufv;
+ struct fuse_bufvec *pbufv;
+
/*
* An element contains one request and the space to send our
* response They're spread over multiple descriptors in a
@@ -495,14 +499,76 @@ static void *fv_queue_thread(void *opaque)
__func__, elem->index);
assert(0); /* TODO */
}
- copy_from_iov(&fbuf, out_num, out_sg);
- fbuf.size = out_len;
+ /* Copy just the first element and look at it */
+ copy_from_iov(&fbuf, 1, out_sg);
+
+ if (out_num > 2 &&
+ out_sg[0].iov_len == sizeof(struct fuse_in_header) &&
+ ((struct fuse_in_header *)fbuf.mem)->opcode == FUSE_WRITE &&
+ out_sg[1].iov_len == sizeof(struct fuse_write_in)) {
+ /*
+ * For a write we don't actually need to copy the
+ * data, we can just do it straight out of guest memory
+ * but we must still copy the headers in case the guest
+ * was nasty and changed them while we were using them.
+ */
+ fuse_log(FUSE_LOG_DEBUG, "%s: Write special case\n", __func__);
+
+ /* copy the fuse_write_in header after the fuse_in_header */
+ fbuf.mem += out_sg->iov_len;
+ copy_from_iov(&fbuf, 1, out_sg + 1);
+ fbuf.mem -= out_sg->iov_len;
+ fbuf.size = out_sg[0].iov_len + out_sg[1].iov_len;
+
+ /* Allocate the bufv, with space for the rest of the iov */
+ allocated_bufv = true;
+ pbufv = malloc(sizeof(struct fuse_bufvec) +
+ sizeof(struct fuse_buf) * (out_num - 2));
+ if (!pbufv) {
+ vu_queue_unpop(dev, q, elem, 0);
+ free(elem);
+ fuse_log(FUSE_LOG_ERR, "%s: pbufv malloc failed\n",
+ __func__);
+ goto out;
+ }
+
+ pbufv->count = 1;
+ pbufv->buf[0] = fbuf;
+
+ size_t iovindex, pbufvindex;
+ iovindex = 2; /* 2 headers, separate iovs */
+ pbufvindex = 1; /* 2 headers, 1 fusebuf */
+
+ for (; iovindex < out_num; iovindex++, pbufvindex++) {
+ pbufv->count++;
+ pbufv->buf[pbufvindex].pos = ~0; /* Dummy */
+ pbufv->buf[pbufvindex].flags = 0;
+ pbufv->buf[pbufvindex].mem = out_sg[iovindex].iov_base;
+ pbufv->buf[pbufvindex].size = out_sg[iovindex].iov_len;
+ }
+ } else {
+ /* Normal (non fast write) path */
+
+ /* Copy the rest of the buffer */
+ fbuf.mem += out_sg->iov_len;
+ copy_from_iov(&fbuf, out_num - 1, out_sg + 1);
+ fbuf.mem -= out_sg->iov_len;
+ fbuf.size = out_len;
- /* TODO! Endianness of header */
+ /* TODO! Endianness of header */
- /* TODO: Add checks for fuse_session_exited */
- struct fuse_bufvec bufv = { .buf[0] = fbuf, .count = 1 };
- fuse_session_process_buf_int(se, &bufv, &ch);
+ /* TODO: Add checks for fuse_session_exited */
+ bufv.buf[0] = fbuf;
+ bufv.count = 1;
+ pbufv = &bufv;
+ }
+ pbufv->idx = 0;
+ pbufv->off = 0;
+ fuse_session_process_buf_int(se, pbufv, &ch);
+
+ if (allocated_bufv) {
+ free(pbufv);
+ }
if (!qi->reply_sent) {
fuse_log(FUSE_LOG_DEBUG, "%s: elem %d no reply sent\n",
@@ -516,6 +582,7 @@ static void *fv_queue_thread(void *opaque)
elem = NULL;
}
}
+out:
pthread_mutex_destroy(&ch.lock);
free(fbuf.mem);
--
1.8.3.1

View File

@ -0,0 +1,168 @@
From 9e4320eec5204da851ac95fb7a7e6520c9ccee7d Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Mon, 27 Jan 2020 19:01:19 +0100
Subject: [PATCH 048/116] virtiofsd: Plumb fuse_bufvec through to do_write_buf
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-id: <20200127190227.40942-45-dgilbert@redhat.com>
Patchwork-id: 93499
O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 044/112] virtiofsd: Plumb fuse_bufvec through to do_write_buf
Bugzilla: 1694164
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Sergio Lopez Pascual <slp@redhat.com>
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Let fuse_session_process_buf_int take a fuse_bufvec * instead of a
fuse_buf; and then through to do_write_buf - where in the best
case it can pass that straight through to op.write_buf without copying
(other than skipping a header).
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Masayoshi Mizuma <m.mizuma@jp.fujitsu.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit 469f9d2fc405b0508e6cf1b4b5bbcadfc82064e5)
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
tools/virtiofsd/fuse_i.h | 2 +-
tools/virtiofsd/fuse_lowlevel.c | 61 +++++++++++++++++++++++++++--------------
tools/virtiofsd/fuse_virtio.c | 3 +-
3 files changed, 44 insertions(+), 22 deletions(-)
diff --git a/tools/virtiofsd/fuse_i.h b/tools/virtiofsd/fuse_i.h
index 45995f3..a20854f 100644
--- a/tools/virtiofsd/fuse_i.h
+++ b/tools/virtiofsd/fuse_i.h
@@ -100,7 +100,7 @@ int fuse_send_reply_iov_nofree(fuse_req_t req, int error, struct iovec *iov,
void fuse_free_req(fuse_req_t req);
void fuse_session_process_buf_int(struct fuse_session *se,
- const struct fuse_buf *buf,
+ struct fuse_bufvec *bufv,
struct fuse_chan *ch);
diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c
index 95f4db8..7e10995 100644
--- a/tools/virtiofsd/fuse_lowlevel.c
+++ b/tools/virtiofsd/fuse_lowlevel.c
@@ -1004,11 +1004,12 @@ static void do_write(fuse_req_t req, fuse_ino_t nodeid, const void *inarg)
}
static void do_write_buf(fuse_req_t req, fuse_ino_t nodeid, const void *inarg,
- const struct fuse_buf *ibuf)
+ struct fuse_bufvec *ibufv)
{
struct fuse_session *se = req->se;
- struct fuse_bufvec bufv = {
- .buf[0] = *ibuf,
+ struct fuse_bufvec *pbufv = ibufv;
+ struct fuse_bufvec tmpbufv = {
+ .buf[0] = ibufv->buf[0],
.count = 1,
};
struct fuse_write_in *arg = (struct fuse_write_in *)inarg;
@@ -1018,22 +1019,31 @@ static void do_write_buf(fuse_req_t req, fuse_ino_t nodeid, const void *inarg,
fi.fh = arg->fh;
fi.writepage = arg->write_flags & FUSE_WRITE_CACHE;
- fi.lock_owner = arg->lock_owner;
- fi.flags = arg->flags;
- if (!(bufv.buf[0].flags & FUSE_BUF_IS_FD)) {
- bufv.buf[0].mem = PARAM(arg);
- }
-
- bufv.buf[0].size -=
- sizeof(struct fuse_in_header) + sizeof(struct fuse_write_in);
- if (bufv.buf[0].size < arg->size) {
- fuse_log(FUSE_LOG_ERR, "fuse: do_write_buf: buffer size too small\n");
- fuse_reply_err(req, EIO);
- return;
+ if (ibufv->count == 1) {
+ fi.lock_owner = arg->lock_owner;
+ fi.flags = arg->flags;
+ if (!(tmpbufv.buf[0].flags & FUSE_BUF_IS_FD)) {
+ tmpbufv.buf[0].mem = PARAM(arg);
+ }
+ tmpbufv.buf[0].size -=
+ sizeof(struct fuse_in_header) + sizeof(struct fuse_write_in);
+ if (tmpbufv.buf[0].size < arg->size) {
+ fuse_log(FUSE_LOG_ERR,
+ "fuse: do_write_buf: buffer size too small\n");
+ fuse_reply_err(req, EIO);
+ return;
+ }
+ tmpbufv.buf[0].size = arg->size;
+ pbufv = &tmpbufv;
+ } else {
+ /*
+ * Input bufv contains the headers in the first element
+ * and the data in the rest, we need to skip that first element
+ */
+ ibufv->buf[0].size = 0;
}
- bufv.buf[0].size = arg->size;
- se->op.write_buf(req, nodeid, &bufv, arg->offset, &fi);
+ se->op.write_buf(req, nodeid, pbufv, arg->offset, &fi);
}
static void do_flush(fuse_req_t req, fuse_ino_t nodeid, const void *inarg)
@@ -2024,13 +2034,24 @@ static const char *opname(enum fuse_opcode opcode)
void fuse_session_process_buf(struct fuse_session *se,
const struct fuse_buf *buf)
{
- fuse_session_process_buf_int(se, buf, NULL);
+ struct fuse_bufvec bufv = { .buf[0] = *buf, .count = 1 };
+ fuse_session_process_buf_int(se, &bufv, NULL);
}
+/*
+ * Restriction:
+ * bufv is normally a single entry buffer, except for a write
+ * where (if it's in memory) then the bufv may be multiple entries,
+ * where the first entry contains all headers and subsequent entries
+ * contain data
+ * bufv shall not use any offsets etc to make the data anything
+ * other than contiguous starting from 0.
+ */
void fuse_session_process_buf_int(struct fuse_session *se,
- const struct fuse_buf *buf,
+ struct fuse_bufvec *bufv,
struct fuse_chan *ch)
{
+ const struct fuse_buf *buf = bufv->buf;
struct fuse_in_header *in;
const void *inarg;
struct fuse_req *req;
@@ -2108,7 +2129,7 @@ void fuse_session_process_buf_int(struct fuse_session *se,
inarg = (void *)&in[1];
if (in->opcode == FUSE_WRITE && se->op.write_buf) {
- do_write_buf(req, in->nodeid, inarg, buf);
+ do_write_buf(req, in->nodeid, inarg, bufv);
} else {
fuse_ll_ops[in->opcode].func(req, in->nodeid, inarg);
}
diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c
index 635f877..fd588a4 100644
--- a/tools/virtiofsd/fuse_virtio.c
+++ b/tools/virtiofsd/fuse_virtio.c
@@ -501,7 +501,8 @@ static void *fv_queue_thread(void *opaque)
/* TODO! Endianness of header */
/* TODO: Add checks for fuse_session_exited */
- fuse_session_process_buf_int(se, &fbuf, &ch);
+ struct fuse_bufvec bufv = { .buf[0] = fbuf, .count = 1 };
+ fuse_session_process_buf_int(se, &bufv, &ch);
if (!qi->reply_sent) {
fuse_log(FUSE_LOG_DEBUG, "%s: elem %d no reply sent\n",
--
1.8.3.1

View File

@ -0,0 +1,97 @@
From 083b944fac29bc3115a19eb38e176f6b23f04938 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Mon, 27 Jan 2020 19:01:01 +0100
Subject: [PATCH 030/116] virtiofsd: Poll kick_fd for queue
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-id: <20200127190227.40942-27-dgilbert@redhat.com>
Patchwork-id: 93483
O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 026/112] virtiofsd: Poll kick_fd for queue
Bugzilla: 1694164
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Sergio Lopez Pascual <slp@redhat.com>
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
In the queue thread poll the kick_fd we're passed.
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit 5dcd1f56141378226d33dc3df68ec57913e0aa04)
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
tools/virtiofsd/fuse_virtio.c | 40 +++++++++++++++++++++++++++++++++++++++-
1 file changed, 39 insertions(+), 1 deletion(-)
diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c
index 2a94bb3..05e7258 100644
--- a/tools/virtiofsd/fuse_virtio.c
+++ b/tools/virtiofsd/fuse_virtio.c
@@ -24,6 +24,7 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <sys/eventfd.h>
#include <sys/socket.h>
#include <sys/types.h>
#include <sys/un.h>
@@ -100,13 +101,50 @@ static void fv_panic(VuDev *dev, const char *err)
exit(EXIT_FAILURE);
}
+/* Thread function for individual queues, created when a queue is 'started' */
static void *fv_queue_thread(void *opaque)
{
struct fv_QueueInfo *qi = opaque;
fuse_log(FUSE_LOG_INFO, "%s: Start for queue %d kick_fd %d\n", __func__,
qi->qidx, qi->kick_fd);
while (1) {
- /* TODO */
+ struct pollfd pf[1];
+ pf[0].fd = qi->kick_fd;
+ pf[0].events = POLLIN;
+ pf[0].revents = 0;
+
+ fuse_log(FUSE_LOG_DEBUG, "%s: Waiting for Queue %d event\n", __func__,
+ qi->qidx);
+ int poll_res = ppoll(pf, 1, NULL, NULL);
+
+ if (poll_res == -1) {
+ if (errno == EINTR) {
+ fuse_log(FUSE_LOG_INFO, "%s: ppoll interrupted, going around\n",
+ __func__);
+ continue;
+ }
+ fuse_log(FUSE_LOG_ERR, "fv_queue_thread ppoll: %m\n");
+ break;
+ }
+ assert(poll_res == 1);
+ if (pf[0].revents & (POLLERR | POLLHUP | POLLNVAL)) {
+ fuse_log(FUSE_LOG_ERR, "%s: Unexpected poll revents %x Queue %d\n",
+ __func__, pf[0].revents, qi->qidx);
+ break;
+ }
+ assert(pf[0].revents & POLLIN);
+ fuse_log(FUSE_LOG_DEBUG, "%s: Got queue event on Queue %d\n", __func__,
+ qi->qidx);
+
+ eventfd_t evalue;
+ if (eventfd_read(qi->kick_fd, &evalue)) {
+ fuse_log(FUSE_LOG_ERR, "Eventfd_read for queue: %m\n");
+ break;
+ }
+ if (qi->virtio_dev->se->debug) {
+ fprintf(stderr, "%s: Queue %d gave evalue: %zx\n", __func__,
+ qi->qidx, (size_t)evalue);
+ }
}
return NULL;
--
1.8.3.1

View File

@ -0,0 +1,144 @@
From ab336e3aea97d76c1b2ac725d19b4518f47dd8f0 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Mon, 27 Jan 2020 19:01:59 +0100
Subject: [PATCH 088/116] virtiofsd: Prevent multiply running with same
vhost_user_socket
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-id: <20200127190227.40942-85-dgilbert@redhat.com>
Patchwork-id: 93541
O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 084/112] virtiofsd: Prevent multiply running with same vhost_user_socket
Bugzilla: 1694164
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Sergio Lopez Pascual <slp@redhat.com>
From: Masayoshi Mizuma <m.mizuma@jp.fujitsu.com>
virtiofsd can run multiply even if the vhost_user_socket is same path.
]# ./virtiofsd -o vhost_user_socket=/tmp/vhostqemu -o source=/tmp/share &
[1] 244965
virtio_session_mount: Waiting for vhost-user socket connection...
]# ./virtiofsd -o vhost_user_socket=/tmp/vhostqemu -o source=/tmp/share &
[2] 244966
virtio_session_mount: Waiting for vhost-user socket connection...
]#
The user will get confused about the situation and maybe the cause of the
unexpected problem. So it's better to prevent the multiple running.
Create a regular file under localstatedir directory to exclude the
vhost_user_socket. To create and lock the file, use qemu_write_pidfile()
because the API has some sanity checks and file lock.
Signed-off-by: Masayoshi Mizuma <m.mizuma@jp.fujitsu.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Applied fixes from Stefan's review and moved osdep include
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit 96814800d2b49d18737c36e021c387697ec40c62)
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
tools/virtiofsd/fuse_lowlevel.c | 1 +
tools/virtiofsd/fuse_virtio.c | 49 ++++++++++++++++++++++++++++++++++++++++-
2 files changed, 49 insertions(+), 1 deletion(-)
diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c
index 440508a..aac282f 100644
--- a/tools/virtiofsd/fuse_lowlevel.c
+++ b/tools/virtiofsd/fuse_lowlevel.c
@@ -18,6 +18,7 @@
#include <assert.h>
#include <errno.h>
+#include <glib.h>
#include <limits.h>
#include <stdbool.h>
#include <stddef.h>
diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c
index e7bd772..b7948de 100644
--- a/tools/virtiofsd/fuse_virtio.c
+++ b/tools/virtiofsd/fuse_virtio.c
@@ -13,11 +13,12 @@
#include "qemu/osdep.h"
#include "qemu/iov.h"
-#include "fuse_virtio.h"
+#include "qapi/error.h"
#include "fuse_i.h"
#include "standard-headers/linux/fuse.h"
#include "fuse_misc.h"
#include "fuse_opt.h"
+#include "fuse_virtio.h"
#include <assert.h>
#include <errno.h>
@@ -743,6 +744,42 @@ int virtio_loop(struct fuse_session *se)
return 0;
}
+static void strreplace(char *s, char old, char new)
+{
+ for (; *s; ++s) {
+ if (*s == old) {
+ *s = new;
+ }
+ }
+}
+
+static bool fv_socket_lock(struct fuse_session *se)
+{
+ g_autofree gchar *sk_name = NULL;
+ g_autofree gchar *pidfile = NULL;
+ g_autofree gchar *dir = NULL;
+ Error *local_err = NULL;
+
+ dir = qemu_get_local_state_pathname("run/virtiofsd");
+
+ if (g_mkdir_with_parents(dir, S_IRWXU) < 0) {
+ fuse_log(FUSE_LOG_ERR, "%s: Failed to create directory %s: %s",
+ __func__, dir, strerror(errno));
+ return false;
+ }
+
+ sk_name = g_strdup(se->vu_socket_path);
+ strreplace(sk_name, '/', '.');
+ pidfile = g_strdup_printf("%s/%s.pid", dir, sk_name);
+
+ if (!qemu_write_pidfile(pidfile, &local_err)) {
+ error_report_err(local_err);
+ return false;
+ }
+
+ return true;
+}
+
static int fv_create_listen_socket(struct fuse_session *se)
{
struct sockaddr_un un;
@@ -758,6 +795,16 @@ static int fv_create_listen_socket(struct fuse_session *se)
return -1;
}
+ if (!strlen(se->vu_socket_path)) {
+ fuse_log(FUSE_LOG_ERR, "Socket path is empty\n");
+ return -1;
+ }
+
+ /* Check the vu_socket_path is already used */
+ if (!fv_socket_lock(se)) {
+ return -1;
+ }
+
/*
* Create the Unix socket to communicate with qemu
* based on QEMU's vhost-user-bridge
--
1.8.3.1

View File

@ -0,0 +1,945 @@
From e7c1ad608117b21f80c762f5505a66b21c56e9d3 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Mon, 27 Jan 2020 19:00:40 +0100
Subject: [PATCH 009/116] virtiofsd: Pull in kernel's fuse.h
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-id: <20200127190227.40942-6-dgilbert@redhat.com>
Patchwork-id: 93460
O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 005/112] virtiofsd: Pull in kernel's fuse.h
Bugzilla: 1694164
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Sergio Lopez Pascual <slp@redhat.com>
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Update scripts/update-linux-headers.sh to add fuse.h and
use it to pull in fuse.h from the kernel; from v5.5-rc1
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit a62a9e192bc5f0aa0bc076b51db5a069add87c78)
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
include/standard-headers/linux/fuse.h | 891 ++++++++++++++++++++++++++++++++++
scripts/update-linux-headers.sh | 1 +
2 files changed, 892 insertions(+)
create mode 100644 include/standard-headers/linux/fuse.h
diff --git a/include/standard-headers/linux/fuse.h b/include/standard-headers/linux/fuse.h
new file mode 100644
index 0000000..f4df0a4
--- /dev/null
+++ b/include/standard-headers/linux/fuse.h
@@ -0,0 +1,891 @@
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */
+/*
+ This file defines the kernel interface of FUSE
+ Copyright (C) 2001-2008 Miklos Szeredi <miklos@szeredi.hu>
+
+ This program can be distributed under the terms of the GNU GPL.
+ See the file COPYING.
+
+ This -- and only this -- header file may also be distributed under
+ the terms of the BSD Licence as follows:
+
+ Copyright (C) 2001-2007 Miklos Szeredi. All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ 1. Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ 2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ SUCH DAMAGE.
+*/
+
+/*
+ * This file defines the kernel interface of FUSE
+ *
+ * Protocol changelog:
+ *
+ * 7.1:
+ * - add the following messages:
+ * FUSE_SETATTR, FUSE_SYMLINK, FUSE_MKNOD, FUSE_MKDIR, FUSE_UNLINK,
+ * FUSE_RMDIR, FUSE_RENAME, FUSE_LINK, FUSE_OPEN, FUSE_READ, FUSE_WRITE,
+ * FUSE_RELEASE, FUSE_FSYNC, FUSE_FLUSH, FUSE_SETXATTR, FUSE_GETXATTR,
+ * FUSE_LISTXATTR, FUSE_REMOVEXATTR, FUSE_OPENDIR, FUSE_READDIR,
+ * FUSE_RELEASEDIR
+ * - add padding to messages to accommodate 32-bit servers on 64-bit kernels
+ *
+ * 7.2:
+ * - add FOPEN_DIRECT_IO and FOPEN_KEEP_CACHE flags
+ * - add FUSE_FSYNCDIR message
+ *
+ * 7.3:
+ * - add FUSE_ACCESS message
+ * - add FUSE_CREATE message
+ * - add filehandle to fuse_setattr_in
+ *
+ * 7.4:
+ * - add frsize to fuse_kstatfs
+ * - clean up request size limit checking
+ *
+ * 7.5:
+ * - add flags and max_write to fuse_init_out
+ *
+ * 7.6:
+ * - add max_readahead to fuse_init_in and fuse_init_out
+ *
+ * 7.7:
+ * - add FUSE_INTERRUPT message
+ * - add POSIX file lock support
+ *
+ * 7.8:
+ * - add lock_owner and flags fields to fuse_release_in
+ * - add FUSE_BMAP message
+ * - add FUSE_DESTROY message
+ *
+ * 7.9:
+ * - new fuse_getattr_in input argument of GETATTR
+ * - add lk_flags in fuse_lk_in
+ * - add lock_owner field to fuse_setattr_in, fuse_read_in and fuse_write_in
+ * - add blksize field to fuse_attr
+ * - add file flags field to fuse_read_in and fuse_write_in
+ * - Add ATIME_NOW and MTIME_NOW flags to fuse_setattr_in
+ *
+ * 7.10
+ * - add nonseekable open flag
+ *
+ * 7.11
+ * - add IOCTL message
+ * - add unsolicited notification support
+ * - add POLL message and NOTIFY_POLL notification
+ *
+ * 7.12
+ * - add umask flag to input argument of create, mknod and mkdir
+ * - add notification messages for invalidation of inodes and
+ * directory entries
+ *
+ * 7.13
+ * - make max number of background requests and congestion threshold
+ * tunables
+ *
+ * 7.14
+ * - add splice support to fuse device
+ *
+ * 7.15
+ * - add store notify
+ * - add retrieve notify
+ *
+ * 7.16
+ * - add BATCH_FORGET request
+ * - FUSE_IOCTL_UNRESTRICTED shall now return with array of 'struct
+ * fuse_ioctl_iovec' instead of ambiguous 'struct iovec'
+ * - add FUSE_IOCTL_32BIT flag
+ *
+ * 7.17
+ * - add FUSE_FLOCK_LOCKS and FUSE_RELEASE_FLOCK_UNLOCK
+ *
+ * 7.18
+ * - add FUSE_IOCTL_DIR flag
+ * - add FUSE_NOTIFY_DELETE
+ *
+ * 7.19
+ * - add FUSE_FALLOCATE
+ *
+ * 7.20
+ * - add FUSE_AUTO_INVAL_DATA
+ *
+ * 7.21
+ * - add FUSE_READDIRPLUS
+ * - send the requested events in POLL request
+ *
+ * 7.22
+ * - add FUSE_ASYNC_DIO
+ *
+ * 7.23
+ * - add FUSE_WRITEBACK_CACHE
+ * - add time_gran to fuse_init_out
+ * - add reserved space to fuse_init_out
+ * - add FATTR_CTIME
+ * - add ctime and ctimensec to fuse_setattr_in
+ * - add FUSE_RENAME2 request
+ * - add FUSE_NO_OPEN_SUPPORT flag
+ *
+ * 7.24
+ * - add FUSE_LSEEK for SEEK_HOLE and SEEK_DATA support
+ *
+ * 7.25
+ * - add FUSE_PARALLEL_DIROPS
+ *
+ * 7.26
+ * - add FUSE_HANDLE_KILLPRIV
+ * - add FUSE_POSIX_ACL
+ *
+ * 7.27
+ * - add FUSE_ABORT_ERROR
+ *
+ * 7.28
+ * - add FUSE_COPY_FILE_RANGE
+ * - add FOPEN_CACHE_DIR
+ * - add FUSE_MAX_PAGES, add max_pages to init_out
+ * - add FUSE_CACHE_SYMLINKS
+ *
+ * 7.29
+ * - add FUSE_NO_OPENDIR_SUPPORT flag
+ *
+ * 7.30
+ * - add FUSE_EXPLICIT_INVAL_DATA
+ * - add FUSE_IOCTL_COMPAT_X32
+ *
+ * 7.31
+ * - add FUSE_WRITE_KILL_PRIV flag
+ * - add FUSE_SETUPMAPPING and FUSE_REMOVEMAPPING
+ * - add map_alignment to fuse_init_out, add FUSE_MAP_ALIGNMENT flag
+ */
+
+#ifndef _LINUX_FUSE_H
+#define _LINUX_FUSE_H
+
+#include <stdint.h>
+
+/*
+ * Version negotiation:
+ *
+ * Both the kernel and userspace send the version they support in the
+ * INIT request and reply respectively.
+ *
+ * If the major versions match then both shall use the smallest
+ * of the two minor versions for communication.
+ *
+ * If the kernel supports a larger major version, then userspace shall
+ * reply with the major version it supports, ignore the rest of the
+ * INIT message and expect a new INIT message from the kernel with a
+ * matching major version.
+ *
+ * If the library supports a larger major version, then it shall fall
+ * back to the major protocol version sent by the kernel for
+ * communication and reply with that major version (and an arbitrary
+ * supported minor version).
+ */
+
+/** Version number of this interface */
+#define FUSE_KERNEL_VERSION 7
+
+/** Minor version number of this interface */
+#define FUSE_KERNEL_MINOR_VERSION 31
+
+/** The node ID of the root inode */
+#define FUSE_ROOT_ID 1
+
+/* Make sure all structures are padded to 64bit boundary, so 32bit
+ userspace works under 64bit kernels */
+
+struct fuse_attr {
+ uint64_t ino;
+ uint64_t size;
+ uint64_t blocks;
+ uint64_t atime;
+ uint64_t mtime;
+ uint64_t ctime;
+ uint32_t atimensec;
+ uint32_t mtimensec;
+ uint32_t ctimensec;
+ uint32_t mode;
+ uint32_t nlink;
+ uint32_t uid;
+ uint32_t gid;
+ uint32_t rdev;
+ uint32_t blksize;
+ uint32_t padding;
+};
+
+struct fuse_kstatfs {
+ uint64_t blocks;
+ uint64_t bfree;
+ uint64_t bavail;
+ uint64_t files;
+ uint64_t ffree;
+ uint32_t bsize;
+ uint32_t namelen;
+ uint32_t frsize;
+ uint32_t padding;
+ uint32_t spare[6];
+};
+
+struct fuse_file_lock {
+ uint64_t start;
+ uint64_t end;
+ uint32_t type;
+ uint32_t pid; /* tgid */
+};
+
+/**
+ * Bitmasks for fuse_setattr_in.valid
+ */
+#define FATTR_MODE (1 << 0)
+#define FATTR_UID (1 << 1)
+#define FATTR_GID (1 << 2)
+#define FATTR_SIZE (1 << 3)
+#define FATTR_ATIME (1 << 4)
+#define FATTR_MTIME (1 << 5)
+#define FATTR_FH (1 << 6)
+#define FATTR_ATIME_NOW (1 << 7)
+#define FATTR_MTIME_NOW (1 << 8)
+#define FATTR_LOCKOWNER (1 << 9)
+#define FATTR_CTIME (1 << 10)
+
+/**
+ * Flags returned by the OPEN request
+ *
+ * FOPEN_DIRECT_IO: bypass page cache for this open file
+ * FOPEN_KEEP_CACHE: don't invalidate the data cache on open
+ * FOPEN_NONSEEKABLE: the file is not seekable
+ * FOPEN_CACHE_DIR: allow caching this directory
+ * FOPEN_STREAM: the file is stream-like (no file position at all)
+ */
+#define FOPEN_DIRECT_IO (1 << 0)
+#define FOPEN_KEEP_CACHE (1 << 1)
+#define FOPEN_NONSEEKABLE (1 << 2)
+#define FOPEN_CACHE_DIR (1 << 3)
+#define FOPEN_STREAM (1 << 4)
+
+/**
+ * INIT request/reply flags
+ *
+ * FUSE_ASYNC_READ: asynchronous read requests
+ * FUSE_POSIX_LOCKS: remote locking for POSIX file locks
+ * FUSE_FILE_OPS: kernel sends file handle for fstat, etc... (not yet supported)
+ * FUSE_ATOMIC_O_TRUNC: handles the O_TRUNC open flag in the filesystem
+ * FUSE_EXPORT_SUPPORT: filesystem handles lookups of "." and ".."
+ * FUSE_BIG_WRITES: filesystem can handle write size larger than 4kB
+ * FUSE_DONT_MASK: don't apply umask to file mode on create operations
+ * FUSE_SPLICE_WRITE: kernel supports splice write on the device
+ * FUSE_SPLICE_MOVE: kernel supports splice move on the device
+ * FUSE_SPLICE_READ: kernel supports splice read on the device
+ * FUSE_FLOCK_LOCKS: remote locking for BSD style file locks
+ * FUSE_HAS_IOCTL_DIR: kernel supports ioctl on directories
+ * FUSE_AUTO_INVAL_DATA: automatically invalidate cached pages
+ * FUSE_DO_READDIRPLUS: do READDIRPLUS (READDIR+LOOKUP in one)
+ * FUSE_READDIRPLUS_AUTO: adaptive readdirplus
+ * FUSE_ASYNC_DIO: asynchronous direct I/O submission
+ * FUSE_WRITEBACK_CACHE: use writeback cache for buffered writes
+ * FUSE_NO_OPEN_SUPPORT: kernel supports zero-message opens
+ * FUSE_PARALLEL_DIROPS: allow parallel lookups and readdir
+ * FUSE_HANDLE_KILLPRIV: fs handles killing suid/sgid/cap on write/chown/trunc
+ * FUSE_POSIX_ACL: filesystem supports posix acls
+ * FUSE_ABORT_ERROR: reading the device after abort returns ECONNABORTED
+ * FUSE_MAX_PAGES: init_out.max_pages contains the max number of req pages
+ * FUSE_CACHE_SYMLINKS: cache READLINK responses
+ * FUSE_NO_OPENDIR_SUPPORT: kernel supports zero-message opendir
+ * FUSE_EXPLICIT_INVAL_DATA: only invalidate cached pages on explicit request
+ * FUSE_MAP_ALIGNMENT: map_alignment field is valid
+ */
+#define FUSE_ASYNC_READ (1 << 0)
+#define FUSE_POSIX_LOCKS (1 << 1)
+#define FUSE_FILE_OPS (1 << 2)
+#define FUSE_ATOMIC_O_TRUNC (1 << 3)
+#define FUSE_EXPORT_SUPPORT (1 << 4)
+#define FUSE_BIG_WRITES (1 << 5)
+#define FUSE_DONT_MASK (1 << 6)
+#define FUSE_SPLICE_WRITE (1 << 7)
+#define FUSE_SPLICE_MOVE (1 << 8)
+#define FUSE_SPLICE_READ (1 << 9)
+#define FUSE_FLOCK_LOCKS (1 << 10)
+#define FUSE_HAS_IOCTL_DIR (1 << 11)
+#define FUSE_AUTO_INVAL_DATA (1 << 12)
+#define FUSE_DO_READDIRPLUS (1 << 13)
+#define FUSE_READDIRPLUS_AUTO (1 << 14)
+#define FUSE_ASYNC_DIO (1 << 15)
+#define FUSE_WRITEBACK_CACHE (1 << 16)
+#define FUSE_NO_OPEN_SUPPORT (1 << 17)
+#define FUSE_PARALLEL_DIROPS (1 << 18)
+#define FUSE_HANDLE_KILLPRIV (1 << 19)
+#define FUSE_POSIX_ACL (1 << 20)
+#define FUSE_ABORT_ERROR (1 << 21)
+#define FUSE_MAX_PAGES (1 << 22)
+#define FUSE_CACHE_SYMLINKS (1 << 23)
+#define FUSE_NO_OPENDIR_SUPPORT (1 << 24)
+#define FUSE_EXPLICIT_INVAL_DATA (1 << 25)
+#define FUSE_MAP_ALIGNMENT (1 << 26)
+
+/**
+ * CUSE INIT request/reply flags
+ *
+ * CUSE_UNRESTRICTED_IOCTL: use unrestricted ioctl
+ */
+#define CUSE_UNRESTRICTED_IOCTL (1 << 0)
+
+/**
+ * Release flags
+ */
+#define FUSE_RELEASE_FLUSH (1 << 0)
+#define FUSE_RELEASE_FLOCK_UNLOCK (1 << 1)
+
+/**
+ * Getattr flags
+ */
+#define FUSE_GETATTR_FH (1 << 0)
+
+/**
+ * Lock flags
+ */
+#define FUSE_LK_FLOCK (1 << 0)
+
+/**
+ * WRITE flags
+ *
+ * FUSE_WRITE_CACHE: delayed write from page cache, file handle is guessed
+ * FUSE_WRITE_LOCKOWNER: lock_owner field is valid
+ * FUSE_WRITE_KILL_PRIV: kill suid and sgid bits
+ */
+#define FUSE_WRITE_CACHE (1 << 0)
+#define FUSE_WRITE_LOCKOWNER (1 << 1)
+#define FUSE_WRITE_KILL_PRIV (1 << 2)
+
+/**
+ * Read flags
+ */
+#define FUSE_READ_LOCKOWNER (1 << 1)
+
+/**
+ * Ioctl flags
+ *
+ * FUSE_IOCTL_COMPAT: 32bit compat ioctl on 64bit machine
+ * FUSE_IOCTL_UNRESTRICTED: not restricted to well-formed ioctls, retry allowed
+ * FUSE_IOCTL_RETRY: retry with new iovecs
+ * FUSE_IOCTL_32BIT: 32bit ioctl
+ * FUSE_IOCTL_DIR: is a directory
+ * FUSE_IOCTL_COMPAT_X32: x32 compat ioctl on 64bit machine (64bit time_t)
+ *
+ * FUSE_IOCTL_MAX_IOV: maximum of in_iovecs + out_iovecs
+ */
+#define FUSE_IOCTL_COMPAT (1 << 0)
+#define FUSE_IOCTL_UNRESTRICTED (1 << 1)
+#define FUSE_IOCTL_RETRY (1 << 2)
+#define FUSE_IOCTL_32BIT (1 << 3)
+#define FUSE_IOCTL_DIR (1 << 4)
+#define FUSE_IOCTL_COMPAT_X32 (1 << 5)
+
+#define FUSE_IOCTL_MAX_IOV 256
+
+/**
+ * Poll flags
+ *
+ * FUSE_POLL_SCHEDULE_NOTIFY: request poll notify
+ */
+#define FUSE_POLL_SCHEDULE_NOTIFY (1 << 0)
+
+/**
+ * Fsync flags
+ *
+ * FUSE_FSYNC_FDATASYNC: Sync data only, not metadata
+ */
+#define FUSE_FSYNC_FDATASYNC (1 << 0)
+
+enum fuse_opcode {
+ FUSE_LOOKUP = 1,
+ FUSE_FORGET = 2, /* no reply */
+ FUSE_GETATTR = 3,
+ FUSE_SETATTR = 4,
+ FUSE_READLINK = 5,
+ FUSE_SYMLINK = 6,
+ FUSE_MKNOD = 8,
+ FUSE_MKDIR = 9,
+ FUSE_UNLINK = 10,
+ FUSE_RMDIR = 11,
+ FUSE_RENAME = 12,
+ FUSE_LINK = 13,
+ FUSE_OPEN = 14,
+ FUSE_READ = 15,
+ FUSE_WRITE = 16,
+ FUSE_STATFS = 17,
+ FUSE_RELEASE = 18,
+ FUSE_FSYNC = 20,
+ FUSE_SETXATTR = 21,
+ FUSE_GETXATTR = 22,
+ FUSE_LISTXATTR = 23,
+ FUSE_REMOVEXATTR = 24,
+ FUSE_FLUSH = 25,
+ FUSE_INIT = 26,
+ FUSE_OPENDIR = 27,
+ FUSE_READDIR = 28,
+ FUSE_RELEASEDIR = 29,
+ FUSE_FSYNCDIR = 30,
+ FUSE_GETLK = 31,
+ FUSE_SETLK = 32,
+ FUSE_SETLKW = 33,
+ FUSE_ACCESS = 34,
+ FUSE_CREATE = 35,
+ FUSE_INTERRUPT = 36,
+ FUSE_BMAP = 37,
+ FUSE_DESTROY = 38,
+ FUSE_IOCTL = 39,
+ FUSE_POLL = 40,
+ FUSE_NOTIFY_REPLY = 41,
+ FUSE_BATCH_FORGET = 42,
+ FUSE_FALLOCATE = 43,
+ FUSE_READDIRPLUS = 44,
+ FUSE_RENAME2 = 45,
+ FUSE_LSEEK = 46,
+ FUSE_COPY_FILE_RANGE = 47,
+ FUSE_SETUPMAPPING = 48,
+ FUSE_REMOVEMAPPING = 49,
+
+ /* CUSE specific operations */
+ CUSE_INIT = 4096,
+
+ /* Reserved opcodes: helpful to detect structure endian-ness */
+ CUSE_INIT_BSWAP_RESERVED = 1048576, /* CUSE_INIT << 8 */
+ FUSE_INIT_BSWAP_RESERVED = 436207616, /* FUSE_INIT << 24 */
+};
+
+enum fuse_notify_code {
+ FUSE_NOTIFY_POLL = 1,
+ FUSE_NOTIFY_INVAL_INODE = 2,
+ FUSE_NOTIFY_INVAL_ENTRY = 3,
+ FUSE_NOTIFY_STORE = 4,
+ FUSE_NOTIFY_RETRIEVE = 5,
+ FUSE_NOTIFY_DELETE = 6,
+ FUSE_NOTIFY_CODE_MAX,
+};
+
+/* The read buffer is required to be at least 8k, but may be much larger */
+#define FUSE_MIN_READ_BUFFER 8192
+
+#define FUSE_COMPAT_ENTRY_OUT_SIZE 120
+
+struct fuse_entry_out {
+ uint64_t nodeid; /* Inode ID */
+ uint64_t generation; /* Inode generation: nodeid:gen must
+ be unique for the fs's lifetime */
+ uint64_t entry_valid; /* Cache timeout for the name */
+ uint64_t attr_valid; /* Cache timeout for the attributes */
+ uint32_t entry_valid_nsec;
+ uint32_t attr_valid_nsec;
+ struct fuse_attr attr;
+};
+
+struct fuse_forget_in {
+ uint64_t nlookup;
+};
+
+struct fuse_forget_one {
+ uint64_t nodeid;
+ uint64_t nlookup;
+};
+
+struct fuse_batch_forget_in {
+ uint32_t count;
+ uint32_t dummy;
+};
+
+struct fuse_getattr_in {
+ uint32_t getattr_flags;
+ uint32_t dummy;
+ uint64_t fh;
+};
+
+#define FUSE_COMPAT_ATTR_OUT_SIZE 96
+
+struct fuse_attr_out {
+ uint64_t attr_valid; /* Cache timeout for the attributes */
+ uint32_t attr_valid_nsec;
+ uint32_t dummy;
+ struct fuse_attr attr;
+};
+
+#define FUSE_COMPAT_MKNOD_IN_SIZE 8
+
+struct fuse_mknod_in {
+ uint32_t mode;
+ uint32_t rdev;
+ uint32_t umask;
+ uint32_t padding;
+};
+
+struct fuse_mkdir_in {
+ uint32_t mode;
+ uint32_t umask;
+};
+
+struct fuse_rename_in {
+ uint64_t newdir;
+};
+
+struct fuse_rename2_in {
+ uint64_t newdir;
+ uint32_t flags;
+ uint32_t padding;
+};
+
+struct fuse_link_in {
+ uint64_t oldnodeid;
+};
+
+struct fuse_setattr_in {
+ uint32_t valid;
+ uint32_t padding;
+ uint64_t fh;
+ uint64_t size;
+ uint64_t lock_owner;
+ uint64_t atime;
+ uint64_t mtime;
+ uint64_t ctime;
+ uint32_t atimensec;
+ uint32_t mtimensec;
+ uint32_t ctimensec;
+ uint32_t mode;
+ uint32_t unused4;
+ uint32_t uid;
+ uint32_t gid;
+ uint32_t unused5;
+};
+
+struct fuse_open_in {
+ uint32_t flags;
+ uint32_t unused;
+};
+
+struct fuse_create_in {
+ uint32_t flags;
+ uint32_t mode;
+ uint32_t umask;
+ uint32_t padding;
+};
+
+struct fuse_open_out {
+ uint64_t fh;
+ uint32_t open_flags;
+ uint32_t padding;
+};
+
+struct fuse_release_in {
+ uint64_t fh;
+ uint32_t flags;
+ uint32_t release_flags;
+ uint64_t lock_owner;
+};
+
+struct fuse_flush_in {
+ uint64_t fh;
+ uint32_t unused;
+ uint32_t padding;
+ uint64_t lock_owner;
+};
+
+struct fuse_read_in {
+ uint64_t fh;
+ uint64_t offset;
+ uint32_t size;
+ uint32_t read_flags;
+ uint64_t lock_owner;
+ uint32_t flags;
+ uint32_t padding;
+};
+
+#define FUSE_COMPAT_WRITE_IN_SIZE 24
+
+struct fuse_write_in {
+ uint64_t fh;
+ uint64_t offset;
+ uint32_t size;
+ uint32_t write_flags;
+ uint64_t lock_owner;
+ uint32_t flags;
+ uint32_t padding;
+};
+
+struct fuse_write_out {
+ uint32_t size;
+ uint32_t padding;
+};
+
+#define FUSE_COMPAT_STATFS_SIZE 48
+
+struct fuse_statfs_out {
+ struct fuse_kstatfs st;
+};
+
+struct fuse_fsync_in {
+ uint64_t fh;
+ uint32_t fsync_flags;
+ uint32_t padding;
+};
+
+struct fuse_setxattr_in {
+ uint32_t size;
+ uint32_t flags;
+};
+
+struct fuse_getxattr_in {
+ uint32_t size;
+ uint32_t padding;
+};
+
+struct fuse_getxattr_out {
+ uint32_t size;
+ uint32_t padding;
+};
+
+struct fuse_lk_in {
+ uint64_t fh;
+ uint64_t owner;
+ struct fuse_file_lock lk;
+ uint32_t lk_flags;
+ uint32_t padding;
+};
+
+struct fuse_lk_out {
+ struct fuse_file_lock lk;
+};
+
+struct fuse_access_in {
+ uint32_t mask;
+ uint32_t padding;
+};
+
+struct fuse_init_in {
+ uint32_t major;
+ uint32_t minor;
+ uint32_t max_readahead;
+ uint32_t flags;
+};
+
+#define FUSE_COMPAT_INIT_OUT_SIZE 8
+#define FUSE_COMPAT_22_INIT_OUT_SIZE 24
+
+struct fuse_init_out {
+ uint32_t major;
+ uint32_t minor;
+ uint32_t max_readahead;
+ uint32_t flags;
+ uint16_t max_background;
+ uint16_t congestion_threshold;
+ uint32_t max_write;
+ uint32_t time_gran;
+ uint16_t max_pages;
+ uint16_t map_alignment;
+ uint32_t unused[8];
+};
+
+#define CUSE_INIT_INFO_MAX 4096
+
+struct cuse_init_in {
+ uint32_t major;
+ uint32_t minor;
+ uint32_t unused;
+ uint32_t flags;
+};
+
+struct cuse_init_out {
+ uint32_t major;
+ uint32_t minor;
+ uint32_t unused;
+ uint32_t flags;
+ uint32_t max_read;
+ uint32_t max_write;
+ uint32_t dev_major; /* chardev major */
+ uint32_t dev_minor; /* chardev minor */
+ uint32_t spare[10];
+};
+
+struct fuse_interrupt_in {
+ uint64_t unique;
+};
+
+struct fuse_bmap_in {
+ uint64_t block;
+ uint32_t blocksize;
+ uint32_t padding;
+};
+
+struct fuse_bmap_out {
+ uint64_t block;
+};
+
+struct fuse_ioctl_in {
+ uint64_t fh;
+ uint32_t flags;
+ uint32_t cmd;
+ uint64_t arg;
+ uint32_t in_size;
+ uint32_t out_size;
+};
+
+struct fuse_ioctl_iovec {
+ uint64_t base;
+ uint64_t len;
+};
+
+struct fuse_ioctl_out {
+ int32_t result;
+ uint32_t flags;
+ uint32_t in_iovs;
+ uint32_t out_iovs;
+};
+
+struct fuse_poll_in {
+ uint64_t fh;
+ uint64_t kh;
+ uint32_t flags;
+ uint32_t events;
+};
+
+struct fuse_poll_out {
+ uint32_t revents;
+ uint32_t padding;
+};
+
+struct fuse_notify_poll_wakeup_out {
+ uint64_t kh;
+};
+
+struct fuse_fallocate_in {
+ uint64_t fh;
+ uint64_t offset;
+ uint64_t length;
+ uint32_t mode;
+ uint32_t padding;
+};
+
+struct fuse_in_header {
+ uint32_t len;
+ uint32_t opcode;
+ uint64_t unique;
+ uint64_t nodeid;
+ uint32_t uid;
+ uint32_t gid;
+ uint32_t pid;
+ uint32_t padding;
+};
+
+struct fuse_out_header {
+ uint32_t len;
+ int32_t error;
+ uint64_t unique;
+};
+
+struct fuse_dirent {
+ uint64_t ino;
+ uint64_t off;
+ uint32_t namelen;
+ uint32_t type;
+ char name[];
+};
+
+#define FUSE_NAME_OFFSET offsetof(struct fuse_dirent, name)
+#define FUSE_DIRENT_ALIGN(x) \
+ (((x) + sizeof(uint64_t) - 1) & ~(sizeof(uint64_t) - 1))
+#define FUSE_DIRENT_SIZE(d) \
+ FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET + (d)->namelen)
+
+struct fuse_direntplus {
+ struct fuse_entry_out entry_out;
+ struct fuse_dirent dirent;
+};
+
+#define FUSE_NAME_OFFSET_DIRENTPLUS \
+ offsetof(struct fuse_direntplus, dirent.name)
+#define FUSE_DIRENTPLUS_SIZE(d) \
+ FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET_DIRENTPLUS + (d)->dirent.namelen)
+
+struct fuse_notify_inval_inode_out {
+ uint64_t ino;
+ int64_t off;
+ int64_t len;
+};
+
+struct fuse_notify_inval_entry_out {
+ uint64_t parent;
+ uint32_t namelen;
+ uint32_t padding;
+};
+
+struct fuse_notify_delete_out {
+ uint64_t parent;
+ uint64_t child;
+ uint32_t namelen;
+ uint32_t padding;
+};
+
+struct fuse_notify_store_out {
+ uint64_t nodeid;
+ uint64_t offset;
+ uint32_t size;
+ uint32_t padding;
+};
+
+struct fuse_notify_retrieve_out {
+ uint64_t notify_unique;
+ uint64_t nodeid;
+ uint64_t offset;
+ uint32_t size;
+ uint32_t padding;
+};
+
+/* Matches the size of fuse_write_in */
+struct fuse_notify_retrieve_in {
+ uint64_t dummy1;
+ uint64_t offset;
+ uint32_t size;
+ uint32_t dummy2;
+ uint64_t dummy3;
+ uint64_t dummy4;
+};
+
+/* Device ioctls: */
+#define FUSE_DEV_IOC_CLONE _IOR(229, 0, uint32_t)
+
+struct fuse_lseek_in {
+ uint64_t fh;
+ uint64_t offset;
+ uint32_t whence;
+ uint32_t padding;
+};
+
+struct fuse_lseek_out {
+ uint64_t offset;
+};
+
+struct fuse_copy_file_range_in {
+ uint64_t fh_in;
+ uint64_t off_in;
+ uint64_t nodeid_out;
+ uint64_t fh_out;
+ uint64_t off_out;
+ uint64_t len;
+ uint64_t flags;
+};
+
+#endif /* _LINUX_FUSE_H */
diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh
index f76d773..29c27f4 100755
--- a/scripts/update-linux-headers.sh
+++ b/scripts/update-linux-headers.sh
@@ -186,6 +186,7 @@ rm -rf "$output/include/standard-headers/linux"
mkdir -p "$output/include/standard-headers/linux"
for i in "$tmpdir"/include/linux/*virtio*.h \
"$tmpdir/include/linux/qemu_fw_cfg.h" \
+ "$tmpdir/include/linux/fuse.h" \
"$tmpdir/include/linux/input.h" \
"$tmpdir/include/linux/input-event-codes.h" \
"$tmpdir/include/linux/pci_regs.h" \
--
1.8.3.1

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,271 @@
From 80237df2b22eca685037456e65d149fed4654165 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Mon, 27 Jan 2020 19:00:48 +0100
Subject: [PATCH 017/116] virtiofsd: Remove unused enum fuse_buf_copy_flags
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-id: <20200127190227.40942-14-dgilbert@redhat.com>
Patchwork-id: 93465
O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 013/112] virtiofsd: Remove unused enum fuse_buf_copy_flags
Bugzilla: 1694164
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Sergio Lopez Pascual <slp@redhat.com>
From: Xiao Yang <yangx.jy@cn.fujitsu.com>
Signed-off-by: Xiao Yang <yangx.jy@cn.fujitsu.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit 8c3fe75e0308ba2f01d160ace534b7e386cea808)
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
tools/virtiofsd/buffer.c | 7 +++---
tools/virtiofsd/fuse_common.h | 46 +---------------------------------------
tools/virtiofsd/fuse_lowlevel.c | 13 +++++-------
tools/virtiofsd/fuse_lowlevel.h | 35 ++----------------------------
tools/virtiofsd/passthrough_ll.c | 4 ++--
5 files changed, 13 insertions(+), 92 deletions(-)
diff --git a/tools/virtiofsd/buffer.c b/tools/virtiofsd/buffer.c
index 5df946c..4d507f3 100644
--- a/tools/virtiofsd/buffer.c
+++ b/tools/virtiofsd/buffer.c
@@ -171,7 +171,7 @@ static ssize_t fuse_buf_fd_to_fd(const struct fuse_buf *dst, size_t dst_off,
static ssize_t fuse_buf_copy_one(const struct fuse_buf *dst, size_t dst_off,
const struct fuse_buf *src, size_t src_off,
- size_t len, enum fuse_buf_copy_flags flags)
+ size_t len)
{
int src_is_fd = src->flags & FUSE_BUF_IS_FD;
int dst_is_fd = dst->flags & FUSE_BUF_IS_FD;
@@ -224,8 +224,7 @@ static int fuse_bufvec_advance(struct fuse_bufvec *bufv, size_t len)
return 1;
}
-ssize_t fuse_buf_copy(struct fuse_bufvec *dstv, struct fuse_bufvec *srcv,
- enum fuse_buf_copy_flags flags)
+ssize_t fuse_buf_copy(struct fuse_bufvec *dstv, struct fuse_bufvec *srcv)
{
size_t copied = 0;
@@ -249,7 +248,7 @@ ssize_t fuse_buf_copy(struct fuse_bufvec *dstv, struct fuse_bufvec *srcv,
dst_len = dst->size - dstv->off;
len = min_size(src_len, dst_len);
- res = fuse_buf_copy_one(dst, dstv->off, src, srcv->off, len, flags);
+ res = fuse_buf_copy_one(dst, dstv->off, src, srcv->off, len);
if (res < 0) {
if (!copied) {
return res;
diff --git a/tools/virtiofsd/fuse_common.h b/tools/virtiofsd/fuse_common.h
index bd9bf86..0cb33ac 100644
--- a/tools/virtiofsd/fuse_common.h
+++ b/tools/virtiofsd/fuse_common.h
@@ -605,48 +605,6 @@ enum fuse_buf_flags {
};
/**
- * Buffer copy flags
- */
-enum fuse_buf_copy_flags {
- /**
- * Don't use splice(2)
- *
- * Always fall back to using read and write instead of
- * splice(2) to copy data from one file descriptor to another.
- *
- * If this flag is not set, then only fall back if splice is
- * unavailable.
- */
- FUSE_BUF_NO_SPLICE = (1 << 1),
-
- /**
- * Force splice
- *
- * Always use splice(2) to copy data from one file descriptor
- * to another. If splice is not available, return -EINVAL.
- */
- FUSE_BUF_FORCE_SPLICE = (1 << 2),
-
- /**
- * Try to move data with splice.
- *
- * If splice is used, try to move pages from the source to the
- * destination instead of copying. See documentation of
- * SPLICE_F_MOVE in splice(2) man page.
- */
- FUSE_BUF_SPLICE_MOVE = (1 << 3),
-
- /**
- * Don't block on the pipe when copying data with splice
- *
- * Makes the operations on the pipe non-blocking (if the pipe
- * is full or empty). See SPLICE_F_NONBLOCK in the splice(2)
- * man page.
- */
- FUSE_BUF_SPLICE_NONBLOCK = (1 << 4),
-};
-
-/**
* Single data buffer
*
* Generic data buffer for I/O, extended attributes, etc... Data may
@@ -741,11 +699,9 @@ size_t fuse_buf_size(const struct fuse_bufvec *bufv);
*
* @param dst destination buffer vector
* @param src source buffer vector
- * @param flags flags controlling the copy
* @return actual number of bytes copied or -errno on error
*/
-ssize_t fuse_buf_copy(struct fuse_bufvec *dst, struct fuse_bufvec *src,
- enum fuse_buf_copy_flags flags);
+ssize_t fuse_buf_copy(struct fuse_bufvec *dst, struct fuse_bufvec *src);
/*
* Signal handling
diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c
index eb0ec49..3da80de 100644
--- a/tools/virtiofsd/fuse_lowlevel.c
+++ b/tools/virtiofsd/fuse_lowlevel.c
@@ -490,16 +490,14 @@ static int fuse_send_data_iov_fallback(struct fuse_session *se,
static int fuse_send_data_iov(struct fuse_session *se, struct fuse_chan *ch,
struct iovec *iov, int iov_count,
- struct fuse_bufvec *buf, unsigned int flags)
+ struct fuse_bufvec *buf)
{
size_t len = fuse_buf_size(buf);
- (void)flags;
return fuse_send_data_iov_fallback(se, ch, iov, iov_count, buf, len);
}
-int fuse_reply_data(fuse_req_t req, struct fuse_bufvec *bufv,
- enum fuse_buf_copy_flags flags)
+int fuse_reply_data(fuse_req_t req, struct fuse_bufvec *bufv)
{
struct iovec iov[2];
struct fuse_out_header out;
@@ -511,7 +509,7 @@ int fuse_reply_data(fuse_req_t req, struct fuse_bufvec *bufv,
out.unique = req->unique;
out.error = 0;
- res = fuse_send_data_iov(req->se, req->ch, iov, 1, bufv, flags);
+ res = fuse_send_data_iov(req->se, req->ch, iov, 1, bufv);
if (res <= 0) {
fuse_free_req(req);
return res;
@@ -1969,8 +1967,7 @@ int fuse_lowlevel_notify_delete(struct fuse_session *se, fuse_ino_t parent,
}
int fuse_lowlevel_notify_store(struct fuse_session *se, fuse_ino_t ino,
- off_t offset, struct fuse_bufvec *bufv,
- enum fuse_buf_copy_flags flags)
+ off_t offset, struct fuse_bufvec *bufv)
{
struct fuse_out_header out;
struct fuse_notify_store_out outarg;
@@ -1999,7 +1996,7 @@ int fuse_lowlevel_notify_store(struct fuse_session *se, fuse_ino_t ino,
iov[1].iov_base = &outarg;
iov[1].iov_len = sizeof(outarg);
- res = fuse_send_data_iov(se, NULL, iov, 2, bufv, flags);
+ res = fuse_send_data_iov(se, NULL, iov, 2, bufv);
if (res > 0) {
res = -res;
}
diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h
index 12a84b4..2fa225d 100644
--- a/tools/virtiofsd/fuse_lowlevel.h
+++ b/tools/virtiofsd/fuse_lowlevel.h
@@ -1363,33 +1363,6 @@ int fuse_reply_buf(fuse_req_t req, const char *buf, size_t size);
/**
* Reply with data copied/moved from buffer(s)
*
- * Zero copy data transfer ("splicing") will be used under
- * the following circumstances:
- *
- * 1. FUSE_CAP_SPLICE_WRITE is set in fuse_conn_info.want, and
- * 2. the kernel supports splicing from the fuse device
- * (FUSE_CAP_SPLICE_WRITE is set in fuse_conn_info.capable), and
- * 3. *flags* does not contain FUSE_BUF_NO_SPLICE
- * 4. The amount of data that is provided in file-descriptor backed
- * buffers (i.e., buffers for which bufv[n].flags == FUSE_BUF_FD)
- * is at least twice the page size.
- *
- * In order for SPLICE_F_MOVE to be used, the following additional
- * conditions have to be fulfilled:
- *
- * 1. FUSE_CAP_SPLICE_MOVE is set in fuse_conn_info.want, and
- * 2. the kernel supports it (i.e, FUSE_CAP_SPLICE_MOVE is set in
- fuse_conn_info.capable), and
- * 3. *flags* contains FUSE_BUF_SPLICE_MOVE
- *
- * Note that, if splice is used, the data is actually spliced twice:
- * once into a temporary pipe (to prepend header data), and then again
- * into the kernel. If some of the provided buffers are memory-backed,
- * the data in them is copied in step one and spliced in step two.
- *
- * The FUSE_BUF_SPLICE_FORCE_SPLICE and FUSE_BUF_SPLICE_NONBLOCK flags
- * are silently ignored.
- *
* Possible requests:
* read, readdir, getxattr, listxattr
*
@@ -1400,11 +1373,9 @@ int fuse_reply_buf(fuse_req_t req, const char *buf, size_t size);
*
* @param req request handle
* @param bufv buffer vector
- * @param flags flags controlling the copy
* @return zero for success, -errno for failure to send reply
*/
-int fuse_reply_data(fuse_req_t req, struct fuse_bufvec *bufv,
- enum fuse_buf_copy_flags flags);
+int fuse_reply_data(fuse_req_t req, struct fuse_bufvec *bufv);
/**
* Reply with data vector
@@ -1705,12 +1676,10 @@ int fuse_lowlevel_notify_delete(struct fuse_session *se, fuse_ino_t parent,
* @param ino the inode number
* @param offset the starting offset into the file to store to
* @param bufv buffer vector
- * @param flags flags controlling the copy
* @return zero for success, -errno for failure
*/
int fuse_lowlevel_notify_store(struct fuse_session *se, fuse_ino_t ino,
- off_t offset, struct fuse_bufvec *bufv,
- enum fuse_buf_copy_flags flags);
+ off_t offset, struct fuse_bufvec *bufv);
/*
* Utility functions
diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
index 9377718..126a56c 100644
--- a/tools/virtiofsd/passthrough_ll.c
+++ b/tools/virtiofsd/passthrough_ll.c
@@ -931,7 +931,7 @@ static void lo_read(fuse_req_t req, fuse_ino_t ino, size_t size, off_t offset,
buf.buf[0].fd = fi->fh;
buf.buf[0].pos = offset;
- fuse_reply_data(req, &buf, FUSE_BUF_SPLICE_MOVE);
+ fuse_reply_data(req, &buf);
}
static void lo_write_buf(fuse_req_t req, fuse_ino_t ino,
@@ -952,7 +952,7 @@ static void lo_write_buf(fuse_req_t req, fuse_ino_t ino,
out_buf.buf[0].size, (unsigned long)off);
}
- res = fuse_buf_copy(&out_buf, in_buf, 0);
+ res = fuse_buf_copy(&out_buf, in_buf);
if (res < 0) {
fuse_reply_err(req, -res);
} else {
--
1.8.3.1

View File

@ -0,0 +1,72 @@
From b8d62021f28114f054571b96ec0cd4dad4476923 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Mon, 27 Jan 2020 19:02:14 +0100
Subject: [PATCH 103/116] virtiofsd: Reset O_DIRECT flag during file open
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-id: <20200127190227.40942-100-dgilbert@redhat.com>
Patchwork-id: 93553
O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 099/112] virtiofsd: Reset O_DIRECT flag during file open
Bugzilla: 1694164
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Sergio Lopez Pascual <slp@redhat.com>
From: Vivek Goyal <vgoyal@redhat.com>
If an application wants to do direct IO and opens a file with O_DIRECT
in guest, that does not necessarily mean that we need to bypass page
cache on host as well. So reset this flag on host.
If somebody needs to bypass page cache on host as well (and it is safe to
do so), we can add a knob in daemon later to control this behavior.
I check virtio-9p and they do reset O_DIRECT flag.
Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit 65da4539803373ec4eec97ffc49ee90083e56efd)
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
tools/virtiofsd/passthrough_ll.c | 14 ++++++++++++++
1 file changed, 14 insertions(+)
diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
index ccbbec1..948cb19 100644
--- a/tools/virtiofsd/passthrough_ll.c
+++ b/tools/virtiofsd/passthrough_ll.c
@@ -1721,6 +1721,13 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name,
goto out;
}
+ /*
+ * O_DIRECT in guest should not necessarily mean bypassing page
+ * cache on host as well. If somebody needs that behavior, it
+ * probably should be a configuration knob in daemon.
+ */
+ fi->flags &= ~O_DIRECT;
+
fd = openat(parent_inode->fd, name, (fi->flags | O_CREAT) & ~O_NOFOLLOW,
mode);
err = fd == -1 ? errno : 0;
@@ -1950,6 +1957,13 @@ static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi)
fi->flags &= ~O_APPEND;
}
+ /*
+ * O_DIRECT in guest should not necessarily mean bypassing page
+ * cache on host as well. If somebody needs that behavior, it
+ * probably should be a configuration knob in daemon.
+ */
+ fi->flags &= ~O_DIRECT;
+
sprintf(buf, "%i", lo_fd(req, ino));
fd = openat(lo->proc_self_fd, buf, fi->flags & ~O_NOFOLLOW);
if (fd == -1) {
--
1.8.3.1

View File

@ -0,0 +1,199 @@
From bb1f691dc410ce11ac9675ced70e78a3ce2511b0 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Mon, 27 Jan 2020 19:01:03 +0100
Subject: [PATCH 032/116] virtiofsd: Send replies to messages
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-id: <20200127190227.40942-29-dgilbert@redhat.com>
Patchwork-id: 93485
O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 028/112] virtiofsd: Send replies to messages
Bugzilla: 1694164
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Sergio Lopez Pascual <slp@redhat.com>
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Route fuse out messages back through the same queue elements
that had the command that triggered the request.
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit df57ba919ec3edef9cc208d35685095e6e92713e)
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
tools/virtiofsd/fuse_lowlevel.c | 4 ++
tools/virtiofsd/fuse_virtio.c | 107 ++++++++++++++++++++++++++++++++++++++--
tools/virtiofsd/fuse_virtio.h | 4 ++
3 files changed, 111 insertions(+), 4 deletions(-)
diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c
index af09fa2..380d93b 100644
--- a/tools/virtiofsd/fuse_lowlevel.c
+++ b/tools/virtiofsd/fuse_lowlevel.c
@@ -171,6 +171,10 @@ static int fuse_send_msg(struct fuse_session *se, struct fuse_chan *ch,
}
}
+ if (fuse_lowlevel_is_virtio(se)) {
+ return virtio_send_msg(se, ch, iov, count);
+ }
+
abort(); /* virtio should have taken it before here */
return 0;
}
diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c
index 3841b20..05d0e29 100644
--- a/tools/virtiofsd/fuse_virtio.c
+++ b/tools/virtiofsd/fuse_virtio.c
@@ -41,6 +41,9 @@ struct fv_QueueInfo {
/* Our queue index, corresponds to array position */
int qidx;
int kick_fd;
+
+ /* The element for the command currently being processed */
+ VuVirtqElement *qe;
};
/*
@@ -121,6 +124,105 @@ static void copy_from_iov(struct fuse_buf *buf, size_t out_num,
}
}
+/*
+ * Copy from one iov to another, the given number of bytes
+ * The caller must have checked sizes.
+ */
+static void copy_iov(struct iovec *src_iov, int src_count,
+ struct iovec *dst_iov, int dst_count, size_t to_copy)
+{
+ size_t dst_offset = 0;
+ /* Outer loop copies 'src' elements */
+ while (to_copy) {
+ assert(src_count);
+ size_t src_len = src_iov[0].iov_len;
+ size_t src_offset = 0;
+
+ if (src_len > to_copy) {
+ src_len = to_copy;
+ }
+ /* Inner loop copies contents of one 'src' to maybe multiple dst. */
+ while (src_len) {
+ assert(dst_count);
+ size_t dst_len = dst_iov[0].iov_len - dst_offset;
+ if (dst_len > src_len) {
+ dst_len = src_len;
+ }
+
+ memcpy(dst_iov[0].iov_base + dst_offset,
+ src_iov[0].iov_base + src_offset, dst_len);
+ src_len -= dst_len;
+ to_copy -= dst_len;
+ src_offset += dst_len;
+ dst_offset += dst_len;
+
+ assert(dst_offset <= dst_iov[0].iov_len);
+ if (dst_offset == dst_iov[0].iov_len) {
+ dst_offset = 0;
+ dst_iov++;
+ dst_count--;
+ }
+ }
+ src_iov++;
+ src_count--;
+ }
+}
+
+/*
+ * Called back by ll whenever it wants to send a reply/message back
+ * The 1st element of the iov starts with the fuse_out_header
+ * 'unique'==0 means it's a notify message.
+ */
+int virtio_send_msg(struct fuse_session *se, struct fuse_chan *ch,
+ struct iovec *iov, int count)
+{
+ VuVirtqElement *elem;
+ VuVirtq *q;
+
+ assert(count >= 1);
+ assert(iov[0].iov_len >= sizeof(struct fuse_out_header));
+
+ struct fuse_out_header *out = iov[0].iov_base;
+ /* TODO: Endianness! */
+
+ size_t tosend_len = iov_size(iov, count);
+
+ /* unique == 0 is notification, which we don't support */
+ assert(out->unique);
+ /* For virtio we always have ch */
+ assert(ch);
+ elem = ch->qi->qe;
+ q = &ch->qi->virtio_dev->dev.vq[ch->qi->qidx];
+
+ /* The 'in' part of the elem is to qemu */
+ unsigned int in_num = elem->in_num;
+ struct iovec *in_sg = elem->in_sg;
+ size_t in_len = iov_size(in_sg, in_num);
+ fuse_log(FUSE_LOG_DEBUG, "%s: elem %d: with %d in desc of length %zd\n",
+ __func__, elem->index, in_num, in_len);
+
+ /*
+ * The elem should have room for a 'fuse_out_header' (out from fuse)
+ * plus the data based on the len in the header.
+ */
+ if (in_len < sizeof(struct fuse_out_header)) {
+ fuse_log(FUSE_LOG_ERR, "%s: elem %d too short for out_header\n",
+ __func__, elem->index);
+ return -E2BIG;
+ }
+ if (in_len < tosend_len) {
+ fuse_log(FUSE_LOG_ERR, "%s: elem %d too small for data len %zd\n",
+ __func__, elem->index, tosend_len);
+ return -E2BIG;
+ }
+
+ copy_iov(iov, count, in_sg, in_num, tosend_len);
+ vu_queue_push(&se->virtio_dev->dev, q, elem, tosend_len);
+ vu_queue_notify(&se->virtio_dev->dev, q);
+
+ return 0;
+}
+
/* Thread function for individual queues, created when a queue is 'started' */
static void *fv_queue_thread(void *opaque)
{
@@ -226,13 +328,10 @@ static void *fv_queue_thread(void *opaque)
/* TODO! Endianness of header */
- /* TODO: Fixup fuse_send_msg */
/* TODO: Add checks for fuse_session_exited */
fuse_session_process_buf_int(se, &fbuf, &ch);
- /* TODO: vu_queue_push(dev, q, elem, qi->write_count); */
- vu_queue_notify(dev, q);
-
+ qi->qe = NULL;
free(elem);
elem = NULL;
}
diff --git a/tools/virtiofsd/fuse_virtio.h b/tools/virtiofsd/fuse_virtio.h
index 23026d6..135a148 100644
--- a/tools/virtiofsd/fuse_virtio.h
+++ b/tools/virtiofsd/fuse_virtio.h
@@ -22,4 +22,8 @@ int virtio_session_mount(struct fuse_session *se);
int virtio_loop(struct fuse_session *se);
+
+int virtio_send_msg(struct fuse_session *se, struct fuse_chan *ch,
+ struct iovec *iov, int count);
+
#endif
--
1.8.3.1

View File

@ -0,0 +1,165 @@
From 38282d996cde61261211160577b366b83cad8012 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Mon, 27 Jan 2020 19:01:00 +0100
Subject: [PATCH 029/116] virtiofsd: Start queue threads
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-id: <20200127190227.40942-26-dgilbert@redhat.com>
Patchwork-id: 93479
O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 025/112] virtiofsd: Start queue threads
Bugzilla: 1694164
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Sergio Lopez Pascual <slp@redhat.com>
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Start a thread for each queue when we get notified it's been started.
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
fix by:
Signed-off-by: Jun Piao <piaojun@huawei.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit e4c55a3c144493b436e40031e2eed61a84eca47b)
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
tools/virtiofsd/fuse_virtio.c | 89 +++++++++++++++++++++++++++++++++++++++++++
1 file changed, 89 insertions(+)
diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c
index 4819e56..2a94bb3 100644
--- a/tools/virtiofsd/fuse_virtio.c
+++ b/tools/virtiofsd/fuse_virtio.c
@@ -11,6 +11,7 @@
* See the file COPYING.LIB
*/
+#include "qemu/osdep.h"
#include "fuse_virtio.h"
#include "fuse_i.h"
#include "standard-headers/linux/fuse.h"
@@ -30,6 +31,15 @@
#include "contrib/libvhost-user/libvhost-user.h"
+struct fv_QueueInfo {
+ pthread_t thread;
+ struct fv_VuDev *virtio_dev;
+
+ /* Our queue index, corresponds to array position */
+ int qidx;
+ int kick_fd;
+};
+
/*
* We pass the dev element into libvhost-user
* and then use it to get back to the outer
@@ -38,6 +48,13 @@
struct fv_VuDev {
VuDev dev;
struct fuse_session *se;
+
+ /*
+ * The following pair of fields are only accessed in the main
+ * virtio_loop
+ */
+ size_t nqueues;
+ struct fv_QueueInfo **qi;
};
/* From spec */
@@ -83,6 +100,75 @@ static void fv_panic(VuDev *dev, const char *err)
exit(EXIT_FAILURE);
}
+static void *fv_queue_thread(void *opaque)
+{
+ struct fv_QueueInfo *qi = opaque;
+ fuse_log(FUSE_LOG_INFO, "%s: Start for queue %d kick_fd %d\n", __func__,
+ qi->qidx, qi->kick_fd);
+ while (1) {
+ /* TODO */
+ }
+
+ return NULL;
+}
+
+/* Callback from libvhost-user on start or stop of a queue */
+static void fv_queue_set_started(VuDev *dev, int qidx, bool started)
+{
+ struct fv_VuDev *vud = container_of(dev, struct fv_VuDev, dev);
+ struct fv_QueueInfo *ourqi;
+
+ fuse_log(FUSE_LOG_INFO, "%s: qidx=%d started=%d\n", __func__, qidx,
+ started);
+ assert(qidx >= 0);
+
+ /*
+ * Ignore additional request queues for now. passthrough_ll.c must be
+ * audited for thread-safety issues first. It was written with a
+ * well-behaved client in mind and may not protect against all types of
+ * races yet.
+ */
+ if (qidx > 1) {
+ fuse_log(FUSE_LOG_ERR,
+ "%s: multiple request queues not yet implemented, please only "
+ "configure 1 request queue\n",
+ __func__);
+ exit(EXIT_FAILURE);
+ }
+
+ if (started) {
+ /* Fire up a thread to watch this queue */
+ if (qidx >= vud->nqueues) {
+ vud->qi = realloc(vud->qi, (qidx + 1) * sizeof(vud->qi[0]));
+ assert(vud->qi);
+ memset(vud->qi + vud->nqueues, 0,
+ sizeof(vud->qi[0]) * (1 + (qidx - vud->nqueues)));
+ vud->nqueues = qidx + 1;
+ }
+ if (!vud->qi[qidx]) {
+ vud->qi[qidx] = calloc(sizeof(struct fv_QueueInfo), 1);
+ assert(vud->qi[qidx]);
+ vud->qi[qidx]->virtio_dev = vud;
+ vud->qi[qidx]->qidx = qidx;
+ } else {
+ /* Shouldn't have been started */
+ assert(vud->qi[qidx]->kick_fd == -1);
+ }
+ ourqi = vud->qi[qidx];
+ ourqi->kick_fd = dev->vq[qidx].kick_fd;
+ if (pthread_create(&ourqi->thread, NULL, fv_queue_thread, ourqi)) {
+ fuse_log(FUSE_LOG_ERR, "%s: Failed to create thread for queue %d\n",
+ __func__, qidx);
+ assert(0);
+ }
+ } else {
+ /* TODO: Kill the thread */
+ assert(qidx < vud->nqueues);
+ ourqi = vud->qi[qidx];
+ ourqi->kick_fd = -1;
+ }
+}
+
static bool fv_queue_order(VuDev *dev, int qidx)
{
return false;
@@ -92,6 +178,9 @@ static const VuDevIface fv_iface = {
.get_features = fv_get_features,
.set_features = fv_set_features,
+ /* Don't need process message, we've not got any at vhost-user level */
+ .queue_set_started = fv_queue_set_started,
+
.queue_is_processed_in_order = fv_queue_order,
};
--
1.8.3.1

View File

@ -0,0 +1,200 @@
From b4af2eff8ecadb4e2c9520602455f77fac2cb943 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Mon, 27 Jan 2020 19:01:02 +0100
Subject: [PATCH 031/116] virtiofsd: Start reading commands from queue
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-id: <20200127190227.40942-28-dgilbert@redhat.com>
Patchwork-id: 93484
O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 027/112] virtiofsd: Start reading commands from queue
Bugzilla: 1694164
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Sergio Lopez Pascual <slp@redhat.com>
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Pop queue elements off queues, copy the data from them and
pass that to fuse.
Note: 'out' in a VuVirtqElement is from QEMU
'in' in libfuse is into the daemon
So we read from the out iov's to get a fuse_in_header
When we get a kick we've got to read all the elements until the queue
is empty.
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit b509e1228b3e5eb83c14819045988999fc2dbd1b)
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
tools/virtiofsd/fuse_i.h | 2 +
tools/virtiofsd/fuse_virtio.c | 99 +++++++++++++++++++++++++++++++++++++++++--
2 files changed, 98 insertions(+), 3 deletions(-)
diff --git a/tools/virtiofsd/fuse_i.h b/tools/virtiofsd/fuse_i.h
index ec04449..1126723 100644
--- a/tools/virtiofsd/fuse_i.h
+++ b/tools/virtiofsd/fuse_i.h
@@ -14,6 +14,7 @@
#include "fuse_lowlevel.h"
struct fv_VuDev;
+struct fv_QueueInfo;
struct fuse_req {
struct fuse_session *se;
@@ -75,6 +76,7 @@ struct fuse_chan {
pthread_mutex_t lock;
int ctr;
int fd;
+ struct fv_QueueInfo *qi;
};
/**
diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c
index 05e7258..3841b20 100644
--- a/tools/virtiofsd/fuse_virtio.c
+++ b/tools/virtiofsd/fuse_virtio.c
@@ -12,6 +12,7 @@
*/
#include "qemu/osdep.h"
+#include "qemu/iov.h"
#include "fuse_virtio.h"
#include "fuse_i.h"
#include "standard-headers/linux/fuse.h"
@@ -32,6 +33,7 @@
#include "contrib/libvhost-user/libvhost-user.h"
+struct fv_VuDev;
struct fv_QueueInfo {
pthread_t thread;
struct fv_VuDev *virtio_dev;
@@ -101,10 +103,41 @@ static void fv_panic(VuDev *dev, const char *err)
exit(EXIT_FAILURE);
}
+/*
+ * Copy from an iovec into a fuse_buf (memory only)
+ * Caller must ensure there is space
+ */
+static void copy_from_iov(struct fuse_buf *buf, size_t out_num,
+ const struct iovec *out_sg)
+{
+ void *dest = buf->mem;
+
+ while (out_num) {
+ size_t onelen = out_sg->iov_len;
+ memcpy(dest, out_sg->iov_base, onelen);
+ dest += onelen;
+ out_sg++;
+ out_num--;
+ }
+}
+
/* Thread function for individual queues, created when a queue is 'started' */
static void *fv_queue_thread(void *opaque)
{
struct fv_QueueInfo *qi = opaque;
+ struct VuDev *dev = &qi->virtio_dev->dev;
+ struct VuVirtq *q = vu_get_queue(dev, qi->qidx);
+ struct fuse_session *se = qi->virtio_dev->se;
+ struct fuse_chan ch;
+ struct fuse_buf fbuf;
+
+ fbuf.mem = NULL;
+ fbuf.flags = 0;
+
+ fuse_mutex_init(&ch.lock);
+ ch.fd = (int)0xdaff0d111;
+ ch.qi = qi;
+
fuse_log(FUSE_LOG_INFO, "%s: Start for queue %d kick_fd %d\n", __func__,
qi->qidx, qi->kick_fd);
while (1) {
@@ -141,11 +174,71 @@ static void *fv_queue_thread(void *opaque)
fuse_log(FUSE_LOG_ERR, "Eventfd_read for queue: %m\n");
break;
}
- if (qi->virtio_dev->se->debug) {
- fprintf(stderr, "%s: Queue %d gave evalue: %zx\n", __func__,
- qi->qidx, (size_t)evalue);
+ /* out is from guest, in is too guest */
+ unsigned int in_bytes, out_bytes;
+ vu_queue_get_avail_bytes(dev, q, &in_bytes, &out_bytes, ~0, ~0);
+
+ fuse_log(FUSE_LOG_DEBUG,
+ "%s: Queue %d gave evalue: %zx available: in: %u out: %u\n",
+ __func__, qi->qidx, (size_t)evalue, in_bytes, out_bytes);
+
+ while (1) {
+ /*
+ * An element contains one request and the space to send our
+ * response They're spread over multiple descriptors in a
+ * scatter/gather set and we can't trust the guest to keep them
+ * still; so copy in/out.
+ */
+ VuVirtqElement *elem = vu_queue_pop(dev, q, sizeof(VuVirtqElement));
+ if (!elem) {
+ break;
+ }
+
+ if (!fbuf.mem) {
+ fbuf.mem = malloc(se->bufsize);
+ assert(fbuf.mem);
+ assert(se->bufsize > sizeof(struct fuse_in_header));
+ }
+ /* The 'out' part of the elem is from qemu */
+ unsigned int out_num = elem->out_num;
+ struct iovec *out_sg = elem->out_sg;
+ size_t out_len = iov_size(out_sg, out_num);
+ fuse_log(FUSE_LOG_DEBUG,
+ "%s: elem %d: with %d out desc of length %zd\n", __func__,
+ elem->index, out_num, out_len);
+
+ /*
+ * The elem should contain a 'fuse_in_header' (in to fuse)
+ * plus the data based on the len in the header.
+ */
+ if (out_len < sizeof(struct fuse_in_header)) {
+ fuse_log(FUSE_LOG_ERR, "%s: elem %d too short for in_header\n",
+ __func__, elem->index);
+ assert(0); /* TODO */
+ }
+ if (out_len > se->bufsize) {
+ fuse_log(FUSE_LOG_ERR, "%s: elem %d too large for buffer\n",
+ __func__, elem->index);
+ assert(0); /* TODO */
+ }
+ copy_from_iov(&fbuf, out_num, out_sg);
+ fbuf.size = out_len;
+
+ /* TODO! Endianness of header */
+
+ /* TODO: Fixup fuse_send_msg */
+ /* TODO: Add checks for fuse_session_exited */
+ fuse_session_process_buf_int(se, &fbuf, &ch);
+
+ /* TODO: vu_queue_push(dev, q, elem, qi->write_count); */
+ vu_queue_notify(dev, q);
+
+ free(elem);
+ elem = NULL;
}
}
+ pthread_mutex_destroy(&ch.lock);
+ free(fbuf.mem);
return NULL;
}
--
1.8.3.1

View File

@ -0,0 +1,247 @@
From 020f593031b0b54e4c35faffea489b700aed6a72 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Mon, 27 Jan 2020 19:00:57 +0100
Subject: [PATCH 026/116] virtiofsd: Start wiring up vhost-user
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-id: <20200127190227.40942-23-dgilbert@redhat.com>
Patchwork-id: 93477
O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 022/112] virtiofsd: Start wiring up vhost-user
Bugzilla: 1694164
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Sergio Lopez Pascual <slp@redhat.com>
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Listen on our unix socket for the connection from QEMU, when we get it
initialise vhost-user and dive into our own loop variant (currently
dummy).
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit f6f3573c6f271af5ded63ce28589a113f7205c72)
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
tools/virtiofsd/fuse_i.h | 4 ++
tools/virtiofsd/fuse_lowlevel.c | 5 +++
tools/virtiofsd/fuse_lowlevel.h | 7 ++++
tools/virtiofsd/fuse_virtio.c | 87 +++++++++++++++++++++++++++++++++++++++-
tools/virtiofsd/fuse_virtio.h | 2 +
tools/virtiofsd/passthrough_ll.c | 7 +---
6 files changed, 106 insertions(+), 6 deletions(-)
diff --git a/tools/virtiofsd/fuse_i.h b/tools/virtiofsd/fuse_i.h
index 82d6ac7..ec04449 100644
--- a/tools/virtiofsd/fuse_i.h
+++ b/tools/virtiofsd/fuse_i.h
@@ -13,6 +13,8 @@
#include "fuse.h"
#include "fuse_lowlevel.h"
+struct fv_VuDev;
+
struct fuse_req {
struct fuse_session *se;
uint64_t unique;
@@ -65,6 +67,8 @@ struct fuse_session {
size_t bufsize;
int error;
char *vu_socket_path;
+ int vu_socketfd;
+ struct fv_VuDev *virtio_dev;
};
struct fuse_chan {
diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c
index 5df124e..af09fa2 100644
--- a/tools/virtiofsd/fuse_lowlevel.c
+++ b/tools/virtiofsd/fuse_lowlevel.c
@@ -2242,6 +2242,11 @@ void fuse_session_unmount(struct fuse_session *se)
{
}
+int fuse_lowlevel_is_virtio(struct fuse_session *se)
+{
+ return se->vu_socket_path != NULL;
+}
+
#ifdef linux
int fuse_req_getgroups(fuse_req_t req, int size, gid_t list[])
{
diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h
index 2fa225d..f6b3470 100644
--- a/tools/virtiofsd/fuse_lowlevel.h
+++ b/tools/virtiofsd/fuse_lowlevel.h
@@ -1755,6 +1755,13 @@ void fuse_req_interrupt_func(fuse_req_t req, fuse_interrupt_func_t func,
*/
int fuse_req_interrupted(fuse_req_t req);
+/**
+ * Check if the session is connected via virtio
+ *
+ * @param se session object
+ * @return 1 if the session is a virtio session
+ */
+int fuse_lowlevel_is_virtio(struct fuse_session *se);
/*
* Inquiry functions
diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c
index cbef6ff..2ae3c76 100644
--- a/tools/virtiofsd/fuse_virtio.c
+++ b/tools/virtiofsd/fuse_virtio.c
@@ -19,18 +19,78 @@
#include <stdint.h>
#include <stdio.h>
+#include <stdlib.h>
#include <string.h>
#include <sys/socket.h>
#include <sys/types.h>
#include <sys/un.h>
#include <unistd.h>
+#include "contrib/libvhost-user/libvhost-user.h"
+
+/*
+ * We pass the dev element into libvhost-user
+ * and then use it to get back to the outer
+ * container for other data.
+ */
+struct fv_VuDev {
+ VuDev dev;
+ struct fuse_session *se;
+};
+
/* From spec */
struct virtio_fs_config {
char tag[36];
uint32_t num_queues;
};
+/*
+ * Callback from libvhost-user if there's a new fd we're supposed to listen
+ * to, typically a queue kick?
+ */
+static void fv_set_watch(VuDev *dev, int fd, int condition, vu_watch_cb cb,
+ void *data)
+{
+ fuse_log(FUSE_LOG_WARNING, "%s: TODO! fd=%d\n", __func__, fd);
+}
+
+/*
+ * Callback from libvhost-user if we're no longer supposed to listen on an fd
+ */
+static void fv_remove_watch(VuDev *dev, int fd)
+{
+ fuse_log(FUSE_LOG_WARNING, "%s: TODO! fd=%d\n", __func__, fd);
+}
+
+/* Callback from libvhost-user to panic */
+static void fv_panic(VuDev *dev, const char *err)
+{
+ fuse_log(FUSE_LOG_ERR, "%s: libvhost-user: %s\n", __func__, err);
+ /* TODO: Allow reconnects?? */
+ exit(EXIT_FAILURE);
+}
+
+static bool fv_queue_order(VuDev *dev, int qidx)
+{
+ return false;
+}
+
+static const VuDevIface fv_iface = {
+ /* TODO: Add other callbacks */
+ .queue_is_processed_in_order = fv_queue_order,
+};
+
+int virtio_loop(struct fuse_session *se)
+{
+ fuse_log(FUSE_LOG_INFO, "%s: Entry\n", __func__);
+
+ while (1) {
+ /* TODO: Add stuffing */
+ }
+
+ fuse_log(FUSE_LOG_INFO, "%s: Exit\n", __func__);
+}
+
int virtio_session_mount(struct fuse_session *se)
{
struct sockaddr_un un;
@@ -75,5 +135,30 @@ int virtio_session_mount(struct fuse_session *se)
return -1;
}
- return -1;
+ fuse_log(FUSE_LOG_INFO, "%s: Waiting for vhost-user socket connection...\n",
+ __func__);
+ int data_sock = accept(listen_sock, NULL, NULL);
+ if (data_sock == -1) {
+ fuse_log(FUSE_LOG_ERR, "vhost socket accept: %m\n");
+ close(listen_sock);
+ return -1;
+ }
+ close(listen_sock);
+ fuse_log(FUSE_LOG_INFO, "%s: Received vhost-user socket connection\n",
+ __func__);
+
+ /* TODO: Some cleanup/deallocation! */
+ se->virtio_dev = calloc(sizeof(struct fv_VuDev), 1);
+ if (!se->virtio_dev) {
+ fuse_log(FUSE_LOG_ERR, "%s: virtio_dev calloc failed\n", __func__);
+ close(data_sock);
+ return -1;
+ }
+
+ se->vu_socketfd = data_sock;
+ se->virtio_dev->se = se;
+ vu_init(&se->virtio_dev->dev, 2, se->vu_socketfd, fv_panic, fv_set_watch,
+ fv_remove_watch, &fv_iface);
+
+ return 0;
}
diff --git a/tools/virtiofsd/fuse_virtio.h b/tools/virtiofsd/fuse_virtio.h
index 8f2edb6..23026d6 100644
--- a/tools/virtiofsd/fuse_virtio.h
+++ b/tools/virtiofsd/fuse_virtio.h
@@ -20,4 +20,6 @@ struct fuse_session;
int virtio_session_mount(struct fuse_session *se);
+int virtio_loop(struct fuse_session *se);
+
#endif
diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
index fc9b264..037c5d7 100644
--- a/tools/virtiofsd/passthrough_ll.c
+++ b/tools/virtiofsd/passthrough_ll.c
@@ -36,6 +36,7 @@
*/
#include "qemu/osdep.h"
+#include "fuse_virtio.h"
#include "fuse_lowlevel.h"
#include <assert.h>
#include <dirent.h>
@@ -1395,11 +1396,7 @@ int main(int argc, char *argv[])
fuse_daemonize(opts.foreground);
/* Block until ctrl+c or fusermount -u */
- if (opts.singlethread) {
- ret = fuse_session_loop(se);
- } else {
- ret = fuse_session_loop_mt(se, opts.clone_fd);
- }
+ ret = virtio_loop(se);
fuse_session_unmount(se);
err_out3:
--
1.8.3.1

View File

@ -0,0 +1,355 @@
From 8e46d0862c4c204f92c08ce2ae961921f270efb5 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Mon, 27 Jan 2020 19:02:03 +0100
Subject: [PATCH 092/116] virtiofsd: Support remote posix locks
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-id: <20200127190227.40942-89-dgilbert@redhat.com>
Patchwork-id: 93537
O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 088/112] virtiofsd: Support remote posix locks
Bugzilla: 1694164
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Sergio Lopez Pascual <slp@redhat.com>
From: Vivek Goyal <vgoyal@redhat.com>
Doing posix locks with-in guest kernel are not sufficient if a file/dir
is being shared by multiple guests. So we need the notion of daemon doing
the locks which are visible to rest of the guests.
Given posix locks are per process, one can not call posix lock API on host,
otherwise bunch of basic posix locks properties are broken. For example,
If two processes (A and B) in guest open the file and take locks on different
sections of file, if one of the processes closes the fd, it will close
fd on virtiofsd and all posix locks on file will go away. This means if
process A closes the fd, then locks of process B will go away too.
Similar other problems exist too.
This patch set tries to emulate posix locks while using open file
description locks provided on Linux.
Daemon provides two options (-o posix_lock, -o no_posix_lock) to enable
or disable posix locking in daemon. By default it is enabled.
There are few issues though.
- GETLK() returns pid of process holding lock. As we are emulating locks
using OFD, and these locks are not per process and don't return pid
of process, so GETLK() in guest does not reuturn process pid.
- As of now only F_SETLK is supported and not F_SETLKW. We can't block
the thread in virtiofsd for arbitrary long duration as there is only
one thread serving the queue. That means unlock request will not make
it to daemon and F_SETLKW will block infinitely and bring virtio-fs
to a halt. This is a solvable problem though and will require significant
changes in virtiofsd and kernel. Left as a TODO item for now.
Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
Reviewed-by: Masayoshi Mizuma <m.mizuma@jp.fujitsu.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit 0e81414c54161296212f6bc8a1c70526c4a9755a)
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
tools/virtiofsd/helper.c | 3 +
tools/virtiofsd/passthrough_ll.c | 189 +++++++++++++++++++++++++++++++++++++++
2 files changed, 192 insertions(+)
diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c
index 5672024..33749bf 100644
--- a/tools/virtiofsd/helper.c
+++ b/tools/virtiofsd/helper.c
@@ -156,6 +156,9 @@ void fuse_cmdline_help(void)
" allowed (default: 10)\n"
" -o norace disable racy fallback\n"
" default: false\n"
+ " -o posix_lock|no_posix_lock\n"
+ " enable/disable remote posix lock\n"
+ " default: posix_lock\n"
" -o readdirplus|no_readdirplus\n"
" enable/disable readirplus\n"
" default: readdirplus except with "
diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
index 05b5f89..9414935 100644
--- a/tools/virtiofsd/passthrough_ll.c
+++ b/tools/virtiofsd/passthrough_ll.c
@@ -67,6 +67,12 @@
#include "passthrough_helpers.h"
#include "seccomp.h"
+/* Keep track of inode posix locks for each owner. */
+struct lo_inode_plock {
+ uint64_t lock_owner;
+ int fd; /* fd for OFD locks */
+};
+
struct lo_map_elem {
union {
struct lo_inode *inode;
@@ -95,6 +101,8 @@ struct lo_inode {
struct lo_key key;
uint64_t refcount; /* protected by lo->mutex */
fuse_ino_t fuse_ino;
+ pthread_mutex_t plock_mutex;
+ GHashTable *posix_locks; /* protected by lo_inode->plock_mutex */
};
struct lo_cred {
@@ -114,6 +122,7 @@ struct lo_data {
int norace;
int writeback;
int flock;
+ int posix_lock;
int xattr;
char *source;
double timeout;
@@ -137,6 +146,8 @@ static const struct fuse_opt lo_opts[] = {
{ "source=%s", offsetof(struct lo_data, source), 0 },
{ "flock", offsetof(struct lo_data, flock), 1 },
{ "no_flock", offsetof(struct lo_data, flock), 0 },
+ { "posix_lock", offsetof(struct lo_data, posix_lock), 1 },
+ { "no_posix_lock", offsetof(struct lo_data, posix_lock), 0 },
{ "xattr", offsetof(struct lo_data, xattr), 1 },
{ "no_xattr", offsetof(struct lo_data, xattr), 0 },
{ "timeout=%lf", offsetof(struct lo_data, timeout), 0 },
@@ -485,6 +496,17 @@ static void lo_init(void *userdata, struct fuse_conn_info *conn)
fuse_log(FUSE_LOG_DEBUG, "lo_init: activating flock locks\n");
conn->want |= FUSE_CAP_FLOCK_LOCKS;
}
+
+ if (conn->capable & FUSE_CAP_POSIX_LOCKS) {
+ if (lo->posix_lock) {
+ fuse_log(FUSE_LOG_DEBUG, "lo_init: activating posix locks\n");
+ conn->want |= FUSE_CAP_POSIX_LOCKS;
+ } else {
+ fuse_log(FUSE_LOG_DEBUG, "lo_init: disabling posix locks\n");
+ conn->want &= ~FUSE_CAP_POSIX_LOCKS;
+ }
+ }
+
if ((lo->cache == CACHE_NONE && !lo->readdirplus_set) ||
lo->readdirplus_clear) {
fuse_log(FUSE_LOG_DEBUG, "lo_init: disabling readdirplus\n");
@@ -772,6 +794,19 @@ static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st)
return p;
}
+/* value_destroy_func for posix_locks GHashTable */
+static void posix_locks_value_destroy(gpointer data)
+{
+ struct lo_inode_plock *plock = data;
+
+ /*
+ * We had used open() for locks and had only one fd. So
+ * closing this fd should release all OFD locks.
+ */
+ close(plock->fd);
+ free(plock);
+}
+
static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name,
struct fuse_entry_param *e)
{
@@ -825,6 +860,9 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name,
newfd = -1;
inode->key.ino = e->attr.st_ino;
inode->key.dev = e->attr.st_dev;
+ pthread_mutex_init(&inode->plock_mutex, NULL);
+ inode->posix_locks = g_hash_table_new_full(
+ g_direct_hash, g_direct_equal, NULL, posix_locks_value_destroy);
pthread_mutex_lock(&lo->mutex);
inode->fuse_ino = lo_add_inode_mapping(req, inode);
@@ -1160,6 +1198,11 @@ static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode,
if (!inode->refcount) {
lo_map_remove(&lo->ino_map, inode->fuse_ino);
g_hash_table_remove(lo->inodes, &inode->key);
+ if (g_hash_table_size(inode->posix_locks)) {
+ fuse_log(FUSE_LOG_WARNING, "Hash table is not empty\n");
+ }
+ g_hash_table_destroy(inode->posix_locks);
+ pthread_mutex_destroy(&inode->plock_mutex);
pthread_mutex_unlock(&lo->mutex);
close(inode->fd);
free(inode);
@@ -1516,6 +1559,136 @@ out:
}
}
+/* Should be called with inode->plock_mutex held */
+static struct lo_inode_plock *lookup_create_plock_ctx(struct lo_data *lo,
+ struct lo_inode *inode,
+ uint64_t lock_owner,
+ pid_t pid, int *err)
+{
+ struct lo_inode_plock *plock;
+ char procname[64];
+ int fd;
+
+ plock =
+ g_hash_table_lookup(inode->posix_locks, GUINT_TO_POINTER(lock_owner));
+
+ if (plock) {
+ return plock;
+ }
+
+ plock = malloc(sizeof(struct lo_inode_plock));
+ if (!plock) {
+ *err = ENOMEM;
+ return NULL;
+ }
+
+ /* Open another instance of file which can be used for ofd locks. */
+ sprintf(procname, "%i", inode->fd);
+
+ /* TODO: What if file is not writable? */
+ fd = openat(lo->proc_self_fd, procname, O_RDWR);
+ if (fd == -1) {
+ *err = errno;
+ free(plock);
+ return NULL;
+ }
+
+ plock->lock_owner = lock_owner;
+ plock->fd = fd;
+ g_hash_table_insert(inode->posix_locks, GUINT_TO_POINTER(plock->lock_owner),
+ plock);
+ return plock;
+}
+
+static void lo_getlk(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi,
+ struct flock *lock)
+{
+ struct lo_data *lo = lo_data(req);
+ struct lo_inode *inode;
+ struct lo_inode_plock *plock;
+ int ret, saverr = 0;
+
+ fuse_log(FUSE_LOG_DEBUG,
+ "lo_getlk(ino=%" PRIu64 ", flags=%d)"
+ " owner=0x%lx, l_type=%d l_start=0x%lx"
+ " l_len=0x%lx\n",
+ ino, fi->flags, fi->lock_owner, lock->l_type, lock->l_start,
+ lock->l_len);
+
+ inode = lo_inode(req, ino);
+ if (!inode) {
+ fuse_reply_err(req, EBADF);
+ return;
+ }
+
+ pthread_mutex_lock(&inode->plock_mutex);
+ plock =
+ lookup_create_plock_ctx(lo, inode, fi->lock_owner, lock->l_pid, &ret);
+ if (!plock) {
+ pthread_mutex_unlock(&inode->plock_mutex);
+ fuse_reply_err(req, ret);
+ return;
+ }
+
+ ret = fcntl(plock->fd, F_OFD_GETLK, lock);
+ if (ret == -1) {
+ saverr = errno;
+ }
+ pthread_mutex_unlock(&inode->plock_mutex);
+
+ if (saverr) {
+ fuse_reply_err(req, saverr);
+ } else {
+ fuse_reply_lock(req, lock);
+ }
+}
+
+static void lo_setlk(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi,
+ struct flock *lock, int sleep)
+{
+ struct lo_data *lo = lo_data(req);
+ struct lo_inode *inode;
+ struct lo_inode_plock *plock;
+ int ret, saverr = 0;
+
+ fuse_log(FUSE_LOG_DEBUG,
+ "lo_setlk(ino=%" PRIu64 ", flags=%d)"
+ " cmd=%d pid=%d owner=0x%lx sleep=%d l_whence=%d"
+ " l_start=0x%lx l_len=0x%lx\n",
+ ino, fi->flags, lock->l_type, lock->l_pid, fi->lock_owner, sleep,
+ lock->l_whence, lock->l_start, lock->l_len);
+
+ if (sleep) {
+ fuse_reply_err(req, EOPNOTSUPP);
+ return;
+ }
+
+ inode = lo_inode(req, ino);
+ if (!inode) {
+ fuse_reply_err(req, EBADF);
+ return;
+ }
+
+ pthread_mutex_lock(&inode->plock_mutex);
+ plock =
+ lookup_create_plock_ctx(lo, inode, fi->lock_owner, lock->l_pid, &ret);
+
+ if (!plock) {
+ pthread_mutex_unlock(&inode->plock_mutex);
+ fuse_reply_err(req, ret);
+ return;
+ }
+
+ /* TODO: Is it alright to modify flock? */
+ lock->l_pid = 0;
+ ret = fcntl(plock->fd, F_OFD_SETLK, lock);
+ if (ret == -1) {
+ saverr = errno;
+ }
+ pthread_mutex_unlock(&inode->plock_mutex);
+ fuse_reply_err(req, saverr);
+}
+
static void lo_fsyncdir(fuse_req_t req, fuse_ino_t ino, int datasync,
struct fuse_file_info *fi)
{
@@ -1617,6 +1790,19 @@ static void lo_flush(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi)
{
int res;
(void)ino;
+ struct lo_inode *inode;
+
+ inode = lo_inode(req, ino);
+ if (!inode) {
+ fuse_reply_err(req, EBADF);
+ return;
+ }
+
+ /* An fd is going away. Cleanup associated posix locks */
+ pthread_mutex_lock(&inode->plock_mutex);
+ g_hash_table_remove(inode->posix_locks, GUINT_TO_POINTER(fi->lock_owner));
+ pthread_mutex_unlock(&inode->plock_mutex);
+
res = close(dup(lo_fi_fd(req, fi)));
fuse_reply_err(req, res == -1 ? errno : 0);
}
@@ -2080,6 +2266,8 @@ static struct fuse_lowlevel_ops lo_oper = {
.releasedir = lo_releasedir,
.fsyncdir = lo_fsyncdir,
.create = lo_create,
+ .getlk = lo_getlk,
+ .setlk = lo_setlk,
.open = lo_open,
.release = lo_release,
.flush = lo_flush,
@@ -2434,6 +2622,7 @@ int main(int argc, char *argv[])
struct lo_data lo = {
.debug = 0,
.writeback = 0,
+ .posix_lock = 1,
.proc_self_fd = -1,
};
struct lo_map_elem *root_elem;
--
1.8.3.1

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,545 @@
From ff16b837e402de773581f77ca188f8806c0b500f Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Mon, 27 Jan 2020 19:00:51 +0100
Subject: [PATCH 020/116] virtiofsd: Trim out compatibility code
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-id: <20200127190227.40942-17-dgilbert@redhat.com>
Patchwork-id: 93468
O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 016/112] virtiofsd: Trim out compatibility code
Bugzilla: 1694164
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Sergio Lopez Pascual <slp@redhat.com>
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
virtiofsd only supports major=7, minor>=31; trim out a lot of
old compatibility code.
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit 72c42e2d65510e073cf78fdc924d121c77fa0080)
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
tools/virtiofsd/fuse_lowlevel.c | 330 +++++++++++++++-------------------------
1 file changed, 119 insertions(+), 211 deletions(-)
diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c
index 07fb8a6..514d79c 100644
--- a/tools/virtiofsd/fuse_lowlevel.c
+++ b/tools/virtiofsd/fuse_lowlevel.c
@@ -387,16 +387,7 @@ static void fill_open(struct fuse_open_out *arg, const struct fuse_file_info *f)
int fuse_reply_entry(fuse_req_t req, const struct fuse_entry_param *e)
{
struct fuse_entry_out arg;
- size_t size = req->se->conn.proto_minor < 9 ? FUSE_COMPAT_ENTRY_OUT_SIZE :
- sizeof(arg);
-
- /*
- * before ABI 7.4 e->ino == 0 was invalid, only ENOENT meant
- * negative entry
- */
- if (!e->ino && req->se->conn.proto_minor < 4) {
- return fuse_reply_err(req, ENOENT);
- }
+ size_t size = sizeof(arg);
memset(&arg, 0, sizeof(arg));
fill_entry(&arg, e);
@@ -407,9 +398,7 @@ int fuse_reply_create(fuse_req_t req, const struct fuse_entry_param *e,
const struct fuse_file_info *f)
{
char buf[sizeof(struct fuse_entry_out) + sizeof(struct fuse_open_out)];
- size_t entrysize = req->se->conn.proto_minor < 9 ?
- FUSE_COMPAT_ENTRY_OUT_SIZE :
- sizeof(struct fuse_entry_out);
+ size_t entrysize = sizeof(struct fuse_entry_out);
struct fuse_entry_out *earg = (struct fuse_entry_out *)buf;
struct fuse_open_out *oarg = (struct fuse_open_out *)(buf + entrysize);
@@ -423,8 +412,7 @@ int fuse_reply_attr(fuse_req_t req, const struct stat *attr,
double attr_timeout)
{
struct fuse_attr_out arg;
- size_t size =
- req->se->conn.proto_minor < 9 ? FUSE_COMPAT_ATTR_OUT_SIZE : sizeof(arg);
+ size_t size = sizeof(arg);
memset(&arg, 0, sizeof(arg));
arg.attr_valid = calc_timeout_sec(attr_timeout);
@@ -519,8 +507,7 @@ int fuse_reply_data(fuse_req_t req, struct fuse_bufvec *bufv)
int fuse_reply_statfs(fuse_req_t req, const struct statvfs *stbuf)
{
struct fuse_statfs_out arg;
- size_t size =
- req->se->conn.proto_minor < 4 ? FUSE_COMPAT_STATFS_SIZE : sizeof(arg);
+ size_t size = sizeof(arg);
memset(&arg, 0, sizeof(arg));
convert_statfs(stbuf, &arg.st);
@@ -604,45 +591,31 @@ int fuse_reply_ioctl_retry(fuse_req_t req, const struct iovec *in_iov,
iov[count].iov_len = sizeof(arg);
count++;
- if (req->se->conn.proto_minor < 16) {
- if (in_count) {
- iov[count].iov_base = (void *)in_iov;
- iov[count].iov_len = sizeof(in_iov[0]) * in_count;
- count++;
- }
+ /* Can't handle non-compat 64bit ioctls on 32bit */
+ if (sizeof(void *) == 4 && req->ioctl_64bit) {
+ res = fuse_reply_err(req, EINVAL);
+ goto out;
+ }
- if (out_count) {
- iov[count].iov_base = (void *)out_iov;
- iov[count].iov_len = sizeof(out_iov[0]) * out_count;
- count++;
+ if (in_count) {
+ in_fiov = fuse_ioctl_iovec_copy(in_iov, in_count);
+ if (!in_fiov) {
+ goto enomem;
}
- } else {
- /* Can't handle non-compat 64bit ioctls on 32bit */
- if (sizeof(void *) == 4 && req->ioctl_64bit) {
- res = fuse_reply_err(req, EINVAL);
- goto out;
- }
-
- if (in_count) {
- in_fiov = fuse_ioctl_iovec_copy(in_iov, in_count);
- if (!in_fiov) {
- goto enomem;
- }
- iov[count].iov_base = (void *)in_fiov;
- iov[count].iov_len = sizeof(in_fiov[0]) * in_count;
- count++;
+ iov[count].iov_base = (void *)in_fiov;
+ iov[count].iov_len = sizeof(in_fiov[0]) * in_count;
+ count++;
+ }
+ if (out_count) {
+ out_fiov = fuse_ioctl_iovec_copy(out_iov, out_count);
+ if (!out_fiov) {
+ goto enomem;
}
- if (out_count) {
- out_fiov = fuse_ioctl_iovec_copy(out_iov, out_count);
- if (!out_fiov) {
- goto enomem;
- }
- iov[count].iov_base = (void *)out_fiov;
- iov[count].iov_len = sizeof(out_fiov[0]) * out_count;
- count++;
- }
+ iov[count].iov_base = (void *)out_fiov;
+ iov[count].iov_len = sizeof(out_fiov[0]) * out_count;
+ count++;
}
res = send_reply_iov(req, 0, iov, count);
@@ -784,14 +757,12 @@ static void do_getattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg)
struct fuse_file_info *fip = NULL;
struct fuse_file_info fi;
- if (req->se->conn.proto_minor >= 9) {
- struct fuse_getattr_in *arg = (struct fuse_getattr_in *)inarg;
+ struct fuse_getattr_in *arg = (struct fuse_getattr_in *)inarg;
- if (arg->getattr_flags & FUSE_GETATTR_FH) {
- memset(&fi, 0, sizeof(fi));
- fi.fh = arg->fh;
- fip = &fi;
- }
+ if (arg->getattr_flags & FUSE_GETATTR_FH) {
+ memset(&fi, 0, sizeof(fi));
+ fi.fh = arg->fh;
+ fip = &fi;
}
if (req->se->op.getattr) {
@@ -856,11 +827,7 @@ static void do_mknod(fuse_req_t req, fuse_ino_t nodeid, const void *inarg)
struct fuse_mknod_in *arg = (struct fuse_mknod_in *)inarg;
char *name = PARAM(arg);
- if (req->se->conn.proto_minor >= 12) {
- req->ctx.umask = arg->umask;
- } else {
- name = (char *)inarg + FUSE_COMPAT_MKNOD_IN_SIZE;
- }
+ req->ctx.umask = arg->umask;
if (req->se->op.mknod) {
req->se->op.mknod(req, nodeid, name, arg->mode, arg->rdev);
@@ -873,9 +840,7 @@ static void do_mkdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg)
{
struct fuse_mkdir_in *arg = (struct fuse_mkdir_in *)inarg;
- if (req->se->conn.proto_minor >= 12) {
- req->ctx.umask = arg->umask;
- }
+ req->ctx.umask = arg->umask;
if (req->se->op.mkdir) {
req->se->op.mkdir(req, nodeid, PARAM(arg), arg->mode);
@@ -967,11 +932,7 @@ static void do_create(fuse_req_t req, fuse_ino_t nodeid, const void *inarg)
memset(&fi, 0, sizeof(fi));
fi.flags = arg->flags;
- if (req->se->conn.proto_minor >= 12) {
- req->ctx.umask = arg->umask;
- } else {
- name = (char *)inarg + sizeof(struct fuse_open_in);
- }
+ req->ctx.umask = arg->umask;
req->se->op.create(req, nodeid, name, arg->mode, &fi);
} else {
@@ -1003,10 +964,8 @@ static void do_read(fuse_req_t req, fuse_ino_t nodeid, const void *inarg)
memset(&fi, 0, sizeof(fi));
fi.fh = arg->fh;
- if (req->se->conn.proto_minor >= 9) {
- fi.lock_owner = arg->lock_owner;
- fi.flags = arg->flags;
- }
+ fi.lock_owner = arg->lock_owner;
+ fi.flags = arg->flags;
req->se->op.read(req, nodeid, arg->size, arg->offset, &fi);
} else {
fuse_reply_err(req, ENOSYS);
@@ -1023,13 +982,9 @@ static void do_write(fuse_req_t req, fuse_ino_t nodeid, const void *inarg)
fi.fh = arg->fh;
fi.writepage = (arg->write_flags & FUSE_WRITE_CACHE) != 0;
- if (req->se->conn.proto_minor < 9) {
- param = ((char *)arg) + FUSE_COMPAT_WRITE_IN_SIZE;
- } else {
- fi.lock_owner = arg->lock_owner;
- fi.flags = arg->flags;
- param = PARAM(arg);
- }
+ fi.lock_owner = arg->lock_owner;
+ fi.flags = arg->flags;
+ param = PARAM(arg);
if (req->se->op.write) {
req->se->op.write(req, nodeid, param, arg->size, arg->offset, &fi);
@@ -1053,21 +1008,14 @@ static void do_write_buf(fuse_req_t req, fuse_ino_t nodeid, const void *inarg,
fi.fh = arg->fh;
fi.writepage = arg->write_flags & FUSE_WRITE_CACHE;
- if (se->conn.proto_minor < 9) {
- bufv.buf[0].mem = ((char *)arg) + FUSE_COMPAT_WRITE_IN_SIZE;
- bufv.buf[0].size -=
- sizeof(struct fuse_in_header) + FUSE_COMPAT_WRITE_IN_SIZE;
- assert(!(bufv.buf[0].flags & FUSE_BUF_IS_FD));
- } else {
- fi.lock_owner = arg->lock_owner;
- fi.flags = arg->flags;
- if (!(bufv.buf[0].flags & FUSE_BUF_IS_FD)) {
- bufv.buf[0].mem = PARAM(arg);
- }
-
- bufv.buf[0].size -=
- sizeof(struct fuse_in_header) + sizeof(struct fuse_write_in);
+ fi.lock_owner = arg->lock_owner;
+ fi.flags = arg->flags;
+ if (!(bufv.buf[0].flags & FUSE_BUF_IS_FD)) {
+ bufv.buf[0].mem = PARAM(arg);
}
+
+ bufv.buf[0].size -=
+ sizeof(struct fuse_in_header) + sizeof(struct fuse_write_in);
if (bufv.buf[0].size < arg->size) {
fuse_log(FUSE_LOG_ERR, "fuse: do_write_buf: buffer size too small\n");
fuse_reply_err(req, EIO);
@@ -1086,9 +1034,7 @@ static void do_flush(fuse_req_t req, fuse_ino_t nodeid, const void *inarg)
memset(&fi, 0, sizeof(fi));
fi.fh = arg->fh;
fi.flush = 1;
- if (req->se->conn.proto_minor >= 7) {
- fi.lock_owner = arg->lock_owner;
- }
+ fi.lock_owner = arg->lock_owner;
if (req->se->op.flush) {
req->se->op.flush(req, nodeid, &fi);
@@ -1105,10 +1051,8 @@ static void do_release(fuse_req_t req, fuse_ino_t nodeid, const void *inarg)
memset(&fi, 0, sizeof(fi));
fi.flags = arg->flags;
fi.fh = arg->fh;
- if (req->se->conn.proto_minor >= 8) {
- fi.flush = (arg->release_flags & FUSE_RELEASE_FLUSH) ? 1 : 0;
- fi.lock_owner = arg->lock_owner;
- }
+ fi.flush = (arg->release_flags & FUSE_RELEASE_FLUSH) ? 1 : 0;
+ fi.lock_owner = arg->lock_owner;
if (arg->release_flags & FUSE_RELEASE_FLOCK_UNLOCK) {
fi.flock_release = 1;
fi.lock_owner = arg->lock_owner;
@@ -1477,8 +1421,7 @@ static void do_ioctl(fuse_req_t req, fuse_ino_t nodeid, const void *inarg)
memset(&fi, 0, sizeof(fi));
fi.fh = arg->fh;
- if (sizeof(void *) == 4 && req->se->conn.proto_minor >= 16 &&
- !(flags & FUSE_IOCTL_32BIT)) {
+ if (sizeof(void *) == 4 && !(flags & FUSE_IOCTL_32BIT)) {
req->ioctl_64bit = 1;
}
@@ -1603,7 +1546,7 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid, const void *inarg)
outarg.major = FUSE_KERNEL_VERSION;
outarg.minor = FUSE_KERNEL_MINOR_VERSION;
- if (arg->major < 7) {
+ if (arg->major < 7 || (arg->major == 7 && arg->minor < 31)) {
fuse_log(FUSE_LOG_ERR, "fuse: unsupported protocol version: %u.%u\n",
arg->major, arg->minor);
fuse_reply_err(req, EPROTO);
@@ -1616,81 +1559,71 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid, const void *inarg)
return;
}
- if (arg->minor >= 6) {
- if (arg->max_readahead < se->conn.max_readahead) {
- se->conn.max_readahead = arg->max_readahead;
- }
- if (arg->flags & FUSE_ASYNC_READ) {
- se->conn.capable |= FUSE_CAP_ASYNC_READ;
- }
- if (arg->flags & FUSE_POSIX_LOCKS) {
- se->conn.capable |= FUSE_CAP_POSIX_LOCKS;
- }
- if (arg->flags & FUSE_ATOMIC_O_TRUNC) {
- se->conn.capable |= FUSE_CAP_ATOMIC_O_TRUNC;
- }
- if (arg->flags & FUSE_EXPORT_SUPPORT) {
- se->conn.capable |= FUSE_CAP_EXPORT_SUPPORT;
- }
- if (arg->flags & FUSE_DONT_MASK) {
- se->conn.capable |= FUSE_CAP_DONT_MASK;
- }
- if (arg->flags & FUSE_FLOCK_LOCKS) {
- se->conn.capable |= FUSE_CAP_FLOCK_LOCKS;
- }
- if (arg->flags & FUSE_AUTO_INVAL_DATA) {
- se->conn.capable |= FUSE_CAP_AUTO_INVAL_DATA;
- }
- if (arg->flags & FUSE_DO_READDIRPLUS) {
- se->conn.capable |= FUSE_CAP_READDIRPLUS;
- }
- if (arg->flags & FUSE_READDIRPLUS_AUTO) {
- se->conn.capable |= FUSE_CAP_READDIRPLUS_AUTO;
- }
- if (arg->flags & FUSE_ASYNC_DIO) {
- se->conn.capable |= FUSE_CAP_ASYNC_DIO;
- }
- if (arg->flags & FUSE_WRITEBACK_CACHE) {
- se->conn.capable |= FUSE_CAP_WRITEBACK_CACHE;
- }
- if (arg->flags & FUSE_NO_OPEN_SUPPORT) {
- se->conn.capable |= FUSE_CAP_NO_OPEN_SUPPORT;
- }
- if (arg->flags & FUSE_PARALLEL_DIROPS) {
- se->conn.capable |= FUSE_CAP_PARALLEL_DIROPS;
- }
- if (arg->flags & FUSE_POSIX_ACL) {
- se->conn.capable |= FUSE_CAP_POSIX_ACL;
- }
- if (arg->flags & FUSE_HANDLE_KILLPRIV) {
- se->conn.capable |= FUSE_CAP_HANDLE_KILLPRIV;
- }
- if (arg->flags & FUSE_NO_OPENDIR_SUPPORT) {
- se->conn.capable |= FUSE_CAP_NO_OPENDIR_SUPPORT;
- }
- if (!(arg->flags & FUSE_MAX_PAGES)) {
- size_t max_bufsize =
- FUSE_DEFAULT_MAX_PAGES_PER_REQ * getpagesize() +
- FUSE_BUFFER_HEADER_SIZE;
- if (bufsize > max_bufsize) {
- bufsize = max_bufsize;
- }
+ if (arg->max_readahead < se->conn.max_readahead) {
+ se->conn.max_readahead = arg->max_readahead;
+ }
+ if (arg->flags & FUSE_ASYNC_READ) {
+ se->conn.capable |= FUSE_CAP_ASYNC_READ;
+ }
+ if (arg->flags & FUSE_POSIX_LOCKS) {
+ se->conn.capable |= FUSE_CAP_POSIX_LOCKS;
+ }
+ if (arg->flags & FUSE_ATOMIC_O_TRUNC) {
+ se->conn.capable |= FUSE_CAP_ATOMIC_O_TRUNC;
+ }
+ if (arg->flags & FUSE_EXPORT_SUPPORT) {
+ se->conn.capable |= FUSE_CAP_EXPORT_SUPPORT;
+ }
+ if (arg->flags & FUSE_DONT_MASK) {
+ se->conn.capable |= FUSE_CAP_DONT_MASK;
+ }
+ if (arg->flags & FUSE_FLOCK_LOCKS) {
+ se->conn.capable |= FUSE_CAP_FLOCK_LOCKS;
+ }
+ if (arg->flags & FUSE_AUTO_INVAL_DATA) {
+ se->conn.capable |= FUSE_CAP_AUTO_INVAL_DATA;
+ }
+ if (arg->flags & FUSE_DO_READDIRPLUS) {
+ se->conn.capable |= FUSE_CAP_READDIRPLUS;
+ }
+ if (arg->flags & FUSE_READDIRPLUS_AUTO) {
+ se->conn.capable |= FUSE_CAP_READDIRPLUS_AUTO;
+ }
+ if (arg->flags & FUSE_ASYNC_DIO) {
+ se->conn.capable |= FUSE_CAP_ASYNC_DIO;
+ }
+ if (arg->flags & FUSE_WRITEBACK_CACHE) {
+ se->conn.capable |= FUSE_CAP_WRITEBACK_CACHE;
+ }
+ if (arg->flags & FUSE_NO_OPEN_SUPPORT) {
+ se->conn.capable |= FUSE_CAP_NO_OPEN_SUPPORT;
+ }
+ if (arg->flags & FUSE_PARALLEL_DIROPS) {
+ se->conn.capable |= FUSE_CAP_PARALLEL_DIROPS;
+ }
+ if (arg->flags & FUSE_POSIX_ACL) {
+ se->conn.capable |= FUSE_CAP_POSIX_ACL;
+ }
+ if (arg->flags & FUSE_HANDLE_KILLPRIV) {
+ se->conn.capable |= FUSE_CAP_HANDLE_KILLPRIV;
+ }
+ if (arg->flags & FUSE_NO_OPENDIR_SUPPORT) {
+ se->conn.capable |= FUSE_CAP_NO_OPENDIR_SUPPORT;
+ }
+ if (!(arg->flags & FUSE_MAX_PAGES)) {
+ size_t max_bufsize = FUSE_DEFAULT_MAX_PAGES_PER_REQ * getpagesize() +
+ FUSE_BUFFER_HEADER_SIZE;
+ if (bufsize > max_bufsize) {
+ bufsize = max_bufsize;
}
- } else {
- se->conn.max_readahead = 0;
}
-
- if (se->conn.proto_minor >= 14) {
#ifdef HAVE_SPLICE
#ifdef HAVE_VMSPLICE
- se->conn.capable |= FUSE_CAP_SPLICE_WRITE | FUSE_CAP_SPLICE_MOVE;
+ se->conn.capable |= FUSE_CAP_SPLICE_WRITE | FUSE_CAP_SPLICE_MOVE;
#endif
- se->conn.capable |= FUSE_CAP_SPLICE_READ;
+ se->conn.capable |= FUSE_CAP_SPLICE_READ;
#endif
- }
- if (se->conn.proto_minor >= 18) {
- se->conn.capable |= FUSE_CAP_IOCTL_DIR;
- }
+ se->conn.capable |= FUSE_CAP_IOCTL_DIR;
/*
* Default settings for modern filesystems.
@@ -1797,24 +1730,20 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid, const void *inarg)
}
outarg.max_readahead = se->conn.max_readahead;
outarg.max_write = se->conn.max_write;
- if (se->conn.proto_minor >= 13) {
- if (se->conn.max_background >= (1 << 16)) {
- se->conn.max_background = (1 << 16) - 1;
- }
- if (se->conn.congestion_threshold > se->conn.max_background) {
- se->conn.congestion_threshold = se->conn.max_background;
- }
- if (!se->conn.congestion_threshold) {
- se->conn.congestion_threshold = se->conn.max_background * 3 / 4;
- }
-
- outarg.max_background = se->conn.max_background;
- outarg.congestion_threshold = se->conn.congestion_threshold;
+ if (se->conn.max_background >= (1 << 16)) {
+ se->conn.max_background = (1 << 16) - 1;
+ }
+ if (se->conn.congestion_threshold > se->conn.max_background) {
+ se->conn.congestion_threshold = se->conn.max_background;
}
- if (se->conn.proto_minor >= 23) {
- outarg.time_gran = se->conn.time_gran;
+ if (!se->conn.congestion_threshold) {
+ se->conn.congestion_threshold = se->conn.max_background * 3 / 4;
}
+ outarg.max_background = se->conn.max_background;
+ outarg.congestion_threshold = se->conn.congestion_threshold;
+ outarg.time_gran = se->conn.time_gran;
+
if (se->debug) {
fuse_log(FUSE_LOG_DEBUG, " INIT: %u.%u\n", outarg.major,
outarg.minor);
@@ -1828,11 +1757,6 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid, const void *inarg)
outarg.congestion_threshold);
fuse_log(FUSE_LOG_DEBUG, " time_gran=%u\n", outarg.time_gran);
}
- if (arg->minor < 5) {
- outargsize = FUSE_COMPAT_INIT_OUT_SIZE;
- } else if (arg->minor < 23) {
- outargsize = FUSE_COMPAT_22_INIT_OUT_SIZE;
- }
send_reply_ok(req, &outarg, outargsize);
}
@@ -1896,10 +1820,6 @@ int fuse_lowlevel_notify_inval_inode(struct fuse_session *se, fuse_ino_t ino,
return -EINVAL;
}
- if (se->conn.proto_major < 6 || se->conn.proto_minor < 12) {
- return -ENOSYS;
- }
-
outarg.ino = ino;
outarg.off = off;
outarg.len = len;
@@ -1920,10 +1840,6 @@ int fuse_lowlevel_notify_inval_entry(struct fuse_session *se, fuse_ino_t parent,
return -EINVAL;
}
- if (se->conn.proto_major < 6 || se->conn.proto_minor < 12) {
- return -ENOSYS;
- }
-
outarg.parent = parent;
outarg.namelen = namelen;
outarg.padding = 0;
@@ -1947,10 +1863,6 @@ int fuse_lowlevel_notify_delete(struct fuse_session *se, fuse_ino_t parent,
return -EINVAL;
}
- if (se->conn.proto_major < 6 || se->conn.proto_minor < 18) {
- return -ENOSYS;
- }
-
outarg.parent = parent;
outarg.child = child;
outarg.namelen = namelen;
@@ -1977,10 +1889,6 @@ int fuse_lowlevel_notify_store(struct fuse_session *se, fuse_ino_t ino,
return -EINVAL;
}
- if (se->conn.proto_major < 6 || se->conn.proto_minor < 15) {
- return -ENOSYS;
- }
-
out.unique = 0;
out.error = FUSE_NOTIFY_STORE;
--
1.8.3.1

View File

@ -0,0 +1,93 @@
From e4c8fd1060fb69a093064851ebf66dd82533ec0e Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Mon, 27 Jan 2020 19:02:17 +0100
Subject: [PATCH 106/116] virtiofsd: add definition of fuse_buf_writev()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-id: <20200127190227.40942-103-dgilbert@redhat.com>
Patchwork-id: 93557
O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 102/112] virtiofsd: add definition of fuse_buf_writev()
Bugzilla: 1694164
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Sergio Lopez Pascual <slp@redhat.com>
From: piaojun <piaojun@huawei.com>
Define fuse_buf_writev() which use pwritev and writev to improve io
bandwidth. Especially, the src bufs with 0 size should be skipped as
their mems are not *block_size* aligned which will cause writev failed
in direct io mode.
Signed-off-by: Jun Piao <piaojun@huawei.com>
Suggested-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit 9ceaaa15cf21073c2b23058c374f61c30cd39c31)
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
tools/virtiofsd/buffer.c | 38 ++++++++++++++++++++++++++++++++++++++
1 file changed, 38 insertions(+)
diff --git a/tools/virtiofsd/buffer.c b/tools/virtiofsd/buffer.c
index 42a608f..37befeb 100644
--- a/tools/virtiofsd/buffer.c
+++ b/tools/virtiofsd/buffer.c
@@ -14,6 +14,7 @@
#include "fuse_lowlevel.h"
#include <assert.h>
#include <errno.h>
+#include <stdlib.h>
#include <string.h>
#include <unistd.h>
@@ -33,6 +34,43 @@ size_t fuse_buf_size(const struct fuse_bufvec *bufv)
return size;
}
+__attribute__((unused))
+static ssize_t fuse_buf_writev(struct fuse_buf *out_buf,
+ struct fuse_bufvec *in_buf)
+{
+ ssize_t res, i, j;
+ size_t iovcnt = in_buf->count;
+ struct iovec *iov;
+ int fd = out_buf->fd;
+
+ iov = calloc(iovcnt, sizeof(struct iovec));
+ if (!iov) {
+ return -ENOMEM;
+ }
+
+ for (i = 0, j = 0; i < iovcnt; i++) {
+ /* Skip the buf with 0 size */
+ if (in_buf->buf[i].size) {
+ iov[j].iov_base = in_buf->buf[i].mem;
+ iov[j].iov_len = in_buf->buf[i].size;
+ j++;
+ }
+ }
+
+ if (out_buf->flags & FUSE_BUF_FD_SEEK) {
+ res = pwritev(fd, iov, iovcnt, out_buf->pos);
+ } else {
+ res = writev(fd, iov, iovcnt);
+ }
+
+ if (res == -1) {
+ res = -errno;
+ }
+
+ free(iov);
+ return res;
+}
+
static size_t min_size(size_t s1, size_t s2)
{
return s1 < s2 ? s1 : s2;
--
1.8.3.1

View File

@ -0,0 +1,170 @@
From f91a9bdc171142174110e9ff1716b611f6fb0039 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Mon, 27 Jan 2020 19:01:07 +0100
Subject: [PATCH 036/116] virtiofsd: add --fd=FDNUM fd passing option
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-id: <20200127190227.40942-33-dgilbert@redhat.com>
Patchwork-id: 93487
O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 032/112] virtiofsd: add --fd=FDNUM fd passing option
Bugzilla: 1694164
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Sergio Lopez Pascual <slp@redhat.com>
From: Stefan Hajnoczi <stefanha@redhat.com>
Although --socket-path=PATH is useful for manual invocations, management
tools typically create the UNIX domain socket themselves and pass it to
the vhost-user device backend. This way QEMU can be launched
immediately with a valid socket. No waiting for the vhost-user device
backend is required when fd passing is used.
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit cee8e35d4386e34bf79c3ca2aab7f7b1bb48cf8d)
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
tools/virtiofsd/fuse_i.h | 1 +
tools/virtiofsd/fuse_lowlevel.c | 16 ++++++++++++----
tools/virtiofsd/fuse_virtio.c | 31 +++++++++++++++++++++++++------
3 files changed, 38 insertions(+), 10 deletions(-)
diff --git a/tools/virtiofsd/fuse_i.h b/tools/virtiofsd/fuse_i.h
index 1126723..45995f3 100644
--- a/tools/virtiofsd/fuse_i.h
+++ b/tools/virtiofsd/fuse_i.h
@@ -68,6 +68,7 @@ struct fuse_session {
size_t bufsize;
int error;
char *vu_socket_path;
+ int vu_listen_fd;
int vu_socketfd;
struct fv_VuDev *virtio_dev;
};
diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c
index 4f4684d..95f4db8 100644
--- a/tools/virtiofsd/fuse_lowlevel.c
+++ b/tools/virtiofsd/fuse_lowlevel.c
@@ -2130,6 +2130,7 @@ static const struct fuse_opt fuse_ll_opts[] = {
LL_OPTION("--debug", debug, 1),
LL_OPTION("allow_root", deny_others, 1),
LL_OPTION("--socket-path=%s", vu_socket_path, 0),
+ LL_OPTION("--fd=%d", vu_listen_fd, 0),
FUSE_OPT_END
};
@@ -2147,7 +2148,8 @@ void fuse_lowlevel_help(void)
*/
printf(
" -o allow_root allow access by root\n"
- " --socket-path=PATH path for the vhost-user socket\n");
+ " --socket-path=PATH path for the vhost-user socket\n"
+ " --fd=FDNUM fd number of vhost-user socket\n");
}
void fuse_session_destroy(struct fuse_session *se)
@@ -2191,6 +2193,7 @@ struct fuse_session *fuse_session_new(struct fuse_args *args,
goto out1;
}
se->fd = -1;
+ se->vu_listen_fd = -1;
se->conn.max_write = UINT_MAX;
se->conn.max_readahead = UINT_MAX;
@@ -2212,8 +2215,13 @@ struct fuse_session *fuse_session_new(struct fuse_args *args,
goto out4;
}
- if (!se->vu_socket_path) {
- fprintf(stderr, "fuse: missing -o vhost_user_socket option\n");
+ if (!se->vu_socket_path && se->vu_listen_fd < 0) {
+ fuse_log(FUSE_LOG_ERR, "fuse: missing --socket-path or --fd option\n");
+ goto out4;
+ }
+ if (se->vu_socket_path && se->vu_listen_fd >= 0) {
+ fuse_log(FUSE_LOG_ERR,
+ "fuse: --socket-path and --fd cannot be given together\n");
goto out4;
}
@@ -2253,7 +2261,7 @@ void fuse_session_unmount(struct fuse_session *se)
int fuse_lowlevel_is_virtio(struct fuse_session *se)
{
- return se->vu_socket_path != NULL;
+ return !!se->virtio_dev;
}
#ifdef linux
diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c
index 7e2711b..635f877 100644
--- a/tools/virtiofsd/fuse_virtio.c
+++ b/tools/virtiofsd/fuse_virtio.c
@@ -638,18 +638,21 @@ int virtio_loop(struct fuse_session *se)
return 0;
}
-int virtio_session_mount(struct fuse_session *se)
+static int fv_create_listen_socket(struct fuse_session *se)
{
struct sockaddr_un un;
mode_t old_umask;
+ /* Nothing to do if fd is already initialized */
+ if (se->vu_listen_fd >= 0) {
+ return 0;
+ }
+
if (strlen(se->vu_socket_path) >= sizeof(un.sun_path)) {
fuse_log(FUSE_LOG_ERR, "Socket path too long\n");
return -1;
}
- se->fd = -1;
-
/*
* Create the Unix socket to communicate with qemu
* based on QEMU's vhost-user-bridge
@@ -682,15 +685,31 @@ int virtio_session_mount(struct fuse_session *se)
return -1;
}
+ se->vu_listen_fd = listen_sock;
+ return 0;
+}
+
+int virtio_session_mount(struct fuse_session *se)
+{
+ int ret;
+
+ ret = fv_create_listen_socket(se);
+ if (ret < 0) {
+ return ret;
+ }
+
+ se->fd = -1;
+
fuse_log(FUSE_LOG_INFO, "%s: Waiting for vhost-user socket connection...\n",
__func__);
- int data_sock = accept(listen_sock, NULL, NULL);
+ int data_sock = accept(se->vu_listen_fd, NULL, NULL);
if (data_sock == -1) {
fuse_log(FUSE_LOG_ERR, "vhost socket accept: %m\n");
- close(listen_sock);
+ close(se->vu_listen_fd);
return -1;
}
- close(listen_sock);
+ close(se->vu_listen_fd);
+ se->vu_listen_fd = -1;
fuse_log(FUSE_LOG_INFO, "%s: Received vhost-user socket connection\n",
__func__);
--
1.8.3.1

View File

@ -0,0 +1,134 @@
From 1b0edd3d0a2ee5c097bcf3501c1dfa937f02e473 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Mon, 27 Jan 2020 19:01:21 +0100
Subject: [PATCH 050/116] virtiofsd: add fuse_mbuf_iter API
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-id: <20200127190227.40942-47-dgilbert@redhat.com>
Patchwork-id: 93502
O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 046/112] virtiofsd: add fuse_mbuf_iter API
Bugzilla: 1694164
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Sergio Lopez Pascual <slp@redhat.com>
From: Stefan Hajnoczi <stefanha@redhat.com>
Introduce an API for consuming bytes from a buffer with size checks.
All FUSE operations will be converted to use this safe API instead of
void *inarg.
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit dad157e880416ab3a0e45beaa0e81977516568bc)
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
tools/virtiofsd/buffer.c | 28 +++++++++++++++++++++++++
tools/virtiofsd/fuse_common.h | 49 ++++++++++++++++++++++++++++++++++++++++++-
2 files changed, 76 insertions(+), 1 deletion(-)
diff --git a/tools/virtiofsd/buffer.c b/tools/virtiofsd/buffer.c
index 772efa9..42a608f 100644
--- a/tools/virtiofsd/buffer.c
+++ b/tools/virtiofsd/buffer.c
@@ -267,3 +267,31 @@ ssize_t fuse_buf_copy(struct fuse_bufvec *dstv, struct fuse_bufvec *srcv)
return copied;
}
+
+void *fuse_mbuf_iter_advance(struct fuse_mbuf_iter *iter, size_t len)
+{
+ void *ptr;
+
+ if (len > iter->size - iter->pos) {
+ return NULL;
+ }
+
+ ptr = iter->mem + iter->pos;
+ iter->pos += len;
+ return ptr;
+}
+
+const char *fuse_mbuf_iter_advance_str(struct fuse_mbuf_iter *iter)
+{
+ const char *str = iter->mem + iter->pos;
+ size_t remaining = iter->size - iter->pos;
+ size_t i;
+
+ for (i = 0; i < remaining; i++) {
+ if (str[i] == '\0') {
+ iter->pos += i + 1;
+ return str;
+ }
+ }
+ return NULL;
+}
diff --git a/tools/virtiofsd/fuse_common.h b/tools/virtiofsd/fuse_common.h
index 0cb33ac..f8f6433 100644
--- a/tools/virtiofsd/fuse_common.h
+++ b/tools/virtiofsd/fuse_common.h
@@ -703,10 +703,57 @@ size_t fuse_buf_size(const struct fuse_bufvec *bufv);
*/
ssize_t fuse_buf_copy(struct fuse_bufvec *dst, struct fuse_bufvec *src);
+/**
+ * Memory buffer iterator
+ *
+ */
+struct fuse_mbuf_iter {
+ /**
+ * Data pointer
+ */
+ void *mem;
+
+ /**
+ * Total length, in bytes
+ */
+ size_t size;
+
+ /**
+ * Offset from start of buffer
+ */
+ size_t pos;
+};
+
+/* Initialize memory buffer iterator from a fuse_buf */
+#define FUSE_MBUF_ITER_INIT(fbuf) \
+ ((struct fuse_mbuf_iter){ \
+ .mem = fbuf->mem, \
+ .size = fbuf->size, \
+ .pos = 0, \
+ })
+
+/**
+ * Consume bytes from a memory buffer iterator
+ *
+ * @param iter memory buffer iterator
+ * @param len number of bytes to consume
+ * @return pointer to start of consumed bytes or
+ * NULL if advancing beyond end of buffer
+ */
+void *fuse_mbuf_iter_advance(struct fuse_mbuf_iter *iter, size_t len);
+
+/**
+ * Consume a NUL-terminated string from a memory buffer iterator
+ *
+ * @param iter memory buffer iterator
+ * @return pointer to the string or
+ * NULL if advancing beyond end of buffer or there is no NUL-terminator
+ */
+const char *fuse_mbuf_iter_advance_str(struct fuse_mbuf_iter *iter);
+
/*
* Signal handling
*/
-
/**
* Exit session on HUP, TERM and INT signals and ignore PIPE signal
*
--
1.8.3.1

View File

@ -0,0 +1,88 @@
From 7a3c94e10b087c06635ef72aadb1550184dd5c58 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Mon, 27 Jan 2020 19:01:58 +0100
Subject: [PATCH 087/116] virtiofsd: add helper for lo_data cleanup
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-id: <20200127190227.40942-84-dgilbert@redhat.com>
Patchwork-id: 93538
O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 083/112] virtiofsd: add helper for lo_data cleanup
Bugzilla: 1694164
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Sergio Lopez Pascual <slp@redhat.com>
From: Liu Bo <bo.liu@linux.alibaba.com>
This offers an helper function for lo_data's cleanup.
Signed-off-by: Liu Bo <bo.liu@linux.alibaba.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit 18a69cbbb6a4caa7c2040c6db4a33b044a32be7e)
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
tools/virtiofsd/passthrough_ll.c | 37 +++++++++++++++++++++----------------
1 file changed, 21 insertions(+), 16 deletions(-)
diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
index 056ebe8..e8dc5c7 100644
--- a/tools/virtiofsd/passthrough_ll.c
+++ b/tools/virtiofsd/passthrough_ll.c
@@ -2407,6 +2407,26 @@ static gboolean lo_key_equal(gconstpointer a, gconstpointer b)
return la->ino == lb->ino && la->dev == lb->dev;
}
+static void fuse_lo_data_cleanup(struct lo_data *lo)
+{
+ if (lo->inodes) {
+ g_hash_table_destroy(lo->inodes);
+ }
+ lo_map_destroy(&lo->fd_map);
+ lo_map_destroy(&lo->dirp_map);
+ lo_map_destroy(&lo->ino_map);
+
+ if (lo->proc_self_fd >= 0) {
+ close(lo->proc_self_fd);
+ }
+
+ if (lo->root.fd >= 0) {
+ close(lo->root.fd);
+ }
+
+ free(lo->source);
+}
+
int main(int argc, char *argv[])
{
struct fuse_args args = FUSE_ARGS_INIT(argc, argv);
@@ -2554,22 +2574,7 @@ err_out2:
err_out1:
fuse_opt_free_args(&args);
- if (lo.inodes) {
- g_hash_table_destroy(lo.inodes);
- }
- lo_map_destroy(&lo.fd_map);
- lo_map_destroy(&lo.dirp_map);
- lo_map_destroy(&lo.ino_map);
-
- if (lo.proc_self_fd >= 0) {
- close(lo.proc_self_fd);
- }
-
- if (lo.root.fd >= 0) {
- close(lo.root.fd);
- }
-
- free(lo.source);
+ fuse_lo_data_cleanup(&lo);
return ret ? 1 : 0;
}
--
1.8.3.1

View File

@ -0,0 +1,46 @@
From c55995c25f60168e3cb6b5bae1bf9a47813383d0 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Mon, 27 Jan 2020 19:00:55 +0100
Subject: [PATCH 024/116] virtiofsd: add -o source=PATH to help output
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-id: <20200127190227.40942-21-dgilbert@redhat.com>
Patchwork-id: 93474
O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 020/112] virtiofsd: add -o source=PATH to help output
Bugzilla: 1694164
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Sergio Lopez Pascual <slp@redhat.com>
From: Stefan Hajnoczi <stefanha@redhat.com>
The -o source=PATH option will be used by most command-line invocations.
Let's document it!
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit 4ff075f72be2f489c8998ae492ec5cdbbbd73e07)
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
tools/virtiofsd/passthrough_ll.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
index 26ac870..fc9b264 100644
--- a/tools/virtiofsd/passthrough_ll.c
+++ b/tools/virtiofsd/passthrough_ll.c
@@ -1319,6 +1319,7 @@ int main(int argc, char *argv[])
if (opts.show_help) {
printf("usage: %s [options]\n\n", argv[0]);
fuse_cmdline_help();
+ printf(" -o source=PATH shared directory tree\n");
fuse_lowlevel_help();
ret = 0;
goto err_out1;
--
1.8.3.1

View File

@ -0,0 +1,121 @@
From 23d81ee7564084f29e32fedaed5196ae1a5a3240 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Mon, 27 Jan 2020 19:01:10 +0100
Subject: [PATCH 039/116] virtiofsd: add --print-capabilities option
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-id: <20200127190227.40942-36-dgilbert@redhat.com>
Patchwork-id: 93486
O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 035/112] virtiofsd: add --print-capabilities option
Bugzilla: 1694164
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Sergio Lopez Pascual <slp@redhat.com>
From: Stefan Hajnoczi <stefanha@redhat.com>
Add the --print-capabilities option as per vhost-user.rst "Backend
programs conventions". Currently there are no advertised features.
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit 45018fbb0a73ce66fd3dd87ecd2872b45658add4)
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
docs/interop/vhost-user.json | 4 +++-
tools/virtiofsd/fuse_lowlevel.h | 1 +
tools/virtiofsd/helper.c | 2 ++
tools/virtiofsd/passthrough_ll.c | 12 ++++++++++++
4 files changed, 18 insertions(+), 1 deletion(-)
diff --git a/docs/interop/vhost-user.json b/docs/interop/vhost-user.json
index da6aaf5..d4ea1f7 100644
--- a/docs/interop/vhost-user.json
+++ b/docs/interop/vhost-user.json
@@ -31,6 +31,7 @@
# @rproc-serial: virtio remoteproc serial link
# @scsi: virtio scsi
# @vsock: virtio vsock transport
+# @fs: virtio fs (since 4.2)
#
# Since: 4.0
##
@@ -50,7 +51,8 @@
'rpmsg',
'rproc-serial',
'scsi',
- 'vsock'
+ 'vsock',
+ 'fs'
]
}
diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h
index f6b3470..0d61df8 100644
--- a/tools/virtiofsd/fuse_lowlevel.h
+++ b/tools/virtiofsd/fuse_lowlevel.h
@@ -1794,6 +1794,7 @@ struct fuse_cmdline_opts {
int nodefault_subtype;
int show_version;
int show_help;
+ int print_capabilities;
unsigned int max_idle_threads;
};
diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c
index a3645fc..b8ec5ac 100644
--- a/tools/virtiofsd/helper.c
+++ b/tools/virtiofsd/helper.c
@@ -40,6 +40,7 @@ static const struct fuse_opt fuse_helper_opts[] = {
FUSE_HELPER_OPT("--help", show_help),
FUSE_HELPER_OPT("-V", show_version),
FUSE_HELPER_OPT("--version", show_version),
+ FUSE_HELPER_OPT("--print-capabilities", print_capabilities),
FUSE_HELPER_OPT("-d", debug),
FUSE_HELPER_OPT("debug", debug),
FUSE_HELPER_OPT("-d", foreground),
@@ -135,6 +136,7 @@ void fuse_cmdline_help(void)
{
printf(" -h --help print help\n"
" -V --version print version\n"
+ " --print-capabilities print vhost-user.json\n"
" -d -o debug enable debug output (implies -f)\n"
" -f foreground operation\n"
" --daemonize run in background\n"
diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
index 037c5d7..cd27c09 100644
--- a/tools/virtiofsd/passthrough_ll.c
+++ b/tools/virtiofsd/passthrough_ll.c
@@ -1298,6 +1298,14 @@ static struct fuse_lowlevel_ops lo_oper = {
.lseek = lo_lseek,
};
+/* Print vhost-user.json backend program capabilities */
+static void print_capabilities(void)
+{
+ printf("{\n");
+ printf(" \"type\": \"fs\"\n");
+ printf("}\n");
+}
+
int main(int argc, char *argv[])
{
struct fuse_args args = FUSE_ARGS_INIT(argc, argv);
@@ -1328,6 +1336,10 @@ int main(int argc, char *argv[])
fuse_lowlevel_version();
ret = 0;
goto err_out1;
+ } else if (opts.print_capabilities) {
+ print_capabilities();
+ ret = 0;
+ goto err_out1;
}
if (fuse_opt_parse(&args, &lo, lo_opts, NULL) == -1) {
--
1.8.3.1

View File

@ -0,0 +1,285 @@
From 58c4e9473b364fb62aac797b0d69fd8ddb02c8c7 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Mon, 27 Jan 2020 19:01:30 +0100
Subject: [PATCH 059/116] virtiofsd: add seccomp whitelist
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-id: <20200127190227.40942-56-dgilbert@redhat.com>
Patchwork-id: 93511
O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 055/112] virtiofsd: add seccomp whitelist
Bugzilla: 1694164
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Sergio Lopez Pascual <slp@redhat.com>
From: Stefan Hajnoczi <stefanha@redhat.com>
Only allow system calls that are needed by virtiofsd. All other system
calls cause SIGSYS to be directed at the thread and the process will
coredump.
Restricting system calls reduces the kernel attack surface and limits
what the process can do when compromised.
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
with additional entries by:
Signed-off-by: Ganesh Maharaj Mahalingam <ganesh.mahalingam@intel.com>
Signed-off-by: Masayoshi Mizuma <m.mizuma@jp.fujitsu.com>
Signed-off-by: Misono Tomohiro <misono.tomohiro@jp.fujitsu.com>
Signed-off-by: piaojun <piaojun@huawei.com>
Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Eric Ren <renzhen@linux.alibaba.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit 4f8bde99c175ffd86b5125098a4707d43f5e80c6)
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
Makefile | 5 +-
tools/virtiofsd/Makefile.objs | 5 +-
tools/virtiofsd/passthrough_ll.c | 2 +
tools/virtiofsd/seccomp.c | 151 +++++++++++++++++++++++++++++++++++++++
tools/virtiofsd/seccomp.h | 14 ++++
5 files changed, 174 insertions(+), 3 deletions(-)
create mode 100644 tools/virtiofsd/seccomp.c
create mode 100644 tools/virtiofsd/seccomp.h
diff --git a/Makefile b/Makefile
index 0e9755d..6879a06 100644
--- a/Makefile
+++ b/Makefile
@@ -330,7 +330,7 @@ endif
endif
endif
-ifdef CONFIG_LINUX
+ifeq ($(CONFIG_LINUX)$(CONFIG_SECCOMP),yy)
HELPERS-y += virtiofsd$(EXESUF)
vhost-user-json-y += tools/virtiofsd/50-qemu-virtiofsd.json
endif
@@ -681,7 +681,8 @@ rdmacm-mux$(EXESUF): LIBS += "-libumad"
rdmacm-mux$(EXESUF): $(rdmacm-mux-obj-y) $(COMMON_LDADDS)
$(call LINK, $^)
-ifdef CONFIG_LINUX # relies on Linux-specific syscalls
+# relies on Linux-specific syscalls
+ifeq ($(CONFIG_LINUX)$(CONFIG_SECCOMP),yy)
virtiofsd$(EXESUF): $(virtiofsd-obj-y) libvhost-user.a $(COMMON_LDADDS)
$(call LINK, $^)
endif
diff --git a/tools/virtiofsd/Makefile.objs b/tools/virtiofsd/Makefile.objs
index 45a8075..076f667 100644
--- a/tools/virtiofsd/Makefile.objs
+++ b/tools/virtiofsd/Makefile.objs
@@ -5,5 +5,8 @@ virtiofsd-obj-y = buffer.o \
fuse_signals.o \
fuse_virtio.o \
helper.o \
- passthrough_ll.o
+ passthrough_ll.o \
+ seccomp.o
+seccomp.o-cflags := $(SECCOMP_CFLAGS)
+seccomp.o-libs := $(SECCOMP_LIBS)
diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
index 0947d14..bd8925b 100644
--- a/tools/virtiofsd/passthrough_ll.c
+++ b/tools/virtiofsd/passthrough_ll.c
@@ -59,6 +59,7 @@
#include <unistd.h>
#include "passthrough_helpers.h"
+#include "seccomp.h"
struct lo_map_elem {
union {
@@ -2091,6 +2092,7 @@ static void setup_sandbox(struct lo_data *lo, struct fuse_session *se)
{
setup_namespaces(lo, se);
setup_mounts(lo->source);
+ setup_seccomp();
}
int main(int argc, char *argv[])
diff --git a/tools/virtiofsd/seccomp.c b/tools/virtiofsd/seccomp.c
new file mode 100644
index 0000000..691fb63
--- /dev/null
+++ b/tools/virtiofsd/seccomp.c
@@ -0,0 +1,151 @@
+/*
+ * Seccomp sandboxing for virtiofsd
+ *
+ * Copyright (C) 2019 Red Hat, Inc.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "seccomp.h"
+#include "fuse_i.h"
+#include "fuse_log.h"
+#include <errno.h>
+#include <glib.h>
+#include <seccomp.h>
+#include <stdlib.h>
+
+/* Bodge for libseccomp 2.4.2 which broke ppoll */
+#if !defined(__SNR_ppoll) && defined(__SNR_brk)
+#ifdef __NR_ppoll
+#define __SNR_ppoll __NR_ppoll
+#else
+#define __SNR_ppoll __PNR_ppoll
+#endif
+#endif
+
+static const int syscall_whitelist[] = {
+ /* TODO ireg sem*() syscalls */
+ SCMP_SYS(brk),
+ SCMP_SYS(capget), /* For CAP_FSETID */
+ SCMP_SYS(capset),
+ SCMP_SYS(clock_gettime),
+ SCMP_SYS(clone),
+#ifdef __NR_clone3
+ SCMP_SYS(clone3),
+#endif
+ SCMP_SYS(close),
+ SCMP_SYS(copy_file_range),
+ SCMP_SYS(dup),
+ SCMP_SYS(eventfd2),
+ SCMP_SYS(exit),
+ SCMP_SYS(exit_group),
+ SCMP_SYS(fallocate),
+ SCMP_SYS(fchmodat),
+ SCMP_SYS(fchownat),
+ SCMP_SYS(fcntl),
+ SCMP_SYS(fdatasync),
+ SCMP_SYS(fgetxattr),
+ SCMP_SYS(flistxattr),
+ SCMP_SYS(flock),
+ SCMP_SYS(fremovexattr),
+ SCMP_SYS(fsetxattr),
+ SCMP_SYS(fstat),
+ SCMP_SYS(fstatfs),
+ SCMP_SYS(fsync),
+ SCMP_SYS(ftruncate),
+ SCMP_SYS(futex),
+ SCMP_SYS(getdents),
+ SCMP_SYS(getdents64),
+ SCMP_SYS(getegid),
+ SCMP_SYS(geteuid),
+ SCMP_SYS(getpid),
+ SCMP_SYS(gettid),
+ SCMP_SYS(gettimeofday),
+ SCMP_SYS(linkat),
+ SCMP_SYS(lseek),
+ SCMP_SYS(madvise),
+ SCMP_SYS(mkdirat),
+ SCMP_SYS(mknodat),
+ SCMP_SYS(mmap),
+ SCMP_SYS(mprotect),
+ SCMP_SYS(mremap),
+ SCMP_SYS(munmap),
+ SCMP_SYS(newfstatat),
+ SCMP_SYS(open),
+ SCMP_SYS(openat),
+ SCMP_SYS(ppoll),
+ SCMP_SYS(prctl), /* TODO restrict to just PR_SET_NAME? */
+ SCMP_SYS(preadv),
+ SCMP_SYS(pread64),
+ SCMP_SYS(pwritev),
+ SCMP_SYS(pwrite64),
+ SCMP_SYS(read),
+ SCMP_SYS(readlinkat),
+ SCMP_SYS(recvmsg),
+ SCMP_SYS(renameat),
+ SCMP_SYS(renameat2),
+ SCMP_SYS(rt_sigaction),
+ SCMP_SYS(rt_sigprocmask),
+ SCMP_SYS(rt_sigreturn),
+ SCMP_SYS(sendmsg),
+ SCMP_SYS(setresgid),
+ SCMP_SYS(setresuid),
+#ifdef __NR_setresgid32
+ SCMP_SYS(setresgid32),
+#endif
+#ifdef __NR_setresuid32
+ SCMP_SYS(setresuid32),
+#endif
+ SCMP_SYS(set_robust_list),
+ SCMP_SYS(symlinkat),
+ SCMP_SYS(time), /* Rarely needed, except on static builds */
+ SCMP_SYS(tgkill),
+ SCMP_SYS(unlinkat),
+ SCMP_SYS(utimensat),
+ SCMP_SYS(write),
+ SCMP_SYS(writev),
+};
+
+void setup_seccomp(void)
+{
+ scmp_filter_ctx ctx;
+ size_t i;
+
+#ifdef SCMP_ACT_KILL_PROCESS
+ ctx = seccomp_init(SCMP_ACT_KILL_PROCESS);
+ /* Handle a newer libseccomp but an older kernel */
+ if (!ctx && errno == EOPNOTSUPP) {
+ ctx = seccomp_init(SCMP_ACT_TRAP);
+ }
+#else
+ ctx = seccomp_init(SCMP_ACT_TRAP);
+#endif
+ if (!ctx) {
+ fuse_log(FUSE_LOG_ERR, "seccomp_init() failed\n");
+ exit(1);
+ }
+
+ for (i = 0; i < G_N_ELEMENTS(syscall_whitelist); i++) {
+ if (seccomp_rule_add(ctx, SCMP_ACT_ALLOW,
+ syscall_whitelist[i], 0) != 0) {
+ fuse_log(FUSE_LOG_ERR, "seccomp_rule_add syscall %d",
+ syscall_whitelist[i]);
+ exit(1);
+ }
+ }
+
+ /* libvhost-user calls this for post-copy migration, we don't need it */
+ if (seccomp_rule_add(ctx, SCMP_ACT_ERRNO(ENOSYS),
+ SCMP_SYS(userfaultfd), 0) != 0) {
+ fuse_log(FUSE_LOG_ERR, "seccomp_rule_add userfaultfd failed\n");
+ exit(1);
+ }
+
+ if (seccomp_load(ctx) < 0) {
+ fuse_log(FUSE_LOG_ERR, "seccomp_load() failed\n");
+ exit(1);
+ }
+
+ seccomp_release(ctx);
+}
diff --git a/tools/virtiofsd/seccomp.h b/tools/virtiofsd/seccomp.h
new file mode 100644
index 0000000..86bce72
--- /dev/null
+++ b/tools/virtiofsd/seccomp.h
@@ -0,0 +1,14 @@
+/*
+ * Seccomp sandboxing for virtiofsd
+ *
+ * Copyright (C) 2019 Red Hat, Inc.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef VIRTIOFSD_SECCOMP_H
+#define VIRTIOFSD_SECCOMP_H
+
+void setup_seccomp(void);
+
+#endif /* VIRTIOFSD_SECCOMP_H */
--
1.8.3.1

View File

@ -0,0 +1,74 @@
From 6d62abb99b6b918f05f099b01a99f4326a69d650 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Mon, 27 Jan 2020 19:02:26 +0100
Subject: [PATCH 115/116] virtiofsd: add some options to the help message
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-id: <20200127190227.40942-112-dgilbert@redhat.com>
Patchwork-id: 93565
O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 111/112] virtiofsd: add some options to the help message
Bugzilla: 1694164
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Sergio Lopez Pascual <slp@redhat.com>
From: Masayoshi Mizuma <m.mizuma@jp.fujitsu.com>
Add following options to the help message:
- cache
- flock|no_flock
- norace
- posix_lock|no_posix_lock
- readdirplus|no_readdirplus
- timeout
- writeback|no_writeback
- xattr|no_xattr
Signed-off-by: Masayoshi Mizuma <m.mizuma@jp.fujitsu.com>
dgilbert: Split cache, norace, posix_lock, readdirplus off
into our own earlier patches that added the options
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Misono Tomohiro <misono.tomohiro@jp.fujitsu.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit 1d59b1b210d7c3b0bdf4b10ebe0bb1fccfcb8b95)
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
tools/virtiofsd/helper.c | 10 +++++++++-
1 file changed, 9 insertions(+), 1 deletion(-)
diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c
index f98d8f2..0801cf7 100644
--- a/tools/virtiofsd/helper.c
+++ b/tools/virtiofsd/helper.c
@@ -148,6 +148,8 @@ void fuse_cmdline_help(void)
" -o cache=<mode> cache mode. could be one of \"auto, "
"always, none\"\n"
" default: auto\n"
+ " -o flock|no_flock enable/disable flock\n"
+ " default: no_flock\n"
" -o log_level=<level> log level, default to \"info\"\n"
" level could be one of \"debug, "
"info, warn, err\"\n"
@@ -163,7 +165,13 @@ void fuse_cmdline_help(void)
" enable/disable readirplus\n"
" default: readdirplus except with "
"cache=none\n"
- );
+ " -o timeout=<number> I/O timeout (second)\n"
+ " default: depends on cache= option.\n"
+ " -o writeback|no_writeback enable/disable writeback cache\n"
+ " default: no_writeback\n"
+ " -o xattr|no_xattr enable/disable xattr\n"
+ " default: no_xattr\n"
+ );
}
static int fuse_helper_opt_proc(void *data, const char *arg, int key,
--
1.8.3.1

View File

@ -0,0 +1,239 @@
From 6f5cf644bebc189bdb16f1caf3d7c47835d7c287 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Mon, 27 Jan 2020 19:01:36 +0100
Subject: [PATCH 065/116] virtiofsd: add --syslog command-line option
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-id: <20200127190227.40942-62-dgilbert@redhat.com>
Patchwork-id: 93509
O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 061/112] virtiofsd: add --syslog command-line option
Bugzilla: 1694164
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Sergio Lopez Pascual <slp@redhat.com>
From: Stefan Hajnoczi <stefanha@redhat.com>
Sometimes collecting output from stderr is inconvenient or does not fit
within the overall logging architecture. Add syslog(3) support for
cases where stderr cannot be used.
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
dgilbert: Reworked as a logging function
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit f185621d41f03a23b55795b89e6584253fa23505)
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
tools/virtiofsd/fuse_lowlevel.h | 1 +
tools/virtiofsd/helper.c | 2 ++
tools/virtiofsd/passthrough_ll.c | 50 +++++++++++++++++++++++++++++++++++++---
tools/virtiofsd/seccomp.c | 32 +++++++++++++++++--------
tools/virtiofsd/seccomp.h | 4 +++-
5 files changed, 76 insertions(+), 13 deletions(-)
diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h
index 0d61df8..f2750bc 100644
--- a/tools/virtiofsd/fuse_lowlevel.h
+++ b/tools/virtiofsd/fuse_lowlevel.h
@@ -1795,6 +1795,7 @@ struct fuse_cmdline_opts {
int show_version;
int show_help;
int print_capabilities;
+ int syslog;
unsigned int max_idle_threads;
};
diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c
index 5531425..9692ef9 100644
--- a/tools/virtiofsd/helper.c
+++ b/tools/virtiofsd/helper.c
@@ -54,6 +54,7 @@ static const struct fuse_opt fuse_helper_opts[] = {
FUSE_HELPER_OPT("subtype=", nodefault_subtype),
FUSE_OPT_KEY("subtype=", FUSE_OPT_KEY_KEEP),
FUSE_HELPER_OPT("max_idle_threads=%u", max_idle_threads),
+ FUSE_HELPER_OPT("--syslog", syslog),
FUSE_OPT_END
};
@@ -138,6 +139,7 @@ void fuse_cmdline_help(void)
" -V --version print version\n"
" --print-capabilities print vhost-user.json\n"
" -d -o debug enable debug output (implies -f)\n"
+ " --syslog log to syslog (default stderr)\n"
" -f foreground operation\n"
" --daemonize run in background\n"
" -o max_idle_threads the maximum number of idle worker "
diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
index c281d81..0372aca 100644
--- a/tools/virtiofsd/passthrough_ll.c
+++ b/tools/virtiofsd/passthrough_ll.c
@@ -58,6 +58,7 @@
#include <sys/types.h>
#include <sys/wait.h>
#include <sys/xattr.h>
+#include <syslog.h>
#include <unistd.h>
#include "passthrough_helpers.h"
@@ -138,6 +139,7 @@ static const struct fuse_opt lo_opts[] = {
{ "norace", offsetof(struct lo_data, norace), 1 },
FUSE_OPT_END
};
+static bool use_syslog = false;
static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n);
@@ -2262,11 +2264,12 @@ static void setup_mounts(const char *source)
* Lock down this process to prevent access to other processes or files outside
* source directory. This reduces the impact of arbitrary code execution bugs.
*/
-static void setup_sandbox(struct lo_data *lo, struct fuse_session *se)
+static void setup_sandbox(struct lo_data *lo, struct fuse_session *se,
+ bool enable_syslog)
{
setup_namespaces(lo, se);
setup_mounts(lo->source);
- setup_seccomp();
+ setup_seccomp(enable_syslog);
}
/* Raise the maximum number of open file descriptors */
@@ -2298,6 +2301,42 @@ static void setup_nofile_rlimit(void)
}
}
+static void log_func(enum fuse_log_level level, const char *fmt, va_list ap)
+{
+ if (use_syslog) {
+ int priority = LOG_ERR;
+ switch (level) {
+ case FUSE_LOG_EMERG:
+ priority = LOG_EMERG;
+ break;
+ case FUSE_LOG_ALERT:
+ priority = LOG_ALERT;
+ break;
+ case FUSE_LOG_CRIT:
+ priority = LOG_CRIT;
+ break;
+ case FUSE_LOG_ERR:
+ priority = LOG_ERR;
+ break;
+ case FUSE_LOG_WARNING:
+ priority = LOG_WARNING;
+ break;
+ case FUSE_LOG_NOTICE:
+ priority = LOG_NOTICE;
+ break;
+ case FUSE_LOG_INFO:
+ priority = LOG_INFO;
+ break;
+ case FUSE_LOG_DEBUG:
+ priority = LOG_DEBUG;
+ break;
+ }
+ vsyslog(priority, fmt, ap);
+ } else {
+ vfprintf(stderr, fmt, ap);
+ }
+}
+
int main(int argc, char *argv[])
{
struct fuse_args args = FUSE_ARGS_INIT(argc, argv);
@@ -2336,6 +2375,11 @@ int main(int argc, char *argv[])
if (fuse_parse_cmdline(&args, &opts) != 0) {
return 1;
}
+ fuse_set_log_func(log_func);
+ use_syslog = opts.syslog;
+ if (use_syslog) {
+ openlog("virtiofsd", LOG_PID, LOG_DAEMON);
+ }
if (opts.show_help) {
printf("usage: %s [options]\n\n", argv[0]);
fuse_cmdline_help();
@@ -2424,7 +2468,7 @@ int main(int argc, char *argv[])
/* Must be before sandbox since it wants /proc */
setup_capng();
- setup_sandbox(&lo, se);
+ setup_sandbox(&lo, se, opts.syslog);
/* Block until ctrl+c or fusermount -u */
ret = virtio_loop(se);
diff --git a/tools/virtiofsd/seccomp.c b/tools/virtiofsd/seccomp.c
index 691fb63..2d9d4a7 100644
--- a/tools/virtiofsd/seccomp.c
+++ b/tools/virtiofsd/seccomp.c
@@ -107,11 +107,28 @@ static const int syscall_whitelist[] = {
SCMP_SYS(writev),
};
-void setup_seccomp(void)
+/* Syscalls used when --syslog is enabled */
+static const int syscall_whitelist_syslog[] = {
+ SCMP_SYS(sendto),
+};
+
+static void add_whitelist(scmp_filter_ctx ctx, const int syscalls[], size_t len)
{
- scmp_filter_ctx ctx;
size_t i;
+ for (i = 0; i < len; i++) {
+ if (seccomp_rule_add(ctx, SCMP_ACT_ALLOW, syscalls[i], 0) != 0) {
+ fuse_log(FUSE_LOG_ERR, "seccomp_rule_add syscall %d failed\n",
+ syscalls[i]);
+ exit(1);
+ }
+ }
+}
+
+void setup_seccomp(bool enable_syslog)
+{
+ scmp_filter_ctx ctx;
+
#ifdef SCMP_ACT_KILL_PROCESS
ctx = seccomp_init(SCMP_ACT_KILL_PROCESS);
/* Handle a newer libseccomp but an older kernel */
@@ -126,13 +143,10 @@ void setup_seccomp(void)
exit(1);
}
- for (i = 0; i < G_N_ELEMENTS(syscall_whitelist); i++) {
- if (seccomp_rule_add(ctx, SCMP_ACT_ALLOW,
- syscall_whitelist[i], 0) != 0) {
- fuse_log(FUSE_LOG_ERR, "seccomp_rule_add syscall %d",
- syscall_whitelist[i]);
- exit(1);
- }
+ add_whitelist(ctx, syscall_whitelist, G_N_ELEMENTS(syscall_whitelist));
+ if (enable_syslog) {
+ add_whitelist(ctx, syscall_whitelist_syslog,
+ G_N_ELEMENTS(syscall_whitelist_syslog));
}
/* libvhost-user calls this for post-copy migration, we don't need it */
diff --git a/tools/virtiofsd/seccomp.h b/tools/virtiofsd/seccomp.h
index 86bce72..d47c8ea 100644
--- a/tools/virtiofsd/seccomp.h
+++ b/tools/virtiofsd/seccomp.h
@@ -9,6 +9,8 @@
#ifndef VIRTIOFSD_SECCOMP_H
#define VIRTIOFSD_SECCOMP_H
-void setup_seccomp(void);
+#include <stdbool.h>
+
+void setup_seccomp(bool enable_syslog);
#endif /* VIRTIOFSD_SECCOMP_H */
--
1.8.3.1

View File

@ -0,0 +1,106 @@
From 3dbfb932288eb5a55dfdc0eebca7e4c7f0cf6f33 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Mon, 27 Jan 2020 19:02:22 +0100
Subject: [PATCH 111/116] virtiofsd: add --thread-pool-size=NUM option
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-id: <20200127190227.40942-108-dgilbert@redhat.com>
Patchwork-id: 93561
O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 107/112] virtiofsd: add --thread-pool-size=NUM option
Bugzilla: 1694164
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Sergio Lopez Pascual <slp@redhat.com>
From: Stefan Hajnoczi <stefanha@redhat.com>
Add an option to control the size of the thread pool. Requests are now
processed in parallel by default.
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit 951b3120dbc971f08681e1d860360e4a1e638902)
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
tools/virtiofsd/fuse_i.h | 1 +
tools/virtiofsd/fuse_lowlevel.c | 7 ++++++-
tools/virtiofsd/fuse_virtio.c | 5 +++--
3 files changed, 10 insertions(+), 3 deletions(-)
diff --git a/tools/virtiofsd/fuse_i.h b/tools/virtiofsd/fuse_i.h
index 1447d86..4e47e58 100644
--- a/tools/virtiofsd/fuse_i.h
+++ b/tools/virtiofsd/fuse_i.h
@@ -72,6 +72,7 @@ struct fuse_session {
int vu_listen_fd;
int vu_socketfd;
struct fv_VuDev *virtio_dev;
+ int thread_pool_size;
};
struct fuse_chan {
diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c
index 79a4031..de2e2e0 100644
--- a/tools/virtiofsd/fuse_lowlevel.c
+++ b/tools/virtiofsd/fuse_lowlevel.c
@@ -28,6 +28,7 @@
#include <sys/file.h>
#include <unistd.h>
+#define THREAD_POOL_SIZE 64
#define OFFSET_MAX 0x7fffffffffffffffLL
@@ -2519,6 +2520,7 @@ static const struct fuse_opt fuse_ll_opts[] = {
LL_OPTION("allow_root", deny_others, 1),
LL_OPTION("--socket-path=%s", vu_socket_path, 0),
LL_OPTION("--fd=%d", vu_listen_fd, 0),
+ LL_OPTION("--thread-pool-size=%d", thread_pool_size, 0),
FUSE_OPT_END
};
@@ -2537,7 +2539,9 @@ void fuse_lowlevel_help(void)
printf(
" -o allow_root allow access by root\n"
" --socket-path=PATH path for the vhost-user socket\n"
- " --fd=FDNUM fd number of vhost-user socket\n");
+ " --fd=FDNUM fd number of vhost-user socket\n"
+ " --thread-pool-size=NUM thread pool size limit (default %d)\n",
+ THREAD_POOL_SIZE);
}
void fuse_session_destroy(struct fuse_session *se)
@@ -2591,6 +2595,7 @@ struct fuse_session *fuse_session_new(struct fuse_args *args,
}
se->fd = -1;
se->vu_listen_fd = -1;
+ se->thread_pool_size = THREAD_POOL_SIZE;
se->conn.max_write = UINT_MAX;
se->conn.max_readahead = UINT_MAX;
diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c
index 0dcf2ef..9f65823 100644
--- a/tools/virtiofsd/fuse_virtio.c
+++ b/tools/virtiofsd/fuse_virtio.c
@@ -572,10 +572,11 @@ static void *fv_queue_thread(void *opaque)
struct fv_QueueInfo *qi = opaque;
struct VuDev *dev = &qi->virtio_dev->dev;
struct VuVirtq *q = vu_get_queue(dev, qi->qidx);
+ struct fuse_session *se = qi->virtio_dev->se;
GThreadPool *pool;
- pool = g_thread_pool_new(fv_queue_worker, qi, 1 /* TODO max_threads */,
- TRUE, NULL);
+ pool = g_thread_pool_new(fv_queue_worker, qi, se->thread_pool_size, TRUE,
+ NULL);
if (!pool) {
fuse_log(FUSE_LOG_ERR, "%s: g_thread_pool_new failed\n", __func__);
return NULL;
--
1.8.3.1

View File

@ -0,0 +1,73 @@
From 77eb3258e76a1ac240503572d4f41d45cb832ba2 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Mon, 27 Jan 2020 19:01:09 +0100
Subject: [PATCH 038/116] virtiofsd: add vhost-user.json file
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-id: <20200127190227.40942-35-dgilbert@redhat.com>
Patchwork-id: 93490
O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 034/112] virtiofsd: add vhost-user.json file
Bugzilla: 1694164
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Sergio Lopez Pascual <slp@redhat.com>
From: Stefan Hajnoczi <stefanha@redhat.com>
Install a vhost-user.json file describing virtiofsd. This allows
libvirt and other management tools to enumerate vhost-user backend
programs.
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit 315616ed50ba15a5d7236ade8a402a93898202de)
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
.gitignore | 1 +
Makefile | 1 +
tools/virtiofsd/50-qemu-virtiofsd.json.in | 5 +++++
3 files changed, 7 insertions(+)
create mode 100644 tools/virtiofsd/50-qemu-virtiofsd.json.in
diff --git a/.gitignore b/.gitignore
index aefad32..d7a4f99 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,6 +6,7 @@
/config-target.*
/config.status
/config-temp
+/tools/virtiofsd/50-qemu-virtiofsd.json
/elf2dmp
/trace-events-all
/trace/generated-events.h
diff --git a/Makefile b/Makefile
index 1526775..0e9755d 100644
--- a/Makefile
+++ b/Makefile
@@ -332,6 +332,7 @@ endif
ifdef CONFIG_LINUX
HELPERS-y += virtiofsd$(EXESUF)
+vhost-user-json-y += tools/virtiofsd/50-qemu-virtiofsd.json
endif
# Sphinx does not allow building manuals into the same directory as
diff --git a/tools/virtiofsd/50-qemu-virtiofsd.json.in b/tools/virtiofsd/50-qemu-virtiofsd.json.in
new file mode 100644
index 0000000..9bcd86f
--- /dev/null
+++ b/tools/virtiofsd/50-qemu-virtiofsd.json.in
@@ -0,0 +1,5 @@
+{
+ "description": "QEMU virtiofsd vhost-user-fs",
+ "type": "fs",
+ "binary": "@libexecdir@/virtiofsd"
+}
--
1.8.3.1

View File

@ -0,0 +1,175 @@
From f62613d8058bcb60b26727d980a37537103b0033 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Mon, 27 Jan 2020 19:01:32 +0100
Subject: [PATCH 061/116] virtiofsd: cap-ng helpers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-id: <20200127190227.40942-58-dgilbert@redhat.com>
Patchwork-id: 93512
O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 057/112] virtiofsd: cap-ng helpers
Bugzilla: 1694164
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Sergio Lopez Pascual <slp@redhat.com>
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
libcap-ng reads /proc during capng_get_caps_process, and virtiofsd's
sandboxing doesn't have /proc mounted; thus we have to do the
caps read before we sandbox it and save/restore the state.
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit 2405f3c0d19eb4d516a88aa4e5c54e5f9c6bbea3)
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
Makefile | 4 +--
tools/virtiofsd/passthrough_ll.c | 72 ++++++++++++++++++++++++++++++++++++++++
2 files changed, 74 insertions(+), 2 deletions(-)
diff --git a/Makefile b/Makefile
index 6879a06..ff05c30 100644
--- a/Makefile
+++ b/Makefile
@@ -330,7 +330,7 @@ endif
endif
endif
-ifeq ($(CONFIG_LINUX)$(CONFIG_SECCOMP),yy)
+ifeq ($(CONFIG_LINUX)$(CONFIG_SECCOMP)$(CONFIG_LIBCAP_NG),yyy)
HELPERS-y += virtiofsd$(EXESUF)
vhost-user-json-y += tools/virtiofsd/50-qemu-virtiofsd.json
endif
@@ -682,7 +682,7 @@ rdmacm-mux$(EXESUF): $(rdmacm-mux-obj-y) $(COMMON_LDADDS)
$(call LINK, $^)
# relies on Linux-specific syscalls
-ifeq ($(CONFIG_LINUX)$(CONFIG_SECCOMP),yy)
+ifeq ($(CONFIG_LINUX)$(CONFIG_SECCOMP)$(CONFIG_LIBCAP_NG),yyy)
virtiofsd$(EXESUF): $(virtiofsd-obj-y) libvhost-user.a $(COMMON_LDADDS)
$(call LINK, $^)
endif
diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
index bd8925b..97e7c75 100644
--- a/tools/virtiofsd/passthrough_ll.c
+++ b/tools/virtiofsd/passthrough_ll.c
@@ -39,6 +39,7 @@
#include "fuse_virtio.h"
#include "fuse_lowlevel.h"
#include <assert.h>
+#include <cap-ng.h>
#include <dirent.h>
#include <errno.h>
#include <inttypes.h>
@@ -139,6 +140,13 @@ static const struct fuse_opt lo_opts[] = {
static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n);
+static struct {
+ pthread_mutex_t mutex;
+ void *saved;
+} cap;
+/* That we loaded cap-ng in the current thread from the saved */
+static __thread bool cap_loaded = 0;
+
static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st);
static int is_dot_or_dotdot(const char *name)
@@ -162,6 +170,37 @@ static struct lo_data *lo_data(fuse_req_t req)
return (struct lo_data *)fuse_req_userdata(req);
}
+/*
+ * Load capng's state from our saved state if the current thread
+ * hadn't previously been loaded.
+ * returns 0 on success
+ */
+static int load_capng(void)
+{
+ if (!cap_loaded) {
+ pthread_mutex_lock(&cap.mutex);
+ capng_restore_state(&cap.saved);
+ /*
+ * restore_state free's the saved copy
+ * so make another.
+ */
+ cap.saved = capng_save_state();
+ if (!cap.saved) {
+ fuse_log(FUSE_LOG_ERR, "capng_save_state (thread)\n");
+ return -EINVAL;
+ }
+ pthread_mutex_unlock(&cap.mutex);
+
+ /*
+ * We want to use the loaded state for our pid,
+ * not the original
+ */
+ capng_setpid(syscall(SYS_gettid));
+ cap_loaded = true;
+ }
+ return 0;
+}
+
static void lo_map_init(struct lo_map *map)
{
map->elems = NULL;
@@ -2024,6 +2063,35 @@ static void setup_namespaces(struct lo_data *lo, struct fuse_session *se)
}
/*
+ * Capture the capability state, we'll need to restore this for individual
+ * threads later; see load_capng.
+ */
+static void setup_capng(void)
+{
+ /* Note this accesses /proc so has to happen before the sandbox */
+ if (capng_get_caps_process()) {
+ fuse_log(FUSE_LOG_ERR, "capng_get_caps_process\n");
+ exit(1);
+ }
+ pthread_mutex_init(&cap.mutex, NULL);
+ pthread_mutex_lock(&cap.mutex);
+ cap.saved = capng_save_state();
+ if (!cap.saved) {
+ fuse_log(FUSE_LOG_ERR, "capng_save_state\n");
+ exit(1);
+ }
+ pthread_mutex_unlock(&cap.mutex);
+}
+
+static void cleanup_capng(void)
+{
+ free(cap.saved);
+ cap.saved = NULL;
+ pthread_mutex_destroy(&cap.mutex);
+}
+
+
+/*
* Make the source directory our root so symlinks cannot escape and no other
* files are accessible. Assumes unshare(CLONE_NEWNS) was already called.
*/
@@ -2216,12 +2284,16 @@ int main(int argc, char *argv[])
fuse_daemonize(opts.foreground);
+ /* Must be before sandbox since it wants /proc */
+ setup_capng();
+
setup_sandbox(&lo, se);
/* Block until ctrl+c or fusermount -u */
ret = virtio_loop(se);
fuse_session_unmount(se);
+ cleanup_capng();
err_out3:
fuse_remove_signal_handlers(se);
err_out2:
--
1.8.3.1

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,82 @@
From 99ff67682ef7c5659bdc9836008541861ae313d5 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Mon, 27 Jan 2020 19:01:56 +0100
Subject: [PATCH 085/116] virtiofsd: cleanup allocated resource in se
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-id: <20200127190227.40942-82-dgilbert@redhat.com>
Patchwork-id: 93533
O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 081/112] virtiofsd: cleanup allocated resource in se
Bugzilla: 1694164
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Sergio Lopez Pascual <slp@redhat.com>
From: Liu Bo <bo.liu@linux.alibaba.com>
This cleans up unfreed resources in se on quiting, including
se->virtio_dev, se->vu_socket_path, se->vu_socketfd.
Signed-off-by: Liu Bo <bo.liu@linux.alibaba.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit 61cfc44982e566c33b9d5df17858e4d5ae373873)
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
tools/virtiofsd/fuse_lowlevel.c | 7 +++++++
tools/virtiofsd/fuse_virtio.c | 7 +++++++
tools/virtiofsd/fuse_virtio.h | 2 +-
3 files changed, 15 insertions(+), 1 deletion(-)
diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c
index 65f91da..440508a 100644
--- a/tools/virtiofsd/fuse_lowlevel.c
+++ b/tools/virtiofsd/fuse_lowlevel.c
@@ -2532,6 +2532,13 @@ void fuse_session_destroy(struct fuse_session *se)
if (se->fd != -1) {
close(se->fd);
}
+
+ if (se->vu_socket_path) {
+ virtio_session_close(se);
+ free(se->vu_socket_path);
+ se->vu_socket_path = NULL;
+ }
+
free(se);
}
diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c
index 7a8774a..e7bd772 100644
--- a/tools/virtiofsd/fuse_virtio.c
+++ b/tools/virtiofsd/fuse_virtio.c
@@ -833,3 +833,10 @@ int virtio_session_mount(struct fuse_session *se)
return 0;
}
+
+void virtio_session_close(struct fuse_session *se)
+{
+ close(se->vu_socketfd);
+ free(se->virtio_dev);
+ se->virtio_dev = NULL;
+}
diff --git a/tools/virtiofsd/fuse_virtio.h b/tools/virtiofsd/fuse_virtio.h
index cc676b9..1116840 100644
--- a/tools/virtiofsd/fuse_virtio.h
+++ b/tools/virtiofsd/fuse_virtio.h
@@ -19,7 +19,7 @@
struct fuse_session;
int virtio_session_mount(struct fuse_session *se);
-
+void virtio_session_close(struct fuse_session *se);
int virtio_loop(struct fuse_session *se);
--
1.8.3.1

View File

@ -0,0 +1,99 @@
From e00543b0384fba61a9c7274c73e11a25e7ab2946 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Mon, 27 Jan 2020 19:02:13 +0100
Subject: [PATCH 102/116] virtiofsd: convert more fprintf and perror to use
fuse log infra
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-id: <20200127190227.40942-99-dgilbert@redhat.com>
Patchwork-id: 93552
O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 098/112] virtiofsd: convert more fprintf and perror to use fuse log infra
Bugzilla: 1694164
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Sergio Lopez Pascual <slp@redhat.com>
From: Eryu Guan <eguan@linux.alibaba.com>
Signed-off-by: Eryu Guan <eguan@linux.alibaba.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Reviewed-by: Misono Tomohiro <misono.tomohiro@jp.fujitsu.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit fc1aed0bf96259d0b46b1cfea7497b7762c4ee3d)
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
tools/virtiofsd/fuse_signals.c | 7 +++++--
tools/virtiofsd/helper.c | 9 ++++++---
2 files changed, 11 insertions(+), 5 deletions(-)
diff --git a/tools/virtiofsd/fuse_signals.c b/tools/virtiofsd/fuse_signals.c
index dc7c8ac..f18625b 100644
--- a/tools/virtiofsd/fuse_signals.c
+++ b/tools/virtiofsd/fuse_signals.c
@@ -12,6 +12,7 @@
#include "fuse_i.h"
#include "fuse_lowlevel.h"
+#include <errno.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
@@ -47,13 +48,15 @@ static int set_one_signal_handler(int sig, void (*handler)(int), int remove)
sa.sa_flags = 0;
if (sigaction(sig, NULL, &old_sa) == -1) {
- perror("fuse: cannot get old signal handler");
+ fuse_log(FUSE_LOG_ERR, "fuse: cannot get old signal handler: %s\n",
+ strerror(errno));
return -1;
}
if (old_sa.sa_handler == (remove ? handler : SIG_DFL) &&
sigaction(sig, &sa, NULL) == -1) {
- perror("fuse: cannot set signal handler");
+ fuse_log(FUSE_LOG_ERR, "fuse: cannot set signal handler: %s\n",
+ strerror(errno));
return -1;
}
return 0;
diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c
index 33749bf..f98d8f2 100644
--- a/tools/virtiofsd/helper.c
+++ b/tools/virtiofsd/helper.c
@@ -208,7 +208,8 @@ int fuse_daemonize(int foreground)
char completed;
if (pipe(waiter)) {
- perror("fuse_daemonize: pipe");
+ fuse_log(FUSE_LOG_ERR, "fuse_daemonize: pipe: %s\n",
+ strerror(errno));
return -1;
}
@@ -218,7 +219,8 @@ int fuse_daemonize(int foreground)
*/
switch (fork()) {
case -1:
- perror("fuse_daemonize: fork");
+ fuse_log(FUSE_LOG_ERR, "fuse_daemonize: fork: %s\n",
+ strerror(errno));
return -1;
case 0:
break;
@@ -228,7 +230,8 @@ int fuse_daemonize(int foreground)
}
if (setsid() == -1) {
- perror("fuse_daemonize: setsid");
+ fuse_log(FUSE_LOG_ERR, "fuse_daemonize: setsid: %s\n",
+ strerror(errno));
return -1;
}
--
1.8.3.1

View File

@ -0,0 +1,57 @@
From 8e6473e906dfc7d2a62abaf1ec80ff461e4d201d Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Mon, 27 Jan 2020 19:02:12 +0100
Subject: [PATCH 101/116] virtiofsd: do not always set FUSE_FLOCK_LOCKS
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-id: <20200127190227.40942-98-dgilbert@redhat.com>
Patchwork-id: 93551
O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 097/112] virtiofsd: do not always set FUSE_FLOCK_LOCKS
Bugzilla: 1694164
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Sergio Lopez Pascual <slp@redhat.com>
From: Peng Tao <tao.peng@linux.alibaba.com>
Right now we always enable it regardless of given commandlines.
Fix it by setting the flag relying on the lo->flock bit.
Signed-off-by: Peng Tao <tao.peng@linux.alibaba.com>
Reviewed-by: Misono Tomohiro <misono.tomohiro@jp.fujitsu.com>
Reviewed-by: Sergio Lopez <slp@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit e468d4af5f5192ab33283464a9f6933044ce47f7)
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
tools/virtiofsd/passthrough_ll.c | 11 ++++++++---
1 file changed, 8 insertions(+), 3 deletions(-)
diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
index ab16135..ccbbec1 100644
--- a/tools/virtiofsd/passthrough_ll.c
+++ b/tools/virtiofsd/passthrough_ll.c
@@ -546,9 +546,14 @@ static void lo_init(void *userdata, struct fuse_conn_info *conn)
fuse_log(FUSE_LOG_DEBUG, "lo_init: activating writeback\n");
conn->want |= FUSE_CAP_WRITEBACK_CACHE;
}
- if (lo->flock && conn->capable & FUSE_CAP_FLOCK_LOCKS) {
- fuse_log(FUSE_LOG_DEBUG, "lo_init: activating flock locks\n");
- conn->want |= FUSE_CAP_FLOCK_LOCKS;
+ if (conn->capable & FUSE_CAP_FLOCK_LOCKS) {
+ if (lo->flock) {
+ fuse_log(FUSE_LOG_DEBUG, "lo_init: activating flock locks\n");
+ conn->want |= FUSE_CAP_FLOCK_LOCKS;
+ } else {
+ fuse_log(FUSE_LOG_DEBUG, "lo_init: disabling flock locks\n");
+ conn->want &= ~FUSE_CAP_FLOCK_LOCKS;
+ }
}
if (conn->capable & FUSE_CAP_POSIX_LOCKS) {
--
1.8.3.1

View File

@ -0,0 +1,47 @@
From bc127914b29f2e4163bc7ca786e04ed955d96016 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Mon, 27 Jan 2020 19:02:00 +0100
Subject: [PATCH 089/116] virtiofsd: enable PARALLEL_DIROPS during INIT
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-id: <20200127190227.40942-86-dgilbert@redhat.com>
Patchwork-id: 93539
O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 085/112] virtiofsd: enable PARALLEL_DIROPS during INIT
Bugzilla: 1694164
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Sergio Lopez Pascual <slp@redhat.com>
From: Liu Bo <bo.liu@linux.alibaba.com>
lookup is a RO operations, PARALLEL_DIROPS can be enabled.
Signed-off-by: Liu Bo <bo.liu@linux.alibaba.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit b7ed733a3841c4d489d3bd6ca7ed23c84db119c2)
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
tools/virtiofsd/fuse_lowlevel.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c
index aac282f..70568d2 100644
--- a/tools/virtiofsd/fuse_lowlevel.c
+++ b/tools/virtiofsd/fuse_lowlevel.c
@@ -2062,6 +2062,9 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid,
if (se->conn.want & FUSE_CAP_ASYNC_READ) {
outarg.flags |= FUSE_ASYNC_READ;
}
+ if (se->conn.want & FUSE_CAP_PARALLEL_DIROPS) {
+ outarg.flags |= FUSE_PARALLEL_DIROPS;
+ }
if (se->conn.want & FUSE_CAP_POSIX_LOCKS) {
outarg.flags |= FUSE_POSIX_LOCKS;
}
--
1.8.3.1

View File

@ -0,0 +1,111 @@
From 983b383bc4a92a9f7ecff0332cadefed2f58f502 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Mon, 27 Jan 2020 19:01:50 +0100
Subject: [PATCH 079/116] virtiofsd: extract root inode init into setup_root()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-id: <20200127190227.40942-76-dgilbert@redhat.com>
Patchwork-id: 93527
O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 075/112] virtiofsd: extract root inode init into setup_root()
Bugzilla: 1694164
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Sergio Lopez Pascual <slp@redhat.com>
From: Miklos Szeredi <mszeredi@redhat.com>
Inititialize the root inode in a single place.
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
dgilbert:
with fix suggested by Misono Tomohiro <misono.tomohiro@jp.fujitsu.com>
Reviewed-by: Misono Tomohiro <misono.tomohiro@jp.fujitsu.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit 3ca8a2b1c83eb185c232a4e87abbb65495263756)
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
tools/virtiofsd/passthrough_ll.c | 35 +++++++++++++++++++++++++----------
1 file changed, 25 insertions(+), 10 deletions(-)
diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
index 33bfb4d..9e7191e 100644
--- a/tools/virtiofsd/passthrough_ll.c
+++ b/tools/virtiofsd/passthrough_ll.c
@@ -2351,6 +2351,30 @@ static void log_func(enum fuse_log_level level, const char *fmt, va_list ap)
}
}
+static void setup_root(struct lo_data *lo, struct lo_inode *root)
+{
+ int fd, res;
+ struct stat stat;
+
+ fd = open("/", O_PATH);
+ if (fd == -1) {
+ fuse_log(FUSE_LOG_ERR, "open(%s, O_PATH): %m\n", lo->source);
+ exit(1);
+ }
+
+ res = fstatat(fd, "", &stat, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW);
+ if (res == -1) {
+ fuse_log(FUSE_LOG_ERR, "fstatat(%s): %m\n", lo->source);
+ exit(1);
+ }
+
+ root->is_symlink = false;
+ root->fd = fd;
+ root->ino = stat.st_ino;
+ root->dev = stat.st_dev;
+ root->refcount = 2;
+}
+
int main(int argc, char *argv[])
{
struct fuse_args args = FUSE_ARGS_INIT(argc, argv);
@@ -2426,8 +2450,6 @@ int main(int argc, char *argv[])
if (lo.debug) {
current_log_level = FUSE_LOG_DEBUG;
}
- lo.root.refcount = 2;
-
if (lo.source) {
struct stat stat;
int res;
@@ -2446,7 +2468,6 @@ int main(int argc, char *argv[])
} else {
lo.source = "/";
}
- lo.root.is_symlink = false;
if (!lo.timeout_set) {
switch (lo.cache) {
case CACHE_NEVER:
@@ -2466,13 +2487,6 @@ int main(int argc, char *argv[])
exit(1);
}
- lo.root.fd = open(lo.source, O_PATH);
-
- if (lo.root.fd == -1) {
- fuse_log(FUSE_LOG_ERR, "open(\"%s\", O_PATH): %m\n", lo.source);
- exit(1);
- }
-
se = fuse_session_new(&args, &lo_oper, sizeof(lo_oper), &lo);
if (se == NULL) {
goto err_out1;
@@ -2495,6 +2509,7 @@ int main(int argc, char *argv[])
setup_sandbox(&lo, se, opts.syslog);
+ setup_root(&lo, &lo.root);
/* Block until ctrl+c or fusermount -u */
ret = virtio_loop(se);
--
1.8.3.1

View File

@ -0,0 +1,85 @@
From b3cd18ab58e331d3610cf00f857d6a945f11a030 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Mon, 27 Jan 2020 19:01:49 +0100
Subject: [PATCH 078/116] virtiofsd: fail when parent inode isn't known in
lo_do_lookup()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-id: <20200127190227.40942-75-dgilbert@redhat.com>
Patchwork-id: 93529
O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 074/112] virtiofsd: fail when parent inode isn't known in lo_do_lookup()
Bugzilla: 1694164
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Sergio Lopez Pascual <slp@redhat.com>
From: Miklos Szeredi <mszeredi@redhat.com>
The Linux file handle APIs (struct export_operations) can access inodes
that are not attached to parents because path name traversal is not
performed. Refuse if there is no parent in lo_do_lookup().
Also clean up lo_do_lookup() while we're here.
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Misono Tomohiro <misono.tomohiro@jp.fujitsu.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit 9de4fab5995d115f8ebfb41d8d94a866d80a1708)
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
tools/virtiofsd/passthrough_ll.c | 14 ++++++++++++--
1 file changed, 12 insertions(+), 2 deletions(-)
diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
index de12e75..33bfb4d 100644
--- a/tools/virtiofsd/passthrough_ll.c
+++ b/tools/virtiofsd/passthrough_ll.c
@@ -777,6 +777,15 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name,
struct lo_data *lo = lo_data(req);
struct lo_inode *inode, *dir = lo_inode(req, parent);
+ /*
+ * name_to_handle_at() and open_by_handle_at() can reach here with fuse
+ * mount point in guest, but we don't have its inode info in the
+ * ino_map.
+ */
+ if (!dir) {
+ return ENOENT;
+ }
+
memset(e, 0, sizeof(*e));
e->attr_timeout = lo->timeout;
e->entry_timeout = lo->timeout;
@@ -786,7 +795,7 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name,
name = ".";
}
- newfd = openat(lo_fd(req, parent), name, O_PATH | O_NOFOLLOW);
+ newfd = openat(dir->fd, name, O_PATH | O_NOFOLLOW);
if (newfd == -1) {
goto out_err;
}
@@ -796,7 +805,7 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name,
goto out_err;
}
- inode = lo_find(lo_data(req), &e->attr);
+ inode = lo_find(lo, &e->attr);
if (inode) {
close(newfd);
newfd = -1;
@@ -812,6 +821,7 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name,
inode->is_symlink = S_ISLNK(e->attr.st_mode);
inode->refcount = 1;
inode->fd = newfd;
+ newfd = -1;
inode->ino = e->attr.st_ino;
inode->dev = e->attr.st_dev;
--
1.8.3.1

View File

@ -0,0 +1,63 @@
From 0ea1c7375d6509367399c706eb9d1e8cf79a5830 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Mon, 27 Jan 2020 19:01:55 +0100
Subject: [PATCH 084/116] virtiofsd: fix error handling in main()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-id: <20200127190227.40942-81-dgilbert@redhat.com>
Patchwork-id: 93534
O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 080/112] virtiofsd: fix error handling in main()
Bugzilla: 1694164
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Sergio Lopez Pascual <slp@redhat.com>
From: Liu Bo <bo.liu@linux.alibaba.com>
Neither fuse_parse_cmdline() nor fuse_opt_parse() goes to the right place
to do cleanup.
Signed-off-by: Liu Bo <bo.liu@linux.alibaba.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit c6de804670f2255ce776263124c37f3370dc5ac1)
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
tools/virtiofsd/passthrough_ll.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
index 9ed77a1..af050c6 100644
--- a/tools/virtiofsd/passthrough_ll.c
+++ b/tools/virtiofsd/passthrough_ll.c
@@ -2443,13 +2443,14 @@ int main(int argc, char *argv[])
lo_map_init(&lo.fd_map);
if (fuse_parse_cmdline(&args, &opts) != 0) {
- return 1;
+ goto err_out1;
}
fuse_set_log_func(log_func);
use_syslog = opts.syslog;
if (use_syslog) {
openlog("virtiofsd", LOG_PID, LOG_DAEMON);
}
+
if (opts.show_help) {
printf("usage: %s [options]\n\n", argv[0]);
fuse_cmdline_help();
@@ -2468,7 +2469,7 @@ int main(int argc, char *argv[])
}
if (fuse_opt_parse(&args, &lo, lo_opts, NULL) == -1) {
- return 1;
+ goto err_out1;
}
/*
--
1.8.3.1

View File

@ -0,0 +1,44 @@
From 9c291ca8624318613ede6e4174d08cf45aae8384 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Mon, 27 Jan 2020 19:02:01 +0100
Subject: [PATCH 090/116] virtiofsd: fix incorrect error handling in
lo_do_lookup
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-id: <20200127190227.40942-87-dgilbert@redhat.com>
Patchwork-id: 93543
O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 086/112] virtiofsd: fix incorrect error handling in lo_do_lookup
Bugzilla: 1694164
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Sergio Lopez Pascual <slp@redhat.com>
From: Eric Ren <renzhen@linux.alibaba.com>
Signed-off-by: Eric Ren <renzhen@linux.alibaba.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit fc3f0041b43b6c64aa97b3558a6abe1a10028354)
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
tools/virtiofsd/passthrough_ll.c | 1 -
1 file changed, 1 deletion(-)
diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
index e8dc5c7..05b5f89 100644
--- a/tools/virtiofsd/passthrough_ll.c
+++ b/tools/virtiofsd/passthrough_ll.c
@@ -814,7 +814,6 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name,
close(newfd);
newfd = -1;
} else {
- saverr = ENOMEM;
inode = calloc(1, sizeof(struct lo_inode));
if (!inode) {
goto out_err;
--
1.8.3.1

View File

@ -0,0 +1,322 @@
From e0d64e481e5a9fab5ff90d2a8f84afcd3311d13b Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Mon, 27 Jan 2020 19:01:35 +0100
Subject: [PATCH 064/116] virtiofsd: fix libfuse information leaks
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-id: <20200127190227.40942-61-dgilbert@redhat.com>
Patchwork-id: 93515
O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 060/112] virtiofsd: fix libfuse information leaks
Bugzilla: 1694164
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Sergio Lopez Pascual <slp@redhat.com>
From: Stefan Hajnoczi <stefanha@redhat.com>
Some FUSE message replies contain padding fields that are not
initialized by libfuse. This is fine in traditional FUSE applications
because the kernel is trusted. virtiofsd does not trust the guest and
must not expose uninitialized memory.
Use C struct initializers to automatically zero out memory. Not all of
these code changes are strictly necessary but they will prevent future
information leaks if the structs are extended.
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit 3db2876a0153ac7103c077c53090e020faffb3ea)
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
tools/virtiofsd/fuse_lowlevel.c | 150 ++++++++++++++++++++--------------------
1 file changed, 76 insertions(+), 74 deletions(-)
diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c
index 2d6dc5a..6ceb33d 100644
--- a/tools/virtiofsd/fuse_lowlevel.c
+++ b/tools/virtiofsd/fuse_lowlevel.c
@@ -44,21 +44,23 @@ static __attribute__((constructor)) void fuse_ll_init_pagesize(void)
static void convert_stat(const struct stat *stbuf, struct fuse_attr *attr)
{
- attr->ino = stbuf->st_ino;
- attr->mode = stbuf->st_mode;
- attr->nlink = stbuf->st_nlink;
- attr->uid = stbuf->st_uid;
- attr->gid = stbuf->st_gid;
- attr->rdev = stbuf->st_rdev;
- attr->size = stbuf->st_size;
- attr->blksize = stbuf->st_blksize;
- attr->blocks = stbuf->st_blocks;
- attr->atime = stbuf->st_atime;
- attr->mtime = stbuf->st_mtime;
- attr->ctime = stbuf->st_ctime;
- attr->atimensec = ST_ATIM_NSEC(stbuf);
- attr->mtimensec = ST_MTIM_NSEC(stbuf);
- attr->ctimensec = ST_CTIM_NSEC(stbuf);
+ *attr = (struct fuse_attr){
+ .ino = stbuf->st_ino,
+ .mode = stbuf->st_mode,
+ .nlink = stbuf->st_nlink,
+ .uid = stbuf->st_uid,
+ .gid = stbuf->st_gid,
+ .rdev = stbuf->st_rdev,
+ .size = stbuf->st_size,
+ .blksize = stbuf->st_blksize,
+ .blocks = stbuf->st_blocks,
+ .atime = stbuf->st_atime,
+ .mtime = stbuf->st_mtime,
+ .ctime = stbuf->st_ctime,
+ .atimensec = ST_ATIM_NSEC(stbuf),
+ .mtimensec = ST_MTIM_NSEC(stbuf),
+ .ctimensec = ST_CTIM_NSEC(stbuf),
+ };
}
static void convert_attr(const struct fuse_setattr_in *attr, struct stat *stbuf)
@@ -183,16 +185,16 @@ static int fuse_send_msg(struct fuse_session *se, struct fuse_chan *ch,
int fuse_send_reply_iov_nofree(fuse_req_t req, int error, struct iovec *iov,
int count)
{
- struct fuse_out_header out;
+ struct fuse_out_header out = {
+ .unique = req->unique,
+ .error = error,
+ };
if (error <= -1000 || error > 0) {
fuse_log(FUSE_LOG_ERR, "fuse: bad error value: %i\n", error);
error = -ERANGE;
}
- out.unique = req->unique;
- out.error = error;
-
iov[0].iov_base = &out;
iov[0].iov_len = sizeof(struct fuse_out_header);
@@ -277,14 +279,16 @@ size_t fuse_add_direntry(fuse_req_t req, char *buf, size_t bufsize,
static void convert_statfs(const struct statvfs *stbuf,
struct fuse_kstatfs *kstatfs)
{
- kstatfs->bsize = stbuf->f_bsize;
- kstatfs->frsize = stbuf->f_frsize;
- kstatfs->blocks = stbuf->f_blocks;
- kstatfs->bfree = stbuf->f_bfree;
- kstatfs->bavail = stbuf->f_bavail;
- kstatfs->files = stbuf->f_files;
- kstatfs->ffree = stbuf->f_ffree;
- kstatfs->namelen = stbuf->f_namemax;
+ *kstatfs = (struct fuse_kstatfs){
+ .bsize = stbuf->f_bsize,
+ .frsize = stbuf->f_frsize,
+ .blocks = stbuf->f_blocks,
+ .bfree = stbuf->f_bfree,
+ .bavail = stbuf->f_bavail,
+ .files = stbuf->f_files,
+ .ffree = stbuf->f_ffree,
+ .namelen = stbuf->f_namemax,
+ };
}
static int send_reply_ok(fuse_req_t req, const void *arg, size_t argsize)
@@ -328,12 +332,14 @@ static unsigned int calc_timeout_nsec(double t)
static void fill_entry(struct fuse_entry_out *arg,
const struct fuse_entry_param *e)
{
- arg->nodeid = e->ino;
- arg->generation = e->generation;
- arg->entry_valid = calc_timeout_sec(e->entry_timeout);
- arg->entry_valid_nsec = calc_timeout_nsec(e->entry_timeout);
- arg->attr_valid = calc_timeout_sec(e->attr_timeout);
- arg->attr_valid_nsec = calc_timeout_nsec(e->attr_timeout);
+ *arg = (struct fuse_entry_out){
+ .nodeid = e->ino,
+ .generation = e->generation,
+ .entry_valid = calc_timeout_sec(e->entry_timeout),
+ .entry_valid_nsec = calc_timeout_nsec(e->entry_timeout),
+ .attr_valid = calc_timeout_sec(e->attr_timeout),
+ .attr_valid_nsec = calc_timeout_nsec(e->attr_timeout),
+ };
convert_stat(&e->attr, &arg->attr);
}
@@ -362,10 +368,12 @@ size_t fuse_add_direntry_plus(fuse_req_t req, char *buf, size_t bufsize,
fill_entry(&dp->entry_out, e);
struct fuse_dirent *dirent = &dp->dirent;
- dirent->ino = e->attr.st_ino;
- dirent->off = off;
- dirent->namelen = namelen;
- dirent->type = (e->attr.st_mode & S_IFMT) >> 12;
+ *dirent = (struct fuse_dirent){
+ .ino = e->attr.st_ino,
+ .off = off,
+ .namelen = namelen,
+ .type = (e->attr.st_mode & S_IFMT) >> 12,
+ };
memcpy(dirent->name, name, namelen);
memset(dirent->name + namelen, 0, entlen_padded - entlen);
@@ -496,15 +504,14 @@ static int fuse_send_data_iov(struct fuse_session *se, struct fuse_chan *ch,
int fuse_reply_data(fuse_req_t req, struct fuse_bufvec *bufv)
{
struct iovec iov[2];
- struct fuse_out_header out;
+ struct fuse_out_header out = {
+ .unique = req->unique,
+ };
int res;
iov[0].iov_base = &out;
iov[0].iov_len = sizeof(struct fuse_out_header);
- out.unique = req->unique;
- out.error = 0;
-
res = fuse_send_data_iov(req->se, req->ch, iov, 1, bufv);
if (res <= 0) {
fuse_free_req(req);
@@ -2145,14 +2152,14 @@ static void do_destroy(fuse_req_t req, fuse_ino_t nodeid,
static int send_notify_iov(struct fuse_session *se, int notify_code,
struct iovec *iov, int count)
{
- struct fuse_out_header out;
+ struct fuse_out_header out = {
+ .error = notify_code,
+ };
if (!se->got_init) {
return -ENOTCONN;
}
- out.unique = 0;
- out.error = notify_code;
iov[0].iov_base = &out;
iov[0].iov_len = sizeof(struct fuse_out_header);
@@ -2162,11 +2169,11 @@ static int send_notify_iov(struct fuse_session *se, int notify_code,
int fuse_lowlevel_notify_poll(struct fuse_pollhandle *ph)
{
if (ph != NULL) {
- struct fuse_notify_poll_wakeup_out outarg;
+ struct fuse_notify_poll_wakeup_out outarg = {
+ .kh = ph->kh,
+ };
struct iovec iov[2];
- outarg.kh = ph->kh;
-
iov[1].iov_base = &outarg;
iov[1].iov_len = sizeof(outarg);
@@ -2179,17 +2186,17 @@ int fuse_lowlevel_notify_poll(struct fuse_pollhandle *ph)
int fuse_lowlevel_notify_inval_inode(struct fuse_session *se, fuse_ino_t ino,
off_t off, off_t len)
{
- struct fuse_notify_inval_inode_out outarg;
+ struct fuse_notify_inval_inode_out outarg = {
+ .ino = ino,
+ .off = off,
+ .len = len,
+ };
struct iovec iov[2];
if (!se) {
return -EINVAL;
}
- outarg.ino = ino;
- outarg.off = off;
- outarg.len = len;
-
iov[1].iov_base = &outarg;
iov[1].iov_len = sizeof(outarg);
@@ -2199,17 +2206,16 @@ int fuse_lowlevel_notify_inval_inode(struct fuse_session *se, fuse_ino_t ino,
int fuse_lowlevel_notify_inval_entry(struct fuse_session *se, fuse_ino_t parent,
const char *name, size_t namelen)
{
- struct fuse_notify_inval_entry_out outarg;
+ struct fuse_notify_inval_entry_out outarg = {
+ .parent = parent,
+ .namelen = namelen,
+ };
struct iovec iov[3];
if (!se) {
return -EINVAL;
}
- outarg.parent = parent;
- outarg.namelen = namelen;
- outarg.padding = 0;
-
iov[1].iov_base = &outarg;
iov[1].iov_len = sizeof(outarg);
iov[2].iov_base = (void *)name;
@@ -2222,18 +2228,17 @@ int fuse_lowlevel_notify_delete(struct fuse_session *se, fuse_ino_t parent,
fuse_ino_t child, const char *name,
size_t namelen)
{
- struct fuse_notify_delete_out outarg;
+ struct fuse_notify_delete_out outarg = {
+ .parent = parent,
+ .child = child,
+ .namelen = namelen,
+ };
struct iovec iov[3];
if (!se) {
return -EINVAL;
}
- outarg.parent = parent;
- outarg.child = child;
- outarg.namelen = namelen;
- outarg.padding = 0;
-
iov[1].iov_base = &outarg;
iov[1].iov_len = sizeof(outarg);
iov[2].iov_base = (void *)name;
@@ -2245,24 +2250,21 @@ int fuse_lowlevel_notify_delete(struct fuse_session *se, fuse_ino_t parent,
int fuse_lowlevel_notify_store(struct fuse_session *se, fuse_ino_t ino,
off_t offset, struct fuse_bufvec *bufv)
{
- struct fuse_out_header out;
- struct fuse_notify_store_out outarg;
+ struct fuse_out_header out = {
+ .error = FUSE_NOTIFY_STORE,
+ };
+ struct fuse_notify_store_out outarg = {
+ .nodeid = ino,
+ .offset = offset,
+ .size = fuse_buf_size(bufv),
+ };
struct iovec iov[3];
- size_t size = fuse_buf_size(bufv);
int res;
if (!se) {
return -EINVAL;
}
- out.unique = 0;
- out.error = FUSE_NOTIFY_STORE;
-
- outarg.nodeid = ino;
- outarg.offset = offset;
- outarg.size = size;
- outarg.padding = 0;
-
iov[0].iov_base = &out;
iov[0].iov_len = sizeof(out);
iov[1].iov_base = &outarg;
--
1.8.3.1

View File

@ -0,0 +1,94 @@
From 9a44d78f5019280b006bb5b3de7164336289d639 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Mon, 27 Jan 2020 19:02:21 +0100
Subject: [PATCH 110/116] virtiofsd: fix lo_destroy() resource leaks
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-id: <20200127190227.40942-107-dgilbert@redhat.com>
Patchwork-id: 93560
O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 106/112] virtiofsd: fix lo_destroy() resource leaks
Bugzilla: 1694164
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Sergio Lopez Pascual <slp@redhat.com>
From: Stefan Hajnoczi <stefanha@redhat.com>
Now that lo_destroy() is serialized we can call unref_inode() so that
all inode resources are freed.
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit 28f7a3b026f231bfe8de5fed6a18a8d27b1dfcee)
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
tools/virtiofsd/passthrough_ll.c | 41 ++++++++++++++++++++--------------------
1 file changed, 20 insertions(+), 21 deletions(-)
diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
index 79b8b71..eb001b9 100644
--- a/tools/virtiofsd/passthrough_ll.c
+++ b/tools/virtiofsd/passthrough_ll.c
@@ -1371,26 +1371,6 @@ static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode,
}
}
-static int unref_all_inodes_cb(gpointer key, gpointer value, gpointer user_data)
-{
- struct lo_inode *inode = value;
- struct lo_data *lo = user_data;
-
- inode->nlookup = 0;
- lo_map_remove(&lo->ino_map, inode->fuse_ino);
- close(inode->fd);
- lo_inode_put(lo, &inode); /* Drop our refcount from lo_do_lookup() */
-
- return TRUE;
-}
-
-static void unref_all_inodes(struct lo_data *lo)
-{
- pthread_mutex_lock(&lo->mutex);
- g_hash_table_foreach_remove(lo->inodes, unref_all_inodes_cb, lo);
- pthread_mutex_unlock(&lo->mutex);
-}
-
static void lo_forget_one(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup)
{
struct lo_data *lo = lo_data(req);
@@ -2477,7 +2457,26 @@ static void lo_lseek(fuse_req_t req, fuse_ino_t ino, off_t off, int whence,
static void lo_destroy(void *userdata)
{
struct lo_data *lo = (struct lo_data *)userdata;
- unref_all_inodes(lo);
+
+ /*
+ * Normally lo->mutex must be taken when traversing lo->inodes but
+ * lo_destroy() is a serialized request so no races are possible here.
+ *
+ * In addition, we cannot acquire lo->mutex since unref_inode() takes it
+ * too and this would result in a recursive lock.
+ */
+ while (true) {
+ GHashTableIter iter;
+ gpointer key, value;
+
+ g_hash_table_iter_init(&iter, lo->inodes);
+ if (!g_hash_table_iter_next(&iter, &key, &value)) {
+ break;
+ }
+
+ struct lo_inode *inode = value;
+ unref_inode_lolocked(lo, inode, inode->nlookup);
+ }
}
static struct fuse_lowlevel_ops lo_oper = {
--
1.8.3.1

View File

@ -0,0 +1,66 @@
From 9e0f5b64f30c2f841f297e25c2f3a6d82c8a16b8 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Mon, 27 Jan 2020 19:01:57 +0100
Subject: [PATCH 086/116] virtiofsd: fix memory leak on lo.source
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-id: <20200127190227.40942-83-dgilbert@redhat.com>
Patchwork-id: 93536
O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 082/112] virtiofsd: fix memory leak on lo.source
Bugzilla: 1694164
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Sergio Lopez Pascual <slp@redhat.com>
From: Liu Bo <bo.liu@linux.alibaba.com>
valgrind reported that lo.source is leaked on quiting, but it was defined
as (const char*) as it may point to a const string "/".
Signed-off-by: Liu Bo <bo.liu@linux.alibaba.com>
Reviewed-by: Misono Tomohiro <misono.tomohiro@jp.fujitsu.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit eb68a33b5fc5dde87bd9b99b94e7c33a5d8ea82e)
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
tools/virtiofsd/passthrough_ll.c | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
index af050c6..056ebe8 100644
--- a/tools/virtiofsd/passthrough_ll.c
+++ b/tools/virtiofsd/passthrough_ll.c
@@ -115,7 +115,7 @@ struct lo_data {
int writeback;
int flock;
int xattr;
- const char *source;
+ char *source;
double timeout;
int cache;
int timeout_set;
@@ -2497,9 +2497,8 @@ int main(int argc, char *argv[])
fuse_log(FUSE_LOG_ERR, "source is not a directory\n");
exit(1);
}
-
} else {
- lo.source = "/";
+ lo.source = strdup("/");
}
if (!lo.timeout_set) {
switch (lo.cache) {
@@ -2570,5 +2569,7 @@ err_out1:
close(lo.root.fd);
}
+ free(lo.source);
+
return ret ? 1 : 0;
}
--
1.8.3.1

View File

@ -0,0 +1,66 @@
From 59bfe3ad924d00dc9c7a4363fcd3db36ea247988 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Mon, 27 Jan 2020 19:00:59 +0100
Subject: [PATCH 028/116] virtiofsd: get/set features callbacks
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-id: <20200127190227.40942-25-dgilbert@redhat.com>
Patchwork-id: 93478
O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 024/112] virtiofsd: get/set features callbacks
Bugzilla: 1694164
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Sergio Lopez Pascual <slp@redhat.com>
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Add the get/set features callbacks.
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit f2cef5fb9ae20136ca18d16328787b69b3abfa18)
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
tools/virtiofsd/fuse_virtio.c | 15 ++++++++++++++-
1 file changed, 14 insertions(+), 1 deletion(-)
diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c
index 1928a20..4819e56 100644
--- a/tools/virtiofsd/fuse_virtio.c
+++ b/tools/virtiofsd/fuse_virtio.c
@@ -46,6 +46,17 @@ struct virtio_fs_config {
uint32_t num_queues;
};
+/* Callback from libvhost-user */
+static uint64_t fv_get_features(VuDev *dev)
+{
+ return 1ULL << VIRTIO_F_VERSION_1;
+}
+
+/* Callback from libvhost-user */
+static void fv_set_features(VuDev *dev, uint64_t features)
+{
+}
+
/*
* Callback from libvhost-user if there's a new fd we're supposed to listen
* to, typically a queue kick?
@@ -78,7 +89,9 @@ static bool fv_queue_order(VuDev *dev, int qidx)
}
static const VuDevIface fv_iface = {
- /* TODO: Add other callbacks */
+ .get_features = fv_get_features,
+ .set_features = fv_set_features,
+
.queue_is_processed_in_order = fv_queue_order,
};
--
1.8.3.1

View File

@ -0,0 +1,589 @@
From da6ee5c24397d2ca93dfaf275fdd9dafc922da15 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Mon, 27 Jan 2020 19:02:11 +0100
Subject: [PATCH 100/116] virtiofsd: introduce inode refcount to prevent
use-after-free
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-id: <20200127190227.40942-97-dgilbert@redhat.com>
Patchwork-id: 93550
O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 096/112] virtiofsd: introduce inode refcount to prevent use-after-free
Bugzilla: 1694164
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Sergio Lopez Pascual <slp@redhat.com>
From: Stefan Hajnoczi <stefanha@redhat.com>
If thread A is using an inode it must not be deleted by thread B when
processing a FUSE_FORGET request.
The FUSE protocol itself already has a counter called nlookup that is
used in FUSE_FORGET messages. We cannot trust this counter since the
untrusted client can manipulate it via FUSE_FORGET messages.
Introduce a new refcount to keep inodes alive for the required lifespan.
lo_inode_put() must be called to release a reference. FUSE's nlookup
counter holds exactly one reference so that the inode stays alive as
long as the client still wants to remember it.
Note that the lo_inode->is_symlink field is moved to avoid creating a
hole in the struct due to struct field alignment.
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Misono Tomohiro <misono.tomohiro@jp.fujitsu.com>
Reviewed-by: Sergio Lopez <slp@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit c241aa9457d88c6a0d027f48fadfed131646bce3)
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
tools/virtiofsd/passthrough_ll.c | 169 +++++++++++++++++++++++++++++++++------
1 file changed, 146 insertions(+), 23 deletions(-)
diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
index e3a6d6b..ab16135 100644
--- a/tools/virtiofsd/passthrough_ll.c
+++ b/tools/virtiofsd/passthrough_ll.c
@@ -97,7 +97,13 @@ struct lo_key {
struct lo_inode {
int fd;
- bool is_symlink;
+
+ /*
+ * Atomic reference count for this object. The nlookup field holds a
+ * reference and release it when nlookup reaches 0.
+ */
+ gint refcount;
+
struct lo_key key;
/*
@@ -116,6 +122,8 @@ struct lo_inode {
fuse_ino_t fuse_ino;
pthread_mutex_t plock_mutex;
GHashTable *posix_locks; /* protected by lo_inode->plock_mutex */
+
+ bool is_symlink;
};
struct lo_cred {
@@ -471,6 +479,23 @@ static ssize_t lo_add_inode_mapping(fuse_req_t req, struct lo_inode *inode)
return elem - lo_data(req)->ino_map.elems;
}
+static void lo_inode_put(struct lo_data *lo, struct lo_inode **inodep)
+{
+ struct lo_inode *inode = *inodep;
+
+ if (!inode) {
+ return;
+ }
+
+ *inodep = NULL;
+
+ if (g_atomic_int_dec_and_test(&inode->refcount)) {
+ close(inode->fd);
+ free(inode);
+ }
+}
+
+/* Caller must release refcount using lo_inode_put() */
static struct lo_inode *lo_inode(fuse_req_t req, fuse_ino_t ino)
{
struct lo_data *lo = lo_data(req);
@@ -478,6 +503,9 @@ static struct lo_inode *lo_inode(fuse_req_t req, fuse_ino_t ino)
pthread_mutex_lock(&lo->mutex);
elem = lo_map_get(&lo->ino_map, ino);
+ if (elem) {
+ g_atomic_int_inc(&elem->inode->refcount);
+ }
pthread_mutex_unlock(&lo->mutex);
if (!elem) {
@@ -487,10 +515,23 @@ static struct lo_inode *lo_inode(fuse_req_t req, fuse_ino_t ino)
return elem->inode;
}
+/*
+ * TODO Remove this helper and force callers to hold an inode refcount until
+ * they are done with the fd. This will be done in a later patch to make
+ * review easier.
+ */
static int lo_fd(fuse_req_t req, fuse_ino_t ino)
{
struct lo_inode *inode = lo_inode(req, ino);
- return inode ? inode->fd : -1;
+ int fd;
+
+ if (!inode) {
+ return -1;
+ }
+
+ fd = inode->fd;
+ lo_inode_put(lo_data(req), &inode);
+ return fd;
}
static void lo_init(void *userdata, struct fuse_conn_info *conn)
@@ -545,6 +586,10 @@ static void lo_getattr(fuse_req_t req, fuse_ino_t ino,
fuse_reply_attr(req, &buf, lo->timeout);
}
+/*
+ * Increments parent->nlookup and caller must release refcount using
+ * lo_inode_put(&parent).
+ */
static int lo_parent_and_name(struct lo_data *lo, struct lo_inode *inode,
char path[PATH_MAX], struct lo_inode **parent)
{
@@ -582,6 +627,7 @@ retry:
p = &lo->root;
pthread_mutex_lock(&lo->mutex);
p->nlookup++;
+ g_atomic_int_inc(&p->refcount);
pthread_mutex_unlock(&lo->mutex);
} else {
*last = '\0';
@@ -625,6 +671,7 @@ retry:
fail_unref:
unref_inode_lolocked(lo, p, 1);
+ lo_inode_put(lo, &p);
fail:
if (retries) {
retries--;
@@ -663,6 +710,7 @@ fallback:
if (res != -1) {
res = utimensat(parent->fd, path, tv, AT_SYMLINK_NOFOLLOW);
unref_inode_lolocked(lo, parent, 1);
+ lo_inode_put(lo, &parent);
}
return res;
@@ -780,11 +828,13 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr,
goto out_err;
}
}
+ lo_inode_put(lo, &inode);
return lo_getattr(req, ino, fi);
out_err:
saverr = errno;
+ lo_inode_put(lo, &inode);
fuse_reply_err(req, saverr);
}
@@ -801,6 +851,7 @@ static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st)
if (p) {
assert(p->nlookup > 0);
p->nlookup++;
+ g_atomic_int_inc(&p->refcount);
}
pthread_mutex_unlock(&lo->mutex);
@@ -820,6 +871,10 @@ static void posix_locks_value_destroy(gpointer data)
free(plock);
}
+/*
+ * Increments nlookup and caller must release refcount using
+ * lo_inode_put(&parent).
+ */
static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name,
struct fuse_entry_param *e)
{
@@ -827,7 +882,8 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name,
int res;
int saverr;
struct lo_data *lo = lo_data(req);
- struct lo_inode *inode, *dir = lo_inode(req, parent);
+ struct lo_inode *inode = NULL;
+ struct lo_inode *dir = lo_inode(req, parent);
/*
* name_to_handle_at() and open_by_handle_at() can reach here with fuse
@@ -868,6 +924,13 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name,
}
inode->is_symlink = S_ISLNK(e->attr.st_mode);
+
+ /*
+ * One for the caller and one for nlookup (released in
+ * unref_inode_lolocked())
+ */
+ g_atomic_int_set(&inode->refcount, 2);
+
inode->nlookup = 1;
inode->fd = newfd;
newfd = -1;
@@ -883,6 +946,8 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name,
pthread_mutex_unlock(&lo->mutex);
}
e->ino = inode->fuse_ino;
+ lo_inode_put(lo, &inode);
+ lo_inode_put(lo, &dir);
fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", (unsigned long long)parent,
name, (unsigned long long)e->ino);
@@ -894,6 +959,8 @@ out_err:
if (newfd != -1) {
close(newfd);
}
+ lo_inode_put(lo, &inode);
+ lo_inode_put(lo, &dir);
return saverr;
}
@@ -991,6 +1058,7 @@ static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent,
{
int res;
int saverr;
+ struct lo_data *lo = lo_data(req);
struct lo_inode *dir;
struct fuse_entry_param e;
struct lo_cred old = {};
@@ -1032,9 +1100,11 @@ static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent,
name, (unsigned long long)e.ino);
fuse_reply_entry(req, &e);
+ lo_inode_put(lo, &dir);
return;
out:
+ lo_inode_put(lo, &dir);
fuse_reply_err(req, saverr);
}
@@ -1085,6 +1155,7 @@ fallback:
if (res != -1) {
res = linkat(parent->fd, path, dfd, name, 0);
unref_inode_lolocked(lo, parent, 1);
+ lo_inode_put(lo, &parent);
}
return res;
@@ -1095,6 +1166,7 @@ static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent,
{
int res;
struct lo_data *lo = lo_data(req);
+ struct lo_inode *parent_inode;
struct lo_inode *inode;
struct fuse_entry_param e;
int saverr;
@@ -1104,17 +1176,18 @@ static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent,
return;
}
+ parent_inode = lo_inode(req, parent);
inode = lo_inode(req, ino);
- if (!inode) {
- fuse_reply_err(req, EBADF);
- return;
+ if (!parent_inode || !inode) {
+ errno = EBADF;
+ goto out_err;
}
memset(&e, 0, sizeof(struct fuse_entry_param));
e.attr_timeout = lo->timeout;
e.entry_timeout = lo->timeout;
- res = linkat_empty_nofollow(lo, inode, lo_fd(req, parent), name);
+ res = linkat_empty_nofollow(lo, inode, parent_inode->fd, name);
if (res == -1) {
goto out_err;
}
@@ -1133,13 +1206,18 @@ static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent,
name, (unsigned long long)e.ino);
fuse_reply_entry(req, &e);
+ lo_inode_put(lo, &parent_inode);
+ lo_inode_put(lo, &inode);
return;
out_err:
saverr = errno;
+ lo_inode_put(lo, &parent_inode);
+ lo_inode_put(lo, &inode);
fuse_reply_err(req, saverr);
}
+/* Increments nlookup and caller must release refcount using lo_inode_put() */
static struct lo_inode *lookup_name(fuse_req_t req, fuse_ino_t parent,
const char *name)
{
@@ -1176,6 +1254,7 @@ static void lo_rmdir(fuse_req_t req, fuse_ino_t parent, const char *name)
fuse_reply_err(req, res == -1 ? errno : 0);
unref_inode_lolocked(lo, inode, 1);
+ lo_inode_put(lo, &inode);
}
static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name,
@@ -1183,8 +1262,10 @@ static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name,
unsigned int flags)
{
int res;
- struct lo_inode *oldinode;
- struct lo_inode *newinode;
+ struct lo_inode *parent_inode;
+ struct lo_inode *newparent_inode;
+ struct lo_inode *oldinode = NULL;
+ struct lo_inode *newinode = NULL;
struct lo_data *lo = lo_data(req);
if (!is_safe_path_component(name) || !is_safe_path_component(newname)) {
@@ -1192,6 +1273,13 @@ static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name,
return;
}
+ parent_inode = lo_inode(req, parent);
+ newparent_inode = lo_inode(req, newparent);
+ if (!parent_inode || !newparent_inode) {
+ fuse_reply_err(req, EBADF);
+ goto out;
+ }
+
oldinode = lookup_name(req, parent, name);
newinode = lookup_name(req, newparent, newname);
@@ -1204,8 +1292,8 @@ static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name,
#ifndef SYS_renameat2
fuse_reply_err(req, EINVAL);
#else
- res = syscall(SYS_renameat2, lo_fd(req, parent), name,
- lo_fd(req, newparent), newname, flags);
+ res = syscall(SYS_renameat2, parent_inode->fd, name,
+ newparent_inode->fd, newname, flags);
if (res == -1 && errno == ENOSYS) {
fuse_reply_err(req, EINVAL);
} else {
@@ -1215,12 +1303,16 @@ static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name,
goto out;
}
- res = renameat(lo_fd(req, parent), name, lo_fd(req, newparent), newname);
+ res = renameat(parent_inode->fd, name, newparent_inode->fd, newname);
fuse_reply_err(req, res == -1 ? errno : 0);
out:
unref_inode_lolocked(lo, oldinode, 1);
unref_inode_lolocked(lo, newinode, 1);
+ lo_inode_put(lo, &oldinode);
+ lo_inode_put(lo, &newinode);
+ lo_inode_put(lo, &parent_inode);
+ lo_inode_put(lo, &newparent_inode);
}
static void lo_unlink(fuse_req_t req, fuse_ino_t parent, const char *name)
@@ -1244,6 +1336,7 @@ static void lo_unlink(fuse_req_t req, fuse_ino_t parent, const char *name)
fuse_reply_err(req, res == -1 ? errno : 0);
unref_inode_lolocked(lo, inode, 1);
+ lo_inode_put(lo, &inode);
}
static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode,
@@ -1265,8 +1358,9 @@ static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode,
g_hash_table_destroy(inode->posix_locks);
pthread_mutex_destroy(&inode->plock_mutex);
pthread_mutex_unlock(&lo->mutex);
- close(inode->fd);
- free(inode);
+
+ /* Drop our refcount from lo_do_lookup() */
+ lo_inode_put(lo, &inode);
} else {
pthread_mutex_unlock(&lo->mutex);
}
@@ -1280,6 +1374,7 @@ static int unref_all_inodes_cb(gpointer key, gpointer value, gpointer user_data)
inode->nlookup = 0;
lo_map_remove(&lo->ino_map, inode->fuse_ino);
close(inode->fd);
+ lo_inode_put(lo, &inode); /* Drop our refcount from lo_do_lookup() */
return TRUE;
}
@@ -1306,6 +1401,7 @@ static void lo_forget_one(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup)
(unsigned long long)nlookup);
unref_inode_lolocked(lo, inode, nlookup);
+ lo_inode_put(lo, &inode);
}
static void lo_forget(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup)
@@ -1537,6 +1633,7 @@ static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size,
err = 0;
error:
lo_dirp_put(&d);
+ lo_inode_put(lo, &dinode);
/*
* If there's an error, we can only signal it if we haven't stored
@@ -1595,6 +1692,7 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name,
{
int fd;
struct lo_data *lo = lo_data(req);
+ struct lo_inode *parent_inode;
struct fuse_entry_param e;
int err;
struct lo_cred old = {};
@@ -1607,12 +1705,18 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name,
return;
}
+ parent_inode = lo_inode(req, parent);
+ if (!parent_inode) {
+ fuse_reply_err(req, EBADF);
+ return;
+ }
+
err = lo_change_cred(req, &old);
if (err) {
goto out;
}
- fd = openat(lo_fd(req, parent), name, (fi->flags | O_CREAT) & ~O_NOFOLLOW,
+ fd = openat(parent_inode->fd, name, (fi->flags | O_CREAT) & ~O_NOFOLLOW,
mode);
err = fd == -1 ? errno : 0;
lo_restore_cred(&old);
@@ -1625,8 +1729,8 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name,
pthread_mutex_unlock(&lo->mutex);
if (fh == -1) {
close(fd);
- fuse_reply_err(req, ENOMEM);
- return;
+ err = ENOMEM;
+ goto out;
}
fi->fh = fh;
@@ -1639,6 +1743,8 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name,
}
out:
+ lo_inode_put(lo, &parent_inode);
+
if (err) {
fuse_reply_err(req, err);
} else {
@@ -1712,16 +1818,18 @@ static void lo_getlk(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi,
plock =
lookup_create_plock_ctx(lo, inode, fi->lock_owner, lock->l_pid, &ret);
if (!plock) {
- pthread_mutex_unlock(&inode->plock_mutex);
- fuse_reply_err(req, ret);
- return;
+ saverr = ret;
+ goto out;
}
ret = fcntl(plock->fd, F_OFD_GETLK, lock);
if (ret == -1) {
saverr = errno;
}
+
+out:
pthread_mutex_unlock(&inode->plock_mutex);
+ lo_inode_put(lo, &inode);
if (saverr) {
fuse_reply_err(req, saverr);
@@ -1761,9 +1869,8 @@ static void lo_setlk(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi,
lookup_create_plock_ctx(lo, inode, fi->lock_owner, lock->l_pid, &ret);
if (!plock) {
- pthread_mutex_unlock(&inode->plock_mutex);
- fuse_reply_err(req, ret);
- return;
+ saverr = ret;
+ goto out;
}
/* TODO: Is it alright to modify flock? */
@@ -1772,7 +1879,11 @@ static void lo_setlk(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi,
if (ret == -1) {
saverr = errno;
}
+
+out:
pthread_mutex_unlock(&inode->plock_mutex);
+ lo_inode_put(lo, &inode);
+
fuse_reply_err(req, saverr);
}
@@ -1898,6 +2009,7 @@ static void lo_flush(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi)
pthread_mutex_unlock(&inode->plock_mutex);
res = close(dup(lo_fi_fd(req, fi)));
+ lo_inode_put(lo_data(req), &inode);
fuse_reply_err(req, res == -1 ? errno : 0);
}
@@ -2115,11 +2227,14 @@ out_free:
if (fd >= 0) {
close(fd);
}
+
+ lo_inode_put(lo, &inode);
return;
out_err:
saverr = errno;
out:
+ lo_inode_put(lo, &inode);
fuse_reply_err(req, saverr);
goto out_free;
}
@@ -2190,11 +2305,14 @@ out_free:
if (fd >= 0) {
close(fd);
}
+
+ lo_inode_put(lo, &inode);
return;
out_err:
saverr = errno;
out:
+ lo_inode_put(lo, &inode);
fuse_reply_err(req, saverr);
goto out_free;
}
@@ -2243,6 +2361,8 @@ out:
if (fd >= 0) {
close(fd);
}
+
+ lo_inode_put(lo, &inode);
fuse_reply_err(req, saverr);
}
@@ -2289,6 +2409,8 @@ out:
if (fd >= 0) {
close(fd);
}
+
+ lo_inode_put(lo, &inode);
fuse_reply_err(req, saverr);
}
@@ -2671,6 +2793,7 @@ static void setup_root(struct lo_data *lo, struct lo_inode *root)
root->key.ino = stat.st_ino;
root->key.dev = stat.st_dev;
root->nlookup = 2;
+ g_atomic_int_set(&root->refcount, 2);
}
static guint lo_key_hash(gconstpointer key)
--
1.8.3.1

View File

@ -0,0 +1,76 @@
From 7f2e1f79a3addb242c3018c7a80e2e57589119f0 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Mon, 27 Jan 2020 19:01:08 +0100
Subject: [PATCH 037/116] virtiofsd: make -f (foreground) the default
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-id: <20200127190227.40942-34-dgilbert@redhat.com>
Patchwork-id: 93489
O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 033/112] virtiofsd: make -f (foreground) the default
Bugzilla: 1694164
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Sergio Lopez Pascual <slp@redhat.com>
From: Stefan Hajnoczi <stefanha@redhat.com>
According to vhost-user.rst "Backend program conventions", backend
programs should run in the foregound by default. Follow the
conventions so libvirt and other management tools can control virtiofsd
in a standard way.
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit 0bbd31753714ac2899efda0f0de31e353e965789)
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
tools/virtiofsd/helper.c | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c
index 676032e..a3645fc 100644
--- a/tools/virtiofsd/helper.c
+++ b/tools/virtiofsd/helper.c
@@ -29,6 +29,11 @@
{ \
t, offsetof(struct fuse_cmdline_opts, p), 1 \
}
+#define FUSE_HELPER_OPT_VALUE(t, p, v) \
+ { \
+ t, offsetof(struct fuse_cmdline_opts, p), v \
+ }
+
static const struct fuse_opt fuse_helper_opts[] = {
FUSE_HELPER_OPT("-h", show_help),
@@ -42,6 +47,7 @@ static const struct fuse_opt fuse_helper_opts[] = {
FUSE_OPT_KEY("-d", FUSE_OPT_KEY_KEEP),
FUSE_OPT_KEY("debug", FUSE_OPT_KEY_KEEP),
FUSE_HELPER_OPT("-f", foreground),
+ FUSE_HELPER_OPT_VALUE("--daemonize", foreground, 0),
FUSE_HELPER_OPT("fsname=", nodefault_subtype),
FUSE_OPT_KEY("fsname=", FUSE_OPT_KEY_KEEP),
FUSE_HELPER_OPT("subtype=", nodefault_subtype),
@@ -131,6 +137,7 @@ void fuse_cmdline_help(void)
" -V --version print version\n"
" -d -o debug enable debug output (implies -f)\n"
" -f foreground operation\n"
+ " --daemonize run in background\n"
" -o max_idle_threads the maximum number of idle worker "
"threads\n"
" allowed (default: 10)\n");
@@ -158,6 +165,7 @@ int fuse_parse_cmdline(struct fuse_args *args, struct fuse_cmdline_opts *opts)
memset(opts, 0, sizeof(struct fuse_cmdline_opts));
opts->max_idle_threads = 10;
+ opts->foreground = 1;
if (fuse_opt_parse(args, opts, fuse_helper_opts, fuse_helper_opt_proc) ==
-1) {
--
1.8.3.1

View File

@ -0,0 +1,62 @@
From 4ebabb66f4132186152edf8e1907fce436bf5c69 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Mon, 27 Jan 2020 19:02:06 +0100
Subject: [PATCH 095/116] virtiofsd: make lo_release() atomic
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-id: <20200127190227.40942-92-dgilbert@redhat.com>
Patchwork-id: 93545
O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 091/112] virtiofsd: make lo_release() atomic
Bugzilla: 1694164
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Sergio Lopez Pascual <slp@redhat.com>
From: Stefan Hajnoczi <stefanha@redhat.com>
Hold the lock across both lo_map_get() and lo_map_remove() to prevent
races between two FUSE_RELEASE requests. In this case I don't see a
serious bug but it's safer to do things atomically.
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit baed65c060c0e524530bc243eec427fb408bd477)
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
tools/virtiofsd/passthrough_ll.c | 12 ++++++++----
1 file changed, 8 insertions(+), 4 deletions(-)
diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
index 9414935..690edbc 100644
--- a/tools/virtiofsd/passthrough_ll.c
+++ b/tools/virtiofsd/passthrough_ll.c
@@ -1772,14 +1772,18 @@ static void lo_release(fuse_req_t req, fuse_ino_t ino,
struct fuse_file_info *fi)
{
struct lo_data *lo = lo_data(req);
- int fd;
+ struct lo_map_elem *elem;
+ int fd = -1;
(void)ino;
- fd = lo_fi_fd(req, fi);
-
pthread_mutex_lock(&lo->mutex);
- lo_map_remove(&lo->fd_map, fi->fh);
+ elem = lo_map_get(&lo->fd_map, fi->fh);
+ if (elem) {
+ fd = elem->fd;
+ elem = NULL;
+ lo_map_remove(&lo->fd_map, fi->fh);
+ }
pthread_mutex_unlock(&lo->mutex);
close(fd);
--
1.8.3.1

View File

@ -0,0 +1,223 @@
From a7a87a751a9893830d031a957a751b7622b71fb2 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Mon, 27 Jan 2020 19:01:29 +0100
Subject: [PATCH 058/116] virtiofsd: move to a new pid namespace
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-id: <20200127190227.40942-55-dgilbert@redhat.com>
Patchwork-id: 93510
O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 054/112] virtiofsd: move to a new pid namespace
Bugzilla: 1694164
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Sergio Lopez Pascual <slp@redhat.com>
From: Stefan Hajnoczi <stefanha@redhat.com>
virtiofsd needs access to /proc/self/fd. Let's move to a new pid
namespace so that a compromised process cannot see another other
processes running on the system.
One wrinkle in this approach: unshare(CLONE_NEWPID) affects *child*
processes and not the current process. Therefore we need to fork the
pid 1 process that will actually run virtiofsd and leave a parent in
waitpid(2). This is not the same thing as daemonization and parent
processes should not notice a difference.
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit 8e1d4ef231d8327be219f7aea7aa15d181375bbc)
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
tools/virtiofsd/passthrough_ll.c | 134 +++++++++++++++++++++++++--------------
1 file changed, 86 insertions(+), 48 deletions(-)
diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
index 27ab328..0947d14 100644
--- a/tools/virtiofsd/passthrough_ll.c
+++ b/tools/virtiofsd/passthrough_ll.c
@@ -51,7 +51,10 @@
#include <string.h>
#include <sys/file.h>
#include <sys/mount.h>
+#include <sys/prctl.h>
#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/wait.h>
#include <sys/xattr.h>
#include <unistd.h>
@@ -1945,24 +1948,95 @@ static void print_capabilities(void)
}
/*
- * Called after our UNIX domain sockets have been created, now we can move to
- * an empty network namespace to prevent TCP/IP and other network activity in
- * case this process is compromised.
+ * Move to a new mount, net, and pid namespaces to isolate this process.
*/
-static void setup_net_namespace(void)
+static void setup_namespaces(struct lo_data *lo, struct fuse_session *se)
{
- if (unshare(CLONE_NEWNET) != 0) {
- fuse_log(FUSE_LOG_ERR, "unshare(CLONE_NEWNET): %m\n");
+ pid_t child;
+
+ /*
+ * Create a new pid namespace for *child* processes. We'll have to
+ * fork in order to enter the new pid namespace. A new mount namespace
+ * is also needed so that we can remount /proc for the new pid
+ * namespace.
+ *
+ * Our UNIX domain sockets have been created. Now we can move to
+ * an empty network namespace to prevent TCP/IP and other network
+ * activity in case this process is compromised.
+ */
+ if (unshare(CLONE_NEWPID | CLONE_NEWNS | CLONE_NEWNET) != 0) {
+ fuse_log(FUSE_LOG_ERR, "unshare(CLONE_NEWPID | CLONE_NEWNS): %m\n");
+ exit(1);
+ }
+
+ child = fork();
+ if (child < 0) {
+ fuse_log(FUSE_LOG_ERR, "fork() failed: %m\n");
+ exit(1);
+ }
+ if (child > 0) {
+ pid_t waited;
+ int wstatus;
+
+ /* The parent waits for the child */
+ do {
+ waited = waitpid(child, &wstatus, 0);
+ } while (waited < 0 && errno == EINTR && !se->exited);
+
+ /* We were terminated by a signal, see fuse_signals.c */
+ if (se->exited) {
+ exit(0);
+ }
+
+ if (WIFEXITED(wstatus)) {
+ exit(WEXITSTATUS(wstatus));
+ }
+
+ exit(1);
+ }
+
+ /* Send us SIGTERM when the parent thread terminates, see prctl(2) */
+ prctl(PR_SET_PDEATHSIG, SIGTERM);
+
+ /*
+ * If the mounts have shared propagation then we want to opt out so our
+ * mount changes don't affect the parent mount namespace.
+ */
+ if (mount(NULL, "/", NULL, MS_REC | MS_SLAVE, NULL) < 0) {
+ fuse_log(FUSE_LOG_ERR, "mount(/, MS_REC|MS_SLAVE): %m\n");
+ exit(1);
+ }
+
+ /* The child must remount /proc to use the new pid namespace */
+ if (mount("proc", "/proc", "proc",
+ MS_NODEV | MS_NOEXEC | MS_NOSUID | MS_RELATIME, NULL) < 0) {
+ fuse_log(FUSE_LOG_ERR, "mount(/proc): %m\n");
+ exit(1);
+ }
+
+ /* Now we can get our /proc/self/fd directory file descriptor */
+ lo->proc_self_fd = open("/proc/self/fd", O_PATH);
+ if (lo->proc_self_fd == -1) {
+ fuse_log(FUSE_LOG_ERR, "open(/proc/self/fd, O_PATH): %m\n");
exit(1);
}
}
-/* This magic is based on lxc's lxc_pivot_root() */
-static void setup_pivot_root(const char *source)
+/*
+ * Make the source directory our root so symlinks cannot escape and no other
+ * files are accessible. Assumes unshare(CLONE_NEWNS) was already called.
+ */
+static void setup_mounts(const char *source)
{
int oldroot;
int newroot;
+ if (mount(source, source, NULL, MS_BIND, NULL) < 0) {
+ fuse_log(FUSE_LOG_ERR, "mount(%s, %s, MS_BIND): %m\n", source, source);
+ exit(1);
+ }
+
+ /* This magic is based on lxc's lxc_pivot_root() */
oldroot = open("/", O_DIRECTORY | O_RDONLY | O_CLOEXEC);
if (oldroot < 0) {
fuse_log(FUSE_LOG_ERR, "open(/): %m\n");
@@ -2009,47 +2083,14 @@ static void setup_pivot_root(const char *source)
close(oldroot);
}
-static void setup_proc_self_fd(struct lo_data *lo)
-{
- lo->proc_self_fd = open("/proc/self/fd", O_PATH);
- if (lo->proc_self_fd == -1) {
- fuse_log(FUSE_LOG_ERR, "open(/proc/self/fd, O_PATH): %m\n");
- exit(1);
- }
-}
-
-/*
- * Make the source directory our root so symlinks cannot escape and no other
- * files are accessible.
- */
-static void setup_mount_namespace(const char *source)
-{
- if (unshare(CLONE_NEWNS) != 0) {
- fuse_log(FUSE_LOG_ERR, "unshare(CLONE_NEWNS): %m\n");
- exit(1);
- }
-
- if (mount(NULL, "/", NULL, MS_REC | MS_SLAVE, NULL) < 0) {
- fuse_log(FUSE_LOG_ERR, "mount(/, MS_REC|MS_PRIVATE): %m\n");
- exit(1);
- }
-
- if (mount(source, source, NULL, MS_BIND, NULL) < 0) {
- fuse_log(FUSE_LOG_ERR, "mount(%s, %s, MS_BIND): %m\n", source, source);
- exit(1);
- }
-
- setup_pivot_root(source);
-}
-
/*
* Lock down this process to prevent access to other processes or files outside
* source directory. This reduces the impact of arbitrary code execution bugs.
*/
-static void setup_sandbox(struct lo_data *lo)
+static void setup_sandbox(struct lo_data *lo, struct fuse_session *se)
{
- setup_net_namespace();
- setup_mount_namespace(lo->source);
+ setup_namespaces(lo, se);
+ setup_mounts(lo->source);
}
int main(int argc, char *argv[])
@@ -2173,10 +2214,7 @@ int main(int argc, char *argv[])
fuse_daemonize(opts.foreground);
- /* Must be after daemonize to get the right /proc/self/fd */
- setup_proc_self_fd(&lo);
-
- setup_sandbox(&lo);
+ setup_sandbox(&lo, se);
/* Block until ctrl+c or fusermount -u */
ret = virtio_loop(se);
--
1.8.3.1

View File

@ -0,0 +1,66 @@
From 19a16f26bdeb6302159736e182a18b06160a3f42 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Mon, 27 Jan 2020 19:01:28 +0100
Subject: [PATCH 057/116] virtiofsd: move to an empty network namespace
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-id: <20200127190227.40942-54-dgilbert@redhat.com>
Patchwork-id: 93508
O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 053/112] virtiofsd: move to an empty network namespace
Bugzilla: 1694164
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Sergio Lopez Pascual <slp@redhat.com>
From: Stefan Hajnoczi <stefanha@redhat.com>
If the process is compromised there should be no network access. Use an
empty network namespace to sandbox networking.
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit d74830d12ae233186ff74ddf64c552d26bb39e50)
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
tools/virtiofsd/passthrough_ll.c | 14 ++++++++++++++
1 file changed, 14 insertions(+)
diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
index 0570453..27ab328 100644
--- a/tools/virtiofsd/passthrough_ll.c
+++ b/tools/virtiofsd/passthrough_ll.c
@@ -1944,6 +1944,19 @@ static void print_capabilities(void)
printf("}\n");
}
+/*
+ * Called after our UNIX domain sockets have been created, now we can move to
+ * an empty network namespace to prevent TCP/IP and other network activity in
+ * case this process is compromised.
+ */
+static void setup_net_namespace(void)
+{
+ if (unshare(CLONE_NEWNET) != 0) {
+ fuse_log(FUSE_LOG_ERR, "unshare(CLONE_NEWNET): %m\n");
+ exit(1);
+ }
+}
+
/* This magic is based on lxc's lxc_pivot_root() */
static void setup_pivot_root(const char *source)
{
@@ -2035,6 +2048,7 @@ static void setup_mount_namespace(const char *source)
*/
static void setup_sandbox(struct lo_data *lo)
{
+ setup_net_namespace();
setup_mount_namespace(lo->source);
}
--
1.8.3.1

View File

@ -0,0 +1,54 @@
From fe031dbbf5e287f64de9fcc9aec361e8ab492109 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Mon, 27 Jan 2020 19:02:24 +0100
Subject: [PATCH 113/116] virtiofsd/passthrough_ll: Pass errno to
fuse_reply_err()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-id: <20200127190227.40942-110-dgilbert@redhat.com>
Patchwork-id: 93559
O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 109/112] virtiofsd/passthrough_ll: Pass errno to fuse_reply_err()
Bugzilla: 1694164
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Sergio Lopez Pascual <slp@redhat.com>
From: Xiao Yang <yangx.jy@cn.fujitsu.com>
lo_copy_file_range() passes -errno to fuse_reply_err() and then fuse_reply_err()
changes it to errno again, so that subsequent fuse_send_reply_iov_nofree() catches
the wrong errno.(i.e. reports "fuse: bad error value: ...").
Make fuse_send_reply_iov_nofree() accept the correct -errno by passing errno
directly in lo_copy_file_range().
Signed-off-by: Xiao Yang <yangx.jy@cn.fujitsu.com>
Reviewed-by: Eryu Guan <eguan@linux.alibaba.com>
dgilbert: Sent upstream and now Merged as aa1185e153f774f1df65
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit a931b6861e59c78d861017e9c6a9c161ff49a163)
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
tools/virtiofsd/passthrough_ll.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
index fc15d61..e6f2399 100644
--- a/tools/virtiofsd/passthrough_ll.c
+++ b/tools/virtiofsd/passthrough_ll.c
@@ -2441,7 +2441,7 @@ static void lo_copy_file_range(fuse_req_t req, fuse_ino_t ino_in, off_t off_in,
res = copy_file_range(in_fd, &off_in, out_fd, &off_out, len, flags);
if (res < 0) {
- fuse_reply_err(req, -errno);
+ fuse_reply_err(req, errno);
} else {
fuse_reply_write(req, res);
}
--
1.8.3.1

View File

@ -0,0 +1,48 @@
From 83b03fc4a3ecf6086394363488bbebc8d55428c0 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Mon, 27 Jan 2020 19:02:16 +0100
Subject: [PATCH 105/116] virtiofsd: passthrough_ll: Use cache_readdir for
directory open
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-id: <20200127190227.40942-102-dgilbert@redhat.com>
Patchwork-id: 93555
O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 101/112] virtiofsd: passthrough_ll: Use cache_readdir for directory open
Bugzilla: 1694164
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Sergio Lopez Pascual <slp@redhat.com>
From: Misono Tomohiro <misono.tomohiro@jp.fujitsu.com>
Since keep_cache(FOPEN_KEEP_CACHE) has no effect for directory as
described in fuse_common.h, use cache_readdir(FOPNE_CACHE_DIR) for
diretory open when cache=always mode.
Signed-off-by: Misono Tomohiro <misono.tomohiro@jp.fujitsu.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit 9b610b09b49b1aada256097b338d49da805da6ae)
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
tools/virtiofsd/passthrough_ll.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
index 4c61ac5..79b8b71 100644
--- a/tools/virtiofsd/passthrough_ll.c
+++ b/tools/virtiofsd/passthrough_ll.c
@@ -1523,7 +1523,7 @@ static void lo_opendir(fuse_req_t req, fuse_ino_t ino,
fi->fh = fh;
if (lo->cache == CACHE_ALWAYS) {
- fi->keep_cache = 1;
+ fi->cache_readdir = 1;
}
fuse_reply_open(req, fi);
return;
--
1.8.3.1

View File

@ -0,0 +1,238 @@
From 474d0adafed4d73720d6413b2903d6c4b529e5e6 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Mon, 27 Jan 2020 19:01:15 +0100
Subject: [PATCH 044/116] virtiofsd: passthrough_ll: add dirp_map to hide
lo_dirp pointers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-id: <20200127190227.40942-41-dgilbert@redhat.com>
Patchwork-id: 93495
O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 040/112] virtiofsd: passthrough_ll: add dirp_map to hide lo_dirp pointers
Bugzilla: 1694164
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Sergio Lopez Pascual <slp@redhat.com>
From: Stefan Hajnoczi <stefanha@redhat.com>
Do not expose lo_dirp pointers to clients.
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit b39bce121bfad8757eec0ee41f14607b883935d3)
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
tools/virtiofsd/passthrough_ll.c | 103 +++++++++++++++++++++++++++++----------
1 file changed, 76 insertions(+), 27 deletions(-)
diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
index a3ebf74..5f5a72f 100644
--- a/tools/virtiofsd/passthrough_ll.c
+++ b/tools/virtiofsd/passthrough_ll.c
@@ -56,27 +56,10 @@
#include "passthrough_helpers.h"
-/*
- * We are re-using pointers to our `struct lo_inode`
- * elements as inodes. This means that we must be able to
- * store uintptr_t values in a fuse_ino_t variable. The following
- * incantation checks this condition at compile time.
- */
-#if defined(__GNUC__) && \
- (__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 6) && \
- !defined __cplusplus
-_Static_assert(sizeof(fuse_ino_t) >= sizeof(uintptr_t),
- "fuse_ino_t too small to hold uintptr_t values!");
-#else
-struct _uintptr_to_must_hold_fuse_ino_t_dummy_struct {
- unsigned _uintptr_to_must_hold_fuse_ino_t
- : ((sizeof(fuse_ino_t) >= sizeof(uintptr_t)) ? 1 : -1);
-};
-#endif
-
struct lo_map_elem {
union {
struct lo_inode *inode;
+ struct lo_dirp *dirp;
ssize_t freelist;
};
bool in_use;
@@ -123,6 +106,7 @@ struct lo_data {
int timeout_set;
struct lo_inode root; /* protected by lo->mutex */
struct lo_map ino_map; /* protected by lo->mutex */
+ struct lo_map dirp_map; /* protected by lo->mutex */
};
static const struct fuse_opt lo_opts[] = {
@@ -253,6 +237,20 @@ static void lo_map_remove(struct lo_map *map, size_t key)
}
/* Assumes lo->mutex is held */
+static ssize_t lo_add_dirp_mapping(fuse_req_t req, struct lo_dirp *dirp)
+{
+ struct lo_map_elem *elem;
+
+ elem = lo_map_alloc_elem(&lo_data(req)->dirp_map);
+ if (!elem) {
+ return -1;
+ }
+
+ elem->dirp = dirp;
+ return elem - lo_data(req)->dirp_map.elems;
+}
+
+/* Assumes lo->mutex is held */
static ssize_t lo_add_inode_mapping(fuse_req_t req, struct lo_inode *inode)
{
struct lo_map_elem *elem;
@@ -861,9 +859,19 @@ struct lo_dirp {
off_t offset;
};
-static struct lo_dirp *lo_dirp(struct fuse_file_info *fi)
+static struct lo_dirp *lo_dirp(fuse_req_t req, struct fuse_file_info *fi)
{
- return (struct lo_dirp *)(uintptr_t)fi->fh;
+ struct lo_data *lo = lo_data(req);
+ struct lo_map_elem *elem;
+
+ pthread_mutex_lock(&lo->mutex);
+ elem = lo_map_get(&lo->dirp_map, fi->fh);
+ pthread_mutex_unlock(&lo->mutex);
+ if (!elem) {
+ return NULL;
+ }
+
+ return elem->dirp;
}
static void lo_opendir(fuse_req_t req, fuse_ino_t ino,
@@ -873,6 +881,7 @@ static void lo_opendir(fuse_req_t req, fuse_ino_t ino,
struct lo_data *lo = lo_data(req);
struct lo_dirp *d;
int fd;
+ ssize_t fh;
d = calloc(1, sizeof(struct lo_dirp));
if (d == NULL) {
@@ -892,7 +901,14 @@ static void lo_opendir(fuse_req_t req, fuse_ino_t ino,
d->offset = 0;
d->entry = NULL;
- fi->fh = (uintptr_t)d;
+ pthread_mutex_lock(&lo->mutex);
+ fh = lo_add_dirp_mapping(req, d);
+ pthread_mutex_unlock(&lo->mutex);
+ if (fh == -1) {
+ goto out_err;
+ }
+
+ fi->fh = fh;
if (lo->cache == CACHE_ALWAYS) {
fi->keep_cache = 1;
}
@@ -903,6 +919,9 @@ out_errno:
error = errno;
out_err:
if (d) {
+ if (d->dp) {
+ closedir(d->dp);
+ }
if (fd != -1) {
close(fd);
}
@@ -920,17 +939,21 @@ static int is_dot_or_dotdot(const char *name)
static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size,
off_t offset, struct fuse_file_info *fi, int plus)
{
- struct lo_dirp *d = lo_dirp(fi);
- char *buf;
+ struct lo_dirp *d;
+ char *buf = NULL;
char *p;
size_t rem = size;
- int err;
+ int err = ENOMEM;
(void)ino;
+ d = lo_dirp(req, fi);
+ if (!d) {
+ goto error;
+ }
+
buf = calloc(1, size);
if (!buf) {
- err = ENOMEM;
goto error;
}
p = buf;
@@ -1028,8 +1051,21 @@ static void lo_readdirplus(fuse_req_t req, fuse_ino_t ino, size_t size,
static void lo_releasedir(fuse_req_t req, fuse_ino_t ino,
struct fuse_file_info *fi)
{
- struct lo_dirp *d = lo_dirp(fi);
+ struct lo_data *lo = lo_data(req);
+ struct lo_dirp *d;
+
(void)ino;
+
+ d = lo_dirp(req, fi);
+ if (!d) {
+ fuse_reply_err(req, EBADF);
+ return;
+ }
+
+ pthread_mutex_lock(&lo->mutex);
+ lo_map_remove(&lo->dirp_map, fi->fh);
+ pthread_mutex_unlock(&lo->mutex);
+
closedir(d->dp);
free(d);
fuse_reply_err(req, 0);
@@ -1081,8 +1117,18 @@ static void lo_fsyncdir(fuse_req_t req, fuse_ino_t ino, int datasync,
struct fuse_file_info *fi)
{
int res;
- int fd = dirfd(lo_dirp(fi)->dp);
+ struct lo_dirp *d;
+ int fd;
+
(void)ino;
+
+ d = lo_dirp(req, fi);
+ if (!d) {
+ fuse_reply_err(req, EBADF);
+ return;
+ }
+
+ fd = dirfd(d->dp);
if (datasync) {
res = fdatasync(fd);
} else {
@@ -1614,6 +1660,8 @@ int main(int argc, char *argv[])
root_elem = lo_map_reserve(&lo.ino_map, lo.root.fuse_ino);
root_elem->inode = &lo.root;
+ lo_map_init(&lo.dirp_map);
+
if (fuse_parse_cmdline(&args, &opts) != 0) {
return 1;
}
@@ -1710,6 +1758,7 @@ err_out2:
err_out1:
fuse_opt_free_args(&args);
+ lo_map_destroy(&lo.dirp_map);
lo_map_destroy(&lo.ino_map);
if (lo.root.fd >= 0) {
--
1.8.3.1

View File

@ -0,0 +1,303 @@
From 03effbc021064bb77d231ae5ca02d1a579c71ee1 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Mon, 27 Jan 2020 19:01:17 +0100
Subject: [PATCH 046/116] virtiofsd: passthrough_ll: add fallback for racy ops
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-id: <20200127190227.40942-43-dgilbert@redhat.com>
Patchwork-id: 93496
O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 042/112] virtiofsd: passthrough_ll: add fallback for racy ops
Bugzilla: 1694164
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Sergio Lopez Pascual <slp@redhat.com>
From: Miklos Szeredi <mszeredi@redhat.com>
We have two operations that cannot be done race-free on a symlink in
certain cases: utimes and link.
Add racy fallback for these if the race-free method doesn't work. We do
our best to avoid races even in this case:
- get absolute path by reading /proc/self/fd/NN symlink
- lookup parent directory: after this we are safe against renames in
ancestors
- lookup name in parent directory, and verify that we got to the original
inode, if not retry the whole thing
Both utimes(2) and link(2) hold i_lock on the inode across the operation,
so a racing rename/delete by this fuse instance is not possible, only from
other entities changing the filesystem.
If the "norace" option is given, then disable the racy fallbacks.
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
Reviewed-by: Masayoshi Mizuma <m.mizuma@jp.fujitsu.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit 5fe319a7b19c9c328e6e061bffcf1ff6cc8b89ce)
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
tools/virtiofsd/helper.c | 5 +-
tools/virtiofsd/passthrough_ll.c | 157 +++++++++++++++++++++++++++++++++++----
2 files changed, 145 insertions(+), 17 deletions(-)
diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c
index b8ec5ac..5531425 100644
--- a/tools/virtiofsd/helper.c
+++ b/tools/virtiofsd/helper.c
@@ -142,7 +142,10 @@ void fuse_cmdline_help(void)
" --daemonize run in background\n"
" -o max_idle_threads the maximum number of idle worker "
"threads\n"
- " allowed (default: 10)\n");
+ " allowed (default: 10)\n"
+ " -o norace disable racy fallback\n"
+ " default: false\n"
+ );
}
static int fuse_helper_opt_proc(void *data, const char *arg, int key,
diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
index 9815bfa..ac380ef 100644
--- a/tools/virtiofsd/passthrough_ll.c
+++ b/tools/virtiofsd/passthrough_ll.c
@@ -98,6 +98,7 @@ enum {
struct lo_data {
pthread_mutex_t mutex;
int debug;
+ int norace;
int writeback;
int flock;
int xattr;
@@ -124,10 +125,15 @@ static const struct fuse_opt lo_opts[] = {
{ "cache=never", offsetof(struct lo_data, cache), CACHE_NEVER },
{ "cache=auto", offsetof(struct lo_data, cache), CACHE_NORMAL },
{ "cache=always", offsetof(struct lo_data, cache), CACHE_ALWAYS },
-
+ { "norace", offsetof(struct lo_data, norace), 1 },
FUSE_OPT_END
};
+static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n);
+
+static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st);
+
+
static struct lo_data *lo_data(fuse_req_t req)
{
return (struct lo_data *)fuse_req_userdata(req);
@@ -347,23 +353,127 @@ static void lo_getattr(fuse_req_t req, fuse_ino_t ino,
fuse_reply_attr(req, &buf, lo->timeout);
}
-static int utimensat_empty_nofollow(struct lo_inode *inode,
- const struct timespec *tv)
+static int lo_parent_and_name(struct lo_data *lo, struct lo_inode *inode,
+ char path[PATH_MAX], struct lo_inode **parent)
{
- int res;
char procname[64];
+ char *last;
+ struct stat stat;
+ struct lo_inode *p;
+ int retries = 2;
+ int res;
+
+retry:
+ sprintf(procname, "/proc/self/fd/%i", inode->fd);
+
+ res = readlink(procname, path, PATH_MAX);
+ if (res < 0) {
+ fuse_log(FUSE_LOG_WARNING, "%s: readlink failed: %m\n", __func__);
+ goto fail_noretry;
+ }
+
+ if (res >= PATH_MAX) {
+ fuse_log(FUSE_LOG_WARNING, "%s: readlink overflowed\n", __func__);
+ goto fail_noretry;
+ }
+ path[res] = '\0';
+
+ last = strrchr(path, '/');
+ if (last == NULL) {
+ /* Shouldn't happen */
+ fuse_log(
+ FUSE_LOG_WARNING,
+ "%s: INTERNAL ERROR: bad path read from proc\n", __func__);
+ goto fail_noretry;
+ }
+ if (last == path) {
+ p = &lo->root;
+ pthread_mutex_lock(&lo->mutex);
+ p->refcount++;
+ pthread_mutex_unlock(&lo->mutex);
+ } else {
+ *last = '\0';
+ res = fstatat(AT_FDCWD, last == path ? "/" : path, &stat, 0);
+ if (res == -1) {
+ if (!retries) {
+ fuse_log(FUSE_LOG_WARNING,
+ "%s: failed to stat parent: %m\n", __func__);
+ }
+ goto fail;
+ }
+ p = lo_find(lo, &stat);
+ if (p == NULL) {
+ if (!retries) {
+ fuse_log(FUSE_LOG_WARNING,
+ "%s: failed to find parent\n", __func__);
+ }
+ goto fail;
+ }
+ }
+ last++;
+ res = fstatat(p->fd, last, &stat, AT_SYMLINK_NOFOLLOW);
+ if (res == -1) {
+ if (!retries) {
+ fuse_log(FUSE_LOG_WARNING,
+ "%s: failed to stat last\n", __func__);
+ }
+ goto fail_unref;
+ }
+ if (stat.st_dev != inode->dev || stat.st_ino != inode->ino) {
+ if (!retries) {
+ fuse_log(FUSE_LOG_WARNING,
+ "%s: failed to match last\n", __func__);
+ }
+ goto fail_unref;
+ }
+ *parent = p;
+ memmove(path, last, strlen(last) + 1);
+
+ return 0;
+
+fail_unref:
+ unref_inode(lo, p, 1);
+fail:
+ if (retries) {
+ retries--;
+ goto retry;
+ }
+fail_noretry:
+ errno = EIO;
+ return -1;
+}
+
+static int utimensat_empty(struct lo_data *lo, struct lo_inode *inode,
+ const struct timespec *tv)
+{
+ int res;
+ struct lo_inode *parent;
+ char path[PATH_MAX];
if (inode->is_symlink) {
- res = utimensat(inode->fd, "", tv, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW);
+ res = utimensat(inode->fd, "", tv, AT_EMPTY_PATH);
if (res == -1 && errno == EINVAL) {
/* Sorry, no race free way to set times on symlink. */
- errno = EPERM;
+ if (lo->norace) {
+ errno = EPERM;
+ } else {
+ goto fallback;
+ }
}
return res;
}
- sprintf(procname, "/proc/self/fd/%i", inode->fd);
+ sprintf(path, "/proc/self/fd/%i", inode->fd);
- return utimensat(AT_FDCWD, procname, tv, 0);
+ return utimensat(AT_FDCWD, path, tv, 0);
+
+fallback:
+ res = lo_parent_and_name(lo, inode, path, &parent);
+ if (res != -1) {
+ res = utimensat(parent->fd, path, tv, AT_SYMLINK_NOFOLLOW);
+ unref_inode(lo, parent, 1);
+ }
+
+ return res;
}
static int lo_fi_fd(fuse_req_t req, struct fuse_file_info *fi)
@@ -387,6 +497,7 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr,
{
int saverr;
char procname[64];
+ struct lo_data *lo = lo_data(req);
struct lo_inode *inode;
int ifd;
int res;
@@ -459,7 +570,7 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr,
if (fi) {
res = futimens(fd, tv);
} else {
- res = utimensat_empty_nofollow(inode, tv);
+ res = utimensat_empty(lo, inode, tv);
}
if (res == -1) {
goto out_err;
@@ -709,24 +820,38 @@ static void lo_symlink(fuse_req_t req, const char *link, fuse_ino_t parent,
lo_mknod_symlink(req, parent, name, S_IFLNK, 0, link);
}
-static int linkat_empty_nofollow(struct lo_inode *inode, int dfd,
- const char *name)
+static int linkat_empty_nofollow(struct lo_data *lo, struct lo_inode *inode,
+ int dfd, const char *name)
{
int res;
- char procname[64];
+ struct lo_inode *parent;
+ char path[PATH_MAX];
if (inode->is_symlink) {
res = linkat(inode->fd, "", dfd, name, AT_EMPTY_PATH);
if (res == -1 && (errno == ENOENT || errno == EINVAL)) {
/* Sorry, no race free way to hard-link a symlink. */
- errno = EPERM;
+ if (lo->norace) {
+ errno = EPERM;
+ } else {
+ goto fallback;
+ }
}
return res;
}
- sprintf(procname, "/proc/self/fd/%i", inode->fd);
+ sprintf(path, "/proc/self/fd/%i", inode->fd);
+
+ return linkat(AT_FDCWD, path, dfd, name, AT_SYMLINK_FOLLOW);
+
+fallback:
+ res = lo_parent_and_name(lo, inode, path, &parent);
+ if (res != -1) {
+ res = linkat(parent->fd, path, dfd, name, 0);
+ unref_inode(lo, parent, 1);
+ }
- return linkat(AT_FDCWD, procname, dfd, name, AT_SYMLINK_FOLLOW);
+ return res;
}
static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent,
@@ -748,7 +873,7 @@ static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent,
e.attr_timeout = lo->timeout;
e.entry_timeout = lo->timeout;
- res = linkat_empty_nofollow(inode, lo_fd(req, parent), name);
+ res = linkat_empty_nofollow(lo, inode, lo_fd(req, parent), name);
if (res == -1) {
goto out_err;
}
--
1.8.3.1

View File

@ -0,0 +1,328 @@
From 35337e604e9149d6d8fcf74b8b82ac33a8611ebb Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Mon, 27 Jan 2020 19:01:16 +0100
Subject: [PATCH 045/116] virtiofsd: passthrough_ll: add fd_map to hide file
descriptors
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-id: <20200127190227.40942-42-dgilbert@redhat.com>
Patchwork-id: 93494
O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 041/112] virtiofsd: passthrough_ll: add fd_map to hide file descriptors
Bugzilla: 1694164
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Sergio Lopez Pascual <slp@redhat.com>
From: Stefan Hajnoczi <stefanha@redhat.com>
Do not expose file descriptor numbers to clients. This prevents the
abuse of internal file descriptors (like stdin/stdout).
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Fix from:
Signed-off-by: Xiao Yang <yangx.jy@cn.fujitsu.com>
dgilbert:
Added lseek
Reviewed-by: Masayoshi Mizuma <m.mizuma@jp.fujitsu.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit 73b4d19dfc4248a74c1f3e511cfa934681d9c602)
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
tools/virtiofsd/passthrough_ll.c | 116 +++++++++++++++++++++++++++++++--------
1 file changed, 94 insertions(+), 22 deletions(-)
diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
index 5f5a72f..9815bfa 100644
--- a/tools/virtiofsd/passthrough_ll.c
+++ b/tools/virtiofsd/passthrough_ll.c
@@ -60,6 +60,7 @@ struct lo_map_elem {
union {
struct lo_inode *inode;
struct lo_dirp *dirp;
+ int fd;
ssize_t freelist;
};
bool in_use;
@@ -107,6 +108,7 @@ struct lo_data {
struct lo_inode root; /* protected by lo->mutex */
struct lo_map ino_map; /* protected by lo->mutex */
struct lo_map dirp_map; /* protected by lo->mutex */
+ struct lo_map fd_map; /* protected by lo->mutex */
};
static const struct fuse_opt lo_opts[] = {
@@ -237,6 +239,20 @@ static void lo_map_remove(struct lo_map *map, size_t key)
}
/* Assumes lo->mutex is held */
+static ssize_t lo_add_fd_mapping(fuse_req_t req, int fd)
+{
+ struct lo_map_elem *elem;
+
+ elem = lo_map_alloc_elem(&lo_data(req)->fd_map);
+ if (!elem) {
+ return -1;
+ }
+
+ elem->fd = fd;
+ return elem - lo_data(req)->fd_map.elems;
+}
+
+/* Assumes lo->mutex is held */
static ssize_t lo_add_dirp_mapping(fuse_req_t req, struct lo_dirp *dirp)
{
struct lo_map_elem *elem;
@@ -350,6 +366,22 @@ static int utimensat_empty_nofollow(struct lo_inode *inode,
return utimensat(AT_FDCWD, procname, tv, 0);
}
+static int lo_fi_fd(fuse_req_t req, struct fuse_file_info *fi)
+{
+ struct lo_data *lo = lo_data(req);
+ struct lo_map_elem *elem;
+
+ pthread_mutex_lock(&lo->mutex);
+ elem = lo_map_get(&lo->fd_map, fi->fh);
+ pthread_mutex_unlock(&lo->mutex);
+
+ if (!elem) {
+ return -1;
+ }
+
+ return elem->fd;
+}
+
static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr,
int valid, struct fuse_file_info *fi)
{
@@ -358,6 +390,7 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr,
struct lo_inode *inode;
int ifd;
int res;
+ int fd;
inode = lo_inode(req, ino);
if (!inode) {
@@ -367,9 +400,14 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr,
ifd = inode->fd;
+ /* If fi->fh is invalid we'll report EBADF later */
+ if (fi) {
+ fd = lo_fi_fd(req, fi);
+ }
+
if (valid & FUSE_SET_ATTR_MODE) {
if (fi) {
- res = fchmod(fi->fh, attr->st_mode);
+ res = fchmod(fd, attr->st_mode);
} else {
sprintf(procname, "/proc/self/fd/%i", ifd);
res = chmod(procname, attr->st_mode);
@@ -389,7 +427,7 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr,
}
if (valid & FUSE_SET_ATTR_SIZE) {
if (fi) {
- res = ftruncate(fi->fh, attr->st_size);
+ res = ftruncate(fd, attr->st_size);
} else {
sprintf(procname, "/proc/self/fd/%i", ifd);
res = truncate(procname, attr->st_size);
@@ -419,7 +457,7 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr,
}
if (fi) {
- res = futimens(fi->fh, tv);
+ res = futimens(fd, tv);
} else {
res = utimensat_empty_nofollow(inode, tv);
}
@@ -1096,7 +1134,18 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name,
lo_restore_cred(&old);
if (!err) {
- fi->fh = fd;
+ ssize_t fh;
+
+ pthread_mutex_lock(&lo->mutex);
+ fh = lo_add_fd_mapping(req, fd);
+ pthread_mutex_unlock(&lo->mutex);
+ if (fh == -1) {
+ close(fd);
+ fuse_reply_err(req, ENOMEM);
+ return;
+ }
+
+ fi->fh = fh;
err = lo_do_lookup(req, parent, name, &e);
}
if (lo->cache == CACHE_NEVER) {
@@ -1140,6 +1189,7 @@ static void lo_fsyncdir(fuse_req_t req, fuse_ino_t ino, int datasync,
static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi)
{
int fd;
+ ssize_t fh;
char buf[64];
struct lo_data *lo = lo_data(req);
@@ -1175,7 +1225,16 @@ static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi)
return (void)fuse_reply_err(req, errno);
}
- fi->fh = fd;
+ pthread_mutex_lock(&lo->mutex);
+ fh = lo_add_fd_mapping(req, fd);
+ pthread_mutex_unlock(&lo->mutex);
+ if (fh == -1) {
+ close(fd);
+ fuse_reply_err(req, ENOMEM);
+ return;
+ }
+
+ fi->fh = fh;
if (lo->cache == CACHE_NEVER) {
fi->direct_io = 1;
} else if (lo->cache == CACHE_ALWAYS) {
@@ -1187,9 +1246,18 @@ static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi)
static void lo_release(fuse_req_t req, fuse_ino_t ino,
struct fuse_file_info *fi)
{
+ struct lo_data *lo = lo_data(req);
+ int fd;
+
(void)ino;
- close(fi->fh);
+ fd = lo_fi_fd(req, fi);
+
+ pthread_mutex_lock(&lo->mutex);
+ lo_map_remove(&lo->fd_map, fi->fh);
+ pthread_mutex_unlock(&lo->mutex);
+
+ close(fd);
fuse_reply_err(req, 0);
}
@@ -1197,7 +1265,7 @@ static void lo_flush(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi)
{
int res;
(void)ino;
- res = close(dup(fi->fh));
+ res = close(dup(lo_fi_fd(req, fi)));
fuse_reply_err(req, res == -1 ? errno : 0);
}
@@ -1224,7 +1292,7 @@ static void lo_fsync(fuse_req_t req, fuse_ino_t ino, int datasync,
return (void)fuse_reply_err(req, errno);
}
} else {
- fd = fi->fh;
+ fd = lo_fi_fd(req, fi);
}
if (datasync) {
@@ -1251,7 +1319,7 @@ static void lo_read(fuse_req_t req, fuse_ino_t ino, size_t size, off_t offset,
}
buf.buf[0].flags = FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK;
- buf.buf[0].fd = fi->fh;
+ buf.buf[0].fd = lo_fi_fd(req, fi);
buf.buf[0].pos = offset;
fuse_reply_data(req, &buf);
@@ -1266,7 +1334,7 @@ static void lo_write_buf(fuse_req_t req, fuse_ino_t ino,
struct fuse_bufvec out_buf = FUSE_BUFVEC_INIT(fuse_buf_size(in_buf));
out_buf.buf[0].flags = FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK;
- out_buf.buf[0].fd = fi->fh;
+ out_buf.buf[0].fd = lo_fi_fd(req, fi);
out_buf.buf[0].pos = off;
if (lo_debug(req)) {
@@ -1303,7 +1371,7 @@ static void lo_fallocate(fuse_req_t req, fuse_ino_t ino, int mode, off_t offset,
(void)ino;
#ifdef CONFIG_FALLOCATE
- err = fallocate(fi->fh, mode, offset, length);
+ err = fallocate(lo_fi_fd(req, fi), mode, offset, length);
if (err < 0) {
err = errno;
}
@@ -1314,7 +1382,7 @@ static void lo_fallocate(fuse_req_t req, fuse_ino_t ino, int mode, off_t offset,
return;
}
- err = posix_fallocate(fi->fh, offset, length);
+ err = posix_fallocate(lo_fi_fd(req, fi), offset, length);
#endif
fuse_reply_err(req, err);
@@ -1326,7 +1394,7 @@ static void lo_flock(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi,
int res;
(void)ino;
- res = flock(fi->fh, op);
+ res = flock(lo_fi_fd(req, fi), op);
fuse_reply_err(req, res == -1 ? errno : 0);
}
@@ -1551,17 +1619,19 @@ static void lo_copy_file_range(fuse_req_t req, fuse_ino_t ino_in, off_t off_in,
off_t off_out, struct fuse_file_info *fi_out,
size_t len, int flags)
{
+ int in_fd, out_fd;
ssize_t res;
- if (lo_debug(req))
- fuse_log(FUSE_LOG_DEBUG,
- "lo_copy_file_range(ino=%" PRIu64 "/fd=%lu, "
- "off=%lu, ino=%" PRIu64 "/fd=%lu, "
- "off=%lu, size=%zd, flags=0x%x)\n",
- ino_in, fi_in->fh, off_in, ino_out, fi_out->fh, off_out, len,
- flags);
+ in_fd = lo_fi_fd(req, fi_in);
+ out_fd = lo_fi_fd(req, fi_out);
+
+ fuse_log(FUSE_LOG_DEBUG,
+ "lo_copy_file_range(ino=%" PRIu64 "/fd=%d, "
+ "off=%lu, ino=%" PRIu64 "/fd=%d, "
+ "off=%lu, size=%zd, flags=0x%x)\n",
+ ino_in, in_fd, off_in, ino_out, out_fd, off_out, len, flags);
- res = copy_file_range(fi_in->fh, &off_in, fi_out->fh, &off_out, len, flags);
+ res = copy_file_range(in_fd, &off_in, out_fd, &off_out, len, flags);
if (res < 0) {
fuse_reply_err(req, -errno);
} else {
@@ -1576,7 +1646,7 @@ static void lo_lseek(fuse_req_t req, fuse_ino_t ino, off_t off, int whence,
off_t res;
(void)ino;
- res = lseek(fi->fh, off, whence);
+ res = lseek(lo_fi_fd(req, fi), off, whence);
if (res != -1) {
fuse_reply_lseek(req, res);
} else {
@@ -1661,6 +1731,7 @@ int main(int argc, char *argv[])
root_elem->inode = &lo.root;
lo_map_init(&lo.dirp_map);
+ lo_map_init(&lo.fd_map);
if (fuse_parse_cmdline(&args, &opts) != 0) {
return 1;
@@ -1758,6 +1829,7 @@ err_out2:
err_out1:
fuse_opt_free_args(&args);
+ lo_map_destroy(&lo.fd_map);
lo_map_destroy(&lo.dirp_map);
lo_map_destroy(&lo.ino_map);
--
1.8.3.1

View File

@ -0,0 +1,395 @@
From d81396cc3d9815730903b0755c9d2e67d6954d54 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Mon, 27 Jan 2020 19:01:14 +0100
Subject: [PATCH 043/116] virtiofsd: passthrough_ll: add ino_map to hide
lo_inode pointers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-id: <20200127190227.40942-40-dgilbert@redhat.com>
Patchwork-id: 93493
O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 039/112] virtiofsd: passthrough_ll: add ino_map to hide lo_inode pointers
Bugzilla: 1694164
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Sergio Lopez Pascual <slp@redhat.com>
From: Stefan Hajnoczi <stefanha@redhat.com>
Do not expose lo_inode pointers to clients.
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Masayoshi Mizuma <m.mizuma@jp.fujitsu.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit 92fb57b83cdbfc4bf53c0c46a3d0bcbc36e64126)
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
tools/virtiofsd/passthrough_ll.c | 144 +++++++++++++++++++++++++++++++--------
1 file changed, 114 insertions(+), 30 deletions(-)
diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
index e83a976..a3ebf74 100644
--- a/tools/virtiofsd/passthrough_ll.c
+++ b/tools/virtiofsd/passthrough_ll.c
@@ -57,8 +57,8 @@
#include "passthrough_helpers.h"
/*
- * We are re-using pointers to our `struct lo_inode` and `struct
- * lo_dirp` elements as inodes. This means that we must be able to
+ * We are re-using pointers to our `struct lo_inode`
+ * elements as inodes. This means that we must be able to
* store uintptr_t values in a fuse_ino_t variable. The following
* incantation checks this condition at compile time.
*/
@@ -76,7 +76,7 @@ struct _uintptr_to_must_hold_fuse_ino_t_dummy_struct {
struct lo_map_elem {
union {
- /* Element values will go here... */
+ struct lo_inode *inode;
ssize_t freelist;
};
bool in_use;
@@ -97,6 +97,7 @@ struct lo_inode {
ino_t ino;
dev_t dev;
uint64_t refcount; /* protected by lo->mutex */
+ fuse_ino_t fuse_ino;
};
struct lo_cred {
@@ -121,6 +122,7 @@ struct lo_data {
int cache;
int timeout_set;
struct lo_inode root; /* protected by lo->mutex */
+ struct lo_map ino_map; /* protected by lo->mutex */
};
static const struct fuse_opt lo_opts[] = {
@@ -145,14 +147,14 @@ static struct lo_data *lo_data(fuse_req_t req)
return (struct lo_data *)fuse_req_userdata(req);
}
-__attribute__((unused)) static void lo_map_init(struct lo_map *map)
+static void lo_map_init(struct lo_map *map)
{
map->elems = NULL;
map->nelems = 0;
map->freelist = -1;
}
-__attribute__((unused)) static void lo_map_destroy(struct lo_map *map)
+static void lo_map_destroy(struct lo_map *map)
{
free(map->elems);
}
@@ -183,8 +185,7 @@ static int lo_map_grow(struct lo_map *map, size_t new_nelems)
return 1;
}
-__attribute__((unused)) static struct lo_map_elem *
-lo_map_alloc_elem(struct lo_map *map)
+static struct lo_map_elem *lo_map_alloc_elem(struct lo_map *map)
{
struct lo_map_elem *elem;
@@ -200,8 +201,7 @@ lo_map_alloc_elem(struct lo_map *map)
return elem;
}
-__attribute__((unused)) static struct lo_map_elem *
-lo_map_reserve(struct lo_map *map, size_t key)
+static struct lo_map_elem *lo_map_reserve(struct lo_map *map, size_t key)
{
ssize_t *prev;
@@ -222,8 +222,7 @@ lo_map_reserve(struct lo_map *map, size_t key)
return NULL;
}
-__attribute__((unused)) static struct lo_map_elem *
-lo_map_get(struct lo_map *map, size_t key)
+static struct lo_map_elem *lo_map_get(struct lo_map *map, size_t key)
{
if (key >= map->nelems) {
return NULL;
@@ -234,8 +233,7 @@ lo_map_get(struct lo_map *map, size_t key)
return &map->elems[key];
}
-__attribute__((unused)) static void lo_map_remove(struct lo_map *map,
- size_t key)
+static void lo_map_remove(struct lo_map *map, size_t key)
{
struct lo_map_elem *elem;
@@ -254,18 +252,40 @@ __attribute__((unused)) static void lo_map_remove(struct lo_map *map,
map->freelist = key;
}
+/* Assumes lo->mutex is held */
+static ssize_t lo_add_inode_mapping(fuse_req_t req, struct lo_inode *inode)
+{
+ struct lo_map_elem *elem;
+
+ elem = lo_map_alloc_elem(&lo_data(req)->ino_map);
+ if (!elem) {
+ return -1;
+ }
+
+ elem->inode = inode;
+ return elem - lo_data(req)->ino_map.elems;
+}
+
static struct lo_inode *lo_inode(fuse_req_t req, fuse_ino_t ino)
{
- if (ino == FUSE_ROOT_ID) {
- return &lo_data(req)->root;
- } else {
- return (struct lo_inode *)(uintptr_t)ino;
+ struct lo_data *lo = lo_data(req);
+ struct lo_map_elem *elem;
+
+ pthread_mutex_lock(&lo->mutex);
+ elem = lo_map_get(&lo->ino_map, ino);
+ pthread_mutex_unlock(&lo->mutex);
+
+ if (!elem) {
+ return NULL;
}
+
+ return elem->inode;
}
static int lo_fd(fuse_req_t req, fuse_ino_t ino)
{
- return lo_inode(req, ino)->fd;
+ struct lo_inode *inode = lo_inode(req, ino);
+ return inode ? inode->fd : -1;
}
static bool lo_debug(fuse_req_t req)
@@ -337,10 +357,18 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr,
{
int saverr;
char procname[64];
- struct lo_inode *inode = lo_inode(req, ino);
- int ifd = inode->fd;
+ struct lo_inode *inode;
+ int ifd;
int res;
+ inode = lo_inode(req, ino);
+ if (!inode) {
+ fuse_reply_err(req, EBADF);
+ return;
+ }
+
+ ifd = inode->fd;
+
if (valid & FUSE_SET_ATTR_MODE) {
if (fi) {
res = fchmod(fi->fh, attr->st_mode);
@@ -470,6 +498,7 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name,
inode->dev = e->attr.st_dev;
pthread_mutex_lock(&lo->mutex);
+ inode->fuse_ino = lo_add_inode_mapping(req, inode);
prev = &lo->root;
next = prev->next;
next->prev = inode;
@@ -478,7 +507,7 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name,
prev->next = inode;
pthread_mutex_unlock(&lo->mutex);
}
- e->ino = (uintptr_t)inode;
+ e->ino = inode->fuse_ino;
if (lo_debug(req)) {
fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n",
@@ -582,10 +611,16 @@ static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent,
{
int res;
int saverr;
- struct lo_inode *dir = lo_inode(req, parent);
+ struct lo_inode *dir;
struct fuse_entry_param e;
struct lo_cred old = {};
+ dir = lo_inode(req, parent);
+ if (!dir) {
+ fuse_reply_err(req, EBADF);
+ return;
+ }
+
saverr = ENOMEM;
saverr = lo_change_cred(req, &old);
@@ -663,10 +698,16 @@ static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent,
{
int res;
struct lo_data *lo = lo_data(req);
- struct lo_inode *inode = lo_inode(req, ino);
+ struct lo_inode *inode;
struct fuse_entry_param e;
int saverr;
+ inode = lo_inode(req, ino);
+ if (!inode) {
+ fuse_reply_err(req, EBADF);
+ return;
+ }
+
memset(&e, 0, sizeof(struct fuse_entry_param));
e.attr_timeout = lo->timeout;
e.entry_timeout = lo->timeout;
@@ -684,7 +725,7 @@ static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent,
pthread_mutex_lock(&lo->mutex);
inode->refcount++;
pthread_mutex_unlock(&lo->mutex);
- e.ino = (uintptr_t)inode;
+ e.ino = inode->fuse_ino;
if (lo_debug(req)) {
fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n",
@@ -750,10 +791,10 @@ static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n)
next->prev = prev;
prev->next = next;
+ lo_map_remove(&lo->ino_map, inode->fuse_ino);
pthread_mutex_unlock(&lo->mutex);
close(inode->fd);
free(inode);
-
} else {
pthread_mutex_unlock(&lo->mutex);
}
@@ -762,7 +803,12 @@ static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n)
static void lo_forget_one(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup)
{
struct lo_data *lo = lo_data(req);
- struct lo_inode *inode = lo_inode(req, ino);
+ struct lo_inode *inode;
+
+ inode = lo_inode(req, ino);
+ if (!inode) {
+ return;
+ }
if (lo_debug(req)) {
fuse_log(FUSE_LOG_DEBUG, " forget %lli %lli -%lli\n",
@@ -1244,10 +1290,16 @@ static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *name,
{
char *value = NULL;
char procname[64];
- struct lo_inode *inode = lo_inode(req, ino);
+ struct lo_inode *inode;
ssize_t ret;
int saverr;
+ inode = lo_inode(req, ino);
+ if (!inode) {
+ fuse_reply_err(req, EBADF);
+ return;
+ }
+
saverr = ENOSYS;
if (!lo_data(req)->xattr) {
goto out;
@@ -1306,10 +1358,16 @@ static void lo_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size)
{
char *value = NULL;
char procname[64];
- struct lo_inode *inode = lo_inode(req, ino);
+ struct lo_inode *inode;
ssize_t ret;
int saverr;
+ inode = lo_inode(req, ino);
+ if (!inode) {
+ fuse_reply_err(req, EBADF);
+ return;
+ }
+
saverr = ENOSYS;
if (!lo_data(req)->xattr) {
goto out;
@@ -1367,10 +1425,16 @@ static void lo_setxattr(fuse_req_t req, fuse_ino_t ino, const char *name,
const char *value, size_t size, int flags)
{
char procname[64];
- struct lo_inode *inode = lo_inode(req, ino);
+ struct lo_inode *inode;
ssize_t ret;
int saverr;
+ inode = lo_inode(req, ino);
+ if (!inode) {
+ fuse_reply_err(req, EBADF);
+ return;
+ }
+
saverr = ENOSYS;
if (!lo_data(req)->xattr) {
goto out;
@@ -1400,10 +1464,16 @@ out:
static void lo_removexattr(fuse_req_t req, fuse_ino_t ino, const char *name)
{
char procname[64];
- struct lo_inode *inode = lo_inode(req, ino);
+ struct lo_inode *inode;
ssize_t ret;
int saverr;
+ inode = lo_inode(req, ino);
+ if (!inode) {
+ fuse_reply_err(req, EBADF);
+ return;
+ }
+
saverr = ENOSYS;
if (!lo_data(req)->xattr) {
goto out;
@@ -1522,6 +1592,7 @@ int main(int argc, char *argv[])
struct fuse_session *se;
struct fuse_cmdline_opts opts;
struct lo_data lo = { .debug = 0, .writeback = 0 };
+ struct lo_map_elem *root_elem;
int ret = -1;
/* Don't mask creation mode, kernel already did that */
@@ -1530,8 +1601,19 @@ int main(int argc, char *argv[])
pthread_mutex_init(&lo.mutex, NULL);
lo.root.next = lo.root.prev = &lo.root;
lo.root.fd = -1;
+ lo.root.fuse_ino = FUSE_ROOT_ID;
lo.cache = CACHE_NORMAL;
+ /*
+ * Set up the ino map like this:
+ * [0] Reserved (will not be used)
+ * [1] Root inode
+ */
+ lo_map_init(&lo.ino_map);
+ lo_map_reserve(&lo.ino_map, 0)->in_use = false;
+ root_elem = lo_map_reserve(&lo.ino_map, lo.root.fuse_ino);
+ root_elem->inode = &lo.root;
+
if (fuse_parse_cmdline(&args, &opts) != 0) {
return 1;
}
@@ -1628,6 +1710,8 @@ err_out2:
err_out1:
fuse_opt_free_args(&args);
+ lo_map_destroy(&lo.ino_map);
+
if (lo.root.fd >= 0) {
close(lo.root.fd);
}
--
1.8.3.1

View File

@ -0,0 +1,182 @@
From d56651e227bae83ee0cceb12bd91e3e9f6045ab3 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Mon, 27 Jan 2020 19:01:13 +0100
Subject: [PATCH 042/116] virtiofsd: passthrough_ll: add lo_map for ino/fh
indirection
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-id: <20200127190227.40942-39-dgilbert@redhat.com>
Patchwork-id: 93492
O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 038/112] virtiofsd: passthrough_ll: add lo_map for ino/fh indirection
Bugzilla: 1694164
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Sergio Lopez Pascual <slp@redhat.com>
From: Stefan Hajnoczi <stefanha@redhat.com>
A layer of indirection is needed because passthrough_ll cannot expose
pointers or file descriptor numbers to untrusted clients. Malicious
clients could send invalid pointers or file descriptors in order to
crash or exploit the file system daemon.
lo_map provides an integer key->value mapping. This will be used for
ino and fh fields in the patches that follow.
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Masayoshi Mizuma <m.mizuma@jp.fujitsu.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit 25c135727b08dca90f00094e522a69170b13dfac)
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
tools/virtiofsd/passthrough_ll.c | 124 +++++++++++++++++++++++++++++++++++++++
1 file changed, 124 insertions(+)
diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
index 5e06179..e83a976 100644
--- a/tools/virtiofsd/passthrough_ll.c
+++ b/tools/virtiofsd/passthrough_ll.c
@@ -74,6 +74,21 @@ struct _uintptr_to_must_hold_fuse_ino_t_dummy_struct {
};
#endif
+struct lo_map_elem {
+ union {
+ /* Element values will go here... */
+ ssize_t freelist;
+ };
+ bool in_use;
+};
+
+/* Maps FUSE fh or ino values to internal objects */
+struct lo_map {
+ struct lo_map_elem *elems;
+ size_t nelems;
+ ssize_t freelist;
+};
+
struct lo_inode {
struct lo_inode *next; /* protected by lo->mutex */
struct lo_inode *prev; /* protected by lo->mutex */
@@ -130,6 +145,115 @@ static struct lo_data *lo_data(fuse_req_t req)
return (struct lo_data *)fuse_req_userdata(req);
}
+__attribute__((unused)) static void lo_map_init(struct lo_map *map)
+{
+ map->elems = NULL;
+ map->nelems = 0;
+ map->freelist = -1;
+}
+
+__attribute__((unused)) static void lo_map_destroy(struct lo_map *map)
+{
+ free(map->elems);
+}
+
+static int lo_map_grow(struct lo_map *map, size_t new_nelems)
+{
+ struct lo_map_elem *new_elems;
+ size_t i;
+
+ if (new_nelems <= map->nelems) {
+ return 1;
+ }
+
+ new_elems = realloc(map->elems, sizeof(map->elems[0]) * new_nelems);
+ if (!new_elems) {
+ return 0;
+ }
+
+ for (i = map->nelems; i < new_nelems; i++) {
+ new_elems[i].freelist = i + 1;
+ new_elems[i].in_use = false;
+ }
+ new_elems[new_nelems - 1].freelist = -1;
+
+ map->elems = new_elems;
+ map->freelist = map->nelems;
+ map->nelems = new_nelems;
+ return 1;
+}
+
+__attribute__((unused)) static struct lo_map_elem *
+lo_map_alloc_elem(struct lo_map *map)
+{
+ struct lo_map_elem *elem;
+
+ if (map->freelist == -1 && !lo_map_grow(map, map->nelems + 256)) {
+ return NULL;
+ }
+
+ elem = &map->elems[map->freelist];
+ map->freelist = elem->freelist;
+
+ elem->in_use = true;
+
+ return elem;
+}
+
+__attribute__((unused)) static struct lo_map_elem *
+lo_map_reserve(struct lo_map *map, size_t key)
+{
+ ssize_t *prev;
+
+ if (!lo_map_grow(map, key + 1)) {
+ return NULL;
+ }
+
+ for (prev = &map->freelist; *prev != -1;
+ prev = &map->elems[*prev].freelist) {
+ if (*prev == key) {
+ struct lo_map_elem *elem = &map->elems[key];
+
+ *prev = elem->freelist;
+ elem->in_use = true;
+ return elem;
+ }
+ }
+ return NULL;
+}
+
+__attribute__((unused)) static struct lo_map_elem *
+lo_map_get(struct lo_map *map, size_t key)
+{
+ if (key >= map->nelems) {
+ return NULL;
+ }
+ if (!map->elems[key].in_use) {
+ return NULL;
+ }
+ return &map->elems[key];
+}
+
+__attribute__((unused)) static void lo_map_remove(struct lo_map *map,
+ size_t key)
+{
+ struct lo_map_elem *elem;
+
+ if (key >= map->nelems) {
+ return;
+ }
+
+ elem = &map->elems[key];
+ if (!elem->in_use) {
+ return;
+ }
+
+ elem->in_use = false;
+
+ elem->freelist = map->freelist;
+ map->freelist = key;
+}
+
static struct lo_inode *lo_inode(fuse_req_t req, fuse_ino_t ino)
{
if (ino == FUSE_ROOT_ID) {
--
1.8.3.1

View File

@ -0,0 +1,52 @@
From 86b4f2865f2ebd7e6b3d85beb66a9390eb46eb96 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Mon, 27 Jan 2020 19:01:45 +0100
Subject: [PATCH 074/116] virtiofsd: passthrough_ll: add renameat2 support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-id: <20200127190227.40942-71-dgilbert@redhat.com>
Patchwork-id: 93531
O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 070/112] virtiofsd: passthrough_ll: add renameat2 support
Bugzilla: 1694164
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Sergio Lopez Pascual <slp@redhat.com>
From: Miklos Szeredi <mszeredi@redhat.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit f0ab7d6f78a7d3c1c19fd81a91c9b1199f56c4f6)
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
tools/virtiofsd/passthrough_ll.c | 10 ++++++++++
1 file changed, 10 insertions(+)
diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
index 98114a3..18d69ab 100644
--- a/tools/virtiofsd/passthrough_ll.c
+++ b/tools/virtiofsd/passthrough_ll.c
@@ -1099,7 +1099,17 @@ static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name,
}
if (flags) {
+#ifndef SYS_renameat2
fuse_reply_err(req, EINVAL);
+#else
+ res = syscall(SYS_renameat2, lo_fd(req, parent), name,
+ lo_fd(req, newparent), newname, flags);
+ if (res == -1 && errno == ENOSYS) {
+ fuse_reply_err(req, EINVAL);
+ } else {
+ fuse_reply_err(req, res == -1 ? errno : 0);
+ }
+#endif
return;
}
--
1.8.3.1

View File

@ -0,0 +1,138 @@
From 079199c53f483f0051f994b195ebb595aec76a39 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Mon, 27 Jan 2020 19:01:51 +0100
Subject: [PATCH 080/116] virtiofsd: passthrough_ll: clean up cache related
options
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-id: <20200127190227.40942-77-dgilbert@redhat.com>
Patchwork-id: 93530
O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 076/112] virtiofsd: passthrough_ll: clean up cache related options
Bugzilla: 1694164
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Sergio Lopez Pascual <slp@redhat.com>
From: Miklos Szeredi <mszeredi@redhat.com>
- Rename "cache=never" to "cache=none" to match 9p's similar option.
- Rename CACHE_NORMAL constant to CACHE_AUTO to match the "cache=auto"
option.
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit 230e777b5e250759ee0480fcc0e9ccfa2b082fba)
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
tools/virtiofsd/helper.c | 5 ++++-
tools/virtiofsd/passthrough_ll.c | 20 ++++++++++----------
2 files changed, 14 insertions(+), 11 deletions(-)
diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c
index 14f5d70..5672024 100644
--- a/tools/virtiofsd/helper.c
+++ b/tools/virtiofsd/helper.c
@@ -145,6 +145,9 @@ void fuse_cmdline_help(void)
" --syslog log to syslog (default stderr)\n"
" -f foreground operation\n"
" --daemonize run in background\n"
+ " -o cache=<mode> cache mode. could be one of \"auto, "
+ "always, none\"\n"
+ " default: auto\n"
" -o log_level=<level> log level, default to \"info\"\n"
" level could be one of \"debug, "
"info, warn, err\"\n"
@@ -156,7 +159,7 @@ void fuse_cmdline_help(void)
" -o readdirplus|no_readdirplus\n"
" enable/disable readirplus\n"
" default: readdirplus except with "
- "cache=never\n"
+ "cache=none\n"
);
}
diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
index 9e7191e..b40f287 100644
--- a/tools/virtiofsd/passthrough_ll.c
+++ b/tools/virtiofsd/passthrough_ll.c
@@ -101,8 +101,8 @@ struct lo_cred {
};
enum {
- CACHE_NEVER,
- CACHE_NORMAL,
+ CACHE_NONE,
+ CACHE_AUTO,
CACHE_ALWAYS,
};
@@ -138,8 +138,8 @@ static const struct fuse_opt lo_opts[] = {
{ "no_xattr", offsetof(struct lo_data, xattr), 0 },
{ "timeout=%lf", offsetof(struct lo_data, timeout), 0 },
{ "timeout=", offsetof(struct lo_data, timeout_set), 1 },
- { "cache=never", offsetof(struct lo_data, cache), CACHE_NEVER },
- { "cache=auto", offsetof(struct lo_data, cache), CACHE_NORMAL },
+ { "cache=none", offsetof(struct lo_data, cache), CACHE_NONE },
+ { "cache=auto", offsetof(struct lo_data, cache), CACHE_AUTO },
{ "cache=always", offsetof(struct lo_data, cache), CACHE_ALWAYS },
{ "norace", offsetof(struct lo_data, norace), 1 },
{ "readdirplus", offsetof(struct lo_data, readdirplus_set), 1 },
@@ -482,7 +482,7 @@ static void lo_init(void *userdata, struct fuse_conn_info *conn)
fuse_log(FUSE_LOG_DEBUG, "lo_init: activating flock locks\n");
conn->want |= FUSE_CAP_FLOCK_LOCKS;
}
- if ((lo->cache == CACHE_NEVER && !lo->readdirplus_set) ||
+ if ((lo->cache == CACHE_NONE && !lo->readdirplus_set) ||
lo->readdirplus_clear) {
fuse_log(FUSE_LOG_DEBUG, "lo_init: disabling readdirplus\n");
conn->want &= ~FUSE_CAP_READDIRPLUS;
@@ -1493,7 +1493,7 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name,
fi->fh = fh;
err = lo_do_lookup(req, parent, name, &e);
}
- if (lo->cache == CACHE_NEVER) {
+ if (lo->cache == CACHE_NONE) {
fi->direct_io = 1;
} else if (lo->cache == CACHE_ALWAYS) {
fi->keep_cache = 1;
@@ -1578,7 +1578,7 @@ static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi)
}
fi->fh = fh;
- if (lo->cache == CACHE_NEVER) {
+ if (lo->cache == CACHE_NONE) {
fi->direct_io = 1;
} else if (lo->cache == CACHE_ALWAYS) {
fi->keep_cache = 1;
@@ -2395,7 +2395,7 @@ int main(int argc, char *argv[])
lo.root.next = lo.root.prev = &lo.root;
lo.root.fd = -1;
lo.root.fuse_ino = FUSE_ROOT_ID;
- lo.cache = CACHE_NORMAL;
+ lo.cache = CACHE_AUTO;
/*
* Set up the ino map like this:
@@ -2470,11 +2470,11 @@ int main(int argc, char *argv[])
}
if (!lo.timeout_set) {
switch (lo.cache) {
- case CACHE_NEVER:
+ case CACHE_NONE:
lo.timeout = 0.0;
break;
- case CACHE_NORMAL:
+ case CACHE_AUTO:
lo.timeout = 1.0;
break;
--
1.8.3.1

View File

@ -0,0 +1,79 @@
From 0f1d456fad4ba6a696eff8976b9fe8a0f251e1b5 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Mon, 27 Jan 2020 19:01:47 +0100
Subject: [PATCH 076/116] virtiofsd: passthrough_ll: control readdirplus
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-id: <20200127190227.40942-73-dgilbert@redhat.com>
Patchwork-id: 93524
O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 072/112] virtiofsd: passthrough_ll: control readdirplus
Bugzilla: 1694164
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Sergio Lopez Pascual <slp@redhat.com>
From: Miklos Szeredi <mszeredi@redhat.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
Reviewed-by: Misono Tomohiro <misono.tomohiro@jp.fujitsu.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit 59aef494be2d8d91055ff3f3a8eb13d9f32873d8)
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
tools/virtiofsd/helper.c | 4 ++++
tools/virtiofsd/passthrough_ll.c | 7 ++++++-
2 files changed, 10 insertions(+), 1 deletion(-)
diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c
index 6d50a46..14f5d70 100644
--- a/tools/virtiofsd/helper.c
+++ b/tools/virtiofsd/helper.c
@@ -153,6 +153,10 @@ void fuse_cmdline_help(void)
" allowed (default: 10)\n"
" -o norace disable racy fallback\n"
" default: false\n"
+ " -o readdirplus|no_readdirplus\n"
+ " enable/disable readirplus\n"
+ " default: readdirplus except with "
+ "cache=never\n"
);
}
diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
index 6480c51..8b1784f 100644
--- a/tools/virtiofsd/passthrough_ll.c
+++ b/tools/virtiofsd/passthrough_ll.c
@@ -117,6 +117,8 @@ struct lo_data {
double timeout;
int cache;
int timeout_set;
+ int readdirplus_set;
+ int readdirplus_clear;
struct lo_inode root; /* protected by lo->mutex */
struct lo_map ino_map; /* protected by lo->mutex */
struct lo_map dirp_map; /* protected by lo->mutex */
@@ -140,6 +142,8 @@ static const struct fuse_opt lo_opts[] = {
{ "cache=auto", offsetof(struct lo_data, cache), CACHE_NORMAL },
{ "cache=always", offsetof(struct lo_data, cache), CACHE_ALWAYS },
{ "norace", offsetof(struct lo_data, norace), 1 },
+ { "readdirplus", offsetof(struct lo_data, readdirplus_set), 1 },
+ { "no_readdirplus", offsetof(struct lo_data, readdirplus_clear), 1 },
FUSE_OPT_END
};
static bool use_syslog = false;
@@ -478,7 +482,8 @@ static void lo_init(void *userdata, struct fuse_conn_info *conn)
fuse_log(FUSE_LOG_DEBUG, "lo_init: activating flock locks\n");
conn->want |= FUSE_CAP_FLOCK_LOCKS;
}
- if (lo->cache == CACHE_NEVER) {
+ if ((lo->cache == CACHE_NEVER && !lo->readdirplus_set) ||
+ lo->readdirplus_clear) {
fuse_log(FUSE_LOG_DEBUG, "lo_init: disabling readdirplus\n");
conn->want &= ~FUSE_CAP_READDIRPLUS;
}
--
1.8.3.1

View File

@ -0,0 +1,198 @@
From af14ef1dba9356e566c9c7531b8fd23361c2b16d Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Mon, 27 Jan 2020 19:01:12 +0100
Subject: [PATCH 041/116] virtiofsd: passthrough_ll: create new files in
caller's context
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-id: <20200127190227.40942-38-dgilbert@redhat.com>
Patchwork-id: 93488
O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 037/112] virtiofsd: passthrough_ll: create new files in caller's context
Bugzilla: 1694164
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Sergio Lopez Pascual <slp@redhat.com>
From: Vivek Goyal <vgoyal@redhat.com>
We need to create files in the caller's context. Otherwise after
creating a file, the caller might not be able to do file operations on
that file.
Changed effective uid/gid to caller's uid/gid, create file and then
switch back to uid/gid 0.
Use syscall(setresuid, ...) otherwise glibc does some magic to change EUID
in all threads, which is not what we want.
Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit 929cfb7a9a1b101cdfc9ac19807ecab4c81a13e4)
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
tools/virtiofsd/passthrough_ll.c | 96 +++++++++++++++++++++++++++++++++++++---
1 file changed, 91 insertions(+), 5 deletions(-)
diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
index cd27c09..5e06179 100644
--- a/tools/virtiofsd/passthrough_ll.c
+++ b/tools/virtiofsd/passthrough_ll.c
@@ -50,6 +50,7 @@
#include <stdlib.h>
#include <string.h>
#include <sys/file.h>
+#include <sys/syscall.h>
#include <sys/xattr.h>
#include <unistd.h>
@@ -83,6 +84,11 @@ struct lo_inode {
uint64_t refcount; /* protected by lo->mutex */
};
+struct lo_cred {
+ uid_t euid;
+ gid_t egid;
+};
+
enum {
CACHE_NEVER,
CACHE_NORMAL,
@@ -383,6 +389,69 @@ static void lo_lookup(fuse_req_t req, fuse_ino_t parent, const char *name)
}
}
+/*
+ * On some archs, setres*id is limited to 2^16 but they
+ * provide setres*id32 variants that allow 2^32.
+ * Others just let setres*id do 2^32 anyway.
+ */
+#ifdef SYS_setresgid32
+#define OURSYS_setresgid SYS_setresgid32
+#else
+#define OURSYS_setresgid SYS_setresgid
+#endif
+
+#ifdef SYS_setresuid32
+#define OURSYS_setresuid SYS_setresuid32
+#else
+#define OURSYS_setresuid SYS_setresuid
+#endif
+
+/*
+ * Change to uid/gid of caller so that file is created with
+ * ownership of caller.
+ * TODO: What about selinux context?
+ */
+static int lo_change_cred(fuse_req_t req, struct lo_cred *old)
+{
+ int res;
+
+ old->euid = geteuid();
+ old->egid = getegid();
+
+ res = syscall(OURSYS_setresgid, -1, fuse_req_ctx(req)->gid, -1);
+ if (res == -1) {
+ return errno;
+ }
+
+ res = syscall(OURSYS_setresuid, -1, fuse_req_ctx(req)->uid, -1);
+ if (res == -1) {
+ int errno_save = errno;
+
+ syscall(OURSYS_setresgid, -1, old->egid, -1);
+ return errno_save;
+ }
+
+ return 0;
+}
+
+/* Regain Privileges */
+static void lo_restore_cred(struct lo_cred *old)
+{
+ int res;
+
+ res = syscall(OURSYS_setresuid, -1, old->euid, -1);
+ if (res == -1) {
+ fuse_log(FUSE_LOG_ERR, "seteuid(%u): %m\n", old->euid);
+ exit(1);
+ }
+
+ res = syscall(OURSYS_setresgid, -1, old->egid, -1);
+ if (res == -1) {
+ fuse_log(FUSE_LOG_ERR, "setegid(%u): %m\n", old->egid);
+ exit(1);
+ }
+}
+
static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent,
const char *name, mode_t mode, dev_t rdev,
const char *link)
@@ -391,12 +460,21 @@ static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent,
int saverr;
struct lo_inode *dir = lo_inode(req, parent);
struct fuse_entry_param e;
+ struct lo_cred old = {};
saverr = ENOMEM;
+ saverr = lo_change_cred(req, &old);
+ if (saverr) {
+ goto out;
+ }
+
res = mknod_wrapper(dir->fd, name, link, mode, rdev);
saverr = errno;
+
+ lo_restore_cred(&old);
+
if (res == -1) {
goto out;
}
@@ -794,26 +872,34 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name,
struct lo_data *lo = lo_data(req);
struct fuse_entry_param e;
int err;
+ struct lo_cred old = {};
if (lo_debug(req)) {
fuse_log(FUSE_LOG_DEBUG, "lo_create(parent=%" PRIu64 ", name=%s)\n",
parent, name);
}
+ err = lo_change_cred(req, &old);
+ if (err) {
+ goto out;
+ }
+
fd = openat(lo_fd(req, parent), name, (fi->flags | O_CREAT) & ~O_NOFOLLOW,
mode);
- if (fd == -1) {
- return (void)fuse_reply_err(req, errno);
- }
+ err = fd == -1 ? errno : 0;
+ lo_restore_cred(&old);
- fi->fh = fd;
+ if (!err) {
+ fi->fh = fd;
+ err = lo_do_lookup(req, parent, name, &e);
+ }
if (lo->cache == CACHE_NEVER) {
fi->direct_io = 1;
} else if (lo->cache == CACHE_ALWAYS) {
fi->keep_cache = 1;
}
- err = lo_do_lookup(req, parent, name, &e);
+out:
if (err) {
fuse_reply_err(req, err);
} else {
--
1.8.3.1

View File

@ -0,0 +1,50 @@
From bbf92338e5e5eed796d511d2bd3c3686b7d1e5fd Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Mon, 27 Jan 2020 19:01:46 +0100
Subject: [PATCH 075/116] virtiofsd: passthrough_ll: disable readdirplus on
cache=never
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-id: <20200127190227.40942-72-dgilbert@redhat.com>
Patchwork-id: 93525
O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 071/112] virtiofsd: passthrough_ll: disable readdirplus on cache=never
Bugzilla: 1694164
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Sergio Lopez Pascual <slp@redhat.com>
From: Miklos Szeredi <mszeredi@redhat.com>
...because the attributes sent in the READDIRPLUS reply would be discarded
anyway.
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit ddcbabcb0ea177be3ec3500726b699c7c26ffd93)
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
tools/virtiofsd/passthrough_ll.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
index 18d69ab..6480c51 100644
--- a/tools/virtiofsd/passthrough_ll.c
+++ b/tools/virtiofsd/passthrough_ll.c
@@ -478,6 +478,10 @@ static void lo_init(void *userdata, struct fuse_conn_info *conn)
fuse_log(FUSE_LOG_DEBUG, "lo_init: activating flock locks\n");
conn->want |= FUSE_CAP_FLOCK_LOCKS;
}
+ if (lo->cache == CACHE_NEVER) {
+ fuse_log(FUSE_LOG_DEBUG, "lo_init: disabling readdirplus\n");
+ conn->want &= ~FUSE_CAP_READDIRPLUS;
+ }
}
static void lo_getattr(fuse_req_t req, fuse_ino_t ino,
--
1.8.3.1

View File

@ -0,0 +1,143 @@
From 5e33269d5fbc4ba4614bab4a6b9e0ef759bebcb7 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Mon, 27 Jan 2020 19:02:10 +0100
Subject: [PATCH 099/116] virtiofsd: passthrough_ll: fix refcounting on
remove/rename
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-id: <20200127190227.40942-96-dgilbert@redhat.com>
Patchwork-id: 93549
O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 095/112] virtiofsd: passthrough_ll: fix refcounting on remove/rename
Bugzilla: 1694164
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Sergio Lopez Pascual <slp@redhat.com>
From: Miklos Szeredi <mszeredi@redhat.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
Reviewed-by: Misono Tomohiro <misono.tomohiro@jp.fujitsu.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit 9257e514d861afa759c36704e1904d43ca3fec88)
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
tools/virtiofsd/passthrough_ll.c | 50 +++++++++++++++++++++++++++++++++++++++-
1 file changed, 49 insertions(+), 1 deletion(-)
diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
index c819b5f..e3a6d6b 100644
--- a/tools/virtiofsd/passthrough_ll.c
+++ b/tools/virtiofsd/passthrough_ll.c
@@ -1140,17 +1140,42 @@ out_err:
fuse_reply_err(req, saverr);
}
+static struct lo_inode *lookup_name(fuse_req_t req, fuse_ino_t parent,
+ const char *name)
+{
+ int res;
+ struct stat attr;
+
+ res = fstatat(lo_fd(req, parent), name, &attr,
+ AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW);
+ if (res == -1) {
+ return NULL;
+ }
+
+ return lo_find(lo_data(req), &attr);
+}
+
static void lo_rmdir(fuse_req_t req, fuse_ino_t parent, const char *name)
{
int res;
+ struct lo_inode *inode;
+ struct lo_data *lo = lo_data(req);
+
if (!is_safe_path_component(name)) {
fuse_reply_err(req, EINVAL);
return;
}
+ inode = lookup_name(req, parent, name);
+ if (!inode) {
+ fuse_reply_err(req, EIO);
+ return;
+ }
+
res = unlinkat(lo_fd(req, parent), name, AT_REMOVEDIR);
fuse_reply_err(req, res == -1 ? errno : 0);
+ unref_inode_lolocked(lo, inode, 1);
}
static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name,
@@ -1158,12 +1183,23 @@ static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name,
unsigned int flags)
{
int res;
+ struct lo_inode *oldinode;
+ struct lo_inode *newinode;
+ struct lo_data *lo = lo_data(req);
if (!is_safe_path_component(name) || !is_safe_path_component(newname)) {
fuse_reply_err(req, EINVAL);
return;
}
+ oldinode = lookup_name(req, parent, name);
+ newinode = lookup_name(req, newparent, newname);
+
+ if (!oldinode) {
+ fuse_reply_err(req, EIO);
+ goto out;
+ }
+
if (flags) {
#ifndef SYS_renameat2
fuse_reply_err(req, EINVAL);
@@ -1176,26 +1212,38 @@ static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name,
fuse_reply_err(req, res == -1 ? errno : 0);
}
#endif
- return;
+ goto out;
}
res = renameat(lo_fd(req, parent), name, lo_fd(req, newparent), newname);
fuse_reply_err(req, res == -1 ? errno : 0);
+out:
+ unref_inode_lolocked(lo, oldinode, 1);
+ unref_inode_lolocked(lo, newinode, 1);
}
static void lo_unlink(fuse_req_t req, fuse_ino_t parent, const char *name)
{
int res;
+ struct lo_inode *inode;
+ struct lo_data *lo = lo_data(req);
if (!is_safe_path_component(name)) {
fuse_reply_err(req, EINVAL);
return;
}
+ inode = lookup_name(req, parent, name);
+ if (!inode) {
+ fuse_reply_err(req, EIO);
+ return;
+ }
+
res = unlinkat(lo_fd(req, parent), name, 0);
fuse_reply_err(req, res == -1 ? errno : 0);
+ unref_inode_lolocked(lo, inode, 1);
}
static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode,
--
1.8.3.1

View File

@ -0,0 +1,211 @@
From 44f4434b1305f6ff47b4f63fafcf39bcea9e4ceb Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Mon, 27 Jan 2020 19:01:52 +0100
Subject: [PATCH 081/116] virtiofsd: passthrough_ll: use hashtable
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-id: <20200127190227.40942-78-dgilbert@redhat.com>
Patchwork-id: 93528
O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 077/112] virtiofsd: passthrough_ll: use hashtable
Bugzilla: 1694164
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Sergio Lopez Pascual <slp@redhat.com>
From: Miklos Szeredi <mszeredi@redhat.com>
Improve performance of inode lookup by using a hash table.
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Signed-off-by: Liu Bo <bo.liu@linux.alibaba.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit bfc50a6e06b10b2f9dbaf6c1a89dd523322e016f)
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
tools/virtiofsd/passthrough_ll.c | 81 ++++++++++++++++++++++------------------
1 file changed, 45 insertions(+), 36 deletions(-)
diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
index b40f287..b176a31 100644
--- a/tools/virtiofsd/passthrough_ll.c
+++ b/tools/virtiofsd/passthrough_ll.c
@@ -84,13 +84,15 @@ struct lo_map {
ssize_t freelist;
};
+struct lo_key {
+ ino_t ino;
+ dev_t dev;
+};
+
struct lo_inode {
- struct lo_inode *next; /* protected by lo->mutex */
- struct lo_inode *prev; /* protected by lo->mutex */
int fd;
bool is_symlink;
- ino_t ino;
- dev_t dev;
+ struct lo_key key;
uint64_t refcount; /* protected by lo->mutex */
fuse_ino_t fuse_ino;
};
@@ -119,7 +121,8 @@ struct lo_data {
int timeout_set;
int readdirplus_set;
int readdirplus_clear;
- struct lo_inode root; /* protected by lo->mutex */
+ struct lo_inode root;
+ GHashTable *inodes; /* protected by lo->mutex */
struct lo_map ino_map; /* protected by lo->mutex */
struct lo_map dirp_map; /* protected by lo->mutex */
struct lo_map fd_map; /* protected by lo->mutex */
@@ -573,7 +576,7 @@ retry:
}
goto fail_unref;
}
- if (stat.st_dev != inode->dev || stat.st_ino != inode->ino) {
+ if (stat.st_dev != inode->key.dev || stat.st_ino != inode->key.ino) {
if (!retries) {
fuse_log(FUSE_LOG_WARNING,
"%s: failed to match last\n", __func__);
@@ -753,19 +756,20 @@ out_err:
static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st)
{
struct lo_inode *p;
- struct lo_inode *ret = NULL;
+ struct lo_key key = {
+ .ino = st->st_ino,
+ .dev = st->st_dev,
+ };
pthread_mutex_lock(&lo->mutex);
- for (p = lo->root.next; p != &lo->root; p = p->next) {
- if (p->ino == st->st_ino && p->dev == st->st_dev) {
- assert(p->refcount > 0);
- ret = p;
- ret->refcount++;
- break;
- }
+ p = g_hash_table_lookup(lo->inodes, &key);
+ if (p) {
+ assert(p->refcount > 0);
+ p->refcount++;
}
pthread_mutex_unlock(&lo->mutex);
- return ret;
+
+ return p;
}
static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name,
@@ -810,8 +814,6 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name,
close(newfd);
newfd = -1;
} else {
- struct lo_inode *prev, *next;
-
saverr = ENOMEM;
inode = calloc(1, sizeof(struct lo_inode));
if (!inode) {
@@ -822,17 +824,12 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name,
inode->refcount = 1;
inode->fd = newfd;
newfd = -1;
- inode->ino = e->attr.st_ino;
- inode->dev = e->attr.st_dev;
+ inode->key.ino = e->attr.st_ino;
+ inode->key.dev = e->attr.st_dev;
pthread_mutex_lock(&lo->mutex);
inode->fuse_ino = lo_add_inode_mapping(req, inode);
- prev = &lo->root;
- next = prev->next;
- next->prev = inode;
- inode->next = next;
- inode->prev = prev;
- prev->next = inode;
+ g_hash_table_insert(lo->inodes, &inode->key, inode);
pthread_mutex_unlock(&lo->mutex);
}
e->ino = inode->fuse_ino;
@@ -1162,14 +1159,8 @@ static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode,
assert(inode->refcount >= n);
inode->refcount -= n;
if (!inode->refcount) {
- struct lo_inode *prev, *next;
-
- prev = inode->prev;
- next = inode->next;
- next->prev = prev;
- prev->next = next;
-
lo_map_remove(&lo->ino_map, inode->fuse_ino);
+ g_hash_table_remove(lo->inodes, &inode->key);
pthread_mutex_unlock(&lo->mutex);
close(inode->fd);
free(inode);
@@ -1369,7 +1360,7 @@ static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size,
/* Hide root's parent directory */
if (dinode == &lo->root && strcmp(name, "..") == 0) {
- e.attr.st_ino = lo->root.ino;
+ e.attr.st_ino = lo->root.key.ino;
e.attr.st_mode = DT_DIR << 12;
}
@@ -2370,11 +2361,26 @@ static void setup_root(struct lo_data *lo, struct lo_inode *root)
root->is_symlink = false;
root->fd = fd;
- root->ino = stat.st_ino;
- root->dev = stat.st_dev;
+ root->key.ino = stat.st_ino;
+ root->key.dev = stat.st_dev;
root->refcount = 2;
}
+static guint lo_key_hash(gconstpointer key)
+{
+ const struct lo_key *lkey = key;
+
+ return (guint)lkey->ino + (guint)lkey->dev;
+}
+
+static gboolean lo_key_equal(gconstpointer a, gconstpointer b)
+{
+ const struct lo_key *la = a;
+ const struct lo_key *lb = b;
+
+ return la->ino == lb->ino && la->dev == lb->dev;
+}
+
int main(int argc, char *argv[])
{
struct fuse_args args = FUSE_ARGS_INIT(argc, argv);
@@ -2392,7 +2398,7 @@ int main(int argc, char *argv[])
umask(0);
pthread_mutex_init(&lo.mutex, NULL);
- lo.root.next = lo.root.prev = &lo.root;
+ lo.inodes = g_hash_table_new(lo_key_hash, lo_key_equal);
lo.root.fd = -1;
lo.root.fuse_ino = FUSE_ROOT_ID;
lo.cache = CACHE_AUTO;
@@ -2522,6 +2528,9 @@ err_out2:
err_out1:
fuse_opt_free_args(&args);
+ if (lo.inodes) {
+ g_hash_table_destroy(lo.inodes);
+ }
lo_map_destroy(&lo.fd_map);
lo_map_destroy(&lo.dirp_map);
lo_map_destroy(&lo.ino_map);
--
1.8.3.1

View File

@ -0,0 +1,54 @@
From feb005dfeb15dd5ac5156c994f323ab4c573b1fc Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Mon, 27 Jan 2020 19:01:24 +0100
Subject: [PATCH 053/116] virtiofsd: prevent ".." escape in lo_do_lookup()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-id: <20200127190227.40942-50-dgilbert@redhat.com>
Patchwork-id: 93500
O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 049/112] virtiofsd: prevent ".." escape in lo_do_lookup()
Bugzilla: 1694164
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Sergio Lopez Pascual <slp@redhat.com>
From: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Sergio Lopez <slp@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit 854684bc0b3d63eb90b3abdfe471c2e4271ef176)
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
tools/virtiofsd/passthrough_ll.c | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
index e375406..79d5966 100644
--- a/tools/virtiofsd/passthrough_ll.c
+++ b/tools/virtiofsd/passthrough_ll.c
@@ -624,12 +624,17 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name,
int res;
int saverr;
struct lo_data *lo = lo_data(req);
- struct lo_inode *inode;
+ struct lo_inode *inode, *dir = lo_inode(req, parent);
memset(e, 0, sizeof(*e));
e->attr_timeout = lo->timeout;
e->entry_timeout = lo->timeout;
+ /* Do not allow escaping root directory */
+ if (dir == &lo->root && strcmp(name, "..") == 0) {
+ name = ".";
+ }
+
newfd = openat(lo_fd(req, parent), name, O_PATH | O_NOFOLLOW);
if (newfd == -1) {
goto out_err;
--
1.8.3.1

View File

@ -0,0 +1,108 @@
From 97e232e75bbc0032f4a309d248f383384612eafe Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Mon, 27 Jan 2020 19:01:25 +0100
Subject: [PATCH 054/116] virtiofsd: prevent ".." escape in lo_do_readdir()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-id: <20200127190227.40942-51-dgilbert@redhat.com>
Patchwork-id: 93507
O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 050/112] virtiofsd: prevent ".." escape in lo_do_readdir()
Bugzilla: 1694164
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Sergio Lopez Pascual <slp@redhat.com>
From: Stefan Hajnoczi <stefanha@redhat.com>
Construct a fake dirent for the root directory's ".." entry. This hides
the parent directory from the FUSE client.
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Sergio Lopez <slp@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit 752272da2b68a2312f0e11fc5303015a6c3ee1ac)
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
tools/virtiofsd/passthrough_ll.c | 36 ++++++++++++++++++++++--------------
1 file changed, 22 insertions(+), 14 deletions(-)
diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
index 79d5966..e3d65c3 100644
--- a/tools/virtiofsd/passthrough_ll.c
+++ b/tools/virtiofsd/passthrough_ll.c
@@ -1149,19 +1149,25 @@ out_err:
static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size,
off_t offset, struct fuse_file_info *fi, int plus)
{
+ struct lo_data *lo = lo_data(req);
struct lo_dirp *d;
+ struct lo_inode *dinode;
char *buf = NULL;
char *p;
size_t rem = size;
- int err = ENOMEM;
+ int err = EBADF;
- (void)ino;
+ dinode = lo_inode(req, ino);
+ if (!dinode) {
+ goto error;
+ }
d = lo_dirp(req, fi);
if (!d) {
goto error;
}
+ err = ENOMEM;
buf = calloc(1, size);
if (!buf) {
goto error;
@@ -1192,15 +1198,21 @@ static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size,
}
nextoff = d->entry->d_off;
name = d->entry->d_name;
+
fuse_ino_t entry_ino = 0;
+ struct fuse_entry_param e = (struct fuse_entry_param){
+ .attr.st_ino = d->entry->d_ino,
+ .attr.st_mode = d->entry->d_type << 12,
+ };
+
+ /* Hide root's parent directory */
+ if (dinode == &lo->root && strcmp(name, "..") == 0) {
+ e.attr.st_ino = lo->root.ino;
+ e.attr.st_mode = DT_DIR << 12;
+ }
+
if (plus) {
- struct fuse_entry_param e;
- if (is_dot_or_dotdot(name)) {
- e = (struct fuse_entry_param){
- .attr.st_ino = d->entry->d_ino,
- .attr.st_mode = d->entry->d_type << 12,
- };
- } else {
+ if (!is_dot_or_dotdot(name)) {
err = lo_do_lookup(req, ino, name, &e);
if (err) {
goto error;
@@ -1210,11 +1222,7 @@ static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size,
entsize = fuse_add_direntry_plus(req, p, rem, name, &e, nextoff);
} else {
- struct stat st = {
- .st_ino = d->entry->d_ino,
- .st_mode = d->entry->d_type << 12,
- };
- entsize = fuse_add_direntry(req, p, rem, name, &st, nextoff);
+ entsize = fuse_add_direntry(req, p, rem, name, &e.attr, nextoff);
}
if (entsize > rem) {
if (entry_ino != 0) {
--
1.8.3.1

View File

@ -0,0 +1,103 @@
From 249c02ae54739dc5894ee1b2905bbe8f1e79e909 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Mon, 27 Jan 2020 19:02:20 +0100
Subject: [PATCH 109/116] virtiofsd: prevent FUSE_INIT/FUSE_DESTROY races
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-id: <20200127190227.40942-106-dgilbert@redhat.com>
Patchwork-id: 93562
O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 105/112] virtiofsd: prevent FUSE_INIT/FUSE_DESTROY races
Bugzilla: 1694164
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Sergio Lopez Pascual <slp@redhat.com>
From: Stefan Hajnoczi <stefanha@redhat.com>
When running with multiple threads it can be tricky to handle
FUSE_INIT/FUSE_DESTROY in parallel with other request types or in
parallel with themselves. Serialize FUSE_INIT and FUSE_DESTROY so that
malicious clients cannot trigger race conditions.
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Masayoshi Mizuma <m.mizuma@jp.fujitsu.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit cdc497c6925be745bc895355bd4674a17a4b2a8b)
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
tools/virtiofsd/fuse_i.h | 1 +
tools/virtiofsd/fuse_lowlevel.c | 18 ++++++++++++++++++
2 files changed, 19 insertions(+)
diff --git a/tools/virtiofsd/fuse_i.h b/tools/virtiofsd/fuse_i.h
index a20854f..1447d86 100644
--- a/tools/virtiofsd/fuse_i.h
+++ b/tools/virtiofsd/fuse_i.h
@@ -61,6 +61,7 @@ struct fuse_session {
struct fuse_req list;
struct fuse_req interrupts;
pthread_mutex_t lock;
+ pthread_rwlock_t init_rwlock;
int got_destroy;
int broken_splice_nonblock;
uint64_t notify_ctr;
diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c
index dab6a31..79a4031 100644
--- a/tools/virtiofsd/fuse_lowlevel.c
+++ b/tools/virtiofsd/fuse_lowlevel.c
@@ -2428,6 +2428,19 @@ void fuse_session_process_buf_int(struct fuse_session *se,
req->ctx.pid = in->pid;
req->ch = ch;
+ /*
+ * INIT and DESTROY requests are serialized, all other request types
+ * run in parallel. This prevents races between FUSE_INIT and ordinary
+ * requests, FUSE_INIT and FUSE_INIT, FUSE_INIT and FUSE_DESTROY, and
+ * FUSE_DESTROY and FUSE_DESTROY.
+ */
+ if (in->opcode == FUSE_INIT || in->opcode == CUSE_INIT ||
+ in->opcode == FUSE_DESTROY) {
+ pthread_rwlock_wrlock(&se->init_rwlock);
+ } else {
+ pthread_rwlock_rdlock(&se->init_rwlock);
+ }
+
err = EIO;
if (!se->got_init) {
enum fuse_opcode expected;
@@ -2485,10 +2498,13 @@ void fuse_session_process_buf_int(struct fuse_session *se,
} else {
fuse_ll_ops[in->opcode].func(req, in->nodeid, &iter);
}
+
+ pthread_rwlock_unlock(&se->init_rwlock);
return;
reply_err:
fuse_reply_err(req, err);
+ pthread_rwlock_unlock(&se->init_rwlock);
}
#define LL_OPTION(n, o, v) \
@@ -2531,6 +2547,7 @@ void fuse_session_destroy(struct fuse_session *se)
se->op.destroy(se->userdata);
}
}
+ pthread_rwlock_destroy(&se->init_rwlock);
pthread_mutex_destroy(&se->lock);
free(se->cuse_data);
if (se->fd != -1) {
@@ -2610,6 +2627,7 @@ struct fuse_session *fuse_session_new(struct fuse_args *args,
list_init_req(&se->list);
list_init_req(&se->interrupts);
fuse_mutex_init(&se->lock);
+ pthread_rwlock_init(&se->init_rwlock, NULL);
memcpy(&se->op, op, op_size);
se->owner = getuid();
--
1.8.3.1

View File

@ -0,0 +1,149 @@
From 69c6a829f8136a8c95ccdf480f2fd0173d64b6ec Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Mon, 27 Jan 2020 19:02:05 +0100
Subject: [PATCH 094/116] virtiofsd: prevent fv_queue_thread() vs virtio_loop()
races
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-id: <20200127190227.40942-91-dgilbert@redhat.com>
Patchwork-id: 93544
O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 090/112] virtiofsd: prevent fv_queue_thread() vs virtio_loop() races
Bugzilla: 1694164
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Sergio Lopez Pascual <slp@redhat.com>
From: Stefan Hajnoczi <stefanha@redhat.com>
We call into libvhost-user from the virtqueue handler thread and the
vhost-user message processing thread without a lock. There is nothing
protecting the virtqueue handler thread if the vhost-user message
processing thread changes the virtqueue or memory table while it is
running.
This patch introduces a read-write lock. Virtqueue handler threads are
readers. The vhost-user message processing thread is a writer. This
will allow concurrency for multiqueue in the future while protecting
against fv_queue_thread() vs virtio_loop() races.
Note that the critical sections could be made smaller but it would be
more invasive and require libvhost-user changes. Let's start simple and
improve performance later, if necessary. Another option would be an
RCU-style approach with lighter-weight primitives.
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit e7b337326d594b71b07cd6dbb332c49c122c80a4)
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
tools/virtiofsd/fuse_virtio.c | 34 +++++++++++++++++++++++++++++++++-
1 file changed, 33 insertions(+), 1 deletion(-)
diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c
index fb8d6d1..f6242f9 100644
--- a/tools/virtiofsd/fuse_virtio.c
+++ b/tools/virtiofsd/fuse_virtio.c
@@ -59,6 +59,18 @@ struct fv_VuDev {
struct fuse_session *se;
/*
+ * Either handle virtqueues or vhost-user protocol messages. Don't do
+ * both at the same time since that could lead to race conditions if
+ * virtqueues or memory tables change while another thread is accessing
+ * them.
+ *
+ * The assumptions are:
+ * 1. fv_queue_thread() reads/writes to virtqueues and only reads VuDev.
+ * 2. virtio_loop() reads/writes virtqueues and VuDev.
+ */
+ pthread_rwlock_t vu_dispatch_rwlock;
+
+ /*
* The following pair of fields are only accessed in the main
* virtio_loop
*/
@@ -415,6 +427,8 @@ static void *fv_queue_thread(void *opaque)
qi->qidx, qi->kick_fd);
while (1) {
struct pollfd pf[2];
+ int ret;
+
pf[0].fd = qi->kick_fd;
pf[0].events = POLLIN;
pf[0].revents = 0;
@@ -461,6 +475,9 @@ static void *fv_queue_thread(void *opaque)
fuse_log(FUSE_LOG_ERR, "Eventfd_read for queue: %m\n");
break;
}
+ /* Mutual exclusion with virtio_loop() */
+ ret = pthread_rwlock_rdlock(&qi->virtio_dev->vu_dispatch_rwlock);
+ assert(ret == 0); /* there is no possible error case */
/* out is from guest, in is too guest */
unsigned int in_bytes, out_bytes;
vu_queue_get_avail_bytes(dev, q, &in_bytes, &out_bytes, ~0, ~0);
@@ -469,6 +486,7 @@ static void *fv_queue_thread(void *opaque)
"%s: Queue %d gave evalue: %zx available: in: %u out: %u\n",
__func__, qi->qidx, (size_t)evalue, in_bytes, out_bytes);
+
while (1) {
bool allocated_bufv = false;
struct fuse_bufvec bufv;
@@ -597,6 +615,8 @@ static void *fv_queue_thread(void *opaque)
free(elem);
elem = NULL;
}
+
+ pthread_rwlock_unlock(&qi->virtio_dev->vu_dispatch_rwlock);
}
out:
pthread_mutex_destroy(&ch.lock);
@@ -711,6 +731,8 @@ int virtio_loop(struct fuse_session *se)
while (!fuse_session_exited(se)) {
struct pollfd pf[1];
+ bool ok;
+ int ret;
pf[0].fd = se->vu_socketfd;
pf[0].events = POLLIN;
pf[0].revents = 0;
@@ -735,7 +757,15 @@ int virtio_loop(struct fuse_session *se)
}
assert(pf[0].revents & POLLIN);
fuse_log(FUSE_LOG_DEBUG, "%s: Got VU event\n", __func__);
- if (!vu_dispatch(&se->virtio_dev->dev)) {
+ /* Mutual exclusion with fv_queue_thread() */
+ ret = pthread_rwlock_wrlock(&se->virtio_dev->vu_dispatch_rwlock);
+ assert(ret == 0); /* there is no possible error case */
+
+ ok = vu_dispatch(&se->virtio_dev->dev);
+
+ pthread_rwlock_unlock(&se->virtio_dev->vu_dispatch_rwlock);
+
+ if (!ok) {
fuse_log(FUSE_LOG_ERR, "%s: vu_dispatch failed\n", __func__);
break;
}
@@ -877,6 +907,7 @@ int virtio_session_mount(struct fuse_session *se)
se->vu_socketfd = data_sock;
se->virtio_dev->se = se;
+ pthread_rwlock_init(&se->virtio_dev->vu_dispatch_rwlock, NULL);
vu_init(&se->virtio_dev->dev, 2, se->vu_socketfd, fv_panic, fv_set_watch,
fv_remove_watch, &fv_iface);
@@ -892,6 +923,7 @@ void virtio_session_close(struct fuse_session *se)
}
free(se->virtio_dev->qi);
+ pthread_rwlock_destroy(&se->virtio_dev->vu_dispatch_rwlock);
free(se->virtio_dev);
se->virtio_dev = NULL;
}
--
1.8.3.1

View File

@ -0,0 +1,147 @@
From 2e58ff6978f8433fc8672d2e357c6f0f5f36d24f Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Mon, 27 Jan 2020 19:02:07 +0100
Subject: [PATCH 096/116] virtiofsd: prevent races with lo_dirp_put()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-id: <20200127190227.40942-93-dgilbert@redhat.com>
Patchwork-id: 93546
O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 092/112] virtiofsd: prevent races with lo_dirp_put()
Bugzilla: 1694164
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Sergio Lopez Pascual <slp@redhat.com>
From: Stefan Hajnoczi <stefanha@redhat.com>
Introduce lo_dirp_put() so that FUSE_RELEASEDIR does not cause
use-after-free races with other threads that are accessing lo_dirp.
Also make lo_releasedir() atomic to prevent FUSE_RELEASEDIR racing with
itself. This prevents double-frees.
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit acefdde73b403576a241ebd8dbe8431ddc0d9442)
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
tools/virtiofsd/passthrough_ll.c | 41 ++++++++++++++++++++++++++++++++++------
1 file changed, 35 insertions(+), 6 deletions(-)
diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
index 690edbc..2d703b5 100644
--- a/tools/virtiofsd/passthrough_ll.c
+++ b/tools/virtiofsd/passthrough_ll.c
@@ -1284,11 +1284,28 @@ static void lo_readlink(fuse_req_t req, fuse_ino_t ino)
}
struct lo_dirp {
+ gint refcount;
DIR *dp;
struct dirent *entry;
off_t offset;
};
+static void lo_dirp_put(struct lo_dirp **dp)
+{
+ struct lo_dirp *d = *dp;
+
+ if (!d) {
+ return;
+ }
+ *dp = NULL;
+
+ if (g_atomic_int_dec_and_test(&d->refcount)) {
+ closedir(d->dp);
+ free(d);
+ }
+}
+
+/* Call lo_dirp_put() on the return value when no longer needed */
static struct lo_dirp *lo_dirp(fuse_req_t req, struct fuse_file_info *fi)
{
struct lo_data *lo = lo_data(req);
@@ -1296,6 +1313,9 @@ static struct lo_dirp *lo_dirp(fuse_req_t req, struct fuse_file_info *fi)
pthread_mutex_lock(&lo->mutex);
elem = lo_map_get(&lo->dirp_map, fi->fh);
+ if (elem) {
+ g_atomic_int_inc(&elem->dirp->refcount);
+ }
pthread_mutex_unlock(&lo->mutex);
if (!elem) {
return NULL;
@@ -1331,6 +1351,7 @@ static void lo_opendir(fuse_req_t req, fuse_ino_t ino,
d->offset = 0;
d->entry = NULL;
+ g_atomic_int_set(&d->refcount, 1); /* paired with lo_releasedir() */
pthread_mutex_lock(&lo->mutex);
fh = lo_add_dirp_mapping(req, d);
pthread_mutex_unlock(&lo->mutex);
@@ -1364,7 +1385,7 @@ static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size,
off_t offset, struct fuse_file_info *fi, int plus)
{
struct lo_data *lo = lo_data(req);
- struct lo_dirp *d;
+ struct lo_dirp *d = NULL;
struct lo_inode *dinode;
char *buf = NULL;
char *p;
@@ -1454,6 +1475,8 @@ static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size,
err = 0;
error:
+ lo_dirp_put(&d);
+
/*
* If there's an error, we can only signal it if we haven't stored
* any entries yet - otherwise we'd end up with wrong lookup
@@ -1484,22 +1507,25 @@ static void lo_releasedir(fuse_req_t req, fuse_ino_t ino,
struct fuse_file_info *fi)
{
struct lo_data *lo = lo_data(req);
+ struct lo_map_elem *elem;
struct lo_dirp *d;
(void)ino;
- d = lo_dirp(req, fi);
- if (!d) {
+ pthread_mutex_lock(&lo->mutex);
+ elem = lo_map_get(&lo->dirp_map, fi->fh);
+ if (!elem) {
+ pthread_mutex_unlock(&lo->mutex);
fuse_reply_err(req, EBADF);
return;
}
- pthread_mutex_lock(&lo->mutex);
+ d = elem->dirp;
lo_map_remove(&lo->dirp_map, fi->fh);
pthread_mutex_unlock(&lo->mutex);
- closedir(d->dp);
- free(d);
+ lo_dirp_put(&d); /* paired with lo_opendir() */
+
fuse_reply_err(req, 0);
}
@@ -1710,6 +1736,9 @@ static void lo_fsyncdir(fuse_req_t req, fuse_ino_t ino, int datasync,
} else {
res = fsync(fd);
}
+
+ lo_dirp_put(&d);
+
fuse_reply_err(req, res == -1 ? errno : 0);
}
--
1.8.3.1

View File

@ -0,0 +1,469 @@
From 5c9bbd00e8f8c944d9e8e22e7d1cf08cb8fddd6b Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Mon, 27 Jan 2020 19:01:37 +0100
Subject: [PATCH 066/116] virtiofsd: print log only when priority is high
enough
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-id: <20200127190227.40942-63-dgilbert@redhat.com>
Patchwork-id: 93518
O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 062/112] virtiofsd: print log only when priority is high enough
Bugzilla: 1694164
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Sergio Lopez Pascual <slp@redhat.com>
From: Eryu Guan <eguan@linux.alibaba.com>
Introduce "-o log_level=" command line option to specify current log
level (priority), valid values are "debug info warn err", e.g.
./virtiofsd -o log_level=debug ...
So only log priority higher than "debug" will be printed to
stderr/syslog. And the default level is info.
The "-o debug"/"-d" options are kept, and imply debug log level.
Signed-off-by: Eryu Guan <eguan@linux.alibaba.com>
dgilbert: Reworked for libfuse's log_func
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
with fix by:
Signed-off-by: Xiao Yang <yangx.jy@cn.fujitsu.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit d240314a1a18a1d914af1b5763fe8c9a572e6409)
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
tools/virtiofsd/fuse_lowlevel.c | 75 ++++++++++---------------
tools/virtiofsd/fuse_lowlevel.h | 1 +
tools/virtiofsd/helper.c | 8 ++-
tools/virtiofsd/passthrough_ll.c | 118 ++++++++++++++++-----------------------
4 files changed, 87 insertions(+), 115 deletions(-)
diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c
index 6ceb33d..a7a1968 100644
--- a/tools/virtiofsd/fuse_lowlevel.c
+++ b/tools/virtiofsd/fuse_lowlevel.c
@@ -158,19 +158,17 @@ static int fuse_send_msg(struct fuse_session *se, struct fuse_chan *ch,
struct fuse_out_header *out = iov[0].iov_base;
out->len = iov_length(iov, count);
- if (se->debug) {
- if (out->unique == 0) {
- fuse_log(FUSE_LOG_DEBUG, "NOTIFY: code=%d length=%u\n", out->error,
- out->len);
- } else if (out->error) {
- fuse_log(FUSE_LOG_DEBUG,
- " unique: %llu, error: %i (%s), outsize: %i\n",
- (unsigned long long)out->unique, out->error,
- strerror(-out->error), out->len);
- } else {
- fuse_log(FUSE_LOG_DEBUG, " unique: %llu, success, outsize: %i\n",
- (unsigned long long)out->unique, out->len);
- }
+ if (out->unique == 0) {
+ fuse_log(FUSE_LOG_DEBUG, "NOTIFY: code=%d length=%u\n", out->error,
+ out->len);
+ } else if (out->error) {
+ fuse_log(FUSE_LOG_DEBUG,
+ " unique: %llu, error: %i (%s), outsize: %i\n",
+ (unsigned long long)out->unique, out->error,
+ strerror(-out->error), out->len);
+ } else {
+ fuse_log(FUSE_LOG_DEBUG, " unique: %llu, success, outsize: %i\n",
+ (unsigned long long)out->unique, out->len);
}
if (fuse_lowlevel_is_virtio(se)) {
@@ -1662,10 +1660,8 @@ static void do_interrupt(fuse_req_t req, fuse_ino_t nodeid,
return;
}
- if (se->debug) {
- fuse_log(FUSE_LOG_DEBUG, "INTERRUPT: %llu\n",
- (unsigned long long)arg->unique);
- }
+ fuse_log(FUSE_LOG_DEBUG, "INTERRUPT: %llu\n",
+ (unsigned long long)arg->unique);
req->u.i.unique = arg->unique;
@@ -1901,13 +1897,10 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid,
}
}
- if (se->debug) {
- fuse_log(FUSE_LOG_DEBUG, "INIT: %u.%u\n", arg->major, arg->minor);
- if (arg->major == 7 && arg->minor >= 6) {
- fuse_log(FUSE_LOG_DEBUG, "flags=0x%08x\n", arg->flags);
- fuse_log(FUSE_LOG_DEBUG, "max_readahead=0x%08x\n",
- arg->max_readahead);
- }
+ fuse_log(FUSE_LOG_DEBUG, "INIT: %u.%u\n", arg->major, arg->minor);
+ if (arg->major == 7 && arg->minor >= 6) {
+ fuse_log(FUSE_LOG_DEBUG, "flags=0x%08x\n", arg->flags);
+ fuse_log(FUSE_LOG_DEBUG, "max_readahead=0x%08x\n", arg->max_readahead);
}
se->conn.proto_major = arg->major;
se->conn.proto_minor = arg->minor;
@@ -2116,19 +2109,14 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid,
outarg.congestion_threshold = se->conn.congestion_threshold;
outarg.time_gran = se->conn.time_gran;
- if (se->debug) {
- fuse_log(FUSE_LOG_DEBUG, " INIT: %u.%u\n", outarg.major,
- outarg.minor);
- fuse_log(FUSE_LOG_DEBUG, " flags=0x%08x\n", outarg.flags);
- fuse_log(FUSE_LOG_DEBUG, " max_readahead=0x%08x\n",
- outarg.max_readahead);
- fuse_log(FUSE_LOG_DEBUG, " max_write=0x%08x\n", outarg.max_write);
- fuse_log(FUSE_LOG_DEBUG, " max_background=%i\n",
- outarg.max_background);
- fuse_log(FUSE_LOG_DEBUG, " congestion_threshold=%i\n",
- outarg.congestion_threshold);
- fuse_log(FUSE_LOG_DEBUG, " time_gran=%u\n", outarg.time_gran);
- }
+ fuse_log(FUSE_LOG_DEBUG, " INIT: %u.%u\n", outarg.major, outarg.minor);
+ fuse_log(FUSE_LOG_DEBUG, " flags=0x%08x\n", outarg.flags);
+ fuse_log(FUSE_LOG_DEBUG, " max_readahead=0x%08x\n", outarg.max_readahead);
+ fuse_log(FUSE_LOG_DEBUG, " max_write=0x%08x\n", outarg.max_write);
+ fuse_log(FUSE_LOG_DEBUG, " max_background=%i\n", outarg.max_background);
+ fuse_log(FUSE_LOG_DEBUG, " congestion_threshold=%i\n",
+ outarg.congestion_threshold);
+ fuse_log(FUSE_LOG_DEBUG, " time_gran=%u\n", outarg.time_gran);
send_reply_ok(req, &outarg, outargsize);
}
@@ -2407,14 +2395,11 @@ void fuse_session_process_buf_int(struct fuse_session *se,
in = fuse_mbuf_iter_advance(&iter, sizeof(*in));
assert(in); /* caller guarantees the input buffer is large enough */
- if (se->debug) {
- fuse_log(FUSE_LOG_DEBUG,
- "unique: %llu, opcode: %s (%i), nodeid: %llu, insize: %zu, "
- "pid: %u\n",
- (unsigned long long)in->unique,
- opname((enum fuse_opcode)in->opcode), in->opcode,
- (unsigned long long)in->nodeid, buf->size, in->pid);
- }
+ fuse_log(
+ FUSE_LOG_DEBUG,
+ "unique: %llu, opcode: %s (%i), nodeid: %llu, insize: %zu, pid: %u\n",
+ (unsigned long long)in->unique, opname((enum fuse_opcode)in->opcode),
+ in->opcode, (unsigned long long)in->nodeid, buf->size, in->pid);
req = fuse_ll_alloc_req(se);
if (req == NULL) {
diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h
index f2750bc..138041e 100644
--- a/tools/virtiofsd/fuse_lowlevel.h
+++ b/tools/virtiofsd/fuse_lowlevel.h
@@ -1796,6 +1796,7 @@ struct fuse_cmdline_opts {
int show_help;
int print_capabilities;
int syslog;
+ int log_level;
unsigned int max_idle_threads;
};
diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c
index 9692ef9..6d50a46 100644
--- a/tools/virtiofsd/helper.c
+++ b/tools/virtiofsd/helper.c
@@ -34,7 +34,6 @@
t, offsetof(struct fuse_cmdline_opts, p), v \
}
-
static const struct fuse_opt fuse_helper_opts[] = {
FUSE_HELPER_OPT("-h", show_help),
FUSE_HELPER_OPT("--help", show_help),
@@ -55,6 +54,10 @@ static const struct fuse_opt fuse_helper_opts[] = {
FUSE_OPT_KEY("subtype=", FUSE_OPT_KEY_KEEP),
FUSE_HELPER_OPT("max_idle_threads=%u", max_idle_threads),
FUSE_HELPER_OPT("--syslog", syslog),
+ FUSE_HELPER_OPT_VALUE("log_level=debug", log_level, FUSE_LOG_DEBUG),
+ FUSE_HELPER_OPT_VALUE("log_level=info", log_level, FUSE_LOG_INFO),
+ FUSE_HELPER_OPT_VALUE("log_level=warn", log_level, FUSE_LOG_WARNING),
+ FUSE_HELPER_OPT_VALUE("log_level=err", log_level, FUSE_LOG_ERR),
FUSE_OPT_END
};
@@ -142,6 +145,9 @@ void fuse_cmdline_help(void)
" --syslog log to syslog (default stderr)\n"
" -f foreground operation\n"
" --daemonize run in background\n"
+ " -o log_level=<level> log level, default to \"info\"\n"
+ " level could be one of \"debug, "
+ "info, warn, err\"\n"
" -o max_idle_threads the maximum number of idle worker "
"threads\n"
" allowed (default: 10)\n"
diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
index 0372aca..ff6910f 100644
--- a/tools/virtiofsd/passthrough_ll.c
+++ b/tools/virtiofsd/passthrough_ll.c
@@ -37,6 +37,7 @@
#include "qemu/osdep.h"
#include "fuse_virtio.h"
+#include "fuse_log.h"
#include "fuse_lowlevel.h"
#include <assert.h>
#include <cap-ng.h>
@@ -140,6 +141,7 @@ static const struct fuse_opt lo_opts[] = {
FUSE_OPT_END
};
static bool use_syslog = false;
+static int current_log_level;
static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n);
@@ -458,11 +460,6 @@ static int lo_fd(fuse_req_t req, fuse_ino_t ino)
return inode ? inode->fd : -1;
}
-static bool lo_debug(fuse_req_t req)
-{
- return lo_data(req)->debug != 0;
-}
-
static void lo_init(void *userdata, struct fuse_conn_info *conn)
{
struct lo_data *lo = (struct lo_data *)userdata;
@@ -472,15 +469,11 @@ static void lo_init(void *userdata, struct fuse_conn_info *conn)
}
if (lo->writeback && conn->capable & FUSE_CAP_WRITEBACK_CACHE) {
- if (lo->debug) {
- fuse_log(FUSE_LOG_DEBUG, "lo_init: activating writeback\n");
- }
+ fuse_log(FUSE_LOG_DEBUG, "lo_init: activating writeback\n");
conn->want |= FUSE_CAP_WRITEBACK_CACHE;
}
if (lo->flock && conn->capable & FUSE_CAP_FLOCK_LOCKS) {
- if (lo->debug) {
- fuse_log(FUSE_LOG_DEBUG, "lo_init: activating flock locks\n");
- }
+ fuse_log(FUSE_LOG_DEBUG, "lo_init: activating flock locks\n");
conn->want |= FUSE_CAP_FLOCK_LOCKS;
}
}
@@ -823,10 +816,8 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name,
}
e->ino = inode->fuse_ino;
- if (lo_debug(req)) {
- fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n",
- (unsigned long long)parent, name, (unsigned long long)e->ino);
- }
+ fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", (unsigned long long)parent,
+ name, (unsigned long long)e->ino);
return 0;
@@ -843,10 +834,8 @@ static void lo_lookup(fuse_req_t req, fuse_ino_t parent, const char *name)
struct fuse_entry_param e;
int err;
- if (lo_debug(req)) {
- fuse_log(FUSE_LOG_DEBUG, "lo_lookup(parent=%" PRIu64 ", name=%s)\n",
- parent, name);
- }
+ fuse_log(FUSE_LOG_DEBUG, "lo_lookup(parent=%" PRIu64 ", name=%s)\n", parent,
+ name);
/*
* Don't use is_safe_path_component(), allow "." and ".." for NFS export
@@ -971,10 +960,8 @@ static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent,
goto out;
}
- if (lo_debug(req)) {
- fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n",
- (unsigned long long)parent, name, (unsigned long long)e.ino);
- }
+ fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", (unsigned long long)parent,
+ name, (unsigned long long)e.ino);
fuse_reply_entry(req, &e);
return;
@@ -1074,10 +1061,8 @@ static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent,
pthread_mutex_unlock(&lo->mutex);
e.ino = inode->fuse_ino;
- if (lo_debug(req)) {
- fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n",
- (unsigned long long)parent, name, (unsigned long long)e.ino);
- }
+ fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", (unsigned long long)parent,
+ name, (unsigned long long)e.ino);
fuse_reply_entry(req, &e);
return;
@@ -1171,11 +1156,9 @@ static void lo_forget_one(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup)
return;
}
- if (lo_debug(req)) {
- fuse_log(FUSE_LOG_DEBUG, " forget %lli %lli -%lli\n",
- (unsigned long long)ino, (unsigned long long)inode->refcount,
- (unsigned long long)nlookup);
- }
+ fuse_log(FUSE_LOG_DEBUG, " forget %lli %lli -%lli\n",
+ (unsigned long long)ino, (unsigned long long)inode->refcount,
+ (unsigned long long)nlookup);
unref_inode(lo, inode, nlookup);
}
@@ -1445,10 +1428,8 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name,
int err;
struct lo_cred old = {};
- if (lo_debug(req)) {
- fuse_log(FUSE_LOG_DEBUG, "lo_create(parent=%" PRIu64 ", name=%s)\n",
- parent, name);
- }
+ fuse_log(FUSE_LOG_DEBUG, "lo_create(parent=%" PRIu64 ", name=%s)\n", parent,
+ name);
if (!is_safe_path_component(name)) {
fuse_reply_err(req, EINVAL);
@@ -1525,10 +1506,8 @@ static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi)
char buf[64];
struct lo_data *lo = lo_data(req);
- if (lo_debug(req)) {
- fuse_log(FUSE_LOG_DEBUG, "lo_open(ino=%" PRIu64 ", flags=%d)\n", ino,
- fi->flags);
- }
+ fuse_log(FUSE_LOG_DEBUG, "lo_open(ino=%" PRIu64 ", flags=%d)\n", ino,
+ fi->flags);
/*
* With writeback cache, kernel may send read requests even
@@ -1644,12 +1623,10 @@ static void lo_read(fuse_req_t req, fuse_ino_t ino, size_t size, off_t offset,
{
struct fuse_bufvec buf = FUSE_BUFVEC_INIT(size);
- if (lo_debug(req)) {
- fuse_log(FUSE_LOG_DEBUG,
- "lo_read(ino=%" PRIu64 ", size=%zd, "
- "off=%lu)\n",
- ino, size, (unsigned long)offset);
- }
+ fuse_log(FUSE_LOG_DEBUG,
+ "lo_read(ino=%" PRIu64 ", size=%zd, "
+ "off=%lu)\n",
+ ino, size, (unsigned long)offset);
buf.buf[0].flags = FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK;
buf.buf[0].fd = lo_fi_fd(req, fi);
@@ -1671,11 +1648,9 @@ static void lo_write_buf(fuse_req_t req, fuse_ino_t ino,
out_buf.buf[0].fd = lo_fi_fd(req, fi);
out_buf.buf[0].pos = off;
- if (lo_debug(req)) {
- fuse_log(FUSE_LOG_DEBUG,
- "lo_write(ino=%" PRIu64 ", size=%zd, off=%lu)\n", ino,
- out_buf.buf[0].size, (unsigned long)off);
- }
+ fuse_log(FUSE_LOG_DEBUG,
+ "lo_write_buf(ino=%" PRIu64 ", size=%zd, off=%lu)\n", ino,
+ out_buf.buf[0].size, (unsigned long)off);
/*
* If kill_priv is set, drop CAP_FSETID which should lead to kernel
@@ -1774,11 +1749,8 @@ static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *name,
goto out;
}
- if (lo_debug(req)) {
- fuse_log(FUSE_LOG_DEBUG,
- "lo_getxattr(ino=%" PRIu64 ", name=%s size=%zd)\n", ino, name,
- size);
- }
+ fuse_log(FUSE_LOG_DEBUG, "lo_getxattr(ino=%" PRIu64 ", name=%s size=%zd)\n",
+ ino, name, size);
if (inode->is_symlink) {
/* Sorry, no race free way to getxattr on symlink. */
@@ -1852,10 +1824,8 @@ static void lo_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size)
goto out;
}
- if (lo_debug(req)) {
- fuse_log(FUSE_LOG_DEBUG, "lo_listxattr(ino=%" PRIu64 ", size=%zd)\n",
- ino, size);
- }
+ fuse_log(FUSE_LOG_DEBUG, "lo_listxattr(ino=%" PRIu64 ", size=%zd)\n", ino,
+ size);
if (inode->is_symlink) {
/* Sorry, no race free way to listxattr on symlink. */
@@ -1929,11 +1899,8 @@ static void lo_setxattr(fuse_req_t req, fuse_ino_t ino, const char *name,
goto out;
}
- if (lo_debug(req)) {
- fuse_log(FUSE_LOG_DEBUG,
- "lo_setxattr(ino=%" PRIu64 ", name=%s value=%s size=%zd)\n",
- ino, name, value, size);
- }
+ fuse_log(FUSE_LOG_DEBUG, "lo_setxattr(ino=%" PRIu64
+ ", name=%s value=%s size=%zd)\n", ino, name, value, size);
if (inode->is_symlink) {
/* Sorry, no race free way to setxattr on symlink. */
@@ -1978,10 +1945,8 @@ static void lo_removexattr(fuse_req_t req, fuse_ino_t ino, const char *name)
goto out;
}
- if (lo_debug(req)) {
- fuse_log(FUSE_LOG_DEBUG, "lo_removexattr(ino=%" PRIu64 ", name=%s)\n",
- ino, name);
- }
+ fuse_log(FUSE_LOG_DEBUG, "lo_removexattr(ino=%" PRIu64 ", name=%s)\n", ino,
+ name);
if (inode->is_symlink) {
/* Sorry, no race free way to setxattr on symlink. */
@@ -2303,6 +2268,10 @@ static void setup_nofile_rlimit(void)
static void log_func(enum fuse_log_level level, const char *fmt, va_list ap)
{
+ if (current_log_level < level) {
+ return;
+ }
+
if (use_syslog) {
int priority = LOG_ERR;
switch (level) {
@@ -2401,8 +2370,19 @@ int main(int argc, char *argv[])
return 1;
}
+ /*
+ * log_level is 0 if not configured via cmd options (0 is LOG_EMERG,
+ * and we don't use this log level).
+ */
+ if (opts.log_level != 0) {
+ current_log_level = opts.log_level;
+ }
lo.debug = opts.debug;
+ if (lo.debug) {
+ current_log_level = FUSE_LOG_DEBUG;
+ }
lo.root.refcount = 2;
+
if (lo.source) {
struct stat stat;
int res;
--
1.8.3.1

Some files were not shown because too many files have changed in this diff Show More