qemu-kvm/kvm-virtiofsd-passthrough_ll-add-fallback-for-racy-ops.patch
Miroslav Rezanina a5bd08701a * Fri Jan 31 2020 Miroslav Rezanina <mrezanin@redhat.com> - 4.2.0-8.el8
- kvm-target-arm-arch_dump-Add-SVE-notes.patch [bz#1725084]
- kvm-vhost-Add-names-to-section-rounded-warning.patch [bz#1779041]
- kvm-vhost-Only-align-sections-for-vhost-user.patch [bz#1779041]
- kvm-vhost-coding-style-fix.patch [bz#1779041]
- kvm-virtio-fs-fix-MSI-X-nvectors-calculation.patch [bz#1694164]
- kvm-vhost-user-fs-remove-vhostfd-property.patch [bz#1694164]
- kvm-build-rename-CONFIG_LIBCAP-to-CONFIG_LIBCAP_NG.patch [bz#1694164]
- kvm-virtiofsd-Pull-in-upstream-headers.patch [bz#1694164]
- kvm-virtiofsd-Pull-in-kernel-s-fuse.h.patch [bz#1694164]
- kvm-virtiofsd-Add-auxiliary-.c-s.patch [bz#1694164]
- kvm-virtiofsd-Add-fuse_lowlevel.c.patch [bz#1694164]
- kvm-virtiofsd-Add-passthrough_ll.patch [bz#1694164]
- kvm-virtiofsd-Trim-down-imported-files.patch [bz#1694164]
- kvm-virtiofsd-Format-imported-files-to-qemu-style.patch [bz#1694164]
- kvm-virtiofsd-remove-mountpoint-dummy-argument.patch [bz#1694164]
- kvm-virtiofsd-remove-unused-notify-reply-support.patch [bz#1694164]
- kvm-virtiofsd-Remove-unused-enum-fuse_buf_copy_flags.patch [bz#1694164]
- kvm-virtiofsd-Fix-fuse_daemonize-ignored-return-values.patch [bz#1694164]
- kvm-virtiofsd-Fix-common-header-and-define-for-QEMU-buil.patch [bz#1694164]
- kvm-virtiofsd-Trim-out-compatibility-code.patch [bz#1694164]
- kvm-vitriofsd-passthrough_ll-fix-fallocate-ifdefs.patch [bz#1694164]
- kvm-virtiofsd-Make-fsync-work-even-if-only-inode-is-pass.patch [bz#1694164]
- kvm-virtiofsd-Add-options-for-virtio.patch [bz#1694164]
- kvm-virtiofsd-add-o-source-PATH-to-help-output.patch [bz#1694164]
- kvm-virtiofsd-Open-vhost-connection-instead-of-mounting.patch [bz#1694164]
- kvm-virtiofsd-Start-wiring-up-vhost-user.patch [bz#1694164]
- kvm-virtiofsd-Add-main-virtio-loop.patch [bz#1694164]
- kvm-virtiofsd-get-set-features-callbacks.patch [bz#1694164]
- kvm-virtiofsd-Start-queue-threads.patch [bz#1694164]
- kvm-virtiofsd-Poll-kick_fd-for-queue.patch [bz#1694164]
- kvm-virtiofsd-Start-reading-commands-from-queue.patch [bz#1694164]
- kvm-virtiofsd-Send-replies-to-messages.patch [bz#1694164]
- kvm-virtiofsd-Keep-track-of-replies.patch [bz#1694164]
- kvm-virtiofsd-Add-Makefile-wiring-for-virtiofsd-contrib.patch [bz#1694164]
- kvm-virtiofsd-Fast-path-for-virtio-read.patch [bz#1694164]
- kvm-virtiofsd-add-fd-FDNUM-fd-passing-option.patch [bz#1694164]
- kvm-virtiofsd-make-f-foreground-the-default.patch [bz#1694164]
- kvm-virtiofsd-add-vhost-user.json-file.patch [bz#1694164]
- kvm-virtiofsd-add-print-capabilities-option.patch [bz#1694164]
- kvm-virtiofs-Add-maintainers-entry.patch [bz#1694164]
- kvm-virtiofsd-passthrough_ll-create-new-files-in-caller-.patch [bz#1694164]
- kvm-virtiofsd-passthrough_ll-add-lo_map-for-ino-fh-indir.patch [bz#1694164]
- kvm-virtiofsd-passthrough_ll-add-ino_map-to-hide-lo_inod.patch [bz#1694164]
- kvm-virtiofsd-passthrough_ll-add-dirp_map-to-hide-lo_dir.patch [bz#1694164]
- kvm-virtiofsd-passthrough_ll-add-fd_map-to-hide-file-des.patch [bz#1694164]
- kvm-virtiofsd-passthrough_ll-add-fallback-for-racy-ops.patch [bz#1694164]
- kvm-virtiofsd-validate-path-components.patch [bz#1694164]
- kvm-virtiofsd-Plumb-fuse_bufvec-through-to-do_write_buf.patch [bz#1694164]
- kvm-virtiofsd-Pass-write-iov-s-all-the-way-through.patch [bz#1694164]
- kvm-virtiofsd-add-fuse_mbuf_iter-API.patch [bz#1694164]
- kvm-virtiofsd-validate-input-buffer-sizes-in-do_write_bu.patch [bz#1694164]
- kvm-virtiofsd-check-input-buffer-size-in-fuse_lowlevel.c.patch [bz#1694164]
- kvm-virtiofsd-prevent-.-escape-in-lo_do_lookup.patch [bz#1694164]
- kvm-virtiofsd-prevent-.-escape-in-lo_do_readdir.patch [bz#1694164]
- kvm-virtiofsd-use-proc-self-fd-O_PATH-file-descriptor.patch [bz#1694164]
- kvm-virtiofsd-sandbox-mount-namespace.patch [bz#1694164]
- kvm-virtiofsd-move-to-an-empty-network-namespace.patch [bz#1694164]
- kvm-virtiofsd-move-to-a-new-pid-namespace.patch [bz#1694164]
- kvm-virtiofsd-add-seccomp-whitelist.patch [bz#1694164]
- kvm-virtiofsd-Parse-flag-FUSE_WRITE_KILL_PRIV.patch [bz#1694164]
- kvm-virtiofsd-cap-ng-helpers.patch [bz#1694164]
- kvm-virtiofsd-Drop-CAP_FSETID-if-client-asked-for-it.patch [bz#1694164]
- kvm-virtiofsd-set-maximum-RLIMIT_NOFILE-limit.patch [bz#1694164]
- kvm-virtiofsd-fix-libfuse-information-leaks.patch [bz#1694164]
- kvm-virtiofsd-add-syslog-command-line-option.patch [bz#1694164]
- kvm-virtiofsd-print-log-only-when-priority-is-high-enoug.patch [bz#1694164]
- kvm-virtiofsd-Add-ID-to-the-log-with-FUSE_LOG_DEBUG-leve.patch [bz#1694164]
- kvm-virtiofsd-Add-timestamp-to-the-log-with-FUSE_LOG_DEB.patch [bz#1694164]
- kvm-virtiofsd-Handle-reinit.patch [bz#1694164]
- kvm-virtiofsd-Handle-hard-reboot.patch [bz#1694164]
- kvm-virtiofsd-Kill-threads-when-queues-are-stopped.patch [bz#1694164]
- kvm-vhost-user-Print-unexpected-slave-message-types.patch [bz#1694164]
- kvm-contrib-libvhost-user-Protect-slave-fd-with-mutex.patch [bz#1694164]
- kvm-virtiofsd-passthrough_ll-add-renameat2-support.patch [bz#1694164]
- kvm-virtiofsd-passthrough_ll-disable-readdirplus-on-cach.patch [bz#1694164]
- kvm-virtiofsd-passthrough_ll-control-readdirplus.patch [bz#1694164]
- kvm-virtiofsd-rename-unref_inode-to-unref_inode_lolocked.patch [bz#1694164]
- kvm-virtiofsd-fail-when-parent-inode-isn-t-known-in-lo_d.patch [bz#1694164]
- kvm-virtiofsd-extract-root-inode-init-into-setup_root.patch [bz#1694164]
- kvm-virtiofsd-passthrough_ll-clean-up-cache-related-opti.patch [bz#1694164]
- kvm-virtiofsd-passthrough_ll-use-hashtable.patch [bz#1694164]
- kvm-virtiofsd-Clean-up-inodes-on-destroy.patch [bz#1694164]
- kvm-virtiofsd-support-nanosecond-resolution-for-file-tim.patch [bz#1694164]
- kvm-virtiofsd-fix-error-handling-in-main.patch [bz#1694164]
- kvm-virtiofsd-cleanup-allocated-resource-in-se.patch [bz#1694164]
- kvm-virtiofsd-fix-memory-leak-on-lo.source.patch [bz#1694164]
- kvm-virtiofsd-add-helper-for-lo_data-cleanup.patch [bz#1694164]
- kvm-virtiofsd-Prevent-multiply-running-with-same-vhost_u.patch [bz#1694164]
- kvm-virtiofsd-enable-PARALLEL_DIROPS-during-INIT.patch [bz#1694164]
- kvm-virtiofsd-fix-incorrect-error-handling-in-lo_do_look.patch [bz#1694164]
- kvm-Virtiofsd-fix-memory-leak-on-fuse-queueinfo.patch [bz#1694164]
- kvm-virtiofsd-Support-remote-posix-locks.patch [bz#1694164]
- kvm-virtiofsd-use-fuse_lowlevel_is_virtio-in-fuse_sessio.patch [bz#1694164]
- kvm-virtiofsd-prevent-fv_queue_thread-vs-virtio_loop-rac.patch [bz#1694164]
- kvm-virtiofsd-make-lo_release-atomic.patch [bz#1694164]
- kvm-virtiofsd-prevent-races-with-lo_dirp_put.patch [bz#1694164]
- kvm-virtiofsd-rename-inode-refcount-to-inode-nlookup.patch [bz#1694164]
- kvm-libvhost-user-Fix-some-memtable-remap-cases.patch [bz#1694164]
- kvm-virtiofsd-passthrough_ll-fix-refcounting-on-remove-r.patch [bz#1694164]
- kvm-virtiofsd-introduce-inode-refcount-to-prevent-use-af.patch [bz#1694164]
- kvm-virtiofsd-do-not-always-set-FUSE_FLOCK_LOCKS.patch [bz#1694164]
- kvm-virtiofsd-convert-more-fprintf-and-perror-to-use-fus.patch [bz#1694164]
- kvm-virtiofsd-Reset-O_DIRECT-flag-during-file-open.patch [bz#1694164]
- kvm-virtiofsd-Fix-data-corruption-with-O_APPEND-write-in.patch [bz#1694164]
- kvm-virtiofsd-passthrough_ll-Use-cache_readdir-for-direc.patch [bz#1694164]
- kvm-virtiofsd-add-definition-of-fuse_buf_writev.patch [bz#1694164]
- kvm-virtiofsd-use-fuse_buf_writev-to-replace-fuse_buf_wr.patch [bz#1694164]
- kvm-virtiofsd-process-requests-in-a-thread-pool.patch [bz#1694164]
- kvm-virtiofsd-prevent-FUSE_INIT-FUSE_DESTROY-races.patch [bz#1694164]
- kvm-virtiofsd-fix-lo_destroy-resource-leaks.patch [bz#1694164]
- kvm-virtiofsd-add-thread-pool-size-NUM-option.patch [bz#1694164]
- kvm-virtiofsd-Convert-lo_destroy-to-take-the-lo-mutex-lo.patch [bz#1694164]
- kvm-virtiofsd-passthrough_ll-Pass-errno-to-fuse_reply_er.patch [bz#1694164]
- kvm-virtiofsd-stop-all-queue-threads-on-exit-in-virtio_l.patch [bz#1694164]
- kvm-virtiofsd-add-some-options-to-the-help-message.patch [bz#1694164]
- kvm-redhat-ship-virtiofsd-vhost-user-device-backend.patch [bz#1694164]
- Resolves: bz#1694164
  (virtio-fs: host<->guest shared file system (qemu))
- Resolves: bz#1725084
  (aarch64: support dumping SVE registers)
- Resolves: bz#1779041
  (netkvm: no connectivity Windows guest with q35 + hugepages + vhost + hv_synic)
2020-01-31 11:12:06 +01:00

304 lines
9.8 KiB
Diff

From 03effbc021064bb77d231ae5ca02d1a579c71ee1 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Mon, 27 Jan 2020 19:01:17 +0100
Subject: [PATCH 046/116] virtiofsd: passthrough_ll: add fallback for racy ops
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-id: <20200127190227.40942-43-dgilbert@redhat.com>
Patchwork-id: 93496
O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 042/112] virtiofsd: passthrough_ll: add fallback for racy ops
Bugzilla: 1694164
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Sergio Lopez Pascual <slp@redhat.com>
From: Miklos Szeredi <mszeredi@redhat.com>
We have two operations that cannot be done race-free on a symlink in
certain cases: utimes and link.
Add racy fallback for these if the race-free method doesn't work. We do
our best to avoid races even in this case:
- get absolute path by reading /proc/self/fd/NN symlink
- lookup parent directory: after this we are safe against renames in
ancestors
- lookup name in parent directory, and verify that we got to the original
inode, if not retry the whole thing
Both utimes(2) and link(2) hold i_lock on the inode across the operation,
so a racing rename/delete by this fuse instance is not possible, only from
other entities changing the filesystem.
If the "norace" option is given, then disable the racy fallbacks.
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
Reviewed-by: Masayoshi Mizuma <m.mizuma@jp.fujitsu.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit 5fe319a7b19c9c328e6e061bffcf1ff6cc8b89ce)
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
tools/virtiofsd/helper.c | 5 +-
tools/virtiofsd/passthrough_ll.c | 157 +++++++++++++++++++++++++++++++++++----
2 files changed, 145 insertions(+), 17 deletions(-)
diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c
index b8ec5ac..5531425 100644
--- a/tools/virtiofsd/helper.c
+++ b/tools/virtiofsd/helper.c
@@ -142,7 +142,10 @@ void fuse_cmdline_help(void)
" --daemonize run in background\n"
" -o max_idle_threads the maximum number of idle worker "
"threads\n"
- " allowed (default: 10)\n");
+ " allowed (default: 10)\n"
+ " -o norace disable racy fallback\n"
+ " default: false\n"
+ );
}
static int fuse_helper_opt_proc(void *data, const char *arg, int key,
diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
index 9815bfa..ac380ef 100644
--- a/tools/virtiofsd/passthrough_ll.c
+++ b/tools/virtiofsd/passthrough_ll.c
@@ -98,6 +98,7 @@ enum {
struct lo_data {
pthread_mutex_t mutex;
int debug;
+ int norace;
int writeback;
int flock;
int xattr;
@@ -124,10 +125,15 @@ static const struct fuse_opt lo_opts[] = {
{ "cache=never", offsetof(struct lo_data, cache), CACHE_NEVER },
{ "cache=auto", offsetof(struct lo_data, cache), CACHE_NORMAL },
{ "cache=always", offsetof(struct lo_data, cache), CACHE_ALWAYS },
-
+ { "norace", offsetof(struct lo_data, norace), 1 },
FUSE_OPT_END
};
+static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n);
+
+static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st);
+
+
static struct lo_data *lo_data(fuse_req_t req)
{
return (struct lo_data *)fuse_req_userdata(req);
@@ -347,23 +353,127 @@ static void lo_getattr(fuse_req_t req, fuse_ino_t ino,
fuse_reply_attr(req, &buf, lo->timeout);
}
-static int utimensat_empty_nofollow(struct lo_inode *inode,
- const struct timespec *tv)
+static int lo_parent_and_name(struct lo_data *lo, struct lo_inode *inode,
+ char path[PATH_MAX], struct lo_inode **parent)
{
- int res;
char procname[64];
+ char *last;
+ struct stat stat;
+ struct lo_inode *p;
+ int retries = 2;
+ int res;
+
+retry:
+ sprintf(procname, "/proc/self/fd/%i", inode->fd);
+
+ res = readlink(procname, path, PATH_MAX);
+ if (res < 0) {
+ fuse_log(FUSE_LOG_WARNING, "%s: readlink failed: %m\n", __func__);
+ goto fail_noretry;
+ }
+
+ if (res >= PATH_MAX) {
+ fuse_log(FUSE_LOG_WARNING, "%s: readlink overflowed\n", __func__);
+ goto fail_noretry;
+ }
+ path[res] = '\0';
+
+ last = strrchr(path, '/');
+ if (last == NULL) {
+ /* Shouldn't happen */
+ fuse_log(
+ FUSE_LOG_WARNING,
+ "%s: INTERNAL ERROR: bad path read from proc\n", __func__);
+ goto fail_noretry;
+ }
+ if (last == path) {
+ p = &lo->root;
+ pthread_mutex_lock(&lo->mutex);
+ p->refcount++;
+ pthread_mutex_unlock(&lo->mutex);
+ } else {
+ *last = '\0';
+ res = fstatat(AT_FDCWD, last == path ? "/" : path, &stat, 0);
+ if (res == -1) {
+ if (!retries) {
+ fuse_log(FUSE_LOG_WARNING,
+ "%s: failed to stat parent: %m\n", __func__);
+ }
+ goto fail;
+ }
+ p = lo_find(lo, &stat);
+ if (p == NULL) {
+ if (!retries) {
+ fuse_log(FUSE_LOG_WARNING,
+ "%s: failed to find parent\n", __func__);
+ }
+ goto fail;
+ }
+ }
+ last++;
+ res = fstatat(p->fd, last, &stat, AT_SYMLINK_NOFOLLOW);
+ if (res == -1) {
+ if (!retries) {
+ fuse_log(FUSE_LOG_WARNING,
+ "%s: failed to stat last\n", __func__);
+ }
+ goto fail_unref;
+ }
+ if (stat.st_dev != inode->dev || stat.st_ino != inode->ino) {
+ if (!retries) {
+ fuse_log(FUSE_LOG_WARNING,
+ "%s: failed to match last\n", __func__);
+ }
+ goto fail_unref;
+ }
+ *parent = p;
+ memmove(path, last, strlen(last) + 1);
+
+ return 0;
+
+fail_unref:
+ unref_inode(lo, p, 1);
+fail:
+ if (retries) {
+ retries--;
+ goto retry;
+ }
+fail_noretry:
+ errno = EIO;
+ return -1;
+}
+
+static int utimensat_empty(struct lo_data *lo, struct lo_inode *inode,
+ const struct timespec *tv)
+{
+ int res;
+ struct lo_inode *parent;
+ char path[PATH_MAX];
if (inode->is_symlink) {
- res = utimensat(inode->fd, "", tv, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW);
+ res = utimensat(inode->fd, "", tv, AT_EMPTY_PATH);
if (res == -1 && errno == EINVAL) {
/* Sorry, no race free way to set times on symlink. */
- errno = EPERM;
+ if (lo->norace) {
+ errno = EPERM;
+ } else {
+ goto fallback;
+ }
}
return res;
}
- sprintf(procname, "/proc/self/fd/%i", inode->fd);
+ sprintf(path, "/proc/self/fd/%i", inode->fd);
- return utimensat(AT_FDCWD, procname, tv, 0);
+ return utimensat(AT_FDCWD, path, tv, 0);
+
+fallback:
+ res = lo_parent_and_name(lo, inode, path, &parent);
+ if (res != -1) {
+ res = utimensat(parent->fd, path, tv, AT_SYMLINK_NOFOLLOW);
+ unref_inode(lo, parent, 1);
+ }
+
+ return res;
}
static int lo_fi_fd(fuse_req_t req, struct fuse_file_info *fi)
@@ -387,6 +497,7 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr,
{
int saverr;
char procname[64];
+ struct lo_data *lo = lo_data(req);
struct lo_inode *inode;
int ifd;
int res;
@@ -459,7 +570,7 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr,
if (fi) {
res = futimens(fd, tv);
} else {
- res = utimensat_empty_nofollow(inode, tv);
+ res = utimensat_empty(lo, inode, tv);
}
if (res == -1) {
goto out_err;
@@ -709,24 +820,38 @@ static void lo_symlink(fuse_req_t req, const char *link, fuse_ino_t parent,
lo_mknod_symlink(req, parent, name, S_IFLNK, 0, link);
}
-static int linkat_empty_nofollow(struct lo_inode *inode, int dfd,
- const char *name)
+static int linkat_empty_nofollow(struct lo_data *lo, struct lo_inode *inode,
+ int dfd, const char *name)
{
int res;
- char procname[64];
+ struct lo_inode *parent;
+ char path[PATH_MAX];
if (inode->is_symlink) {
res = linkat(inode->fd, "", dfd, name, AT_EMPTY_PATH);
if (res == -1 && (errno == ENOENT || errno == EINVAL)) {
/* Sorry, no race free way to hard-link a symlink. */
- errno = EPERM;
+ if (lo->norace) {
+ errno = EPERM;
+ } else {
+ goto fallback;
+ }
}
return res;
}
- sprintf(procname, "/proc/self/fd/%i", inode->fd);
+ sprintf(path, "/proc/self/fd/%i", inode->fd);
+
+ return linkat(AT_FDCWD, path, dfd, name, AT_SYMLINK_FOLLOW);
+
+fallback:
+ res = lo_parent_and_name(lo, inode, path, &parent);
+ if (res != -1) {
+ res = linkat(parent->fd, path, dfd, name, 0);
+ unref_inode(lo, parent, 1);
+ }
- return linkat(AT_FDCWD, procname, dfd, name, AT_SYMLINK_FOLLOW);
+ return res;
}
static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent,
@@ -748,7 +873,7 @@ static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent,
e.attr_timeout = lo->timeout;
e.entry_timeout = lo->timeout;
- res = linkat_empty_nofollow(inode, lo_fd(req, parent), name);
+ res = linkat_empty_nofollow(lo, inode, lo_fd(req, parent), name);
if (res == -1) {
goto out_err;
}
--
1.8.3.1