diff --git a/.gitignore b/.gitignore index 713ad2e..ba7d4aa 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,13 @@ -/qemu-*.tar.xz +/qemu-3.1.0.tar.xz +/qemu-4.0.0.tar.xz +/qemu-4.1.0-rc4.tar.xz +/qemu-4.1.0.tar.xz +/qemu-4.2.0-rc1.tar.xz +/qemu-4.2.0-rc4.tar.xz +/qemu-4.2.0.tar.xz +/qemu-5.0.0-rc0.tar.xz +/qemu-5.0.0-rc1.tar.xz +/qemu-5.0.0-rc2.tar.xz +/qemu-5.0.0-rc3.tar.xz +/qemu-5.0.0-rc4.tar.xz +/qemu-5.0.0.tar.xz diff --git a/0001-redhat-Adding-slirp-to-the-exploded-tree.patch b/0001-redhat-Adding-slirp-to-the-exploded-tree.patch new file mode 100644 index 0000000..04e73be --- /dev/null +++ b/0001-redhat-Adding-slirp-to-the-exploded-tree.patch @@ -0,0 +1,16975 @@ +From e4d185c8c4efbf15a9380c1433bc66b49a09e79d Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Thu, 23 Apr 2020 05:26:54 +0200 +Subject: redhat: Adding slirp to the exploded tree + +RH-Author: Danilo de Paula +Message-id: <20190907020756.8619-1-ddepaula@redhat.com> +Patchwork-id: 90309 +O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH 1/1] redhat: Adding slirp to the exploded tree +Bugzilla: +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Wainer dos Santos Moschetta + +Until qemu-kvm-3.1 slirp used to live as a regular folder in qemu-kvm. +After that it got moved into its own submodule. Which means it's not +part of the qemu-kvm git tree anymore. + +This passed unoticed for RHEL-AV-8.0.1 and 8.1.0 because qemu still ships +the code in the tarball. That's why scratch builds still works (it's based in +the tarball content). + +As we're receiving some CVE's against slirp, we need a way to patch +slirp in RHEL-8.1.0 without handling as a separate package (as we do for +firmwares). + +The simplest solution is to copy the slirp folder from the tarball into the +exploded tree. + +To be able to do that, I had to make some changes: + +slirp needs to be removed from .gitmodules, otherwise git complains +about files on it. + +Since "make -C redhat rh-brew" uses the tarball and apply all the +patches on top of it, we need to remove the folder from the tarball before applying +the patch (because we are actually re-applying them). + +We also need to use --ignore-submodule while generating the patches for +scratch-build, otherwise it will include some weird definition of the +slirp folder in the patch, something that /usr/bin/patch gets mad with. + +After that I compared the patch list, after and before this change, and +saw no major differences. + +This is an exploded-tree-only change and shouldn't be applied to dist-git. + +Signed-off-by: Danilo C. L. de Paula + +Rebase notes (5.0.0-rc4): + - Update slirp directory to commit 2faae0f778 (used upstream) +--- + .gitmodules | 3 - + slirp/.clang-format | 58 ++ + slirp/.gitignore | 10 + + slirp/.gitlab-ci.yml | 27 + + slirp/.gitpublish | 3 + + slirp/CHANGELOG.md | 88 +++ + slirp/COPYRIGHT | 62 ++ + slirp/Makefile | 62 ++ + slirp/README.md | 60 ++ + slirp/build-aux/git-version-gen | 158 ++++ + slirp/build-aux/meson-dist | 16 + + slirp/meson.build | 134 ++++ + slirp/src/arp_table.c | 91 +++ + slirp/src/bootp.c | 369 ++++++++++ + slirp/src/bootp.h | 129 ++++ + slirp/src/cksum.c | 179 +++++ + slirp/src/debug.h | 51 ++ + slirp/src/dhcpv6.c | 224 ++++++ + slirp/src/dhcpv6.h | 68 ++ + slirp/src/dnssearch.c | 306 ++++++++ + slirp/src/if.c | 213 ++++++ + slirp/src/if.h | 25 + + slirp/src/ip.h | 242 ++++++ + slirp/src/ip6.h | 214 ++++++ + slirp/src/ip6_icmp.c | 434 +++++++++++ + slirp/src/ip6_icmp.h | 219 ++++++ + slirp/src/ip6_input.c | 78 ++ + slirp/src/ip6_output.c | 39 + + slirp/src/ip_icmp.c | 489 +++++++++++++ + slirp/src/ip_icmp.h | 166 +++++ + slirp/src/ip_input.c | 461 ++++++++++++ + slirp/src/ip_output.c | 169 +++++ + slirp/src/libslirp-version.h.in | 24 + + slirp/src/libslirp.h | 171 +++++ + slirp/src/libslirp.map | 30 + + slirp/src/main.h | 16 + + slirp/src/mbuf.c | 224 ++++++ + slirp/src/mbuf.h | 127 ++++ + slirp/src/misc.c | 390 ++++++++++ + slirp/src/misc.h | 72 ++ + slirp/src/ncsi-pkt.h | 445 +++++++++++ + slirp/src/ncsi.c | 192 +++++ + slirp/src/ndp_table.c | 87 +++ + slirp/src/sbuf.c | 168 +++++ + slirp/src/sbuf.h | 27 + + slirp/src/slirp.c | 1185 ++++++++++++++++++++++++++++++ + slirp/src/slirp.h | 283 +++++++ + slirp/src/socket.c | 957 ++++++++++++++++++++++++ + slirp/src/socket.h | 164 +++++ + slirp/src/state.c | 379 ++++++++++ + slirp/src/stream.c | 120 +++ + slirp/src/stream.h | 35 + + slirp/src/tcp.h | 169 +++++ + slirp/src/tcp_input.c | 1539 +++++++++++++++++++++++++++++++++++++++ + slirp/src/tcp_output.c | 516 +++++++++++++ + slirp/src/tcp_subr.c | 980 +++++++++++++++++++++++++ + slirp/src/tcp_timer.c | 286 ++++++++ + slirp/src/tcp_timer.h | 130 ++++ + slirp/src/tcp_var.h | 161 ++++ + slirp/src/tcpip.h | 104 +++ + slirp/src/tftp.c | 462 ++++++++++++ + slirp/src/tftp.h | 52 ++ + slirp/src/udp.c | 361 +++++++++ + slirp/src/udp.h | 90 +++ + slirp/src/udp6.c | 173 +++++ + slirp/src/util.c | 428 +++++++++++ + slirp/src/util.h | 189 +++++ + slirp/src/version.c | 8 + + slirp/src/vmstate.c | 444 +++++++++++ + slirp/src/vmstate.h | 391 ++++++++++ + 70 files changed, 16423 insertions(+), 3 deletions(-) + create mode 100644 slirp/.clang-format + create mode 100644 slirp/.gitignore + create mode 100644 slirp/.gitlab-ci.yml + create mode 100644 slirp/.gitpublish + create mode 100644 slirp/CHANGELOG.md + create mode 100644 slirp/COPYRIGHT + create mode 100644 slirp/Makefile + create mode 100644 slirp/README.md + create mode 100755 slirp/build-aux/git-version-gen + create mode 100755 slirp/build-aux/meson-dist + create mode 100644 slirp/meson.build + create mode 100644 slirp/src/arp_table.c + create mode 100644 slirp/src/bootp.c + create mode 100644 slirp/src/bootp.h + create mode 100644 slirp/src/cksum.c + create mode 100644 slirp/src/debug.h + create mode 100644 slirp/src/dhcpv6.c + create mode 100644 slirp/src/dhcpv6.h + create mode 100644 slirp/src/dnssearch.c + create mode 100644 slirp/src/if.c + create mode 100644 slirp/src/if.h + create mode 100644 slirp/src/ip.h + create mode 100644 slirp/src/ip6.h + create mode 100644 slirp/src/ip6_icmp.c + create mode 100644 slirp/src/ip6_icmp.h + create mode 100644 slirp/src/ip6_input.c + create mode 100644 slirp/src/ip6_output.c + create mode 100644 slirp/src/ip_icmp.c + create mode 100644 slirp/src/ip_icmp.h + create mode 100644 slirp/src/ip_input.c + create mode 100644 slirp/src/ip_output.c + create mode 100644 slirp/src/libslirp-version.h.in + create mode 100644 slirp/src/libslirp.h + create mode 100644 slirp/src/libslirp.map + create mode 100644 slirp/src/main.h + create mode 100644 slirp/src/mbuf.c + create mode 100644 slirp/src/mbuf.h + create mode 100644 slirp/src/misc.c + create mode 100644 slirp/src/misc.h + create mode 100644 slirp/src/ncsi-pkt.h + create mode 100644 slirp/src/ncsi.c + create mode 100644 slirp/src/ndp_table.c + create mode 100644 slirp/src/sbuf.c + create mode 100644 slirp/src/sbuf.h + create mode 100644 slirp/src/slirp.c + create mode 100644 slirp/src/slirp.h + create mode 100644 slirp/src/socket.c + create mode 100644 slirp/src/socket.h + create mode 100644 slirp/src/state.c + create mode 100644 slirp/src/stream.c + create mode 100644 slirp/src/stream.h + create mode 100644 slirp/src/tcp.h + create mode 100644 slirp/src/tcp_input.c + create mode 100644 slirp/src/tcp_output.c + create mode 100644 slirp/src/tcp_subr.c + create mode 100644 slirp/src/tcp_timer.c + create mode 100644 slirp/src/tcp_timer.h + create mode 100644 slirp/src/tcp_var.h + create mode 100644 slirp/src/tcpip.h + create mode 100644 slirp/src/tftp.c + create mode 100644 slirp/src/tftp.h + create mode 100644 slirp/src/udp.c + create mode 100644 slirp/src/udp.h + create mode 100644 slirp/src/udp6.c + create mode 100644 slirp/src/util.c + create mode 100644 slirp/src/util.h + create mode 100644 slirp/src/version.c + create mode 100644 slirp/src/vmstate.c + create mode 100644 slirp/src/vmstate.h + +diff --git a/slirp/.clang-format b/slirp/.clang-format +new file mode 100644 +index 0000000..17fb49f +--- /dev/null ++++ b/slirp/.clang-format +@@ -0,0 +1,58 @@ ++# https://clang.llvm.org/docs/ClangFormat.html ++# https://clang.llvm.org/docs/ClangFormatStyleOptions.html ++--- ++Language: Cpp ++AlignAfterOpenBracket: Align ++AlignConsecutiveAssignments: false # although we like it, it creates churn ++AlignConsecutiveDeclarations: false ++AlignEscapedNewlinesLeft: true ++AlignOperands: true ++AlignTrailingComments: false # churn ++AllowAllParametersOfDeclarationOnNextLine: true ++AllowShortBlocksOnASingleLine: false ++AllowShortCaseLabelsOnASingleLine: false ++AllowShortFunctionsOnASingleLine: None ++AllowShortIfStatementsOnASingleLine: false ++AllowShortLoopsOnASingleLine: false ++AlwaysBreakAfterReturnType: None # AlwaysBreakAfterDefinitionReturnType is taken into account ++AlwaysBreakBeforeMultilineStrings: false ++BinPackArguments: true ++BinPackParameters: true ++BraceWrapping: ++ AfterControlStatement: false ++ AfterEnum: false ++ AfterFunction: true ++ AfterStruct: false ++ AfterUnion: false ++ BeforeElse: false ++ IndentBraces: false ++BreakBeforeBinaryOperators: None ++BreakBeforeBraces: Custom ++BreakBeforeTernaryOperators: false ++BreakStringLiterals: true ++ColumnLimit: 80 ++ContinuationIndentWidth: 4 ++Cpp11BracedListStyle: false ++DerivePointerAlignment: false ++DisableFormat: false ++IndentCaseLabels: false ++IndentWidth: 4 ++IndentWrappedFunctionNames: false ++KeepEmptyLinesAtTheStartOfBlocks: false ++MacroBlockBegin: '.*_BEGIN$' # only PREC_BEGIN ? ++MacroBlockEnd: '.*_END$' ++MaxEmptyLinesToKeep: 2 ++PointerAlignment: Right ++ReflowComments: true ++SortIncludes: false ++SpaceAfterCStyleCast: false ++SpaceBeforeAssignmentOperators: true ++SpaceBeforeParens: ControlStatements ++SpaceInEmptyParentheses: false ++SpacesBeforeTrailingComments: 1 ++SpacesInContainerLiterals: true ++SpacesInParentheses: false ++SpacesInSquareBrackets: false ++Standard: Auto ++UseTab: Never ++... +diff --git a/slirp/CHANGELOG.md b/slirp/CHANGELOG.md +new file mode 100644 +index 0000000..67b0a74 +--- /dev/null ++++ b/slirp/CHANGELOG.md +@@ -0,0 +1,88 @@ ++# Changelog ++ ++All notable changes to this project will be documented in this file. ++ ++The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ++and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ++ ++## [Unreleased] ++ ++### Added ++ ++### Changed ++ ++### Deprecated ++ ++### Fixed ++ ++## [4.2.0] - 2020-03-17 ++ ++### Added ++ ++ - New API function `slirp_add_unix`: add a forward rule to a Unix socket. ++ - New API function `slirp_remove_guestfwd`: remove a forward rule previously ++ added by `slirp_add_exec`, `slirp_add_unix` or `slirp_add_guestfwd` ++ - New SlirpConfig.outbound_addr{,6} fields to bind output socket to a ++ specific address ++ ++### Changed ++ ++ - socket: do not fallback on host loopback if get_dns_addr() failed ++ or the address is in slirp network ++ ++### Fixed ++ ++ - ncsi: fix checksum OOB memory access ++ - `tcp_emu()`: fix OOB accesses ++ - tftp: restrict relative path access ++ - state: fix loading of guestfwd state ++ ++## [4.1.0] - 2019-12-02 ++ ++### Added ++ ++ - The `slirp_new()` API, simpler and more extensible than `slirp_init()`. ++ - Allow custom MTU configuration. ++ - Option to disable host loopback connections. ++ - CI now runs scan-build too. ++ ++### Changed ++ ++ - Disable `tcp_emu()` by default. `tcp_emu()` is known to have caused ++ several CVEs, and not useful today in most cases. The feature can ++ be still enabled by setting `SlirpConfig.enable_emu` to true. ++ - meson build system is now `subproject()` friendly. ++ - Replace remaining `malloc()`/`free()` with glib (which aborts on OOM) ++ - Various code cleanups. ++ ++### Deprecated ++ ++ - The `slirp_init()` API. ++ ++### Fixed ++ ++ - `getpeername()` error after `shutdown(SHUT_WR)`. ++ - Exec forward: correctly parse command lines that contain spaces. ++ - Allow 0.0.0.0 destination address. ++ - Make host receive broadcast packets. ++ - Various memory related fixes (heap overflow, leaks, NULL ++ dereference). ++ - Compilation warnings, dead code. ++ ++## [4.0.0] - 2019-05-24 ++ ++### Added ++ ++ - Installable as a shared library. ++ - meson build system ++ (& make build system for in-tree QEMU integration) ++ ++### Changed ++ ++ - Standalone project, removing any QEMU dependency. ++ - License clarifications. ++ ++[unreleased]: https://gitlab.freedesktop.org/slirp/libslirp/compare/v4.2.0...master ++[4.2.0]: https://gitlab.freedesktop.org/slirp/libslirp/compare/v4.1.0...v4.2.0 ++[4.1.0]: https://gitlab.freedesktop.org/slirp/libslirp/compare/v4.0.0...v4.1.0 ++[4.0.0]: https://gitlab.freedesktop.org/slirp/libslirp/commits/v4.0.0 +diff --git a/slirp/COPYRIGHT b/slirp/COPYRIGHT +new file mode 100644 +index 0000000..ed49512 +--- /dev/null ++++ b/slirp/COPYRIGHT +@@ -0,0 +1,62 @@ ++Slirp was written by Danny Gasparovski. ++Copyright (c), 1995,1996 All Rights Reserved. ++ ++Slirp is free software; "free" as in you don't have to pay for it, and you ++are free to do whatever you want with it. I do not accept any donations, ++monetary or otherwise, for Slirp. Instead, I would ask you to pass this ++potential donation to your favorite charity. In fact, I encourage ++*everyone* who finds Slirp useful to make a small donation to their ++favorite charity (for example, GreenPeace). This is not a requirement, but ++a suggestion from someone who highly values the service they provide. ++ ++The copyright terms and conditions: ++ ++---BEGIN--- ++ ++ Copyright (c) 1995,1996 Danny Gasparovski. All rights reserved. ++ ++ Redistribution and use in source and binary forms, with or without ++ modification, are permitted provided that the following conditions ++ are met: ++ 1. Redistributions of source code must retain the above copyright ++ notice, this list of conditions and the following disclaimer. ++ 2. Redistributions in binary form must reproduce the above copyright ++ notice, this list of conditions and the following disclaimer in the ++ documentation and/or other materials provided with the distribution. ++ 3. Neither the name of the copyright holder nor the names of its ++ contributors may be used to endorse or promote products derived ++ from this software without specific prior written permission. ++ ++ THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, ++ INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY ++ AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ++ DANNY GASPAROVSKI OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, ++ INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT ++ NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, ++ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY ++ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT ++ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF ++ THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++ ++---END--- ++ ++This basically means you can do anything you want with the software, except ++1) call it your own, and 2) claim warranty on it. There is no warranty for ++this software. None. Nada. If you lose a million dollars while using ++Slirp, that's your loss not mine. So, ***USE AT YOUR OWN RISK!***. ++ ++If these conditions cannot be met due to legal restrictions (E.g. where it ++is against the law to give out Software without warranty), you must cease ++using the software and delete all copies you have. ++ ++Slirp uses code that is copyrighted by the following people/organizations: ++ ++Juha Pirkola. ++Gregory M. Christy. ++The Regents of the University of California. ++Carnegie Mellon University. ++The Australian National University. ++RSA Data Security, Inc. ++ ++Please read the top of each source file for the details on the various ++copyrights. +diff --git a/slirp/Makefile b/slirp/Makefile +new file mode 100644 +index 0000000..8857b41 +--- /dev/null ++++ b/slirp/Makefile +@@ -0,0 +1,62 @@ ++ROOT_DIR := $(shell dirname $(realpath $(lastword $(MAKEFILE_LIST)))) ++BUILD_DIR ?= . ++ ++LIBSLIRP = $(BUILD_DIR)/libslirp.a ++SLIRP_MAJOR_VERSION = 4 ++SLIRP_MINOR_VERSION = 2 ++SLIRP_MICRO_VERSION = 0 ++SLIRP_VERSION_STRING = "$(SLIRP_MAJOR_VERSION).$(SLIRP_MINOR_VERSION).$(SLIRP_MICRO_VERSION)-git" ++ ++all: $(LIBSLIRP) ++ ++SRCS := $(wildcard src/*.c) ++OBJS := $(SRCS:%.c=$(BUILD_DIR)/%.o) ++DEPS := $(OBJS:%.o=%.d) ++ ++INC_DIRS := $(BUILD_DIR)/src ++INC_FLAGS := $(addprefix -I,$(INC_DIRS)) ++ ++override CFLAGS += \ ++ -DG_LOG_DOMAIN='"Slirp"' \ ++ $(shell $(PKG_CONFIG) --cflags glib-2.0) \ ++ $(INC_FLAGS) \ ++ -MMD -MP ++override LDFLAGS += $(shell $(PKG_CONFIG) --libs glib-2.0) ++ ++$(BUILD_DIR)/src/libslirp-version.h: Makefile ++ @$(MKDIR_P) $(dir $@) ++ $(call quiet-command,cat $(ROOT_DIR)/src/libslirp-version.h.in | \ ++ sed 's/@SLIRP_MAJOR_VERSION@/$(SLIRP_MAJOR_VERSION)/' | \ ++ sed 's/@SLIRP_MINOR_VERSION@/$(SLIRP_MINOR_VERSION)/' | \ ++ sed 's/@SLIRP_MICRO_VERSION@/$(SLIRP_MICRO_VERSION)/' | \ ++ sed 's/@SLIRP_VERSION_STRING@/$(SLIRP_VERSION_STRING)/' \ ++ > $@,"GEN","$@") ++ ++$(OBJS): $(BUILD_DIR)/src/libslirp-version.h ++ ++$(LIBSLIRP): $(OBJS) ++ ++.PHONY: clean ++ ++clean: ++ rm -r $(OBJS) $(DEPS) $(LIBSLIRP) $(BUILD_DIR)/src/libslirp-version.h ++ ++$(BUILD_DIR)/src/%.o: $(ROOT_DIR)/src/%.c ++ @$(MKDIR_P) $(dir $@) ++ $(call quiet-command,$(CC) $(CFLAGS) -c -o $@ $<,"CC","$@") ++ ++%.a: ++ $(call quiet-command,rm -f $@ && $(AR) rcs $@ $^,"AR","$@") ++ ++PKG_CONFIG ?= pkg-config ++MKDIR_P ?= mkdir -p ++quiet-command-run = $(if $(V),,$(if $2,printf " %-7s %s\n" $2 $3 && ))$1 ++quiet-@ = $(if $(V),,@) ++quiet-command = $(quiet-@)$(call quiet-command-run,$1,$2,$3) ++ ++print-%: ++ @echo '$*=$($*)' ++ ++.SUFFIXES: ++ ++-include $(DEPS) +diff --git a/slirp/README.md b/slirp/README.md +new file mode 100644 +index 0000000..dc11e5f +--- /dev/null ++++ b/slirp/README.md +@@ -0,0 +1,60 @@ ++# libslirp ++ ++libslirp is a user-mode networking library used by virtual machines, ++containers or various tools. ++ ++## Getting Started ++ ++### Prerequisites ++ ++A C compiler, make/meson and glib2 development libraries. ++ ++(see also [.gitlab-ci.yml](.gitlab-ci.yml) DEPS variable for the list ++of dependencies on Fedora) ++ ++### Building ++ ++You may build and install the shared library with meson: ++ ++``` sh ++meson build ++ninja -C build install ++``` ++And configure QEMU with --enable-slirp=system to link against it. ++ ++(QEMU may build with the submodule static library using --enable-slirp=git) ++ ++### Testing ++ ++Unfortunately, there are no automated tests available. ++ ++You may run QEMU ``-net user`` linked with your development version. ++ ++## Contributing ++ ++Feel free to open issues on the [project ++issues](https://gitlab.freedesktop.org/slirp/libslirp/issues) page. ++ ++You may clone the [gitlab ++project](https://gitlab.freedesktop.org/slirp/libslirp) and create a ++merge request. ++ ++Contributing with gitlab allows gitlab workflow, tracking issues, ++running CI etc. ++ ++Alternatively, you may send patches to slirp@lists.freedesktop.org ++mailing list. ++ ++## Versioning ++ ++We intend to use [libtool's ++versioning](https://www.gnu.org/software/libtool/manual/html_node/Updating-version-info.html) ++for the shared libraries and use [SemVer](http://semver.org/) for ++project versions. ++ ++For the versions available, see the [tags on this ++repository](https://gitlab.freedesktop.org/slirp/libslirp/releases). ++ ++## License ++ ++See the [COPYRIGHT](COPYRIGHT) file for details. +diff --git a/slirp/build-aux/git-version-gen b/slirp/build-aux/git-version-gen +new file mode 100755 +index 0000000..5617eb8 +--- /dev/null ++++ b/slirp/build-aux/git-version-gen +@@ -0,0 +1,158 @@ ++#!/bin/sh ++# Print a version string. ++scriptversion=2010-06-14.19; # UTC ++ ++# Copyright (C) 2007-2010 Free Software Foundation, Inc. ++# ++# This program is free software: you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 3 of the License, or ++# (at your option) any later version. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program. If not, see . ++ ++# This script is derived from GIT-VERSION-GEN from GIT: http://git.or.cz/. ++# It may be run two ways: ++# - from a git repository in which the "git describe" command below ++# produces useful output (thus requiring at least one signed tag) ++# - from a non-git-repo directory containing a .tarball-version file, which ++# presumes this script is invoked like "./git-version-gen .tarball-version". ++ ++# In order to use intra-version strings in your project, you will need two ++# separate generated version string files: ++# ++# .tarball-version - present only in a distribution tarball, and not in ++# a checked-out repository. Created with contents that were learned at ++# the last time autoconf was run, and used by git-version-gen. Must not ++# be present in either $(srcdir) or $(builddir) for git-version-gen to ++# give accurate answers during normal development with a checked out tree, ++# but must be present in a tarball when there is no version control system. ++# Therefore, it cannot be used in any dependencies. GNUmakefile has ++# hooks to force a reconfigure at distribution time to get the value ++# correct, without penalizing normal development with extra reconfigures. ++# ++# .version - present in a checked-out repository and in a distribution ++# tarball. Usable in dependencies, particularly for files that don't ++# want to depend on config.h but do want to track version changes. ++# Delete this file prior to any autoconf run where you want to rebuild ++# files to pick up a version string change; and leave it stale to ++# minimize rebuild time after unrelated changes to configure sources. ++# ++# It is probably wise to add these two files to .gitignore, so that you ++# don't accidentally commit either generated file. ++# ++# Use the following line in your configure.ac, so that $(VERSION) will ++# automatically be up-to-date each time configure is run (and note that ++# since configure.ac no longer includes a version string, Makefile rules ++# should not depend on configure.ac for version updates). ++# ++# AC_INIT([GNU project], ++# m4_esyscmd([build-aux/git-version-gen .tarball-version]), ++# [bug-project@example]) ++# ++# Then use the following lines in your Makefile.am, so that .version ++# will be present for dependencies, and so that .tarball-version will ++# exist in distribution tarballs. ++# ++# BUILT_SOURCES = $(top_srcdir)/.version ++# $(top_srcdir)/.version: ++# echo $(VERSION) > $@-t && mv $@-t $@ ++# dist-hook: ++# echo $(VERSION) > $(distdir)/.tarball-version ++ ++case $# in ++ 1|2) ;; ++ *) echo 1>&2 "Usage: $0 \$srcdir/.tarball-version" \ ++ '[TAG-NORMALIZATION-SED-SCRIPT]' ++ exit 1;; ++esac ++ ++tarball_version_file=$1 ++tag_sed_script="${2:-s/x/x/}" ++nl=' ++' ++ ++# Avoid meddling by environment variable of the same name. ++v= ++ ++# First see if there is a tarball-only version file. ++# then try "git describe", then default. ++if test -f $tarball_version_file ++then ++ v=`cat $tarball_version_file` || exit 1 ++ case $v in ++ *$nl*) v= ;; # reject multi-line output ++ [0-9]*) ;; ++ *) v= ;; ++ esac ++ test -z "$v" \ ++ && echo "$0: WARNING: $tarball_version_file seems to be damaged" 1>&2 ++fi ++ ++if test -n "$v" ++then ++ : # use $v ++elif test -d .git \ ++ && v=`git describe --abbrev=4 --match='v*' HEAD 2>/dev/null \ ++ || git describe --abbrev=4 HEAD 2>/dev/null` \ ++ && v=`printf '%s\n' "$v" | sed "$tag_sed_script"` \ ++ && case $v in ++ v[0-9]*) ;; ++ *) (exit 1) ;; ++ esac ++then ++ # Is this a new git that lists number of commits since the last ++ # tag or the previous older version that did not? ++ # Newer: v6.10-77-g0f8faeb ++ # Older: v6.10-g0f8faeb ++ case $v in ++ *-*-*) : git describe is okay three part flavor ;; ++ *-*) ++ : git describe is older two part flavor ++ # Recreate the number of commits and rewrite such that the ++ # result is the same as if we were using the newer version ++ # of git describe. ++ vtag=`echo "$v" | sed 's/-.*//'` ++ numcommits=`git rev-list "$vtag"..HEAD | wc -l` ++ v=`echo "$v" | sed "s/\(.*\)-\(.*\)/\1-$numcommits-\2/"`; ++ ;; ++ esac ++ ++ # Change the first '-' to a '.', so version-comparing tools work properly. ++ # Remove the "g" in git describe's output string, to save a byte. ++ v=`echo "$v" | sed 's/-/./;s/\(.*\)-g/\1-/'`; ++else ++ v=UNKNOWN ++fi ++ ++v=`echo "$v" |sed 's/^v//'` ++ ++# Don't declare a version "dirty" merely because a time stamp has changed. ++git update-index --refresh > /dev/null 2>&1 ++ ++dirty=`sh -c 'git diff-index --name-only HEAD' 2>/dev/null` || dirty= ++case "$dirty" in ++ '') ;; ++ *) # Append the suffix only if there isn't one already. ++ case $v in ++ *-dirty) ;; ++ *) v="$v-dirty" ;; ++ esac ;; ++esac ++ ++# Omit the trailing newline, so that m4_esyscmd can use the result directly. ++echo "$v" | tr -d "$nl" ++ ++# Local variables: ++# eval: (add-hook 'write-file-hooks 'time-stamp) ++# time-stamp-start: "scriptversion=" ++# time-stamp-format: "%:y-%02m-%02d.%02H" ++# time-stamp-time-zone: "UTC" ++# time-stamp-end: "; # UTC" ++# End: +diff --git a/slirp/build-aux/meson-dist b/slirp/build-aux/meson-dist +new file mode 100755 +index 0000000..80d534f +--- /dev/null ++++ b/slirp/build-aux/meson-dist +@@ -0,0 +1,16 @@ ++#!/bin/bash ++ ++set -e ++set -o pipefail ++ ++if test "$1" = ""; then ++ echo "Version not provided" >&2 ++ exit 1 ++fi ++if ! test -d "$2"; then ++ echo "Source directory not provided" >&2 ++ exit 1 ++fi ++ ++# generate tarball version ++echo "$1" > "$MESON_DIST_ROOT/.tarball-version" +diff --git a/slirp/meson.build b/slirp/meson.build +new file mode 100644 +index 0000000..3a27149 +--- /dev/null ++++ b/slirp/meson.build +@@ -0,0 +1,134 @@ ++project('libslirp', 'c', ++ version : run_command('build-aux/git-version-gen', '@0@/.tarball-version'.format(meson.source_root()), check : true).stdout().strip(), ++ license : 'BSD-3-Clause', ++ default_options : ['warning_level=1', 'c_std=gnu99'], ++ meson_version : '>= 0.49', ++) ++ ++meson.add_dist_script('build-aux/meson-dist', meson.project_version(), meson.source_root()) ++ ++version = meson.project_version() ++varr = version.split('.') ++major_version = varr[0] ++minor_version = varr[1] ++micro_version = varr[2] ++ ++conf = configuration_data() ++conf.set('SLIRP_MAJOR_VERSION', major_version) ++conf.set('SLIRP_MINOR_VERSION', minor_version) ++conf.set('SLIRP_MICRO_VERSION', micro_version) ++conf.set_quoted('SLIRP_VERSION_STRING', version) ++ ++# libtool versioning - this applies to libslirp ++# ++# See http://sources.redhat.com/autobook/autobook/autobook_91.html#SEC91 for details ++# ++# - If interfaces have been changed or added, but binary compatibility ++# has been preserved, change: ++# CURRENT += 1 ++# REVISION = 0 ++# AGE += 1 ++# - If binary compatibility has been broken (eg removed or changed ++# interfaces), change: ++# CURRENT += 1 ++# REVISION = 0 ++# AGE = 0 ++# - If the interface is the same as the previous version, but bugs are ++# fixed, change: ++# REVISION += 1 ++lt_current = 2 ++lt_revision = 0 ++lt_age = 2 ++lt_version = '@0@.@1@.@2@'.format(lt_current - lt_age, lt_age, lt_revision) ++ ++host_system = host_machine.system() ++ ++glib_dep = dependency('glib-2.0') ++ ++cc = meson.get_compiler('c') ++ ++platform_deps = [] ++ ++if host_system == 'windows' ++ platform_deps += [ ++ cc.find_library('ws2_32'), ++ cc.find_library('iphlpapi') ++ ] ++endif ++ ++cargs = [ ++ '-DG_LOG_DOMAIN="Slirp"', ++] ++ ++sources = [ ++ 'src/arp_table.c', ++ 'src/bootp.c', ++ 'src/cksum.c', ++ 'src/dhcpv6.c', ++ 'src/dnssearch.c', ++ 'src/if.c', ++ 'src/ip6_icmp.c', ++ 'src/ip6_input.c', ++ 'src/ip6_output.c', ++ 'src/ip_icmp.c', ++ 'src/ip_input.c', ++ 'src/ip_output.c', ++ 'src/mbuf.c', ++ 'src/misc.c', ++ 'src/ncsi.c', ++ 'src/ndp_table.c', ++ 'src/sbuf.c', ++ 'src/slirp.c', ++ 'src/socket.c', ++ 'src/state.c', ++ 'src/stream.c', ++ 'src/tcp_input.c', ++ 'src/tcp_output.c', ++ 'src/tcp_subr.c', ++ 'src/tcp_timer.c', ++ 'src/tftp.c', ++ 'src/udp.c', ++ 'src/udp6.c', ++ 'src/util.c', ++ 'src/version.c', ++ 'src/vmstate.c', ++] ++ ++mapfile = 'src/libslirp.map' ++vflag = '-Wl,--version-script,@0@/@1@'.format(meson.current_source_dir(), mapfile) ++ ++configure_file( ++ input : 'src/libslirp-version.h.in', ++ output : 'libslirp-version.h', ++ install_dir : join_paths(get_option('includedir'), 'slirp'), ++ configuration : conf ++) ++ ++lib = library('slirp', sources, ++ version : lt_version, ++ c_args : cargs, ++ link_args : vflag, ++ link_depends : mapfile, ++ dependencies : [glib_dep, platform_deps], ++ install : true ++) ++ ++libslirp_dep = declare_dependency( ++ include_directories: include_directories('.', 'src'), ++ link_with: lib) ++ ++install_headers(['src/libslirp.h'], subdir : 'slirp') ++ ++pkg = import('pkgconfig') ++ ++pkg.generate( ++ version : version, ++ libraries : lib, ++ requires : [ ++ 'glib-2.0', ++ ], ++ name : 'slirp', ++ description : 'User-space network stack', ++ filebase : 'slirp', ++ subdirs : 'slirp', ++) +diff --git a/slirp/src/arp_table.c b/slirp/src/arp_table.c +new file mode 100644 +index 0000000..054fbf5 +--- /dev/null ++++ b/slirp/src/arp_table.c +@@ -0,0 +1,91 @@ ++/* SPDX-License-Identifier: MIT */ ++/* ++ * ARP table ++ * ++ * Copyright (c) 2011 AdaCore ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to deal ++ * in the Software without restriction, including without limitation the rights ++ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell ++ * copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN ++ * THE SOFTWARE. ++ */ ++ ++#include "slirp.h" ++ ++#include ++ ++void arp_table_add(Slirp *slirp, uint32_t ip_addr, uint8_t ethaddr[ETH_ALEN]) ++{ ++ const uint32_t broadcast_addr = ++ ~slirp->vnetwork_mask.s_addr | slirp->vnetwork_addr.s_addr; ++ ArpTable *arptbl = &slirp->arp_table; ++ int i; ++ ++ DEBUG_CALL("arp_table_add"); ++ DEBUG_ARG("ip = %s", inet_ntoa((struct in_addr){ .s_addr = ip_addr })); ++ DEBUG_ARG("hw addr = %02x:%02x:%02x:%02x:%02x:%02x", ethaddr[0], ethaddr[1], ++ ethaddr[2], ethaddr[3], ethaddr[4], ethaddr[5]); ++ ++ if (ip_addr == 0 || ip_addr == 0xffffffff || ip_addr == broadcast_addr) { ++ /* Do not register broadcast addresses */ ++ return; ++ } ++ ++ /* Search for an entry */ ++ for (i = 0; i < ARP_TABLE_SIZE; i++) { ++ if (arptbl->table[i].ar_sip == ip_addr) { ++ /* Update the entry */ ++ memcpy(arptbl->table[i].ar_sha, ethaddr, ETH_ALEN); ++ return; ++ } ++ } ++ ++ /* No entry found, create a new one */ ++ arptbl->table[arptbl->next_victim].ar_sip = ip_addr; ++ memcpy(arptbl->table[arptbl->next_victim].ar_sha, ethaddr, ETH_ALEN); ++ arptbl->next_victim = (arptbl->next_victim + 1) % ARP_TABLE_SIZE; ++} ++ ++bool arp_table_search(Slirp *slirp, uint32_t ip_addr, ++ uint8_t out_ethaddr[ETH_ALEN]) ++{ ++ const uint32_t broadcast_addr = ++ ~slirp->vnetwork_mask.s_addr | slirp->vnetwork_addr.s_addr; ++ ArpTable *arptbl = &slirp->arp_table; ++ int i; ++ ++ DEBUG_CALL("arp_table_search"); ++ DEBUG_ARG("ip = %s", inet_ntoa((struct in_addr){ .s_addr = ip_addr })); ++ ++ /* If broadcast address */ ++ if (ip_addr == 0 || ip_addr == 0xffffffff || ip_addr == broadcast_addr) { ++ /* return Ethernet broadcast address */ ++ memset(out_ethaddr, 0xff, ETH_ALEN); ++ return 1; ++ } ++ ++ for (i = 0; i < ARP_TABLE_SIZE; i++) { ++ if (arptbl->table[i].ar_sip == ip_addr) { ++ memcpy(out_ethaddr, arptbl->table[i].ar_sha, ETH_ALEN); ++ DEBUG_ARG("found hw addr = %02x:%02x:%02x:%02x:%02x:%02x", ++ out_ethaddr[0], out_ethaddr[1], out_ethaddr[2], ++ out_ethaddr[3], out_ethaddr[4], out_ethaddr[5]); ++ return 1; ++ } ++ } ++ ++ return 0; ++} +diff --git a/slirp/src/bootp.c b/slirp/src/bootp.c +new file mode 100644 +index 0000000..46e9681 +--- /dev/null ++++ b/slirp/src/bootp.c +@@ -0,0 +1,369 @@ ++/* SPDX-License-Identifier: MIT */ ++/* ++ * QEMU BOOTP/DHCP server ++ * ++ * Copyright (c) 2004 Fabrice Bellard ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to deal ++ * in the Software without restriction, including without limitation the rights ++ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell ++ * copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN ++ * THE SOFTWARE. ++ */ ++#include "slirp.h" ++ ++#if defined(_WIN32) ++/* Windows ntohl() returns an u_long value. ++ * Add a type cast to match the format strings. */ ++#define ntohl(n) ((uint32_t)ntohl(n)) ++#endif ++ ++/* XXX: only DHCP is supported */ ++ ++#define LEASE_TIME (24 * 3600) ++ ++static const uint8_t rfc1533_cookie[] = { RFC1533_COOKIE }; ++ ++#define DPRINTF(fmt, ...) DEBUG_CALL(fmt, ##__VA_ARGS__) ++ ++static BOOTPClient *get_new_addr(Slirp *slirp, struct in_addr *paddr, ++ const uint8_t *macaddr) ++{ ++ BOOTPClient *bc; ++ int i; ++ ++ for (i = 0; i < NB_BOOTP_CLIENTS; i++) { ++ bc = &slirp->bootp_clients[i]; ++ if (!bc->allocated || !memcmp(macaddr, bc->macaddr, 6)) ++ goto found; ++ } ++ return NULL; ++found: ++ bc = &slirp->bootp_clients[i]; ++ bc->allocated = 1; ++ paddr->s_addr = slirp->vdhcp_startaddr.s_addr + htonl(i); ++ return bc; ++} ++ ++static BOOTPClient *request_addr(Slirp *slirp, const struct in_addr *paddr, ++ const uint8_t *macaddr) ++{ ++ uint32_t req_addr = ntohl(paddr->s_addr); ++ uint32_t dhcp_addr = ntohl(slirp->vdhcp_startaddr.s_addr); ++ BOOTPClient *bc; ++ ++ if (req_addr >= dhcp_addr && req_addr < (dhcp_addr + NB_BOOTP_CLIENTS)) { ++ bc = &slirp->bootp_clients[req_addr - dhcp_addr]; ++ if (!bc->allocated || !memcmp(macaddr, bc->macaddr, 6)) { ++ bc->allocated = 1; ++ return bc; ++ } ++ } ++ return NULL; ++} ++ ++static BOOTPClient *find_addr(Slirp *slirp, struct in_addr *paddr, ++ const uint8_t *macaddr) ++{ ++ BOOTPClient *bc; ++ int i; ++ ++ for (i = 0; i < NB_BOOTP_CLIENTS; i++) { ++ if (!memcmp(macaddr, slirp->bootp_clients[i].macaddr, 6)) ++ goto found; ++ } ++ return NULL; ++found: ++ bc = &slirp->bootp_clients[i]; ++ bc->allocated = 1; ++ paddr->s_addr = slirp->vdhcp_startaddr.s_addr + htonl(i); ++ return bc; ++} ++ ++static void dhcp_decode(const struct bootp_t *bp, int *pmsg_type, ++ struct in_addr *preq_addr) ++{ ++ const uint8_t *p, *p_end; ++ int len, tag; ++ ++ *pmsg_type = 0; ++ preq_addr->s_addr = htonl(0L); ++ ++ p = bp->bp_vend; ++ p_end = p + DHCP_OPT_LEN; ++ if (memcmp(p, rfc1533_cookie, 4) != 0) ++ return; ++ p += 4; ++ while (p < p_end) { ++ tag = p[0]; ++ if (tag == RFC1533_PAD) { ++ p++; ++ } else if (tag == RFC1533_END) { ++ break; ++ } else { ++ p++; ++ if (p >= p_end) ++ break; ++ len = *p++; ++ if (p + len > p_end) { ++ break; ++ } ++ DPRINTF("dhcp: tag=%d len=%d\n", tag, len); ++ ++ switch (tag) { ++ case RFC2132_MSG_TYPE: ++ if (len >= 1) ++ *pmsg_type = p[0]; ++ break; ++ case RFC2132_REQ_ADDR: ++ if (len >= 4) { ++ memcpy(&(preq_addr->s_addr), p, 4); ++ } ++ break; ++ default: ++ break; ++ } ++ p += len; ++ } ++ } ++ if (*pmsg_type == DHCPREQUEST && preq_addr->s_addr == htonl(0L) && ++ bp->bp_ciaddr.s_addr) { ++ memcpy(&(preq_addr->s_addr), &bp->bp_ciaddr, 4); ++ } ++} ++ ++static void bootp_reply(Slirp *slirp, const struct bootp_t *bp) ++{ ++ BOOTPClient *bc = NULL; ++ struct mbuf *m; ++ struct bootp_t *rbp; ++ struct sockaddr_in saddr, daddr; ++ struct in_addr preq_addr; ++ int dhcp_msg_type, val; ++ uint8_t *q; ++ uint8_t *end; ++ uint8_t client_ethaddr[ETH_ALEN]; ++ ++ /* extract exact DHCP msg type */ ++ dhcp_decode(bp, &dhcp_msg_type, &preq_addr); ++ DPRINTF("bootp packet op=%d msgtype=%d", bp->bp_op, dhcp_msg_type); ++ if (preq_addr.s_addr != htonl(0L)) ++ DPRINTF(" req_addr=%08" PRIx32 "\n", ntohl(preq_addr.s_addr)); ++ else { ++ DPRINTF("\n"); ++ } ++ ++ if (dhcp_msg_type == 0) ++ dhcp_msg_type = DHCPREQUEST; /* Force reply for old BOOTP clients */ ++ ++ if (dhcp_msg_type != DHCPDISCOVER && dhcp_msg_type != DHCPREQUEST) ++ return; ++ ++ /* Get client's hardware address from bootp request */ ++ memcpy(client_ethaddr, bp->bp_hwaddr, ETH_ALEN); ++ ++ m = m_get(slirp); ++ if (!m) { ++ return; ++ } ++ m->m_data += IF_MAXLINKHDR; ++ rbp = (struct bootp_t *)m->m_data; ++ m->m_data += sizeof(struct udpiphdr); ++ memset(rbp, 0, sizeof(struct bootp_t)); ++ ++ if (dhcp_msg_type == DHCPDISCOVER) { ++ if (preq_addr.s_addr != htonl(0L)) { ++ bc = request_addr(slirp, &preq_addr, client_ethaddr); ++ if (bc) { ++ daddr.sin_addr = preq_addr; ++ } ++ } ++ if (!bc) { ++ new_addr: ++ bc = get_new_addr(slirp, &daddr.sin_addr, client_ethaddr); ++ if (!bc) { ++ DPRINTF("no address left\n"); ++ return; ++ } ++ } ++ memcpy(bc->macaddr, client_ethaddr, ETH_ALEN); ++ } else if (preq_addr.s_addr != htonl(0L)) { ++ bc = request_addr(slirp, &preq_addr, client_ethaddr); ++ if (bc) { ++ daddr.sin_addr = preq_addr; ++ memcpy(bc->macaddr, client_ethaddr, ETH_ALEN); ++ } else { ++ /* DHCPNAKs should be sent to broadcast */ ++ daddr.sin_addr.s_addr = 0xffffffff; ++ } ++ } else { ++ bc = find_addr(slirp, &daddr.sin_addr, bp->bp_hwaddr); ++ if (!bc) { ++ /* if never assigned, behaves as if it was already ++ assigned (windows fix because it remembers its address) */ ++ goto new_addr; ++ } ++ } ++ ++ /* Update ARP table for this IP address */ ++ arp_table_add(slirp, daddr.sin_addr.s_addr, client_ethaddr); ++ ++ saddr.sin_addr = slirp->vhost_addr; ++ saddr.sin_port = htons(BOOTP_SERVER); ++ ++ daddr.sin_port = htons(BOOTP_CLIENT); ++ ++ rbp->bp_op = BOOTP_REPLY; ++ rbp->bp_xid = bp->bp_xid; ++ rbp->bp_htype = 1; ++ rbp->bp_hlen = 6; ++ memcpy(rbp->bp_hwaddr, bp->bp_hwaddr, ETH_ALEN); ++ ++ rbp->bp_yiaddr = daddr.sin_addr; /* Client IP address */ ++ rbp->bp_siaddr = saddr.sin_addr; /* Server IP address */ ++ ++ q = rbp->bp_vend; ++ end = (uint8_t *)&rbp[1]; ++ memcpy(q, rfc1533_cookie, 4); ++ q += 4; ++ ++ if (bc) { ++ DPRINTF("%s addr=%08" PRIx32 "\n", ++ (dhcp_msg_type == DHCPDISCOVER) ? "offered" : "ack'ed", ++ ntohl(daddr.sin_addr.s_addr)); ++ ++ if (dhcp_msg_type == DHCPDISCOVER) { ++ *q++ = RFC2132_MSG_TYPE; ++ *q++ = 1; ++ *q++ = DHCPOFFER; ++ } else /* DHCPREQUEST */ { ++ *q++ = RFC2132_MSG_TYPE; ++ *q++ = 1; ++ *q++ = DHCPACK; ++ } ++ ++ if (slirp->bootp_filename) { ++ g_assert(strlen(slirp->bootp_filename) < sizeof(rbp->bp_file)); ++ strcpy(rbp->bp_file, slirp->bootp_filename); ++ } ++ ++ *q++ = RFC2132_SRV_ID; ++ *q++ = 4; ++ memcpy(q, &saddr.sin_addr, 4); ++ q += 4; ++ ++ *q++ = RFC1533_NETMASK; ++ *q++ = 4; ++ memcpy(q, &slirp->vnetwork_mask, 4); ++ q += 4; ++ ++ if (!slirp->restricted) { ++ *q++ = RFC1533_GATEWAY; ++ *q++ = 4; ++ memcpy(q, &saddr.sin_addr, 4); ++ q += 4; ++ ++ *q++ = RFC1533_DNS; ++ *q++ = 4; ++ memcpy(q, &slirp->vnameserver_addr, 4); ++ q += 4; ++ } ++ ++ *q++ = RFC2132_LEASE_TIME; ++ *q++ = 4; ++ val = htonl(LEASE_TIME); ++ memcpy(q, &val, 4); ++ q += 4; ++ ++ if (*slirp->client_hostname) { ++ val = strlen(slirp->client_hostname); ++ if (q + val + 2 >= end) { ++ g_warning("DHCP packet size exceeded, " ++ "omitting host name option."); ++ } else { ++ *q++ = RFC1533_HOSTNAME; ++ *q++ = val; ++ memcpy(q, slirp->client_hostname, val); ++ q += val; ++ } ++ } ++ ++ if (slirp->vdomainname) { ++ val = strlen(slirp->vdomainname); ++ if (q + val + 2 >= end) { ++ g_warning("DHCP packet size exceeded, " ++ "omitting domain name option."); ++ } else { ++ *q++ = RFC1533_DOMAINNAME; ++ *q++ = val; ++ memcpy(q, slirp->vdomainname, val); ++ q += val; ++ } ++ } ++ ++ if (slirp->tftp_server_name) { ++ val = strlen(slirp->tftp_server_name); ++ if (q + val + 2 >= end) { ++ g_warning("DHCP packet size exceeded, " ++ "omitting tftp-server-name option."); ++ } else { ++ *q++ = RFC2132_TFTP_SERVER_NAME; ++ *q++ = val; ++ memcpy(q, slirp->tftp_server_name, val); ++ q += val; ++ } ++ } ++ ++ if (slirp->vdnssearch) { ++ val = slirp->vdnssearch_len; ++ if (q + val >= end) { ++ g_warning("DHCP packet size exceeded, " ++ "omitting domain-search option."); ++ } else { ++ memcpy(q, slirp->vdnssearch, val); ++ q += val; ++ } ++ } ++ } else { ++ static const char nak_msg[] = "requested address not available"; ++ ++ DPRINTF("nak'ed addr=%08" PRIx32 "\n", ntohl(preq_addr.s_addr)); ++ ++ *q++ = RFC2132_MSG_TYPE; ++ *q++ = 1; ++ *q++ = DHCPNAK; ++ ++ *q++ = RFC2132_MESSAGE; ++ *q++ = sizeof(nak_msg) - 1; ++ memcpy(q, nak_msg, sizeof(nak_msg) - 1); ++ q += sizeof(nak_msg) - 1; ++ } ++ assert(q < end); ++ *q = RFC1533_END; ++ ++ daddr.sin_addr.s_addr = 0xffffffffu; ++ ++ m->m_len = sizeof(struct bootp_t) - sizeof(struct ip) - sizeof(struct udphdr); ++ udp_output(NULL, m, &saddr, &daddr, IPTOS_LOWDELAY); ++} ++ ++void bootp_input(struct mbuf *m) ++{ ++ struct bootp_t *bp = mtod(m, struct bootp_t *); ++ ++ if (bp->bp_op == BOOTP_REQUEST) { ++ bootp_reply(m->slirp, bp); ++ } ++} +diff --git a/slirp/src/bootp.h b/slirp/src/bootp.h +new file mode 100644 +index 0000000..a57fa51 +--- /dev/null ++++ b/slirp/src/bootp.h +@@ -0,0 +1,129 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* bootp/dhcp defines */ ++ ++#ifndef SLIRP_BOOTP_H ++#define SLIRP_BOOTP_H ++ ++#define BOOTP_SERVER 67 ++#define BOOTP_CLIENT 68 ++ ++#define BOOTP_REQUEST 1 ++#define BOOTP_REPLY 2 ++ ++#define RFC1533_COOKIE 99, 130, 83, 99 ++#define RFC1533_PAD 0 ++#define RFC1533_NETMASK 1 ++#define RFC1533_TIMEOFFSET 2 ++#define RFC1533_GATEWAY 3 ++#define RFC1533_TIMESERVER 4 ++#define RFC1533_IEN116NS 5 ++#define RFC1533_DNS 6 ++#define RFC1533_LOGSERVER 7 ++#define RFC1533_COOKIESERVER 8 ++#define RFC1533_LPRSERVER 9 ++#define RFC1533_IMPRESSSERVER 10 ++#define RFC1533_RESOURCESERVER 11 ++#define RFC1533_HOSTNAME 12 ++#define RFC1533_BOOTFILESIZE 13 ++#define RFC1533_MERITDUMPFILE 14 ++#define RFC1533_DOMAINNAME 15 ++#define RFC1533_SWAPSERVER 16 ++#define RFC1533_ROOTPATH 17 ++#define RFC1533_EXTENSIONPATH 18 ++#define RFC1533_IPFORWARDING 19 ++#define RFC1533_IPSOURCEROUTING 20 ++#define RFC1533_IPPOLICYFILTER 21 ++#define RFC1533_IPMAXREASSEMBLY 22 ++#define RFC1533_IPTTL 23 ++#define RFC1533_IPMTU 24 ++#define RFC1533_IPMTUPLATEAU 25 ++#define RFC1533_INTMTU 26 ++#define RFC1533_INTLOCALSUBNETS 27 ++#define RFC1533_INTBROADCAST 28 ++#define RFC1533_INTICMPDISCOVER 29 ++#define RFC1533_INTICMPRESPOND 30 ++#define RFC1533_INTROUTEDISCOVER 31 ++#define RFC1533_INTROUTESOLICIT 32 ++#define RFC1533_INTSTATICROUTES 33 ++#define RFC1533_LLTRAILERENCAP 34 ++#define RFC1533_LLARPCACHETMO 35 ++#define RFC1533_LLETHERNETENCAP 36 ++#define RFC1533_TCPTTL 37 ++#define RFC1533_TCPKEEPALIVETMO 38 ++#define RFC1533_TCPKEEPALIVEGB 39 ++#define RFC1533_NISDOMAIN 40 ++#define RFC1533_NISSERVER 41 ++#define RFC1533_NTPSERVER 42 ++#define RFC1533_VENDOR 43 ++#define RFC1533_NBNS 44 ++#define RFC1533_NBDD 45 ++#define RFC1533_NBNT 46 ++#define RFC1533_NBSCOPE 47 ++#define RFC1533_XFS 48 ++#define RFC1533_XDM 49 ++ ++#define RFC2132_REQ_ADDR 50 ++#define RFC2132_LEASE_TIME 51 ++#define RFC2132_MSG_TYPE 53 ++#define RFC2132_SRV_ID 54 ++#define RFC2132_PARAM_LIST 55 ++#define RFC2132_MESSAGE 56 ++#define RFC2132_MAX_SIZE 57 ++#define RFC2132_RENEWAL_TIME 58 ++#define RFC2132_REBIND_TIME 59 ++#define RFC2132_TFTP_SERVER_NAME 66 ++ ++#define DHCPDISCOVER 1 ++#define DHCPOFFER 2 ++#define DHCPREQUEST 3 ++#define DHCPACK 5 ++#define DHCPNAK 6 ++ ++#define RFC1533_VENDOR_MAJOR 0 ++#define RFC1533_VENDOR_MINOR 0 ++ ++#define RFC1533_VENDOR_MAGIC 128 ++#define RFC1533_VENDOR_ADDPARM 129 ++#define RFC1533_VENDOR_ETHDEV 130 ++#define RFC1533_VENDOR_HOWTO 132 ++#define RFC1533_VENDOR_MNUOPTS 160 ++#define RFC1533_VENDOR_SELECTION 176 ++#define RFC1533_VENDOR_MOTD 184 ++#define RFC1533_VENDOR_NUMOFMOTD 8 ++#define RFC1533_VENDOR_IMG 192 ++#define RFC1533_VENDOR_NUMOFIMG 16 ++ ++#define RFC1533_END 255 ++#define BOOTP_VENDOR_LEN 64 ++#define DHCP_OPT_LEN 312 ++ ++struct bootp_t { ++ struct ip ip; ++ struct udphdr udp; ++ uint8_t bp_op; ++ uint8_t bp_htype; ++ uint8_t bp_hlen; ++ uint8_t bp_hops; ++ uint32_t bp_xid; ++ uint16_t bp_secs; ++ uint16_t unused; ++ struct in_addr bp_ciaddr; ++ struct in_addr bp_yiaddr; ++ struct in_addr bp_siaddr; ++ struct in_addr bp_giaddr; ++ uint8_t bp_hwaddr[16]; ++ uint8_t bp_sname[64]; ++ char bp_file[128]; ++ uint8_t bp_vend[DHCP_OPT_LEN]; ++}; ++ ++typedef struct { ++ uint16_t allocated; ++ uint8_t macaddr[6]; ++} BOOTPClient; ++ ++#define NB_BOOTP_CLIENTS 16 ++ ++void bootp_input(struct mbuf *m); ++ ++#endif +diff --git a/slirp/src/cksum.c b/slirp/src/cksum.c +new file mode 100644 +index 0000000..4d08380 +--- /dev/null ++++ b/slirp/src/cksum.c +@@ -0,0 +1,179 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1988, 1992, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)in_cksum.c 8.1 (Berkeley) 6/10/93 ++ * in_cksum.c,v 1.2 1994/08/02 07:48:16 davidg Exp ++ */ ++ ++#include "slirp.h" ++ ++/* ++ * Checksum routine for Internet Protocol family headers (Portable Version). ++ * ++ * This routine is very heavily used in the network ++ * code and should be modified for each CPU to be as fast as possible. ++ * ++ * XXX Since we will never span more than 1 mbuf, we can optimise this ++ */ ++ ++#define ADDCARRY(x) (x > 65535 ? x -= 65535 : x) ++#define REDUCE \ ++ { \ ++ l_util.l = sum; \ ++ sum = l_util.s[0] + l_util.s[1]; \ ++ (void)ADDCARRY(sum); \ ++ } ++ ++int cksum(struct mbuf *m, int len) ++{ ++ register uint16_t *w; ++ register int sum = 0; ++ register int mlen = 0; ++ int byte_swapped = 0; ++ ++ union { ++ uint8_t c[2]; ++ uint16_t s; ++ } s_util; ++ union { ++ uint16_t s[2]; ++ uint32_t l; ++ } l_util; ++ ++ if (m->m_len == 0) ++ goto cont; ++ w = mtod(m, uint16_t *); ++ ++ mlen = m->m_len; ++ ++ if (len < mlen) ++ mlen = len; ++ len -= mlen; ++ /* ++ * Force to even boundary. ++ */ ++ if ((1 & (uintptr_t)w) && (mlen > 0)) { ++ REDUCE; ++ sum <<= 8; ++ s_util.c[0] = *(uint8_t *)w; ++ w = (uint16_t *)((int8_t *)w + 1); ++ mlen--; ++ byte_swapped = 1; ++ } ++ /* ++ * Unroll the loop to make overhead from ++ * branches &c small. ++ */ ++ while ((mlen -= 32) >= 0) { ++ sum += w[0]; ++ sum += w[1]; ++ sum += w[2]; ++ sum += w[3]; ++ sum += w[4]; ++ sum += w[5]; ++ sum += w[6]; ++ sum += w[7]; ++ sum += w[8]; ++ sum += w[9]; ++ sum += w[10]; ++ sum += w[11]; ++ sum += w[12]; ++ sum += w[13]; ++ sum += w[14]; ++ sum += w[15]; ++ w += 16; ++ } ++ mlen += 32; ++ while ((mlen -= 8) >= 0) { ++ sum += w[0]; ++ sum += w[1]; ++ sum += w[2]; ++ sum += w[3]; ++ w += 4; ++ } ++ mlen += 8; ++ if (mlen == 0 && byte_swapped == 0) ++ goto cont; ++ REDUCE; ++ while ((mlen -= 2) >= 0) { ++ sum += *w++; ++ } ++ ++ if (byte_swapped) { ++ REDUCE; ++ sum <<= 8; ++ if (mlen == -1) { ++ s_util.c[1] = *(uint8_t *)w; ++ sum += s_util.s; ++ mlen = 0; ++ } else ++ ++ mlen = -1; ++ } else if (mlen == -1) ++ s_util.c[0] = *(uint8_t *)w; ++ ++cont: ++ if (len) { ++ DEBUG_ERROR("cksum: out of data"); ++ DEBUG_ERROR(" len = %d", len); ++ } ++ if (mlen == -1) { ++ /* The last mbuf has odd # of bytes. Follow the ++ standard (the odd byte may be shifted left by 8 bits ++ or not as determined by endian-ness of the machine) */ ++ s_util.c[1] = 0; ++ sum += s_util.s; ++ } ++ REDUCE; ++ return (~sum & 0xffff); ++} ++ ++int ip6_cksum(struct mbuf *m) ++{ ++ /* TODO: Optimize this by being able to pass the ip6_pseudohdr to cksum ++ * separately from the mbuf */ ++ struct ip6 save_ip, *ip = mtod(m, struct ip6 *); ++ struct ip6_pseudohdr *ih = mtod(m, struct ip6_pseudohdr *); ++ int sum; ++ ++ save_ip = *ip; ++ ++ ih->ih_src = save_ip.ip_src; ++ ih->ih_dst = save_ip.ip_dst; ++ ih->ih_pl = htonl((uint32_t)ntohs(save_ip.ip_pl)); ++ ih->ih_zero_hi = 0; ++ ih->ih_zero_lo = 0; ++ ih->ih_nh = save_ip.ip_nh; ++ ++ sum = cksum(m, ((int)sizeof(struct ip6_pseudohdr)) + ntohl(ih->ih_pl)); ++ ++ *ip = save_ip; ++ ++ return sum; ++} +diff --git a/slirp/src/debug.h b/slirp/src/debug.h +new file mode 100644 +index 0000000..47712bd +--- /dev/null ++++ b/slirp/src/debug.h +@@ -0,0 +1,51 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#ifndef DEBUG_H_ ++#define DEBUG_H_ ++ ++#define DBG_CALL (1 << 0) ++#define DBG_MISC (1 << 1) ++#define DBG_ERROR (1 << 2) ++#define DBG_TFTP (1 << 3) ++ ++extern int slirp_debug; ++ ++#define DEBUG_CALL(fmt, ...) \ ++ do { \ ++ if (G_UNLIKELY(slirp_debug & DBG_CALL)) { \ ++ g_debug(fmt "...", ##__VA_ARGS__); \ ++ } \ ++ } while (0) ++ ++#define DEBUG_ARG(fmt, ...) \ ++ do { \ ++ if (G_UNLIKELY(slirp_debug & DBG_CALL)) { \ ++ g_debug(" " fmt, ##__VA_ARGS__); \ ++ } \ ++ } while (0) ++ ++#define DEBUG_MISC(fmt, ...) \ ++ do { \ ++ if (G_UNLIKELY(slirp_debug & DBG_MISC)) { \ ++ g_debug(fmt, ##__VA_ARGS__); \ ++ } \ ++ } while (0) ++ ++#define DEBUG_ERROR(fmt, ...) \ ++ do { \ ++ if (G_UNLIKELY(slirp_debug & DBG_ERROR)) { \ ++ g_debug(fmt, ##__VA_ARGS__); \ ++ } \ ++ } while (0) ++ ++#define DEBUG_TFTP(fmt, ...) \ ++ do { \ ++ if (G_UNLIKELY(slirp_debug & DBG_TFTP)) { \ ++ g_debug(fmt, ##__VA_ARGS__); \ ++ } \ ++ } while (0) ++ ++#endif /* DEBUG_H_ */ +diff --git a/slirp/src/dhcpv6.c b/slirp/src/dhcpv6.c +new file mode 100644 +index 0000000..77b451b +--- /dev/null ++++ b/slirp/src/dhcpv6.c +@@ -0,0 +1,224 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * SLIRP stateless DHCPv6 ++ * ++ * We only support stateless DHCPv6, e.g. for network booting. ++ * See RFC 3315, RFC 3736, RFC 3646 and RFC 5970 for details. ++ * ++ * Copyright 2016 Thomas Huth, Red Hat Inc. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * ++ * 1. Redistributions of source code must retain the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer. ++ * ++ * 2. Redistributions in binary form must reproduce the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer in the documentation and/or other materials provided ++ * with the distribution. ++ * ++ * 3. Neither the name of the copyright holder nor the names of its ++ * contributors may be used to endorse or promote products derived ++ * from this software without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ++ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS ++ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ++ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, ++ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES ++ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR ++ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, ++ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED ++ * OF THE POSSIBILITY OF SUCH DAMAGE. ++ */ ++ ++#include "slirp.h" ++#include "dhcpv6.h" ++ ++/* DHCPv6 message types */ ++#define MSGTYPE_REPLY 7 ++#define MSGTYPE_INFO_REQUEST 11 ++ ++/* DHCPv6 option types */ ++#define OPTION_CLIENTID 1 ++#define OPTION_IAADDR 5 ++#define OPTION_ORO 6 ++#define OPTION_DNS_SERVERS 23 ++#define OPTION_BOOTFILE_URL 59 ++ ++struct requested_infos { ++ uint8_t *client_id; ++ int client_id_len; ++ bool want_dns; ++ bool want_boot_url; ++}; ++ ++/** ++ * Analyze the info request message sent by the client to see what data it ++ * provided and what it wants to have. The information is gathered in the ++ * "requested_infos" struct. Note that client_id (if provided) points into ++ * the odata region, thus the caller must keep odata valid as long as it ++ * needs to access the requested_infos struct. ++ */ ++static int dhcpv6_parse_info_request(Slirp *slirp, uint8_t *odata, int olen, ++ struct requested_infos *ri) ++{ ++ int i, req_opt; ++ ++ while (olen > 4) { ++ /* Parse one option */ ++ int option = odata[0] << 8 | odata[1]; ++ int len = odata[2] << 8 | odata[3]; ++ ++ if (len + 4 > olen) { ++ slirp->cb->guest_error("Guest sent bad DHCPv6 packet!", ++ slirp->opaque); ++ return -E2BIG; ++ } ++ ++ switch (option) { ++ case OPTION_IAADDR: ++ /* According to RFC3315, we must discard requests with IA option */ ++ return -EINVAL; ++ case OPTION_CLIENTID: ++ if (len > 256) { ++ /* Avoid very long IDs which could cause problems later */ ++ return -E2BIG; ++ } ++ ri->client_id = odata + 4; ++ ri->client_id_len = len; ++ break; ++ case OPTION_ORO: /* Option request option */ ++ if (len & 1) { ++ return -EINVAL; ++ } ++ /* Check which options the client wants to have */ ++ for (i = 0; i < len; i += 2) { ++ req_opt = odata[4 + i] << 8 | odata[4 + i + 1]; ++ switch (req_opt) { ++ case OPTION_DNS_SERVERS: ++ ri->want_dns = true; ++ break; ++ case OPTION_BOOTFILE_URL: ++ ri->want_boot_url = true; ++ break; ++ default: ++ DEBUG_MISC("dhcpv6: Unsupported option request %d", ++ req_opt); ++ } ++ } ++ break; ++ default: ++ DEBUG_MISC("dhcpv6 info req: Unsupported option %d, len=%d", option, ++ len); ++ } ++ ++ odata += len + 4; ++ olen -= len + 4; ++ } ++ ++ return 0; ++} ++ ++ ++/** ++ * Handle information request messages ++ */ ++static void dhcpv6_info_request(Slirp *slirp, struct sockaddr_in6 *srcsas, ++ uint32_t xid, uint8_t *odata, int olen) ++{ ++ struct requested_infos ri = { NULL }; ++ struct sockaddr_in6 sa6, da6; ++ struct mbuf *m; ++ uint8_t *resp; ++ ++ if (dhcpv6_parse_info_request(slirp, odata, olen, &ri) < 0) { ++ return; ++ } ++ ++ m = m_get(slirp); ++ if (!m) { ++ return; ++ } ++ memset(m->m_data, 0, m->m_size); ++ m->m_data += IF_MAXLINKHDR; ++ resp = (uint8_t *)m->m_data + sizeof(struct ip6) + sizeof(struct udphdr); ++ ++ /* Fill in response */ ++ *resp++ = MSGTYPE_REPLY; ++ *resp++ = (uint8_t)(xid >> 16); ++ *resp++ = (uint8_t)(xid >> 8); ++ *resp++ = (uint8_t)xid; ++ ++ if (ri.client_id) { ++ *resp++ = OPTION_CLIENTID >> 8; /* option-code high byte */ ++ *resp++ = OPTION_CLIENTID; /* option-code low byte */ ++ *resp++ = ri.client_id_len >> 8; /* option-len high byte */ ++ *resp++ = ri.client_id_len; /* option-len low byte */ ++ memcpy(resp, ri.client_id, ri.client_id_len); ++ resp += ri.client_id_len; ++ } ++ if (ri.want_dns) { ++ *resp++ = OPTION_DNS_SERVERS >> 8; /* option-code high byte */ ++ *resp++ = OPTION_DNS_SERVERS; /* option-code low byte */ ++ *resp++ = 0; /* option-len high byte */ ++ *resp++ = 16; /* option-len low byte */ ++ memcpy(resp, &slirp->vnameserver_addr6, 16); ++ resp += 16; ++ } ++ if (ri.want_boot_url) { ++ uint8_t *sa = slirp->vhost_addr6.s6_addr; ++ int slen, smaxlen; ++ ++ *resp++ = OPTION_BOOTFILE_URL >> 8; /* option-code high byte */ ++ *resp++ = OPTION_BOOTFILE_URL; /* option-code low byte */ ++ smaxlen = (uint8_t *)m->m_data + slirp->if_mtu - (resp + 2); ++ slen = slirp_fmt((char *)resp + 2, smaxlen, ++ "tftp://[%02x%02x:%02x%02x:%02x%02x:%02x%02x:" ++ "%02x%02x:%02x%02x:%02x%02x:%02x%02x]/%s", ++ sa[0], sa[1], sa[2], sa[3], sa[4], sa[5], sa[6], sa[7], ++ sa[8], sa[9], sa[10], sa[11], sa[12], sa[13], sa[14], ++ sa[15], slirp->bootp_filename); ++ *resp++ = slen >> 8; /* option-len high byte */ ++ *resp++ = slen; /* option-len low byte */ ++ resp += slen; ++ } ++ ++ sa6.sin6_addr = slirp->vhost_addr6; ++ sa6.sin6_port = DHCPV6_SERVER_PORT; ++ da6.sin6_addr = srcsas->sin6_addr; ++ da6.sin6_port = srcsas->sin6_port; ++ m->m_data += sizeof(struct ip6) + sizeof(struct udphdr); ++ m->m_len = resp - (uint8_t *)m->m_data; ++ udp6_output(NULL, m, &sa6, &da6); ++} ++ ++/** ++ * Handle DHCPv6 messages sent by the client ++ */ ++void dhcpv6_input(struct sockaddr_in6 *srcsas, struct mbuf *m) ++{ ++ uint8_t *data = (uint8_t *)m->m_data + sizeof(struct udphdr); ++ int data_len = m->m_len - sizeof(struct udphdr); ++ uint32_t xid; ++ ++ if (data_len < 4) { ++ return; ++ } ++ ++ xid = ntohl(*(uint32_t *)data) & 0xffffff; ++ ++ switch (data[0]) { ++ case MSGTYPE_INFO_REQUEST: ++ dhcpv6_info_request(m->slirp, srcsas, xid, &data[4], data_len - 4); ++ break; ++ default: ++ DEBUG_MISC("dhcpv6_input: Unsupported message type 0x%x", data[0]); ++ } ++} +diff --git a/slirp/src/dhcpv6.h b/slirp/src/dhcpv6.h +new file mode 100644 +index 0000000..d12c49b +--- /dev/null ++++ b/slirp/src/dhcpv6.h +@@ -0,0 +1,68 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Definitions and prototypes for SLIRP stateless DHCPv6 ++ * ++ * Copyright 2016 Thomas Huth, Red Hat Inc. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * ++ * 1. Redistributions of source code must retain the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer. ++ * ++ * 2. Redistributions in binary form must reproduce the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer in the documentation and/or other materials provided ++ * with the distribution. ++ * ++ * 3. Neither the name of the copyright holder nor the names of its ++ * contributors may be used to endorse or promote products derived ++ * from this software without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ++ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS ++ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ++ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, ++ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES ++ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR ++ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, ++ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED ++ * OF THE POSSIBILITY OF SUCH DAMAGE. ++ */ ++#ifndef SLIRP_DHCPV6_H ++#define SLIRP_DHCPV6_H ++ ++#define DHCPV6_SERVER_PORT 547 ++ ++#define ALLDHCP_MULTICAST \ ++ { \ ++ .s6_addr = { \ ++ 0xff, \ ++ 0x02, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x01, \ ++ 0x00, \ ++ 0x02 \ ++ } \ ++ } ++ ++#define in6_dhcp_multicast(a) in6_equal(a, &(struct in6_addr)ALLDHCP_MULTICAST) ++ ++void dhcpv6_input(struct sockaddr_in6 *srcsas, struct mbuf *m); ++ ++#endif +diff --git a/slirp/src/dnssearch.c b/slirp/src/dnssearch.c +new file mode 100644 +index 0000000..e8f14e3 +--- /dev/null ++++ b/slirp/src/dnssearch.c +@@ -0,0 +1,306 @@ ++/* SPDX-License-Identifier: MIT */ ++/* ++ * Domain search option for DHCP (RFC 3397) ++ * ++ * Copyright (c) 2012 Klaus Stengel ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to deal ++ * in the Software without restriction, including without limitation the rights ++ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell ++ * copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN ++ * THE SOFTWARE. ++ */ ++ ++#include "slirp.h" ++ ++static const uint8_t RFC3397_OPT_DOMAIN_SEARCH = 119; ++static const uint8_t MAX_OPT_LEN = 255; ++static const uint8_t OPT_HEADER_LEN = 2; ++static const uint8_t REFERENCE_LEN = 2; ++ ++struct compact_domain; ++ ++typedef struct compact_domain { ++ struct compact_domain *self; ++ struct compact_domain *refdom; ++ uint8_t *labels; ++ size_t len; ++ size_t common_octets; ++} CompactDomain; ++ ++static size_t domain_suffix_diffoff(const CompactDomain *a, ++ const CompactDomain *b) ++{ ++ size_t la = a->len, lb = b->len; ++ uint8_t *da = a->labels + la, *db = b->labels + lb; ++ size_t i, lm = (la < lb) ? la : lb; ++ ++ for (i = 0; i < lm; i++) { ++ da--; ++ db--; ++ if (*da != *db) { ++ break; ++ } ++ } ++ return i; ++} ++ ++static int domain_suffix_ord(const void *cva, const void *cvb) ++{ ++ const CompactDomain *a = cva, *b = cvb; ++ size_t la = a->len, lb = b->len; ++ size_t doff = domain_suffix_diffoff(a, b); ++ uint8_t ca = a->labels[la - doff]; ++ uint8_t cb = b->labels[lb - doff]; ++ ++ if (ca < cb) { ++ return -1; ++ } ++ if (ca > cb) { ++ return 1; ++ } ++ if (la < lb) { ++ return -1; ++ } ++ if (la > lb) { ++ return 1; ++ } ++ return 0; ++} ++ ++static size_t domain_common_label(CompactDomain *a, CompactDomain *b) ++{ ++ size_t res, doff = domain_suffix_diffoff(a, b); ++ uint8_t *first_eq_pos = a->labels + (a->len - doff); ++ uint8_t *label = a->labels; ++ ++ while (*label && label < first_eq_pos) { ++ label += *label + 1; ++ } ++ res = a->len - (label - a->labels); ++ /* only report if it can help to reduce the packet size */ ++ return (res > REFERENCE_LEN) ? res : 0; ++} ++ ++static void domain_fixup_order(CompactDomain *cd, size_t n) ++{ ++ size_t i; ++ ++ for (i = 0; i < n; i++) { ++ CompactDomain *cur = cd + i, *next = cd[i].self; ++ ++ while (!cur->common_octets) { ++ CompactDomain *tmp = next->self; /* backup target value */ ++ ++ next->self = cur; ++ cur->common_octets++; ++ ++ cur = next; ++ next = tmp; ++ } ++ } ++} ++ ++static void domain_mklabels(CompactDomain *cd, const char *input) ++{ ++ uint8_t *len_marker = cd->labels; ++ uint8_t *output = len_marker; /* pre-incremented */ ++ const char *in = input; ++ char cur_chr; ++ size_t len = 0; ++ ++ if (cd->len == 0) { ++ goto fail; ++ } ++ cd->len++; ++ ++ do { ++ cur_chr = *in++; ++ if (cur_chr == '.' || cur_chr == '\0') { ++ len = output - len_marker; ++ if ((len == 0 && cur_chr == '.') || len >= 64) { ++ goto fail; ++ } ++ *len_marker = len; ++ ++ output++; ++ len_marker = output; ++ } else { ++ output++; ++ *output = cur_chr; ++ } ++ } while (cur_chr != '\0'); ++ ++ /* ensure proper zero-termination */ ++ if (len != 0) { ++ *len_marker = 0; ++ cd->len++; ++ } ++ return; ++ ++fail: ++ g_warning("failed to parse domain name '%s'\n", input); ++ cd->len = 0; ++} ++ ++static void domain_mkxrefs(CompactDomain *doms, CompactDomain *last, ++ size_t depth) ++{ ++ CompactDomain *i = doms, *target = doms; ++ ++ do { ++ if (i->labels < target->labels) { ++ target = i; ++ } ++ } while (i++ != last); ++ ++ for (i = doms; i != last; i++) { ++ CompactDomain *group_last; ++ size_t next_depth; ++ ++ if (i->common_octets == depth) { ++ continue; ++ } ++ ++ next_depth = -1; ++ for (group_last = i; group_last != last; group_last++) { ++ size_t co = group_last->common_octets; ++ if (co <= depth) { ++ break; ++ } ++ if (co < next_depth) { ++ next_depth = co; ++ } ++ } ++ domain_mkxrefs(i, group_last, next_depth); ++ ++ i = group_last; ++ if (i == last) { ++ break; ++ } ++ } ++ ++ if (depth == 0) { ++ return; ++ } ++ ++ i = doms; ++ do { ++ if (i != target && i->refdom == NULL) { ++ i->refdom = target; ++ i->common_octets = depth; ++ } ++ } while (i++ != last); ++} ++ ++static size_t domain_compactify(CompactDomain *domains, size_t n) ++{ ++ uint8_t *start = domains->self->labels, *outptr = start; ++ size_t i; ++ ++ for (i = 0; i < n; i++) { ++ CompactDomain *cd = domains[i].self; ++ CompactDomain *rd = cd->refdom; ++ ++ if (rd != NULL) { ++ size_t moff = (rd->labels - start) + (rd->len - cd->common_octets); ++ if (moff < 0x3FFFu) { ++ cd->len -= cd->common_octets - 2; ++ cd->labels[cd->len - 1] = moff & 0xFFu; ++ cd->labels[cd->len - 2] = 0xC0u | (moff >> 8); ++ } ++ } ++ ++ if (cd->labels != outptr) { ++ memmove(outptr, cd->labels, cd->len); ++ cd->labels = outptr; ++ } ++ outptr += cd->len; ++ } ++ return outptr - start; ++} ++ ++int translate_dnssearch(Slirp *s, const char **names) ++{ ++ size_t blocks, bsrc_start, bsrc_end, bdst_start; ++ size_t i, num_domains, memreq = 0; ++ uint8_t *result = NULL, *outptr; ++ CompactDomain *domains = NULL; ++ ++ num_domains = g_strv_length((GStrv)names); ++ if (num_domains == 0) { ++ return -2; ++ } ++ ++ domains = g_malloc(num_domains * sizeof(*domains)); ++ ++ for (i = 0; i < num_domains; i++) { ++ size_t nlen = strlen(names[i]); ++ memreq += nlen + 2; /* 1 zero octet + 1 label length octet */ ++ domains[i].self = domains + i; ++ domains[i].len = nlen; ++ domains[i].common_octets = 0; ++ domains[i].refdom = NULL; ++ } ++ ++ /* reserve extra 2 header bytes for each 255 bytes of output */ ++ memreq += DIV_ROUND_UP(memreq, MAX_OPT_LEN) * OPT_HEADER_LEN; ++ result = g_malloc(memreq * sizeof(*result)); ++ ++ outptr = result; ++ for (i = 0; i < num_domains; i++) { ++ domains[i].labels = outptr; ++ domain_mklabels(domains + i, names[i]); ++ outptr += domains[i].len; ++ } ++ ++ if (outptr == result) { ++ g_free(domains); ++ g_free(result); ++ return -1; ++ } ++ ++ qsort(domains, num_domains, sizeof(*domains), domain_suffix_ord); ++ domain_fixup_order(domains, num_domains); ++ ++ for (i = 1; i < num_domains; i++) { ++ size_t cl = domain_common_label(domains + i - 1, domains + i); ++ domains[i - 1].common_octets = cl; ++ } ++ ++ domain_mkxrefs(domains, domains + num_domains - 1, 0); ++ memreq = domain_compactify(domains, num_domains); ++ ++ blocks = DIV_ROUND_UP(memreq, MAX_OPT_LEN); ++ bsrc_end = memreq; ++ bsrc_start = (blocks - 1) * MAX_OPT_LEN; ++ bdst_start = bsrc_start + blocks * OPT_HEADER_LEN; ++ memreq += blocks * OPT_HEADER_LEN; ++ ++ while (blocks--) { ++ size_t len = bsrc_end - bsrc_start; ++ memmove(result + bdst_start, result + bsrc_start, len); ++ result[bdst_start - 2] = RFC3397_OPT_DOMAIN_SEARCH; ++ result[bdst_start - 1] = len; ++ bsrc_end = bsrc_start; ++ bsrc_start -= MAX_OPT_LEN; ++ bdst_start -= MAX_OPT_LEN + OPT_HEADER_LEN; ++ } ++ ++ g_free(domains); ++ s->vdnssearch = result; ++ s->vdnssearch_len = memreq; ++ return 0; ++} +diff --git a/slirp/src/if.c b/slirp/src/if.c +new file mode 100644 +index 0000000..23190b5 +--- /dev/null ++++ b/slirp/src/if.c +@@ -0,0 +1,213 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#include "slirp.h" ++ ++static void ifs_insque(struct mbuf *ifm, struct mbuf *ifmhead) ++{ ++ ifm->ifs_next = ifmhead->ifs_next; ++ ifmhead->ifs_next = ifm; ++ ifm->ifs_prev = ifmhead; ++ ifm->ifs_next->ifs_prev = ifm; ++} ++ ++static void ifs_remque(struct mbuf *ifm) ++{ ++ ifm->ifs_prev->ifs_next = ifm->ifs_next; ++ ifm->ifs_next->ifs_prev = ifm->ifs_prev; ++} ++ ++void if_init(Slirp *slirp) ++{ ++ slirp->if_fastq.qh_link = slirp->if_fastq.qh_rlink = &slirp->if_fastq; ++ slirp->if_batchq.qh_link = slirp->if_batchq.qh_rlink = &slirp->if_batchq; ++} ++ ++/* ++ * if_output: Queue packet into an output queue. ++ * There are 2 output queue's, if_fastq and if_batchq. ++ * Each output queue is a doubly linked list of double linked lists ++ * of mbufs, each list belonging to one "session" (socket). This ++ * way, we can output packets fairly by sending one packet from each ++ * session, instead of all the packets from one session, then all packets ++ * from the next session, etc. Packets on the if_fastq get absolute ++ * priority, but if one session hogs the link, it gets "downgraded" ++ * to the batchq until it runs out of packets, then it'll return ++ * to the fastq (eg. if the user does an ls -alR in a telnet session, ++ * it'll temporarily get downgraded to the batchq) ++ */ ++void if_output(struct socket *so, struct mbuf *ifm) ++{ ++ Slirp *slirp = ifm->slirp; ++ struct mbuf *ifq; ++ int on_fastq = 1; ++ ++ DEBUG_CALL("if_output"); ++ DEBUG_ARG("so = %p", so); ++ DEBUG_ARG("ifm = %p", ifm); ++ ++ /* ++ * First remove the mbuf from m_usedlist, ++ * since we're gonna use m_next and m_prev ourselves ++ * XXX Shouldn't need this, gotta change dtom() etc. ++ */ ++ if (ifm->m_flags & M_USEDLIST) { ++ remque(ifm); ++ ifm->m_flags &= ~M_USEDLIST; ++ } ++ ++ /* ++ * See if there's already a batchq list for this session. ++ * This can include an interactive session, which should go on fastq, ++ * but gets too greedy... hence it'll be downgraded from fastq to batchq. ++ * We mustn't put this packet back on the fastq (or we'll send it out of ++ * order) ++ * XXX add cache here? ++ */ ++ if (so) { ++ for (ifq = (struct mbuf *)slirp->if_batchq.qh_rlink; ++ (struct quehead *)ifq != &slirp->if_batchq; ifq = ifq->ifq_prev) { ++ if (so == ifq->ifq_so) { ++ /* A match! */ ++ ifm->ifq_so = so; ++ ifs_insque(ifm, ifq->ifs_prev); ++ goto diddit; ++ } ++ } ++ } ++ ++ /* No match, check which queue to put it on */ ++ if (so && (so->so_iptos & IPTOS_LOWDELAY)) { ++ ifq = (struct mbuf *)slirp->if_fastq.qh_rlink; ++ on_fastq = 1; ++ /* ++ * Check if this packet is a part of the last ++ * packet's session ++ */ ++ if (ifq->ifq_so == so) { ++ ifm->ifq_so = so; ++ ifs_insque(ifm, ifq->ifs_prev); ++ goto diddit; ++ } ++ } else { ++ ifq = (struct mbuf *)slirp->if_batchq.qh_rlink; ++ } ++ ++ /* Create a new doubly linked list for this session */ ++ ifm->ifq_so = so; ++ ifs_init(ifm); ++ insque(ifm, ifq); ++ ++diddit: ++ if (so) { ++ /* Update *_queued */ ++ so->so_queued++; ++ so->so_nqueued++; ++ /* ++ * Check if the interactive session should be downgraded to ++ * the batchq. A session is downgraded if it has queued 6 ++ * packets without pausing, and at least 3 of those packets ++ * have been sent over the link ++ * (XXX These are arbitrary numbers, probably not optimal..) ++ */ ++ if (on_fastq && ++ ((so->so_nqueued >= 6) && (so->so_nqueued - so->so_queued) >= 3)) { ++ /* Remove from current queue... */ ++ remque(ifm->ifs_next); ++ ++ /* ...And insert in the new. That'll teach ya! */ ++ insque(ifm->ifs_next, &slirp->if_batchq); ++ } ++ } ++ ++ /* ++ * This prevents us from malloc()ing too many mbufs ++ */ ++ if_start(ifm->slirp); ++} ++ ++/* ++ * Send one packet from each session. ++ * If there are packets on the fastq, they are sent FIFO, before ++ * everything else. Then we choose the first packet from each ++ * batchq session (socket) and send it. ++ * For example, if there are 3 ftp sessions fighting for bandwidth, ++ * one packet will be sent from the first session, then one packet ++ * from the second session, then one packet from the third. ++ */ ++void if_start(Slirp *slirp) ++{ ++ uint64_t now = slirp->cb->clock_get_ns(slirp->opaque); ++ bool from_batchq = false; ++ struct mbuf *ifm, *ifm_next, *ifqt; ++ ++ DEBUG_CALL("if_start"); ++ ++ if (slirp->if_start_busy) { ++ return; ++ } ++ slirp->if_start_busy = true; ++ ++ struct mbuf *batch_head = NULL; ++ if (slirp->if_batchq.qh_link != &slirp->if_batchq) { ++ batch_head = (struct mbuf *)slirp->if_batchq.qh_link; ++ } ++ ++ if (slirp->if_fastq.qh_link != &slirp->if_fastq) { ++ ifm_next = (struct mbuf *)slirp->if_fastq.qh_link; ++ } else if (batch_head) { ++ /* Nothing on fastq, pick up from batchq */ ++ ifm_next = batch_head; ++ from_batchq = true; ++ } else { ++ ifm_next = NULL; ++ } ++ ++ while (ifm_next) { ++ ifm = ifm_next; ++ ++ ifm_next = ifm->ifq_next; ++ if ((struct quehead *)ifm_next == &slirp->if_fastq) { ++ /* No more packets in fastq, switch to batchq */ ++ ifm_next = batch_head; ++ from_batchq = true; ++ } ++ if ((struct quehead *)ifm_next == &slirp->if_batchq) { ++ /* end of batchq */ ++ ifm_next = NULL; ++ } ++ ++ /* Try to send packet unless it already expired */ ++ if (ifm->expiration_date >= now && !if_encap(slirp, ifm)) { ++ /* Packet is delayed due to pending ARP or NDP resolution */ ++ continue; ++ } ++ ++ /* Remove it from the queue */ ++ ifqt = ifm->ifq_prev; ++ remque(ifm); ++ ++ /* If there are more packets for this session, re-queue them */ ++ if (ifm->ifs_next != ifm) { ++ struct mbuf *next = ifm->ifs_next; ++ ++ insque(next, ifqt); ++ ifs_remque(ifm); ++ if (!from_batchq) { ++ ifm_next = next; ++ } ++ } ++ ++ /* Update so_queued */ ++ if (ifm->ifq_so && --ifm->ifq_so->so_queued == 0) { ++ /* If there's no more queued, reset nqueued */ ++ ifm->ifq_so->so_nqueued = 0; ++ } ++ ++ m_free(ifm); ++ } ++ ++ slirp->if_start_busy = false; ++} +diff --git a/slirp/src/if.h b/slirp/src/if.h +new file mode 100644 +index 0000000..7cf9d27 +--- /dev/null ++++ b/slirp/src/if.h +@@ -0,0 +1,25 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#ifndef IF_H ++#define IF_H ++ ++#define IF_COMPRESS 0x01 /* We want compression */ ++#define IF_NOCOMPRESS 0x02 /* Do not do compression */ ++#define IF_AUTOCOMP 0x04 /* Autodetect (default) */ ++#define IF_NOCIDCOMP 0x08 /* CID compression */ ++ ++#define IF_MTU_DEFAULT 1500 ++#define IF_MTU_MIN 68 ++#define IF_MTU_MAX 65521 ++#define IF_MRU_DEFAULT 1500 ++#define IF_MRU_MIN 68 ++#define IF_MRU_MAX 65521 ++#define IF_COMP IF_AUTOCOMP /* Flags for compression */ ++ ++/* 2 for alignment, 14 for ethernet */ ++#define IF_MAXLINKHDR (2 + ETH_HLEN) ++ ++#endif +diff --git a/slirp/src/ip.h b/slirp/src/ip.h +new file mode 100644 +index 0000000..e5d4aa8 +--- /dev/null ++++ b/slirp/src/ip.h +@@ -0,0 +1,242 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)ip.h 8.1 (Berkeley) 6/10/93 ++ * ip.h,v 1.3 1994/08/21 05:27:30 paul Exp ++ */ ++ ++#ifndef IP_H ++#define IP_H ++ ++#include ++ ++#if G_BYTE_ORDER == G_BIG_ENDIAN ++#undef NTOHL ++#undef NTOHS ++#undef HTONL ++#undef HTONS ++#define NTOHL(d) ++#define NTOHS(d) ++#define HTONL(d) ++#define HTONS(d) ++#else ++#ifndef NTOHL ++#define NTOHL(d) ((d) = ntohl((d))) ++#endif ++#ifndef NTOHS ++#define NTOHS(d) ((d) = ntohs((uint16_t)(d))) ++#endif ++#ifndef HTONL ++#define HTONL(d) ((d) = htonl((d))) ++#endif ++#ifndef HTONS ++#define HTONS(d) ((d) = htons((uint16_t)(d))) ++#endif ++#endif ++ ++typedef uint32_t n_long; /* long as received from the net */ ++ ++/* ++ * Definitions for internet protocol version 4. ++ * Per RFC 791, September 1981. ++ */ ++#define IPVERSION 4 ++ ++/* ++ * Structure of an internet header, naked of options. ++ */ ++struct ip { ++#if G_BYTE_ORDER == G_BIG_ENDIAN ++ uint8_t ip_v : 4, /* version */ ++ ip_hl : 4; /* header length */ ++#else ++ uint8_t ip_hl : 4, /* header length */ ++ ip_v : 4; /* version */ ++#endif ++ uint8_t ip_tos; /* type of service */ ++ uint16_t ip_len; /* total length */ ++ uint16_t ip_id; /* identification */ ++ uint16_t ip_off; /* fragment offset field */ ++#define IP_DF 0x4000 /* don't fragment flag */ ++#define IP_MF 0x2000 /* more fragments flag */ ++#define IP_OFFMASK 0x1fff /* mask for fragmenting bits */ ++ uint8_t ip_ttl; /* time to live */ ++ uint8_t ip_p; /* protocol */ ++ uint16_t ip_sum; /* checksum */ ++ struct in_addr ip_src, ip_dst; /* source and dest address */ ++} SLIRP_PACKED; ++ ++#define IP_MAXPACKET 65535 /* maximum packet size */ ++ ++/* ++ * Definitions for IP type of service (ip_tos) ++ */ ++#define IPTOS_LOWDELAY 0x10 ++#define IPTOS_THROUGHPUT 0x08 ++#define IPTOS_RELIABILITY 0x04 ++ ++/* ++ * Definitions for options. ++ */ ++#define IPOPT_COPIED(o) ((o)&0x80) ++#define IPOPT_CLASS(o) ((o)&0x60) ++#define IPOPT_NUMBER(o) ((o)&0x1f) ++ ++#define IPOPT_CONTROL 0x00 ++#define IPOPT_RESERVED1 0x20 ++#define IPOPT_DEBMEAS 0x40 ++#define IPOPT_RESERVED2 0x60 ++ ++#define IPOPT_EOL 0 /* end of option list */ ++#define IPOPT_NOP 1 /* no operation */ ++ ++#define IPOPT_RR 7 /* record packet route */ ++#define IPOPT_TS 68 /* timestamp */ ++#define IPOPT_SECURITY 130 /* provide s,c,h,tcc */ ++#define IPOPT_LSRR 131 /* loose source route */ ++#define IPOPT_SATID 136 /* satnet id */ ++#define IPOPT_SSRR 137 /* strict source route */ ++ ++/* ++ * Offsets to fields in options other than EOL and NOP. ++ */ ++#define IPOPT_OPTVAL 0 /* option ID */ ++#define IPOPT_OLEN 1 /* option length */ ++#define IPOPT_OFFSET 2 /* offset within option */ ++#define IPOPT_MINOFF 4 /* min value of above */ ++ ++/* ++ * Time stamp option structure. ++ */ ++struct ip_timestamp { ++ uint8_t ipt_code; /* IPOPT_TS */ ++ uint8_t ipt_len; /* size of structure (variable) */ ++ uint8_t ipt_ptr; /* index of current entry */ ++#if G_BYTE_ORDER == G_BIG_ENDIAN ++ uint8_t ipt_oflw : 4, /* overflow counter */ ++ ipt_flg : 4; /* flags, see below */ ++#else ++ uint8_t ipt_flg : 4, /* flags, see below */ ++ ipt_oflw : 4; /* overflow counter */ ++#endif ++ union ipt_timestamp { ++ n_long ipt_time[1]; ++ struct ipt_ta { ++ struct in_addr ipt_addr; ++ n_long ipt_time; ++ } ipt_ta[1]; ++ } ipt_timestamp; ++} SLIRP_PACKED; ++ ++/* flag bits for ipt_flg */ ++#define IPOPT_TS_TSONLY 0 /* timestamps only */ ++#define IPOPT_TS_TSANDADDR 1 /* timestamps and addresses */ ++#define IPOPT_TS_PRESPEC 3 /* specified modules only */ ++ ++/* bits for security (not byte swapped) */ ++#define IPOPT_SECUR_UNCLASS 0x0000 ++#define IPOPT_SECUR_CONFID 0xf135 ++#define IPOPT_SECUR_EFTO 0x789a ++#define IPOPT_SECUR_MMMM 0xbc4d ++#define IPOPT_SECUR_RESTR 0xaf13 ++#define IPOPT_SECUR_SECRET 0xd788 ++#define IPOPT_SECUR_TOPSECRET 0x6bc5 ++ ++/* ++ * Internet implementation parameters. ++ */ ++#define MAXTTL 255 /* maximum time to live (seconds) */ ++#define IPDEFTTL 64 /* default ttl, from RFC 1340 */ ++#define IPFRAGTTL 60 /* time to live for frags, slowhz */ ++#define IPTTLDEC 1 /* subtracted when forwarding */ ++ ++#define IP_MSS 576 /* default maximum segment size */ ++ ++#if GLIB_SIZEOF_VOID_P == 4 ++struct mbuf_ptr { ++ struct mbuf *mptr; ++ uint32_t dummy; ++} SLIRP_PACKED; ++#else ++struct mbuf_ptr { ++ struct mbuf *mptr; ++} SLIRP_PACKED; ++#endif ++struct qlink { ++ void *next, *prev; ++}; ++ ++/* ++ * Overlay for ip header used by other protocols (tcp, udp). ++ */ ++struct ipovly { ++ struct mbuf_ptr ih_mbuf; /* backpointer to mbuf */ ++ uint8_t ih_x1; /* (unused) */ ++ uint8_t ih_pr; /* protocol */ ++ uint16_t ih_len; /* protocol length */ ++ struct in_addr ih_src; /* source internet address */ ++ struct in_addr ih_dst; /* destination internet address */ ++} SLIRP_PACKED; ++ ++/* ++ * Ip reassembly queue structure. Each fragment ++ * being reassembled is attached to one of these structures. ++ * They are timed out after ipq_ttl drops to 0, and may also ++ * be reclaimed if memory becomes tight. ++ * size 28 bytes ++ */ ++struct ipq { ++ struct qlink frag_link; /* to ip headers of fragments */ ++ struct qlink ip_link; /* to other reass headers */ ++ uint8_t ipq_ttl; /* time for reass q to live */ ++ uint8_t ipq_p; /* protocol of this fragment */ ++ uint16_t ipq_id; /* sequence id for reassembly */ ++ struct in_addr ipq_src, ipq_dst; ++}; ++ ++/* ++ * Ip header, when holding a fragment. ++ * ++ * Note: ipf_link must be at same offset as frag_link above ++ */ ++struct ipasfrag { ++ struct qlink ipf_link; ++ struct ip ipf_ip; ++}; ++ ++G_STATIC_ASSERT(offsetof(struct ipq, frag_link) == ++ offsetof(struct ipasfrag, ipf_link)); ++ ++#define ipf_off ipf_ip.ip_off ++#define ipf_tos ipf_ip.ip_tos ++#define ipf_len ipf_ip.ip_len ++#define ipf_next ipf_link.next ++#define ipf_prev ipf_link.prev ++ ++#endif +diff --git a/slirp/src/ip6.h b/slirp/src/ip6.h +new file mode 100644 +index 0000000..0630309 +--- /dev/null ++++ b/slirp/src/ip6.h +@@ -0,0 +1,214 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 2013 ++ * Guillaume Subiron, Yann Bordenave, Serigne Modou Wagne. ++ */ ++ ++#ifndef SLIRP_IP6_H ++#define SLIRP_IP6_H ++ ++#include ++#include ++ ++#define ALLNODES_MULTICAST \ ++ { \ ++ .s6_addr = { \ ++ 0xff, \ ++ 0x02, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x01 \ ++ } \ ++ } ++ ++#define SOLICITED_NODE_PREFIX \ ++ { \ ++ .s6_addr = { \ ++ 0xff, \ ++ 0x02, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x01, \ ++ 0xff, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00 \ ++ } \ ++ } ++ ++#define LINKLOCAL_ADDR \ ++ { \ ++ .s6_addr = { \ ++ 0xfe, \ ++ 0x80, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x02 \ ++ } \ ++ } ++ ++#define ZERO_ADDR \ ++ { \ ++ .s6_addr = { \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00 \ ++ } \ ++ } ++ ++static inline bool in6_equal(const struct in6_addr *a, const struct in6_addr *b) ++{ ++ return memcmp(a, b, sizeof(*a)) == 0; ++} ++ ++static inline bool in6_equal_net(const struct in6_addr *a, ++ const struct in6_addr *b, int prefix_len) ++{ ++ if (memcmp(a, b, prefix_len / 8) != 0) { ++ return 0; ++ } ++ ++ if (prefix_len % 8 == 0) { ++ return 1; ++ } ++ ++ return a->s6_addr[prefix_len / 8] >> (8 - (prefix_len % 8)) == ++ b->s6_addr[prefix_len / 8] >> (8 - (prefix_len % 8)); ++} ++ ++static inline bool in6_equal_mach(const struct in6_addr *a, ++ const struct in6_addr *b, int prefix_len) ++{ ++ if (memcmp(&(a->s6_addr[DIV_ROUND_UP(prefix_len, 8)]), ++ &(b->s6_addr[DIV_ROUND_UP(prefix_len, 8)]), ++ 16 - DIV_ROUND_UP(prefix_len, 8)) != 0) { ++ return 0; ++ } ++ ++ if (prefix_len % 8 == 0) { ++ return 1; ++ } ++ ++ return (a->s6_addr[prefix_len / 8] & ++ ((1U << (8 - (prefix_len % 8))) - 1)) == ++ (b->s6_addr[prefix_len / 8] & ((1U << (8 - (prefix_len % 8))) - 1)); ++} ++ ++ ++#define in6_equal_router(a) \ ++ ((in6_equal_net(a, &slirp->vprefix_addr6, slirp->vprefix_len) && \ ++ in6_equal_mach(a, &slirp->vhost_addr6, slirp->vprefix_len)) || \ ++ (in6_equal_net(a, &(struct in6_addr)LINKLOCAL_ADDR, 64) && \ ++ in6_equal_mach(a, &slirp->vhost_addr6, 64))) ++ ++#define in6_equal_dns(a) \ ++ ((in6_equal_net(a, &slirp->vprefix_addr6, slirp->vprefix_len) && \ ++ in6_equal_mach(a, &slirp->vnameserver_addr6, slirp->vprefix_len)) || \ ++ (in6_equal_net(a, &(struct in6_addr)LINKLOCAL_ADDR, 64) && \ ++ in6_equal_mach(a, &slirp->vnameserver_addr6, 64))) ++ ++#define in6_equal_host(a) (in6_equal_router(a) || in6_equal_dns(a)) ++ ++#define in6_solicitednode_multicast(a) \ ++ (in6_equal_net(a, &(struct in6_addr)SOLICITED_NODE_PREFIX, 104)) ++ ++#define in6_zero(a) (in6_equal(a, &(struct in6_addr)ZERO_ADDR)) ++ ++/* Compute emulated host MAC address from its ipv6 address */ ++static inline void in6_compute_ethaddr(struct in6_addr ip, ++ uint8_t eth[ETH_ALEN]) ++{ ++ eth[0] = 0x52; ++ eth[1] = 0x56; ++ memcpy(ð[2], &ip.s6_addr[16 - (ETH_ALEN - 2)], ETH_ALEN - 2); ++} ++ ++/* ++ * Definitions for internet protocol version 6. ++ * Per RFC 2460, December 1998. ++ */ ++#define IP6VERSION 6 ++#define IP6_HOP_LIMIT 255 ++ ++/* ++ * Structure of an internet header, naked of options. ++ */ ++struct ip6 { ++#if G_BYTE_ORDER == G_BIG_ENDIAN ++ uint32_t ip_v : 4, /* version */ ++ ip_tc_hi : 4, /* traffic class */ ++ ip_tc_lo : 4, ip_fl_hi : 4, /* flow label */ ++ ip_fl_lo : 16; ++#else ++ uint32_t ip_tc_hi : 4, ip_v : 4, ip_fl_hi : 4, ip_tc_lo : 4, ip_fl_lo : 16; ++#endif ++ uint16_t ip_pl; /* payload length */ ++ uint8_t ip_nh; /* next header */ ++ uint8_t ip_hl; /* hop limit */ ++ struct in6_addr ip_src, ip_dst; /* source and dest address */ ++}; ++ ++/* ++ * IPv6 pseudo-header used by upper-layer protocols ++ */ ++struct ip6_pseudohdr { ++ struct in6_addr ih_src; /* source internet address */ ++ struct in6_addr ih_dst; /* destination internet address */ ++ uint32_t ih_pl; /* upper-layer packet length */ ++ uint16_t ih_zero_hi; /* zero */ ++ uint8_t ih_zero_lo; /* zero */ ++ uint8_t ih_nh; /* next header */ ++}; ++ ++/* ++ * We don't want to mark these ip6 structs as packed as they are naturally ++ * correctly aligned; instead assert that there is no stray padding. ++ * If we marked the struct as packed then we would be unable to take ++ * the address of any of the fields in it. ++ */ ++G_STATIC_ASSERT(sizeof(struct ip6) == 40); ++G_STATIC_ASSERT(sizeof(struct ip6_pseudohdr) == 40); ++ ++#endif +diff --git a/slirp/src/ip6_icmp.c b/slirp/src/ip6_icmp.c +new file mode 100644 +index 0000000..28ec2be +--- /dev/null ++++ b/slirp/src/ip6_icmp.c +@@ -0,0 +1,434 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 2013 ++ * Guillaume Subiron, Yann Bordenave, Serigne Modou Wagne. ++ */ ++ ++#include "slirp.h" ++#include "ip6_icmp.h" ++ ++#define NDP_Interval \ ++ g_rand_int_range(slirp->grand, NDP_MinRtrAdvInterval, NDP_MaxRtrAdvInterval) ++ ++static void ra_timer_handler(void *opaque) ++{ ++ Slirp *slirp = opaque; ++ ++ slirp->cb->timer_mod(slirp->ra_timer, ++ slirp->cb->clock_get_ns(slirp->opaque) / SCALE_MS + ++ NDP_Interval, ++ slirp->opaque); ++ ndp_send_ra(slirp); ++} ++ ++void icmp6_init(Slirp *slirp) ++{ ++ if (!slirp->in6_enabled) { ++ return; ++ } ++ ++ slirp->ra_timer = ++ slirp->cb->timer_new(ra_timer_handler, slirp, slirp->opaque); ++ slirp->cb->timer_mod(slirp->ra_timer, ++ slirp->cb->clock_get_ns(slirp->opaque) / SCALE_MS + ++ NDP_Interval, ++ slirp->opaque); ++} ++ ++void icmp6_cleanup(Slirp *slirp) ++{ ++ if (!slirp->in6_enabled) { ++ return; ++ } ++ ++ slirp->cb->timer_free(slirp->ra_timer, slirp->opaque); ++} ++ ++static void icmp6_send_echoreply(struct mbuf *m, Slirp *slirp, struct ip6 *ip, ++ struct icmp6 *icmp) ++{ ++ struct mbuf *t = m_get(slirp); ++ t->m_len = sizeof(struct ip6) + ntohs(ip->ip_pl); ++ memcpy(t->m_data, m->m_data, t->m_len); ++ ++ /* IPv6 Packet */ ++ struct ip6 *rip = mtod(t, struct ip6 *); ++ rip->ip_dst = ip->ip_src; ++ rip->ip_src = ip->ip_dst; ++ ++ /* ICMPv6 packet */ ++ t->m_data += sizeof(struct ip6); ++ struct icmp6 *ricmp = mtod(t, struct icmp6 *); ++ ricmp->icmp6_type = ICMP6_ECHO_REPLY; ++ ricmp->icmp6_cksum = 0; ++ ++ /* Checksum */ ++ t->m_data -= sizeof(struct ip6); ++ ricmp->icmp6_cksum = ip6_cksum(t); ++ ++ ip6_output(NULL, t, 0); ++} ++ ++void icmp6_send_error(struct mbuf *m, uint8_t type, uint8_t code) ++{ ++ Slirp *slirp = m->slirp; ++ struct mbuf *t; ++ struct ip6 *ip = mtod(m, struct ip6 *); ++ char addrstr[INET6_ADDRSTRLEN]; ++ ++ DEBUG_CALL("icmp6_send_error"); ++ DEBUG_ARG("type = %d, code = %d", type, code); ++ ++ if (IN6_IS_ADDR_MULTICAST(&ip->ip_src) || in6_zero(&ip->ip_src)) { ++ /* TODO icmp error? */ ++ return; ++ } ++ ++ t = m_get(slirp); ++ ++ /* IPv6 packet */ ++ struct ip6 *rip = mtod(t, struct ip6 *); ++ rip->ip_src = (struct in6_addr)LINKLOCAL_ADDR; ++ rip->ip_dst = ip->ip_src; ++ inet_ntop(AF_INET6, &rip->ip_dst, addrstr, INET6_ADDRSTRLEN); ++ DEBUG_ARG("target = %s", addrstr); ++ ++ rip->ip_nh = IPPROTO_ICMPV6; ++ const int error_data_len = MIN( ++ m->m_len, slirp->if_mtu - (sizeof(struct ip6) + ICMP6_ERROR_MINLEN)); ++ rip->ip_pl = htons(ICMP6_ERROR_MINLEN + error_data_len); ++ t->m_len = sizeof(struct ip6) + ntohs(rip->ip_pl); ++ ++ /* ICMPv6 packet */ ++ t->m_data += sizeof(struct ip6); ++ struct icmp6 *ricmp = mtod(t, struct icmp6 *); ++ ricmp->icmp6_type = type; ++ ricmp->icmp6_code = code; ++ ricmp->icmp6_cksum = 0; ++ ++ switch (type) { ++ case ICMP6_UNREACH: ++ case ICMP6_TIMXCEED: ++ ricmp->icmp6_err.unused = 0; ++ break; ++ case ICMP6_TOOBIG: ++ ricmp->icmp6_err.mtu = htonl(slirp->if_mtu); ++ break; ++ case ICMP6_PARAMPROB: ++ /* TODO: Handle this case */ ++ break; ++ default: ++ g_assert_not_reached(); ++ break; ++ } ++ t->m_data += ICMP6_ERROR_MINLEN; ++ memcpy(t->m_data, m->m_data, error_data_len); ++ ++ /* Checksum */ ++ t->m_data -= ICMP6_ERROR_MINLEN; ++ t->m_data -= sizeof(struct ip6); ++ ricmp->icmp6_cksum = ip6_cksum(t); ++ ++ ip6_output(NULL, t, 0); ++} ++ ++/* ++ * Send NDP Router Advertisement ++ */ ++void ndp_send_ra(Slirp *slirp) ++{ ++ DEBUG_CALL("ndp_send_ra"); ++ ++ /* Build IPv6 packet */ ++ struct mbuf *t = m_get(slirp); ++ struct ip6 *rip = mtod(t, struct ip6 *); ++ size_t pl_size = 0; ++ struct in6_addr addr; ++ uint32_t scope_id; ++ ++ rip->ip_src = (struct in6_addr)LINKLOCAL_ADDR; ++ rip->ip_dst = (struct in6_addr)ALLNODES_MULTICAST; ++ rip->ip_nh = IPPROTO_ICMPV6; ++ ++ /* Build ICMPv6 packet */ ++ t->m_data += sizeof(struct ip6); ++ struct icmp6 *ricmp = mtod(t, struct icmp6 *); ++ ricmp->icmp6_type = ICMP6_NDP_RA; ++ ricmp->icmp6_code = 0; ++ ricmp->icmp6_cksum = 0; ++ ++ /* NDP */ ++ ricmp->icmp6_nra.chl = NDP_AdvCurHopLimit; ++ ricmp->icmp6_nra.M = NDP_AdvManagedFlag; ++ ricmp->icmp6_nra.O = NDP_AdvOtherConfigFlag; ++ ricmp->icmp6_nra.reserved = 0; ++ ricmp->icmp6_nra.lifetime = htons(NDP_AdvDefaultLifetime); ++ ricmp->icmp6_nra.reach_time = htonl(NDP_AdvReachableTime); ++ ricmp->icmp6_nra.retrans_time = htonl(NDP_AdvRetransTime); ++ t->m_data += ICMP6_NDP_RA_MINLEN; ++ pl_size += ICMP6_NDP_RA_MINLEN; ++ ++ /* Source link-layer address (NDP option) */ ++ struct ndpopt *opt = mtod(t, struct ndpopt *); ++ opt->ndpopt_type = NDPOPT_LINKLAYER_SOURCE; ++ opt->ndpopt_len = NDPOPT_LINKLAYER_LEN / 8; ++ in6_compute_ethaddr(rip->ip_src, opt->ndpopt_linklayer); ++ t->m_data += NDPOPT_LINKLAYER_LEN; ++ pl_size += NDPOPT_LINKLAYER_LEN; ++ ++ /* Prefix information (NDP option) */ ++ struct ndpopt *opt2 = mtod(t, struct ndpopt *); ++ opt2->ndpopt_type = NDPOPT_PREFIX_INFO; ++ opt2->ndpopt_len = NDPOPT_PREFIXINFO_LEN / 8; ++ opt2->ndpopt_prefixinfo.prefix_length = slirp->vprefix_len; ++ opt2->ndpopt_prefixinfo.L = 1; ++ opt2->ndpopt_prefixinfo.A = 1; ++ opt2->ndpopt_prefixinfo.reserved1 = 0; ++ opt2->ndpopt_prefixinfo.valid_lt = htonl(NDP_AdvValidLifetime); ++ opt2->ndpopt_prefixinfo.pref_lt = htonl(NDP_AdvPrefLifetime); ++ opt2->ndpopt_prefixinfo.reserved2 = 0; ++ opt2->ndpopt_prefixinfo.prefix = slirp->vprefix_addr6; ++ t->m_data += NDPOPT_PREFIXINFO_LEN; ++ pl_size += NDPOPT_PREFIXINFO_LEN; ++ ++ /* Prefix information (NDP option) */ ++ if (get_dns6_addr(&addr, &scope_id) >= 0) { ++ /* Host system does have an IPv6 DNS server, announce our proxy. */ ++ struct ndpopt *opt3 = mtod(t, struct ndpopt *); ++ opt3->ndpopt_type = NDPOPT_RDNSS; ++ opt3->ndpopt_len = NDPOPT_RDNSS_LEN / 8; ++ opt3->ndpopt_rdnss.reserved = 0; ++ opt3->ndpopt_rdnss.lifetime = htonl(2 * NDP_MaxRtrAdvInterval); ++ opt3->ndpopt_rdnss.addr = slirp->vnameserver_addr6; ++ t->m_data += NDPOPT_RDNSS_LEN; ++ pl_size += NDPOPT_RDNSS_LEN; ++ } ++ ++ rip->ip_pl = htons(pl_size); ++ t->m_data -= sizeof(struct ip6) + pl_size; ++ t->m_len = sizeof(struct ip6) + pl_size; ++ ++ /* ICMPv6 Checksum */ ++ ricmp->icmp6_cksum = ip6_cksum(t); ++ ++ ip6_output(NULL, t, 0); ++} ++ ++/* ++ * Send NDP Neighbor Solitication ++ */ ++void ndp_send_ns(Slirp *slirp, struct in6_addr addr) ++{ ++ char addrstr[INET6_ADDRSTRLEN]; ++ ++ inet_ntop(AF_INET6, &addr, addrstr, INET6_ADDRSTRLEN); ++ ++ DEBUG_CALL("ndp_send_ns"); ++ DEBUG_ARG("target = %s", addrstr); ++ ++ /* Build IPv6 packet */ ++ struct mbuf *t = m_get(slirp); ++ struct ip6 *rip = mtod(t, struct ip6 *); ++ rip->ip_src = slirp->vhost_addr6; ++ rip->ip_dst = (struct in6_addr)SOLICITED_NODE_PREFIX; ++ memcpy(&rip->ip_dst.s6_addr[13], &addr.s6_addr[13], 3); ++ rip->ip_nh = IPPROTO_ICMPV6; ++ rip->ip_pl = htons(ICMP6_NDP_NS_MINLEN + NDPOPT_LINKLAYER_LEN); ++ t->m_len = sizeof(struct ip6) + ntohs(rip->ip_pl); ++ ++ /* Build ICMPv6 packet */ ++ t->m_data += sizeof(struct ip6); ++ struct icmp6 *ricmp = mtod(t, struct icmp6 *); ++ ricmp->icmp6_type = ICMP6_NDP_NS; ++ ricmp->icmp6_code = 0; ++ ricmp->icmp6_cksum = 0; ++ ++ /* NDP */ ++ ricmp->icmp6_nns.reserved = 0; ++ ricmp->icmp6_nns.target = addr; ++ ++ /* Build NDP option */ ++ t->m_data += ICMP6_NDP_NS_MINLEN; ++ struct ndpopt *opt = mtod(t, struct ndpopt *); ++ opt->ndpopt_type = NDPOPT_LINKLAYER_SOURCE; ++ opt->ndpopt_len = NDPOPT_LINKLAYER_LEN / 8; ++ in6_compute_ethaddr(slirp->vhost_addr6, opt->ndpopt_linklayer); ++ ++ /* ICMPv6 Checksum */ ++ t->m_data -= ICMP6_NDP_NA_MINLEN; ++ t->m_data -= sizeof(struct ip6); ++ ricmp->icmp6_cksum = ip6_cksum(t); ++ ++ ip6_output(NULL, t, 1); ++} ++ ++/* ++ * Send NDP Neighbor Advertisement ++ */ ++static void ndp_send_na(Slirp *slirp, struct ip6 *ip, struct icmp6 *icmp) ++{ ++ /* Build IPv6 packet */ ++ struct mbuf *t = m_get(slirp); ++ struct ip6 *rip = mtod(t, struct ip6 *); ++ rip->ip_src = icmp->icmp6_nns.target; ++ if (in6_zero(&ip->ip_src)) { ++ rip->ip_dst = (struct in6_addr)ALLNODES_MULTICAST; ++ } else { ++ rip->ip_dst = ip->ip_src; ++ } ++ rip->ip_nh = IPPROTO_ICMPV6; ++ rip->ip_pl = htons(ICMP6_NDP_NA_MINLEN + NDPOPT_LINKLAYER_LEN); ++ t->m_len = sizeof(struct ip6) + ntohs(rip->ip_pl); ++ ++ /* Build ICMPv6 packet */ ++ t->m_data += sizeof(struct ip6); ++ struct icmp6 *ricmp = mtod(t, struct icmp6 *); ++ ricmp->icmp6_type = ICMP6_NDP_NA; ++ ricmp->icmp6_code = 0; ++ ricmp->icmp6_cksum = 0; ++ ++ /* NDP */ ++ ricmp->icmp6_nna.R = NDP_IsRouter; ++ ricmp->icmp6_nna.S = !IN6_IS_ADDR_MULTICAST(&rip->ip_dst); ++ ricmp->icmp6_nna.O = 1; ++ ricmp->icmp6_nna.reserved_hi = 0; ++ ricmp->icmp6_nna.reserved_lo = 0; ++ ricmp->icmp6_nna.target = icmp->icmp6_nns.target; ++ ++ /* Build NDP option */ ++ t->m_data += ICMP6_NDP_NA_MINLEN; ++ struct ndpopt *opt = mtod(t, struct ndpopt *); ++ opt->ndpopt_type = NDPOPT_LINKLAYER_TARGET; ++ opt->ndpopt_len = NDPOPT_LINKLAYER_LEN / 8; ++ in6_compute_ethaddr(ricmp->icmp6_nna.target, opt->ndpopt_linklayer); ++ ++ /* ICMPv6 Checksum */ ++ t->m_data -= ICMP6_NDP_NA_MINLEN; ++ t->m_data -= sizeof(struct ip6); ++ ricmp->icmp6_cksum = ip6_cksum(t); ++ ++ ip6_output(NULL, t, 0); ++} ++ ++/* ++ * Process a NDP message ++ */ ++static void ndp_input(struct mbuf *m, Slirp *slirp, struct ip6 *ip, ++ struct icmp6 *icmp) ++{ ++ m->m_len += ETH_HLEN; ++ m->m_data -= ETH_HLEN; ++ struct ethhdr *eth = mtod(m, struct ethhdr *); ++ m->m_len -= ETH_HLEN; ++ m->m_data += ETH_HLEN; ++ ++ switch (icmp->icmp6_type) { ++ case ICMP6_NDP_RS: ++ DEBUG_CALL(" type = Router Solicitation"); ++ if (ip->ip_hl == 255 && icmp->icmp6_code == 0 && ++ ntohs(ip->ip_pl) >= ICMP6_NDP_RS_MINLEN) { ++ /* Gratuitous NDP */ ++ ndp_table_add(slirp, ip->ip_src, eth->h_source); ++ ++ ndp_send_ra(slirp); ++ } ++ break; ++ ++ case ICMP6_NDP_RA: ++ DEBUG_CALL(" type = Router Advertisement"); ++ slirp->cb->guest_error("Warning: guest sent NDP RA, but shouldn't", ++ slirp->opaque); ++ break; ++ ++ case ICMP6_NDP_NS: ++ DEBUG_CALL(" type = Neighbor Solicitation"); ++ if (ip->ip_hl == 255 && icmp->icmp6_code == 0 && ++ !IN6_IS_ADDR_MULTICAST(&icmp->icmp6_nns.target) && ++ ntohs(ip->ip_pl) >= ICMP6_NDP_NS_MINLEN && ++ (!in6_zero(&ip->ip_src) || ++ in6_solicitednode_multicast(&ip->ip_dst))) { ++ if (in6_equal_host(&icmp->icmp6_nns.target)) { ++ /* Gratuitous NDP */ ++ ndp_table_add(slirp, ip->ip_src, eth->h_source); ++ ndp_send_na(slirp, ip, icmp); ++ } ++ } ++ break; ++ ++ case ICMP6_NDP_NA: ++ DEBUG_CALL(" type = Neighbor Advertisement"); ++ if (ip->ip_hl == 255 && icmp->icmp6_code == 0 && ++ ntohs(ip->ip_pl) >= ICMP6_NDP_NA_MINLEN && ++ !IN6_IS_ADDR_MULTICAST(&icmp->icmp6_nna.target) && ++ (!IN6_IS_ADDR_MULTICAST(&ip->ip_dst) || icmp->icmp6_nna.S == 0)) { ++ ndp_table_add(slirp, ip->ip_src, eth->h_source); ++ } ++ break; ++ ++ case ICMP6_NDP_REDIRECT: ++ DEBUG_CALL(" type = Redirect"); ++ slirp->cb->guest_error( ++ "Warning: guest sent NDP REDIRECT, but shouldn't", slirp->opaque); ++ break; ++ } ++} ++ ++/* ++ * Process a received ICMPv6 message. ++ */ ++void icmp6_input(struct mbuf *m) ++{ ++ struct icmp6 *icmp; ++ struct ip6 *ip = mtod(m, struct ip6 *); ++ Slirp *slirp = m->slirp; ++ int hlen = sizeof(struct ip6); ++ ++ DEBUG_CALL("icmp6_input"); ++ DEBUG_ARG("m = %p", m); ++ DEBUG_ARG("m_len = %d", m->m_len); ++ ++ if (ntohs(ip->ip_pl) < ICMP6_MINLEN) { ++ goto end; ++ } ++ ++ if (ip6_cksum(m)) { ++ goto end; ++ } ++ ++ m->m_len -= hlen; ++ m->m_data += hlen; ++ icmp = mtod(m, struct icmp6 *); ++ m->m_len += hlen; ++ m->m_data -= hlen; ++ ++ DEBUG_ARG("icmp6_type = %d", icmp->icmp6_type); ++ switch (icmp->icmp6_type) { ++ case ICMP6_ECHO_REQUEST: ++ if (in6_equal_host(&ip->ip_dst)) { ++ icmp6_send_echoreply(m, slirp, ip, icmp); ++ } else { ++ /* TODO */ ++ g_critical("external icmpv6 not supported yet"); ++ } ++ break; ++ ++ case ICMP6_NDP_RS: ++ case ICMP6_NDP_RA: ++ case ICMP6_NDP_NS: ++ case ICMP6_NDP_NA: ++ case ICMP6_NDP_REDIRECT: ++ ndp_input(m, slirp, ip, icmp); ++ break; ++ ++ case ICMP6_UNREACH: ++ case ICMP6_TOOBIG: ++ case ICMP6_TIMXCEED: ++ case ICMP6_PARAMPROB: ++ /* XXX? report error? close socket? */ ++ default: ++ break; ++ } ++ ++end: ++ m_free(m); ++} +diff --git a/slirp/src/ip6_icmp.h b/slirp/src/ip6_icmp.h +new file mode 100644 +index 0000000..c37e60f +--- /dev/null ++++ b/slirp/src/ip6_icmp.h +@@ -0,0 +1,219 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 2013 ++ * Guillaume Subiron, Yann Bordenave, Serigne Modou Wagne. ++ */ ++ ++#ifndef SLIRP_IP6_ICMP_H ++#define SLIRP_IP6_ICMP_H ++ ++/* ++ * Interface Control Message Protocol version 6 Definitions. ++ * Per RFC 4443, March 2006. ++ * ++ * Network Discover Protocol Definitions. ++ * Per RFC 4861, September 2007. ++ */ ++ ++struct icmp6_echo { /* Echo Messages */ ++ uint16_t id; ++ uint16_t seq_num; ++}; ++ ++union icmp6_error_body { ++ uint32_t unused; ++ uint32_t pointer; ++ uint32_t mtu; ++}; ++ ++/* ++ * NDP Messages ++ */ ++struct ndp_rs { /* Router Solicitation Message */ ++ uint32_t reserved; ++}; ++ ++struct ndp_ra { /* Router Advertisement Message */ ++ uint8_t chl; /* Cur Hop Limit */ ++#if G_BYTE_ORDER == G_BIG_ENDIAN ++ uint8_t M : 1, O : 1, reserved : 6; ++#else ++ uint8_t reserved : 6, O : 1, M : 1; ++#endif ++ uint16_t lifetime; /* Router Lifetime */ ++ uint32_t reach_time; /* Reachable Time */ ++ uint32_t retrans_time; /* Retrans Timer */ ++}; ++ ++G_STATIC_ASSERT(sizeof(struct ndp_ra) == 12); ++ ++struct ndp_ns { /* Neighbor Solicitation Message */ ++ uint32_t reserved; ++ struct in6_addr target; /* Target Address */ ++}; ++ ++G_STATIC_ASSERT(sizeof(struct ndp_ns) == 20); ++ ++struct ndp_na { /* Neighbor Advertisement Message */ ++#if G_BYTE_ORDER == G_BIG_ENDIAN ++ uint32_t R : 1, /* Router Flag */ ++ S : 1, /* Solicited Flag */ ++ O : 1, /* Override Flag */ ++ reserved_hi : 5, reserved_lo : 24; ++#else ++ uint32_t reserved_hi : 5, O : 1, S : 1, R : 1, reserved_lo : 24; ++#endif ++ struct in6_addr target; /* Target Address */ ++}; ++ ++G_STATIC_ASSERT(sizeof(struct ndp_na) == 20); ++ ++struct ndp_redirect { ++ uint32_t reserved; ++ struct in6_addr target; /* Target Address */ ++ struct in6_addr dest; /* Destination Address */ ++}; ++ ++G_STATIC_ASSERT(sizeof(struct ndp_redirect) == 36); ++ ++/* ++ * Structure of an icmpv6 header. ++ */ ++struct icmp6 { ++ uint8_t icmp6_type; /* type of message, see below */ ++ uint8_t icmp6_code; /* type sub code */ ++ uint16_t icmp6_cksum; /* ones complement cksum of struct */ ++ union { ++ union icmp6_error_body error_body; ++ struct icmp6_echo echo; ++ struct ndp_rs ndp_rs; ++ struct ndp_ra ndp_ra; ++ struct ndp_ns ndp_ns; ++ struct ndp_na ndp_na; ++ struct ndp_redirect ndp_redirect; ++ } icmp6_body; ++#define icmp6_err icmp6_body.error_body ++#define icmp6_echo icmp6_body.echo ++#define icmp6_nrs icmp6_body.ndp_rs ++#define icmp6_nra icmp6_body.ndp_ra ++#define icmp6_nns icmp6_body.ndp_ns ++#define icmp6_nna icmp6_body.ndp_na ++#define icmp6_redirect icmp6_body.ndp_redirect ++}; ++ ++G_STATIC_ASSERT(sizeof(struct icmp6) == 40); ++ ++#define ICMP6_MINLEN 4 ++#define ICMP6_ERROR_MINLEN 8 ++#define ICMP6_ECHO_MINLEN 8 ++#define ICMP6_NDP_RS_MINLEN 8 ++#define ICMP6_NDP_RA_MINLEN 16 ++#define ICMP6_NDP_NS_MINLEN 24 ++#define ICMP6_NDP_NA_MINLEN 24 ++#define ICMP6_NDP_REDIRECT_MINLEN 40 ++ ++/* ++ * NDP Options ++ */ ++struct ndpopt { ++ uint8_t ndpopt_type; /* Option type */ ++ uint8_t ndpopt_len; /* /!\ In units of 8 octets */ ++ union { ++ unsigned char linklayer_addr[6]; /* Source/Target Link-layer */ ++#define ndpopt_linklayer ndpopt_body.linklayer_addr ++ struct prefixinfo { /* Prefix Information */ ++ uint8_t prefix_length; ++#if G_BYTE_ORDER == G_BIG_ENDIAN ++ uint8_t L : 1, A : 1, reserved1 : 6; ++#else ++ uint8_t reserved1 : 6, A : 1, L : 1; ++#endif ++ uint32_t valid_lt; /* Valid Lifetime */ ++ uint32_t pref_lt; /* Preferred Lifetime */ ++ uint32_t reserved2; ++ struct in6_addr prefix; ++ } SLIRP_PACKED prefixinfo; ++#define ndpopt_prefixinfo ndpopt_body.prefixinfo ++ struct rdnss { ++ uint16_t reserved; ++ uint32_t lifetime; ++ struct in6_addr addr; ++ } SLIRP_PACKED rdnss; ++#define ndpopt_rdnss ndpopt_body.rdnss ++ } ndpopt_body; ++} SLIRP_PACKED; ++ ++/* NDP options type */ ++#define NDPOPT_LINKLAYER_SOURCE 1 /* Source Link-Layer Address */ ++#define NDPOPT_LINKLAYER_TARGET 2 /* Target Link-Layer Address */ ++#define NDPOPT_PREFIX_INFO 3 /* Prefix Information */ ++#define NDPOPT_RDNSS 25 /* Recursive DNS Server Address */ ++ ++/* NDP options size, in octets. */ ++#define NDPOPT_LINKLAYER_LEN 8 ++#define NDPOPT_PREFIXINFO_LEN 32 ++#define NDPOPT_RDNSS_LEN 24 ++ ++/* ++ * Definition of type and code field values. ++ * Per https://www.iana.org/assignments/icmpv6-parameters/icmpv6-parameters.xml ++ * Last Updated 2012-11-12 ++ */ ++ ++/* Errors */ ++#define ICMP6_UNREACH 1 /* Destination Unreachable */ ++#define ICMP6_UNREACH_NO_ROUTE 0 /* no route to dest */ ++#define ICMP6_UNREACH_DEST_PROHIB 1 /* com with dest prohibited */ ++#define ICMP6_UNREACH_SCOPE 2 /* beyond scope of src addr */ ++#define ICMP6_UNREACH_ADDRESS 3 /* address unreachable */ ++#define ICMP6_UNREACH_PORT 4 /* port unreachable */ ++#define ICMP6_UNREACH_SRC_FAIL 5 /* src addr failed */ ++#define ICMP6_UNREACH_REJECT_ROUTE 6 /* reject route to dest */ ++#define ICMP6_UNREACH_SRC_HDR_ERROR 7 /* error in src routing header */ ++#define ICMP6_TOOBIG 2 /* Packet Too Big */ ++#define ICMP6_TIMXCEED 3 /* Time Exceeded */ ++#define ICMP6_TIMXCEED_INTRANS 0 /* hop limit exceeded in transit */ ++#define ICMP6_TIMXCEED_REASS 1 /* ttl=0 in reass */ ++#define ICMP6_PARAMPROB 4 /* Parameter Problem */ ++#define ICMP6_PARAMPROB_HDR_FIELD 0 /* err header field */ ++#define ICMP6_PARAMPROB_NXTHDR_TYPE 1 /* unrecognized Next Header type */ ++#define ICMP6_PARAMPROB_IPV6_OPT 2 /* unrecognized IPv6 option */ ++ ++/* Informational Messages */ ++#define ICMP6_ECHO_REQUEST 128 /* Echo Request */ ++#define ICMP6_ECHO_REPLY 129 /* Echo Reply */ ++#define ICMP6_NDP_RS 133 /* Router Solicitation (NDP) */ ++#define ICMP6_NDP_RA 134 /* Router Advertisement (NDP) */ ++#define ICMP6_NDP_NS 135 /* Neighbor Solicitation (NDP) */ ++#define ICMP6_NDP_NA 136 /* Neighbor Advertisement (NDP) */ ++#define ICMP6_NDP_REDIRECT 137 /* Redirect Message (NDP) */ ++ ++/* ++ * Router Configuration Variables (rfc4861#section-6) ++ */ ++#define NDP_IsRouter 1 ++#define NDP_AdvSendAdvertisements 1 ++#define NDP_MaxRtrAdvInterval 600000 ++#define NDP_MinRtrAdvInterval \ ++ ((NDP_MaxRtrAdvInterval >= 9) ? NDP_MaxRtrAdvInterval / 3 : \ ++ NDP_MaxRtrAdvInterval) ++#define NDP_AdvManagedFlag 0 ++#define NDP_AdvOtherConfigFlag 0 ++#define NDP_AdvLinkMTU 0 ++#define NDP_AdvReachableTime 0 ++#define NDP_AdvRetransTime 0 ++#define NDP_AdvCurHopLimit 64 ++#define NDP_AdvDefaultLifetime ((3 * NDP_MaxRtrAdvInterval) / 1000) ++#define NDP_AdvValidLifetime 86400 ++#define NDP_AdvOnLinkFlag 1 ++#define NDP_AdvPrefLifetime 14400 ++#define NDP_AdvAutonomousFlag 1 ++ ++void icmp6_init(Slirp *slirp); ++void icmp6_cleanup(Slirp *slirp); ++void icmp6_input(struct mbuf *); ++void icmp6_send_error(struct mbuf *m, uint8_t type, uint8_t code); ++void ndp_send_ra(Slirp *slirp); ++void ndp_send_ns(Slirp *slirp, struct in6_addr addr); ++ ++#endif +diff --git a/slirp/src/ip6_input.c b/slirp/src/ip6_input.c +new file mode 100644 +index 0000000..dfcbfd6 +--- /dev/null ++++ b/slirp/src/ip6_input.c +@@ -0,0 +1,78 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 2013 ++ * Guillaume Subiron, Yann Bordenave, Serigne Modou Wagne. ++ */ ++ ++#include "slirp.h" ++#include "ip6_icmp.h" ++ ++/* ++ * IP initialization: fill in IP protocol switch table. ++ * All protocols not implemented in kernel go to raw IP protocol handler. ++ */ ++void ip6_init(Slirp *slirp) ++{ ++ icmp6_init(slirp); ++} ++ ++void ip6_cleanup(Slirp *slirp) ++{ ++ icmp6_cleanup(slirp); ++} ++ ++void ip6_input(struct mbuf *m) ++{ ++ struct ip6 *ip6; ++ Slirp *slirp = m->slirp; ++ ++ if (!slirp->in6_enabled) { ++ goto bad; ++ } ++ ++ DEBUG_CALL("ip6_input"); ++ DEBUG_ARG("m = %p", m); ++ DEBUG_ARG("m_len = %d", m->m_len); ++ ++ if (m->m_len < sizeof(struct ip6)) { ++ goto bad; ++ } ++ ++ ip6 = mtod(m, struct ip6 *); ++ ++ if (ip6->ip_v != IP6VERSION) { ++ goto bad; ++ } ++ ++ if (ntohs(ip6->ip_pl) > slirp->if_mtu) { ++ icmp6_send_error(m, ICMP6_TOOBIG, 0); ++ goto bad; ++ } ++ ++ /* check ip_ttl for a correct ICMP reply */ ++ if (ip6->ip_hl == 0) { ++ icmp6_send_error(m, ICMP6_TIMXCEED, ICMP6_TIMXCEED_INTRANS); ++ goto bad; ++ } ++ ++ /* ++ * Switch out to protocol's input routine. ++ */ ++ switch (ip6->ip_nh) { ++ case IPPROTO_TCP: ++ NTOHS(ip6->ip_pl); ++ tcp_input(m, sizeof(struct ip6), (struct socket *)NULL, AF_INET6); ++ break; ++ case IPPROTO_UDP: ++ udp6_input(m); ++ break; ++ case IPPROTO_ICMPV6: ++ icmp6_input(m); ++ break; ++ default: ++ m_free(m); ++ } ++ return; ++bad: ++ m_free(m); ++} +diff --git a/slirp/src/ip6_output.c b/slirp/src/ip6_output.c +new file mode 100644 +index 0000000..b861106 +--- /dev/null ++++ b/slirp/src/ip6_output.c +@@ -0,0 +1,39 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 2013 ++ * Guillaume Subiron, Yann Bordenave, Serigne Modou Wagne. ++ */ ++ ++#include "slirp.h" ++ ++/* Number of packets queued before we start sending ++ * (to prevent allocing too many mbufs) */ ++#define IF6_THRESH 10 ++ ++/* ++ * IPv6 output. The packet in mbuf chain m contains a IP header ++ */ ++int ip6_output(struct socket *so, struct mbuf *m, int fast) ++{ ++ struct ip6 *ip = mtod(m, struct ip6 *); ++ ++ DEBUG_CALL("ip6_output"); ++ DEBUG_ARG("so = %p", so); ++ DEBUG_ARG("m = %p", m); ++ ++ /* Fill IPv6 header */ ++ ip->ip_v = IP6VERSION; ++ ip->ip_hl = IP6_HOP_LIMIT; ++ ip->ip_tc_hi = 0; ++ ip->ip_tc_lo = 0; ++ ip->ip_fl_hi = 0; ++ ip->ip_fl_lo = 0; ++ ++ if (fast) { ++ if_encap(m->slirp, m); ++ } else { ++ if_output(so, m); ++ } ++ ++ return 0; ++} +diff --git a/slirp/src/ip_icmp.c b/slirp/src/ip_icmp.c +new file mode 100644 +index 0000000..fe0add4 +--- /dev/null ++++ b/slirp/src/ip_icmp.c +@@ -0,0 +1,489 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1988, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)ip_icmp.c 8.2 (Berkeley) 1/4/94 ++ * ip_icmp.c,v 1.7 1995/05/30 08:09:42 rgrimes Exp ++ */ ++ ++#include "slirp.h" ++#include "ip_icmp.h" ++ ++#ifndef WITH_ICMP_ERROR_MSG ++#define WITH_ICMP_ERROR_MSG 0 ++#endif ++ ++/* The message sent when emulating PING */ ++/* Be nice and tell them it's just a pseudo-ping packet */ ++static const char icmp_ping_msg[] = ++ "This is a pseudo-PING packet used by Slirp to emulate ICMP ECHO-REQUEST " ++ "packets.\n"; ++ ++/* list of actions for icmp_send_error() on RX of an icmp message */ ++static const int icmp_flush[19] = { ++ /* ECHO REPLY (0) */ 0, ++ 1, ++ 1, ++ /* DEST UNREACH (3) */ 1, ++ /* SOURCE QUENCH (4)*/ 1, ++ /* REDIRECT (5) */ 1, ++ 1, ++ 1, ++ /* ECHO (8) */ 0, ++ /* ROUTERADVERT (9) */ 1, ++ /* ROUTERSOLICIT (10) */ 1, ++ /* TIME EXCEEDED (11) */ 1, ++ /* PARAMETER PROBLEM (12) */ 1, ++ /* TIMESTAMP (13) */ 0, ++ /* TIMESTAMP REPLY (14) */ 0, ++ /* INFO (15) */ 0, ++ /* INFO REPLY (16) */ 0, ++ /* ADDR MASK (17) */ 0, ++ /* ADDR MASK REPLY (18) */ 0 ++}; ++ ++void icmp_init(Slirp *slirp) ++{ ++ slirp->icmp.so_next = slirp->icmp.so_prev = &slirp->icmp; ++ slirp->icmp_last_so = &slirp->icmp; ++} ++ ++void icmp_cleanup(Slirp *slirp) ++{ ++ while (slirp->icmp.so_next != &slirp->icmp) { ++ icmp_detach(slirp->icmp.so_next); ++ } ++} ++ ++static int icmp_send(struct socket *so, struct mbuf *m, int hlen) ++{ ++ struct ip *ip = mtod(m, struct ip *); ++ struct sockaddr_in addr; ++ ++ so->s = slirp_socket(AF_INET, SOCK_DGRAM, IPPROTO_ICMP); ++ if (so->s == -1) { ++ return -1; ++ } ++ ++ if (slirp_bind_outbound(so, AF_INET) != 0) { ++ // bind failed - close socket ++ closesocket(so->s); ++ so->s = -1; ++ return -1; ++ } ++ ++ so->so_m = m; ++ so->so_faddr = ip->ip_dst; ++ so->so_laddr = ip->ip_src; ++ so->so_iptos = ip->ip_tos; ++ so->so_type = IPPROTO_ICMP; ++ so->so_state = SS_ISFCONNECTED; ++ so->so_expire = curtime + SO_EXPIRE; ++ ++ addr.sin_family = AF_INET; ++ addr.sin_addr = so->so_faddr; ++ ++ insque(so, &so->slirp->icmp); ++ ++ if (sendto(so->s, m->m_data + hlen, m->m_len - hlen, 0, ++ (struct sockaddr *)&addr, sizeof(addr)) == -1) { ++ DEBUG_MISC("icmp_input icmp sendto tx errno = %d-%s", errno, ++ strerror(errno)); ++ icmp_send_error(m, ICMP_UNREACH, ICMP_UNREACH_NET, 0, strerror(errno)); ++ icmp_detach(so); ++ } ++ ++ return 0; ++} ++ ++void icmp_detach(struct socket *so) ++{ ++ so->slirp->cb->unregister_poll_fd(so->s, so->slirp->opaque); ++ closesocket(so->s); ++ sofree(so); ++} ++ ++/* ++ * Process a received ICMP message. ++ */ ++void icmp_input(struct mbuf *m, int hlen) ++{ ++ register struct icmp *icp; ++ register struct ip *ip = mtod(m, struct ip *); ++ int icmplen = ip->ip_len; ++ Slirp *slirp = m->slirp; ++ ++ DEBUG_CALL("icmp_input"); ++ DEBUG_ARG("m = %p", m); ++ DEBUG_ARG("m_len = %d", m->m_len); ++ ++ /* ++ * Locate icmp structure in mbuf, and check ++ * that its not corrupted and of at least minimum length. ++ */ ++ if (icmplen < ICMP_MINLEN) { /* min 8 bytes payload */ ++ freeit: ++ m_free(m); ++ goto end_error; ++ } ++ ++ m->m_len -= hlen; ++ m->m_data += hlen; ++ icp = mtod(m, struct icmp *); ++ if (cksum(m, icmplen)) { ++ goto freeit; ++ } ++ m->m_len += hlen; ++ m->m_data -= hlen; ++ ++ DEBUG_ARG("icmp_type = %d", icp->icmp_type); ++ switch (icp->icmp_type) { ++ case ICMP_ECHO: ++ ip->ip_len += hlen; /* since ip_input subtracts this */ ++ if (ip->ip_dst.s_addr == slirp->vhost_addr.s_addr || ++ ip->ip_dst.s_addr == slirp->vnameserver_addr.s_addr) { ++ icmp_reflect(m); ++ } else if (slirp->restricted) { ++ goto freeit; ++ } else { ++ struct socket *so; ++ struct sockaddr_storage addr; ++ so = socreate(slirp); ++ if (icmp_send(so, m, hlen) == 0) { ++ return; ++ } ++ if (udp_attach(so, AF_INET) == -1) { ++ DEBUG_MISC("icmp_input udp_attach errno = %d-%s", errno, ++ strerror(errno)); ++ sofree(so); ++ m_free(m); ++ goto end_error; ++ } ++ so->so_m = m; ++ so->so_ffamily = AF_INET; ++ so->so_faddr = ip->ip_dst; ++ so->so_fport = htons(7); ++ so->so_lfamily = AF_INET; ++ so->so_laddr = ip->ip_src; ++ so->so_lport = htons(9); ++ so->so_iptos = ip->ip_tos; ++ so->so_type = IPPROTO_ICMP; ++ so->so_state = SS_ISFCONNECTED; ++ ++ /* Send the packet */ ++ addr = so->fhost.ss; ++ if (sotranslate_out(so, &addr) < 0) { ++ icmp_send_error(m, ICMP_UNREACH, ICMP_UNREACH_NET, 0, ++ strerror(errno)); ++ udp_detach(so); ++ return; ++ } ++ ++ if (sendto(so->s, icmp_ping_msg, strlen(icmp_ping_msg), 0, ++ (struct sockaddr *)&addr, sockaddr_size(&addr)) == -1) { ++ DEBUG_MISC("icmp_input udp sendto tx errno = %d-%s", errno, ++ strerror(errno)); ++ icmp_send_error(m, ICMP_UNREACH, ICMP_UNREACH_NET, 0, ++ strerror(errno)); ++ udp_detach(so); ++ } ++ } /* if ip->ip_dst.s_addr == alias_addr.s_addr */ ++ break; ++ case ICMP_UNREACH: ++ /* XXX? report error? close socket? */ ++ case ICMP_TIMXCEED: ++ case ICMP_PARAMPROB: ++ case ICMP_SOURCEQUENCH: ++ case ICMP_TSTAMP: ++ case ICMP_MASKREQ: ++ case ICMP_REDIRECT: ++ m_free(m); ++ break; ++ ++ default: ++ m_free(m); ++ } /* swith */ ++ ++end_error: ++ /* m is m_free()'d xor put in a socket xor or given to ip_send */ ++ return; ++} ++ ++ ++/* ++ * Send an ICMP message in response to a situation ++ * ++ * RFC 1122: 3.2.2 MUST send at least the IP header and 8 bytes of header. ++ *MAY send more (we do). MUST NOT change this header information. MUST NOT reply ++ *to a multicast/broadcast IP address. MUST NOT reply to a multicast/broadcast ++ *MAC address. MUST reply to only the first fragment. ++ */ ++/* ++ * Send ICMP_UNREACH back to the source regarding msrc. ++ * mbuf *msrc is used as a template, but is NOT m_free()'d. ++ * It is reported as the bad ip packet. The header should ++ * be fully correct and in host byte order. ++ * ICMP fragmentation is illegal. All machines must accept 576 bytes in one ++ * packet. The maximum payload is 576-20(ip hdr)-8(icmp hdr)=548 ++ */ ++ ++#define ICMP_MAXDATALEN (IP_MSS - 28) ++void icmp_send_error(struct mbuf *msrc, uint8_t type, uint8_t code, int minsize, ++ const char *message) ++{ ++ unsigned hlen, shlen, s_ip_len; ++ register struct ip *ip; ++ register struct icmp *icp; ++ register struct mbuf *m; ++ ++ DEBUG_CALL("icmp_send_error"); ++ DEBUG_ARG("msrc = %p", msrc); ++ DEBUG_ARG("msrc_len = %d", msrc->m_len); ++ ++ if (type != ICMP_UNREACH && type != ICMP_TIMXCEED) ++ goto end_error; ++ ++ /* check msrc */ ++ if (!msrc) ++ goto end_error; ++ ip = mtod(msrc, struct ip *); ++ if (slirp_debug & DBG_MISC) { ++ char bufa[20], bufb[20]; ++ strcpy(bufa, inet_ntoa(ip->ip_src)); ++ strcpy(bufb, inet_ntoa(ip->ip_dst)); ++ DEBUG_MISC(" %.16s to %.16s", bufa, bufb); ++ } ++ if (ip->ip_off & IP_OFFMASK) ++ goto end_error; /* Only reply to fragment 0 */ ++ ++ /* Do not reply to source-only IPs */ ++ if ((ip->ip_src.s_addr & htonl(~(0xf << 28))) == 0) { ++ goto end_error; ++ } ++ ++ shlen = ip->ip_hl << 2; ++ s_ip_len = ip->ip_len; ++ if (ip->ip_p == IPPROTO_ICMP) { ++ icp = (struct icmp *)((char *)ip + shlen); ++ /* ++ * Assume any unknown ICMP type is an error. This isn't ++ * specified by the RFC, but think about it.. ++ */ ++ if (icp->icmp_type > 18 || icmp_flush[icp->icmp_type]) ++ goto end_error; ++ } ++ ++ /* make a copy */ ++ m = m_get(msrc->slirp); ++ if (!m) { ++ goto end_error; ++ } ++ ++ { ++ int new_m_size; ++ new_m_size = ++ sizeof(struct ip) + ICMP_MINLEN + msrc->m_len + ICMP_MAXDATALEN; ++ if (new_m_size > m->m_size) ++ m_inc(m, new_m_size); ++ } ++ memcpy(m->m_data, msrc->m_data, msrc->m_len); ++ m->m_len = msrc->m_len; /* copy msrc to m */ ++ ++ /* make the header of the reply packet */ ++ ip = mtod(m, struct ip *); ++ hlen = sizeof(struct ip); /* no options in reply */ ++ ++ /* fill in icmp */ ++ m->m_data += hlen; ++ m->m_len -= hlen; ++ ++ icp = mtod(m, struct icmp *); ++ ++ if (minsize) ++ s_ip_len = shlen + ICMP_MINLEN; /* return header+8b only */ ++ else if (s_ip_len > ICMP_MAXDATALEN) /* maximum size */ ++ s_ip_len = ICMP_MAXDATALEN; ++ ++ m->m_len = ICMP_MINLEN + s_ip_len; /* 8 bytes ICMP header */ ++ ++ /* min. size = 8+sizeof(struct ip)+8 */ ++ ++ icp->icmp_type = type; ++ icp->icmp_code = code; ++ icp->icmp_id = 0; ++ icp->icmp_seq = 0; ++ ++ memcpy(&icp->icmp_ip, msrc->m_data, s_ip_len); /* report the ip packet */ ++ HTONS(icp->icmp_ip.ip_len); ++ HTONS(icp->icmp_ip.ip_id); ++ HTONS(icp->icmp_ip.ip_off); ++ ++ if (message && WITH_ICMP_ERROR_MSG) { /* append message to ICMP packet */ ++ int message_len; ++ char *cpnt; ++ message_len = strlen(message); ++ if (message_len > ICMP_MAXDATALEN) ++ message_len = ICMP_MAXDATALEN; ++ cpnt = (char *)m->m_data + m->m_len; ++ memcpy(cpnt, message, message_len); ++ m->m_len += message_len; ++ } ++ ++ icp->icmp_cksum = 0; ++ icp->icmp_cksum = cksum(m, m->m_len); ++ ++ m->m_data -= hlen; ++ m->m_len += hlen; ++ ++ /* fill in ip */ ++ ip->ip_hl = hlen >> 2; ++ ip->ip_len = m->m_len; ++ ++ ip->ip_tos = ((ip->ip_tos & 0x1E) | 0xC0); /* high priority for errors */ ++ ++ ip->ip_ttl = MAXTTL; ++ ip->ip_p = IPPROTO_ICMP; ++ ip->ip_dst = ip->ip_src; /* ip addresses */ ++ ip->ip_src = m->slirp->vhost_addr; ++ ++ (void)ip_output((struct socket *)NULL, m); ++ ++end_error: ++ return; ++} ++#undef ICMP_MAXDATALEN ++ ++/* ++ * Reflect the ip packet back to the source ++ */ ++void icmp_reflect(struct mbuf *m) ++{ ++ register struct ip *ip = mtod(m, struct ip *); ++ int hlen = ip->ip_hl << 2; ++ int optlen = hlen - sizeof(struct ip); ++ register struct icmp *icp; ++ ++ /* ++ * Send an icmp packet back to the ip level, ++ * after supplying a checksum. ++ */ ++ m->m_data += hlen; ++ m->m_len -= hlen; ++ icp = mtod(m, struct icmp *); ++ ++ icp->icmp_type = ICMP_ECHOREPLY; ++ icp->icmp_cksum = 0; ++ icp->icmp_cksum = cksum(m, ip->ip_len - hlen); ++ ++ m->m_data -= hlen; ++ m->m_len += hlen; ++ ++ /* fill in ip */ ++ if (optlen > 0) { ++ /* ++ * Strip out original options by copying rest of first ++ * mbuf's data back, and adjust the IP length. ++ */ ++ memmove((char *)(ip + 1), (char *)ip + hlen, ++ (unsigned)(m->m_len - hlen)); ++ hlen -= optlen; ++ ip->ip_hl = hlen >> 2; ++ ip->ip_len -= optlen; ++ m->m_len -= optlen; ++ } ++ ++ ip->ip_ttl = MAXTTL; ++ { /* swap */ ++ struct in_addr icmp_dst; ++ icmp_dst = ip->ip_dst; ++ ip->ip_dst = ip->ip_src; ++ ip->ip_src = icmp_dst; ++ } ++ ++ (void)ip_output((struct socket *)NULL, m); ++} ++ ++void icmp_receive(struct socket *so) ++{ ++ struct mbuf *m = so->so_m; ++ struct ip *ip = mtod(m, struct ip *); ++ int hlen = ip->ip_hl << 2; ++ uint8_t error_code; ++ struct icmp *icp; ++ int id, len; ++ ++ m->m_data += hlen; ++ m->m_len -= hlen; ++ icp = mtod(m, struct icmp *); ++ ++ id = icp->icmp_id; ++ len = recv(so->s, icp, M_ROOM(m), 0); ++ /* ++ * The behavior of reading SOCK_DGRAM+IPPROTO_ICMP sockets is inconsistent ++ * between host OSes. On Linux, only the ICMP header and payload is ++ * included. On macOS/Darwin, the socket acts like a raw socket and ++ * includes the IP header as well. On other BSDs, SOCK_DGRAM+IPPROTO_ICMP ++ * sockets aren't supported at all, so we treat them like raw sockets. It ++ * isn't possible to detect this difference at runtime, so we must use an ++ * #ifdef to determine if we need to remove the IP header. ++ */ ++#ifdef CONFIG_BSD ++ if (len >= sizeof(struct ip)) { ++ struct ip *inner_ip = mtod(m, struct ip *); ++ int inner_hlen = inner_ip->ip_hl << 2; ++ if (inner_hlen > len) { ++ len = -1; ++ errno = -EINVAL; ++ } else { ++ len -= inner_hlen; ++ memmove(icp, (unsigned char *)icp + inner_hlen, len); ++ } ++ } else { ++ len = -1; ++ errno = -EINVAL; ++ } ++#endif ++ icp->icmp_id = id; ++ ++ m->m_data -= hlen; ++ m->m_len += hlen; ++ ++ if (len == -1 || len == 0) { ++ if (errno == ENETUNREACH) { ++ error_code = ICMP_UNREACH_NET; ++ } else { ++ error_code = ICMP_UNREACH_HOST; ++ } ++ DEBUG_MISC(" udp icmp rx errno = %d-%s", errno, strerror(errno)); ++ icmp_send_error(so->so_m, ICMP_UNREACH, error_code, 0, strerror(errno)); ++ } else { ++ icmp_reflect(so->so_m); ++ so->so_m = NULL; /* Don't m_free() it again! */ ++ } ++ icmp_detach(so); ++} +diff --git a/slirp/src/ip_icmp.h b/slirp/src/ip_icmp.h +new file mode 100644 +index 0000000..84707db +--- /dev/null ++++ b/slirp/src/ip_icmp.h +@@ -0,0 +1,166 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)ip_icmp.h 8.1 (Berkeley) 6/10/93 ++ * ip_icmp.h,v 1.4 1995/05/30 08:09:43 rgrimes Exp ++ */ ++ ++#ifndef NETINET_IP_ICMP_H ++#define NETINET_IP_ICMP_H ++ ++/* ++ * Interface Control Message Protocol Definitions. ++ * Per RFC 792, September 1981. ++ */ ++ ++typedef uint32_t n_time; ++ ++/* ++ * Structure of an icmp header. ++ */ ++struct icmp { ++ uint8_t icmp_type; /* type of message, see below */ ++ uint8_t icmp_code; /* type sub code */ ++ uint16_t icmp_cksum; /* ones complement cksum of struct */ ++ union { ++ uint8_t ih_pptr; /* ICMP_PARAMPROB */ ++ struct in_addr ih_gwaddr; /* ICMP_REDIRECT */ ++ struct ih_idseq { ++ uint16_t icd_id; ++ uint16_t icd_seq; ++ } ih_idseq; ++ int ih_void; ++ ++ /* ICMP_UNREACH_NEEDFRAG -- Path MTU Discovery (RFC1191) */ ++ struct ih_pmtu { ++ uint16_t ipm_void; ++ uint16_t ipm_nextmtu; ++ } ih_pmtu; ++ } icmp_hun; ++#define icmp_pptr icmp_hun.ih_pptr ++#define icmp_gwaddr icmp_hun.ih_gwaddr ++#define icmp_id icmp_hun.ih_idseq.icd_id ++#define icmp_seq icmp_hun.ih_idseq.icd_seq ++#define icmp_void icmp_hun.ih_void ++#define icmp_pmvoid icmp_hun.ih_pmtu.ipm_void ++#define icmp_nextmtu icmp_hun.ih_pmtu.ipm_nextmtu ++ union { ++ struct id_ts { ++ n_time its_otime; ++ n_time its_rtime; ++ n_time its_ttime; ++ } id_ts; ++ struct id_ip { ++ struct ip idi_ip; ++ /* options and then 64 bits of data */ ++ } id_ip; ++ uint32_t id_mask; ++ char id_data[1]; ++ } icmp_dun; ++#define icmp_otime icmp_dun.id_ts.its_otime ++#define icmp_rtime icmp_dun.id_ts.its_rtime ++#define icmp_ttime icmp_dun.id_ts.its_ttime ++#define icmp_ip icmp_dun.id_ip.idi_ip ++#define icmp_mask icmp_dun.id_mask ++#define icmp_data icmp_dun.id_data ++}; ++ ++/* ++ * Lower bounds on packet lengths for various types. ++ * For the error advice packets must first ensure that the ++ * packet is large enough to contain the returned ip header. ++ * Only then can we do the check to see if 64 bits of packet ++ * data have been returned, since we need to check the returned ++ * ip header length. ++ */ ++#define ICMP_MINLEN 8 /* abs minimum */ ++#define ICMP_TSLEN (8 + 3 * sizeof(n_time)) /* timestamp */ ++#define ICMP_MASKLEN 12 /* address mask */ ++#define ICMP_ADVLENMIN (8 + sizeof(struct ip) + 8) /* min */ ++#define ICMP_ADVLEN(p) (8 + ((p)->icmp_ip.ip_hl << 2) + 8) ++/* N.B.: must separately check that ip_hl >= 5 */ ++ ++/* ++ * Definition of type and code field values. ++ */ ++#define ICMP_ECHOREPLY 0 /* echo reply */ ++#define ICMP_UNREACH 3 /* dest unreachable, codes: */ ++#define ICMP_UNREACH_NET 0 /* bad net */ ++#define ICMP_UNREACH_HOST 1 /* bad host */ ++#define ICMP_UNREACH_PROTOCOL 2 /* bad protocol */ ++#define ICMP_UNREACH_PORT 3 /* bad port */ ++#define ICMP_UNREACH_NEEDFRAG 4 /* IP_DF caused drop */ ++#define ICMP_UNREACH_SRCFAIL 5 /* src route failed */ ++#define ICMP_UNREACH_NET_UNKNOWN 6 /* unknown net */ ++#define ICMP_UNREACH_HOST_UNKNOWN 7 /* unknown host */ ++#define ICMP_UNREACH_ISOLATED 8 /* src host isolated */ ++#define ICMP_UNREACH_NET_PROHIB 9 /* prohibited access */ ++#define ICMP_UNREACH_HOST_PROHIB 10 /* ditto */ ++#define ICMP_UNREACH_TOSNET 11 /* bad tos for net */ ++#define ICMP_UNREACH_TOSHOST 12 /* bad tos for host */ ++#define ICMP_SOURCEQUENCH 4 /* packet lost, slow down */ ++#define ICMP_REDIRECT 5 /* shorter route, codes: */ ++#define ICMP_REDIRECT_NET 0 /* for network */ ++#define ICMP_REDIRECT_HOST 1 /* for host */ ++#define ICMP_REDIRECT_TOSNET 2 /* for tos and net */ ++#define ICMP_REDIRECT_TOSHOST 3 /* for tos and host */ ++#define ICMP_ECHO 8 /* echo service */ ++#define ICMP_ROUTERADVERT 9 /* router advertisement */ ++#define ICMP_ROUTERSOLICIT 10 /* router solicitation */ ++#define ICMP_TIMXCEED 11 /* time exceeded, code: */ ++#define ICMP_TIMXCEED_INTRANS 0 /* ttl==0 in transit */ ++#define ICMP_TIMXCEED_REASS 1 /* ttl==0 in reass */ ++#define ICMP_PARAMPROB 12 /* ip header bad */ ++#define ICMP_PARAMPROB_OPTABSENT 1 /* req. opt. absent */ ++#define ICMP_TSTAMP 13 /* timestamp request */ ++#define ICMP_TSTAMPREPLY 14 /* timestamp reply */ ++#define ICMP_IREQ 15 /* information request */ ++#define ICMP_IREQREPLY 16 /* information reply */ ++#define ICMP_MASKREQ 17 /* address mask request */ ++#define ICMP_MASKREPLY 18 /* address mask reply */ ++ ++#define ICMP_MAXTYPE 18 ++ ++#define ICMP_INFOTYPE(type) \ ++ ((type) == ICMP_ECHOREPLY || (type) == ICMP_ECHO || \ ++ (type) == ICMP_ROUTERADVERT || (type) == ICMP_ROUTERSOLICIT || \ ++ (type) == ICMP_TSTAMP || (type) == ICMP_TSTAMPREPLY || \ ++ (type) == ICMP_IREQ || (type) == ICMP_IREQREPLY || \ ++ (type) == ICMP_MASKREQ || (type) == ICMP_MASKREPLY) ++ ++void icmp_init(Slirp *slirp); ++void icmp_cleanup(Slirp *slirp); ++void icmp_input(struct mbuf *, int); ++void icmp_send_error(struct mbuf *msrc, uint8_t type, uint8_t code, int minsize, ++ const char *message); ++void icmp_reflect(struct mbuf *); ++void icmp_receive(struct socket *so); ++void icmp_detach(struct socket *so); ++ ++#endif +diff --git a/slirp/src/ip_input.c b/slirp/src/ip_input.c +new file mode 100644 +index 0000000..89a01d4 +--- /dev/null ++++ b/slirp/src/ip_input.c +@@ -0,0 +1,461 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1988, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)ip_input.c 8.2 (Berkeley) 1/4/94 ++ * ip_input.c,v 1.11 1994/11/16 10:17:08 jkh Exp ++ */ ++ ++/* ++ * Changes and additions relating to SLiRP are ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#include "slirp.h" ++#include "ip_icmp.h" ++ ++static struct ip *ip_reass(Slirp *slirp, struct ip *ip, struct ipq *fp); ++static void ip_freef(Slirp *slirp, struct ipq *fp); ++static void ip_enq(register struct ipasfrag *p, register struct ipasfrag *prev); ++static void ip_deq(register struct ipasfrag *p); ++ ++/* ++ * IP initialization: fill in IP protocol switch table. ++ * All protocols not implemented in kernel go to raw IP protocol handler. ++ */ ++void ip_init(Slirp *slirp) ++{ ++ slirp->ipq.ip_link.next = slirp->ipq.ip_link.prev = &slirp->ipq.ip_link; ++ udp_init(slirp); ++ tcp_init(slirp); ++ icmp_init(slirp); ++} ++ ++void ip_cleanup(Slirp *slirp) ++{ ++ udp_cleanup(slirp); ++ tcp_cleanup(slirp); ++ icmp_cleanup(slirp); ++} ++ ++/* ++ * Ip input routine. Checksum and byte swap header. If fragmented ++ * try to reassemble. Process options. Pass to next level. ++ */ ++void ip_input(struct mbuf *m) ++{ ++ Slirp *slirp = m->slirp; ++ register struct ip *ip; ++ int hlen; ++ ++ if (!slirp->in_enabled) { ++ goto bad; ++ } ++ ++ DEBUG_CALL("ip_input"); ++ DEBUG_ARG("m = %p", m); ++ DEBUG_ARG("m_len = %d", m->m_len); ++ ++ if (m->m_len < sizeof(struct ip)) { ++ goto bad; ++ } ++ ++ ip = mtod(m, struct ip *); ++ ++ if (ip->ip_v != IPVERSION) { ++ goto bad; ++ } ++ ++ hlen = ip->ip_hl << 2; ++ if (hlen < sizeof(struct ip) || hlen > m->m_len) { /* min header length */ ++ goto bad; /* or packet too short */ ++ } ++ ++ /* keep ip header intact for ICMP reply ++ * ip->ip_sum = cksum(m, hlen); ++ * if (ip->ip_sum) { ++ */ ++ if (cksum(m, hlen)) { ++ goto bad; ++ } ++ ++ /* ++ * Convert fields to host representation. ++ */ ++ NTOHS(ip->ip_len); ++ if (ip->ip_len < hlen) { ++ goto bad; ++ } ++ NTOHS(ip->ip_id); ++ NTOHS(ip->ip_off); ++ ++ /* ++ * Check that the amount of data in the buffers ++ * is as at least much as the IP header would have us expect. ++ * Trim mbufs if longer than we expect. ++ * Drop packet if shorter than we expect. ++ */ ++ if (m->m_len < ip->ip_len) { ++ goto bad; ++ } ++ ++ /* Should drop packet if mbuf too long? hmmm... */ ++ if (m->m_len > ip->ip_len) ++ m_adj(m, ip->ip_len - m->m_len); ++ ++ /* check ip_ttl for a correct ICMP reply */ ++ if (ip->ip_ttl == 0) { ++ icmp_send_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, 0, "ttl"); ++ goto bad; ++ } ++ ++ /* ++ * If offset or IP_MF are set, must reassemble. ++ * Otherwise, nothing need be done. ++ * (We could look in the reassembly queue to see ++ * if the packet was previously fragmented, ++ * but it's not worth the time; just let them time out.) ++ * ++ * XXX This should fail, don't fragment yet ++ */ ++ if (ip->ip_off & ~IP_DF) { ++ register struct ipq *fp; ++ struct qlink *l; ++ /* ++ * Look for queue of fragments ++ * of this datagram. ++ */ ++ for (l = slirp->ipq.ip_link.next; l != &slirp->ipq.ip_link; ++ l = l->next) { ++ fp = container_of(l, struct ipq, ip_link); ++ if (ip->ip_id == fp->ipq_id && ++ ip->ip_src.s_addr == fp->ipq_src.s_addr && ++ ip->ip_dst.s_addr == fp->ipq_dst.s_addr && ++ ip->ip_p == fp->ipq_p) ++ goto found; ++ } ++ fp = NULL; ++ found: ++ ++ /* ++ * Adjust ip_len to not reflect header, ++ * set ip_mff if more fragments are expected, ++ * convert offset of this to bytes. ++ */ ++ ip->ip_len -= hlen; ++ if (ip->ip_off & IP_MF) ++ ip->ip_tos |= 1; ++ else ++ ip->ip_tos &= ~1; ++ ++ ip->ip_off <<= 3; ++ ++ /* ++ * If datagram marked as having more fragments ++ * or if this is not the first fragment, ++ * attempt reassembly; if it succeeds, proceed. ++ */ ++ if (ip->ip_tos & 1 || ip->ip_off) { ++ ip = ip_reass(slirp, ip, fp); ++ if (ip == NULL) ++ return; ++ m = dtom(slirp, ip); ++ } else if (fp) ++ ip_freef(slirp, fp); ++ ++ } else ++ ip->ip_len -= hlen; ++ ++ /* ++ * Switch out to protocol's input routine. ++ */ ++ switch (ip->ip_p) { ++ case IPPROTO_TCP: ++ tcp_input(m, hlen, (struct socket *)NULL, AF_INET); ++ break; ++ case IPPROTO_UDP: ++ udp_input(m, hlen); ++ break; ++ case IPPROTO_ICMP: ++ icmp_input(m, hlen); ++ break; ++ default: ++ m_free(m); ++ } ++ return; ++bad: ++ m_free(m); ++} ++ ++#define iptofrag(P) ((struct ipasfrag *)(((char *)(P)) - sizeof(struct qlink))) ++#define fragtoip(P) ((struct ip *)(((char *)(P)) + sizeof(struct qlink))) ++/* ++ * Take incoming datagram fragment and try to ++ * reassemble it into whole datagram. If a chain for ++ * reassembly of this datagram already exists, then it ++ * is given as fp; otherwise have to make a chain. ++ */ ++static struct ip *ip_reass(Slirp *slirp, struct ip *ip, struct ipq *fp) ++{ ++ register struct mbuf *m = dtom(slirp, ip); ++ register struct ipasfrag *q; ++ int hlen = ip->ip_hl << 2; ++ int i, next; ++ ++ DEBUG_CALL("ip_reass"); ++ DEBUG_ARG("ip = %p", ip); ++ DEBUG_ARG("fp = %p", fp); ++ DEBUG_ARG("m = %p", m); ++ ++ /* ++ * Presence of header sizes in mbufs ++ * would confuse code below. ++ * Fragment m_data is concatenated. ++ */ ++ m->m_data += hlen; ++ m->m_len -= hlen; ++ ++ /* ++ * If first fragment to arrive, create a reassembly queue. ++ */ ++ if (fp == NULL) { ++ struct mbuf *t = m_get(slirp); ++ ++ if (t == NULL) { ++ goto dropfrag; ++ } ++ fp = mtod(t, struct ipq *); ++ insque(&fp->ip_link, &slirp->ipq.ip_link); ++ fp->ipq_ttl = IPFRAGTTL; ++ fp->ipq_p = ip->ip_p; ++ fp->ipq_id = ip->ip_id; ++ fp->frag_link.next = fp->frag_link.prev = &fp->frag_link; ++ fp->ipq_src = ip->ip_src; ++ fp->ipq_dst = ip->ip_dst; ++ q = (struct ipasfrag *)fp; ++ goto insert; ++ } ++ ++ /* ++ * Find a segment which begins after this one does. ++ */ ++ for (q = fp->frag_link.next; q != (struct ipasfrag *)&fp->frag_link; ++ q = q->ipf_next) ++ if (q->ipf_off > ip->ip_off) ++ break; ++ ++ /* ++ * If there is a preceding segment, it may provide some of ++ * our data already. If so, drop the data from the incoming ++ * segment. If it provides all of our data, drop us. ++ */ ++ if (q->ipf_prev != &fp->frag_link) { ++ struct ipasfrag *pq = q->ipf_prev; ++ i = pq->ipf_off + pq->ipf_len - ip->ip_off; ++ if (i > 0) { ++ if (i >= ip->ip_len) ++ goto dropfrag; ++ m_adj(dtom(slirp, ip), i); ++ ip->ip_off += i; ++ ip->ip_len -= i; ++ } ++ } ++ ++ /* ++ * While we overlap succeeding segments trim them or, ++ * if they are completely covered, dequeue them. ++ */ ++ while (q != (struct ipasfrag *)&fp->frag_link && ++ ip->ip_off + ip->ip_len > q->ipf_off) { ++ struct ipasfrag *prev; ++ i = (ip->ip_off + ip->ip_len) - q->ipf_off; ++ if (i < q->ipf_len) { ++ q->ipf_len -= i; ++ q->ipf_off += i; ++ m_adj(dtom(slirp, q), i); ++ break; ++ } ++ prev = q; ++ q = q->ipf_next; ++ ip_deq(prev); ++ m_free(dtom(slirp, prev)); ++ } ++ ++insert: ++ /* ++ * Stick new segment in its place; ++ * check for complete reassembly. ++ */ ++ ip_enq(iptofrag(ip), q->ipf_prev); ++ next = 0; ++ for (q = fp->frag_link.next; q != (struct ipasfrag *)&fp->frag_link; ++ q = q->ipf_next) { ++ if (q->ipf_off != next) ++ return NULL; ++ next += q->ipf_len; ++ } ++ if (((struct ipasfrag *)(q->ipf_prev))->ipf_tos & 1) ++ return NULL; ++ ++ /* ++ * Reassembly is complete; concatenate fragments. ++ */ ++ q = fp->frag_link.next; ++ m = dtom(slirp, q); ++ int delta = (char *)q - (m->m_flags & M_EXT ? m->m_ext : m->m_dat); ++ ++ q = (struct ipasfrag *)q->ipf_next; ++ while (q != (struct ipasfrag *)&fp->frag_link) { ++ struct mbuf *t = dtom(slirp, q); ++ q = (struct ipasfrag *)q->ipf_next; ++ m_cat(m, t); ++ } ++ ++ /* ++ * Create header for new ip packet by ++ * modifying header of first packet; ++ * dequeue and discard fragment reassembly header. ++ * Make header visible. ++ */ ++ q = fp->frag_link.next; ++ ++ /* ++ * If the fragments concatenated to an mbuf that's bigger than the total ++ * size of the fragment and the mbuf was not already using an m_ext buffer, ++ * then an m_ext buffer was alloced. But fp->ipq_next points to the old ++ * buffer (in the mbuf), so we must point ip into the new buffer. ++ */ ++ if (m->m_flags & M_EXT) { ++ q = (struct ipasfrag *)(m->m_ext + delta); ++ } ++ ++ ip = fragtoip(q); ++ ip->ip_len = next; ++ ip->ip_tos &= ~1; ++ ip->ip_src = fp->ipq_src; ++ ip->ip_dst = fp->ipq_dst; ++ remque(&fp->ip_link); ++ (void)m_free(dtom(slirp, fp)); ++ m->m_len += (ip->ip_hl << 2); ++ m->m_data -= (ip->ip_hl << 2); ++ ++ return ip; ++ ++dropfrag: ++ m_free(m); ++ return NULL; ++} ++ ++/* ++ * Free a fragment reassembly header and all ++ * associated datagrams. ++ */ ++static void ip_freef(Slirp *slirp, struct ipq *fp) ++{ ++ register struct ipasfrag *q, *p; ++ ++ for (q = fp->frag_link.next; q != (struct ipasfrag *)&fp->frag_link; ++ q = p) { ++ p = q->ipf_next; ++ ip_deq(q); ++ m_free(dtom(slirp, q)); ++ } ++ remque(&fp->ip_link); ++ (void)m_free(dtom(slirp, fp)); ++} ++ ++/* ++ * Put an ip fragment on a reassembly chain. ++ * Like insque, but pointers in middle of structure. ++ */ ++static void ip_enq(register struct ipasfrag *p, register struct ipasfrag *prev) ++{ ++ DEBUG_CALL("ip_enq"); ++ DEBUG_ARG("prev = %p", prev); ++ p->ipf_prev = prev; ++ p->ipf_next = prev->ipf_next; ++ ((struct ipasfrag *)(prev->ipf_next))->ipf_prev = p; ++ prev->ipf_next = p; ++} ++ ++/* ++ * To ip_enq as remque is to insque. ++ */ ++static void ip_deq(register struct ipasfrag *p) ++{ ++ ((struct ipasfrag *)(p->ipf_prev))->ipf_next = p->ipf_next; ++ ((struct ipasfrag *)(p->ipf_next))->ipf_prev = p->ipf_prev; ++} ++ ++/* ++ * IP timer processing; ++ * if a timer expires on a reassembly ++ * queue, discard it. ++ */ ++void ip_slowtimo(Slirp *slirp) ++{ ++ struct qlink *l; ++ ++ DEBUG_CALL("ip_slowtimo"); ++ ++ l = slirp->ipq.ip_link.next; ++ ++ if (l == NULL) ++ return; ++ ++ while (l != &slirp->ipq.ip_link) { ++ struct ipq *fp = container_of(l, struct ipq, ip_link); ++ l = l->next; ++ if (--fp->ipq_ttl == 0) { ++ ip_freef(slirp, fp); ++ } ++ } ++} ++ ++/* ++ * Strip out IP options, at higher ++ * level protocol in the kernel. ++ * Second argument is buffer to which options ++ * will be moved, and return value is their length. ++ * (XXX) should be deleted; last arg currently ignored. ++ */ ++void ip_stripoptions(register struct mbuf *m, struct mbuf *mopt) ++{ ++ register int i; ++ struct ip *ip = mtod(m, struct ip *); ++ register char *opts; ++ int olen; ++ ++ olen = (ip->ip_hl << 2) - sizeof(struct ip); ++ opts = (char *)(ip + 1); ++ i = m->m_len - (sizeof(struct ip) + olen); ++ memcpy(opts, opts + olen, (unsigned)i); ++ m->m_len -= olen; ++ ++ ip->ip_hl = sizeof(struct ip) >> 2; ++} +diff --git a/slirp/src/ip_output.c b/slirp/src/ip_output.c +new file mode 100644 +index 0000000..22916a3 +--- /dev/null ++++ b/slirp/src/ip_output.c +@@ -0,0 +1,169 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1988, 1990, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)ip_output.c 8.3 (Berkeley) 1/21/94 ++ * ip_output.c,v 1.9 1994/11/16 10:17:10 jkh Exp ++ */ ++ ++/* ++ * Changes and additions relating to SLiRP are ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#include "slirp.h" ++ ++/* Number of packets queued before we start sending ++ * (to prevent allocing too many mbufs) */ ++#define IF_THRESH 10 ++ ++/* ++ * IP output. The packet in mbuf chain m contains a skeletal IP ++ * header (with len, off, ttl, proto, tos, src, dst). ++ * The mbuf chain containing the packet will be freed. ++ * The mbuf opt, if present, will not be freed. ++ */ ++int ip_output(struct socket *so, struct mbuf *m0) ++{ ++ Slirp *slirp = m0->slirp; ++ register struct ip *ip; ++ register struct mbuf *m = m0; ++ register int hlen = sizeof(struct ip); ++ int len, off, error = 0; ++ ++ DEBUG_CALL("ip_output"); ++ DEBUG_ARG("so = %p", so); ++ DEBUG_ARG("m0 = %p", m0); ++ ++ ip = mtod(m, struct ip *); ++ /* ++ * Fill in IP header. ++ */ ++ ip->ip_v = IPVERSION; ++ ip->ip_off &= IP_DF; ++ ip->ip_id = htons(slirp->ip_id++); ++ ip->ip_hl = hlen >> 2; ++ ++ /* ++ * If small enough for interface, can just send directly. ++ */ ++ if ((uint16_t)ip->ip_len <= slirp->if_mtu) { ++ ip->ip_len = htons((uint16_t)ip->ip_len); ++ ip->ip_off = htons((uint16_t)ip->ip_off); ++ ip->ip_sum = 0; ++ ip->ip_sum = cksum(m, hlen); ++ ++ if_output(so, m); ++ goto done; ++ } ++ ++ /* ++ * Too large for interface; fragment if possible. ++ * Must be able to put at least 8 bytes per fragment. ++ */ ++ if (ip->ip_off & IP_DF) { ++ error = -1; ++ goto bad; ++ } ++ ++ len = (slirp->if_mtu - hlen) & ~7; /* ip databytes per packet */ ++ if (len < 8) { ++ error = -1; ++ goto bad; ++ } ++ ++ { ++ int mhlen, firstlen = len; ++ struct mbuf **mnext = &m->m_nextpkt; ++ ++ /* ++ * Loop through length of segment after first fragment, ++ * make new header and copy data of each part and link onto chain. ++ */ ++ m0 = m; ++ mhlen = sizeof(struct ip); ++ for (off = hlen + len; off < (uint16_t)ip->ip_len; off += len) { ++ register struct ip *mhip; ++ m = m_get(slirp); ++ if (m == NULL) { ++ error = -1; ++ goto sendorfree; ++ } ++ m->m_data += IF_MAXLINKHDR; ++ mhip = mtod(m, struct ip *); ++ *mhip = *ip; ++ ++ m->m_len = mhlen; ++ mhip->ip_off = ((off - hlen) >> 3) + (ip->ip_off & ~IP_MF); ++ if (ip->ip_off & IP_MF) ++ mhip->ip_off |= IP_MF; ++ if (off + len >= (uint16_t)ip->ip_len) ++ len = (uint16_t)ip->ip_len - off; ++ else ++ mhip->ip_off |= IP_MF; ++ mhip->ip_len = htons((uint16_t)(len + mhlen)); ++ ++ if (m_copy(m, m0, off, len) < 0) { ++ error = -1; ++ goto sendorfree; ++ } ++ ++ mhip->ip_off = htons((uint16_t)mhip->ip_off); ++ mhip->ip_sum = 0; ++ mhip->ip_sum = cksum(m, mhlen); ++ *mnext = m; ++ mnext = &m->m_nextpkt; ++ } ++ /* ++ * Update first fragment by trimming what's been copied out ++ * and updating header, then send each fragment (in order). ++ */ ++ m = m0; ++ m_adj(m, hlen + firstlen - (uint16_t)ip->ip_len); ++ ip->ip_len = htons((uint16_t)m->m_len); ++ ip->ip_off = htons((uint16_t)(ip->ip_off | IP_MF)); ++ ip->ip_sum = 0; ++ ip->ip_sum = cksum(m, hlen); ++ sendorfree: ++ for (m = m0; m; m = m0) { ++ m0 = m->m_nextpkt; ++ m->m_nextpkt = NULL; ++ if (error == 0) ++ if_output(so, m); ++ else ++ m_free(m); ++ } ++ } ++ ++done: ++ return (error); ++ ++bad: ++ m_free(m0); ++ goto done; ++} +diff --git a/slirp/src/libslirp-version.h.in b/slirp/src/libslirp-version.h.in +new file mode 100644 +index 0000000..faa6c85 +--- /dev/null ++++ b/slirp/src/libslirp-version.h.in +@@ -0,0 +1,24 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++#ifndef LIBSLIRP_VERSION_H_ ++#define LIBSLIRP_VERSION_H_ ++ ++#ifdef __cplusplus ++extern "C" { ++#endif ++ ++#define SLIRP_MAJOR_VERSION @SLIRP_MAJOR_VERSION@ ++#define SLIRP_MINOR_VERSION @SLIRP_MINOR_VERSION@ ++#define SLIRP_MICRO_VERSION @SLIRP_MICRO_VERSION@ ++#define SLIRP_VERSION_STRING @SLIRP_VERSION_STRING@ ++ ++#define SLIRP_CHECK_VERSION(major,minor,micro) \ ++ (SLIRP_MAJOR_VERSION > (major) || \ ++ (SLIRP_MAJOR_VERSION == (major) && SLIRP_MINOR_VERSION > (minor)) || \ ++ (SLIRP_MAJOR_VERSION == (major) && SLIRP_MINOR_VERSION == (minor) && \ ++ SLIRP_MICRO_VERSION >= (micro))) ++ ++#ifdef __cplusplus ++} /* extern "C" */ ++#endif ++ ++#endif /* LIBSLIRP_VERSION_H_ */ +diff --git a/slirp/src/libslirp.h b/slirp/src/libslirp.h +new file mode 100644 +index 0000000..fb4c7e8 +--- /dev/null ++++ b/slirp/src/libslirp.h +@@ -0,0 +1,171 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++#ifndef LIBSLIRP_H ++#define LIBSLIRP_H ++ ++#include ++#include ++#include ++ ++#ifdef _WIN32 ++#include ++#include ++#else ++#include ++#include ++#endif ++ ++#include "libslirp-version.h" ++ ++#ifdef __cplusplus ++extern "C" { ++#endif ++ ++typedef struct Slirp Slirp; ++ ++enum { ++ SLIRP_POLL_IN = 1 << 0, ++ SLIRP_POLL_OUT = 1 << 1, ++ SLIRP_POLL_PRI = 1 << 2, ++ SLIRP_POLL_ERR = 1 << 3, ++ SLIRP_POLL_HUP = 1 << 4, ++}; ++ ++typedef ssize_t (*SlirpReadCb)(void *buf, size_t len, void *opaque); ++typedef ssize_t (*SlirpWriteCb)(const void *buf, size_t len, void *opaque); ++typedef void (*SlirpTimerCb)(void *opaque); ++typedef int (*SlirpAddPollCb)(int fd, int events, void *opaque); ++typedef int (*SlirpGetREventsCb)(int idx, void *opaque); ++ ++/* ++ * Callbacks from slirp ++ */ ++typedef struct SlirpCb { ++ /* ++ * Send an ethernet frame to the guest network. The opaque ++ * parameter is the one given to slirp_init(). The function ++ * doesn't need to send all the data and may return m_freelist.qh_link = slirp->m_freelist.qh_rlink = &slirp->m_freelist; ++ slirp->m_usedlist.qh_link = slirp->m_usedlist.qh_rlink = &slirp->m_usedlist; ++} ++ ++void m_cleanup(Slirp *slirp) ++{ ++ struct mbuf *m, *next; ++ ++ m = (struct mbuf *)slirp->m_usedlist.qh_link; ++ while ((struct quehead *)m != &slirp->m_usedlist) { ++ next = m->m_next; ++ if (m->m_flags & M_EXT) { ++ g_free(m->m_ext); ++ } ++ g_free(m); ++ m = next; ++ } ++ m = (struct mbuf *)slirp->m_freelist.qh_link; ++ while ((struct quehead *)m != &slirp->m_freelist) { ++ next = m->m_next; ++ g_free(m); ++ m = next; ++ } ++} ++ ++/* ++ * Get an mbuf from the free list, if there are none ++ * allocate one ++ * ++ * Because fragmentation can occur if we alloc new mbufs and ++ * free old mbufs, we mark all mbufs above mbuf_thresh as M_DOFREE, ++ * which tells m_free to actually g_free() it ++ */ ++struct mbuf *m_get(Slirp *slirp) ++{ ++ register struct mbuf *m; ++ int flags = 0; ++ ++ DEBUG_CALL("m_get"); ++ ++ if (slirp->m_freelist.qh_link == &slirp->m_freelist) { ++ m = g_malloc(SLIRP_MSIZE(slirp->if_mtu)); ++ slirp->mbuf_alloced++; ++ if (slirp->mbuf_alloced > MBUF_THRESH) ++ flags = M_DOFREE; ++ m->slirp = slirp; ++ } else { ++ m = (struct mbuf *)slirp->m_freelist.qh_link; ++ remque(m); ++ } ++ ++ /* Insert it in the used list */ ++ insque(m, &slirp->m_usedlist); ++ m->m_flags = (flags | M_USEDLIST); ++ ++ /* Initialise it */ ++ m->m_size = SLIRP_MSIZE(slirp->if_mtu) - offsetof(struct mbuf, m_dat); ++ m->m_data = m->m_dat; ++ m->m_len = 0; ++ m->m_nextpkt = NULL; ++ m->m_prevpkt = NULL; ++ m->resolution_requested = false; ++ m->expiration_date = (uint64_t)-1; ++ DEBUG_ARG("m = %p", m); ++ return m; ++} ++ ++void m_free(struct mbuf *m) ++{ ++ DEBUG_CALL("m_free"); ++ DEBUG_ARG("m = %p", m); ++ ++ if (m) { ++ /* Remove from m_usedlist */ ++ if (m->m_flags & M_USEDLIST) ++ remque(m); ++ ++ /* If it's M_EXT, free() it */ ++ if (m->m_flags & M_EXT) { ++ g_free(m->m_ext); ++ } ++ /* ++ * Either free() it or put it on the free list ++ */ ++ if (m->m_flags & M_DOFREE) { ++ m->slirp->mbuf_alloced--; ++ g_free(m); ++ } else if ((m->m_flags & M_FREELIST) == 0) { ++ insque(m, &m->slirp->m_freelist); ++ m->m_flags = M_FREELIST; /* Clobber other flags */ ++ } ++ } /* if(m) */ ++} ++ ++/* ++ * Copy data from one mbuf to the end of ++ * the other.. if result is too big for one mbuf, allocate ++ * an M_EXT data segment ++ */ ++void m_cat(struct mbuf *m, struct mbuf *n) ++{ ++ /* ++ * If there's no room, realloc ++ */ ++ if (M_FREEROOM(m) < n->m_len) ++ m_inc(m, m->m_len + n->m_len); ++ ++ memcpy(m->m_data + m->m_len, n->m_data, n->m_len); ++ m->m_len += n->m_len; ++ ++ m_free(n); ++} ++ ++ ++/* make m 'size' bytes large from m_data */ ++void m_inc(struct mbuf *m, int size) ++{ ++ int gapsize; ++ ++ /* some compilers throw up on gotos. This one we can fake. */ ++ if (M_ROOM(m) > size) { ++ return; ++ } ++ ++ if (m->m_flags & M_EXT) { ++ gapsize = m->m_data - m->m_ext; ++ m->m_ext = g_realloc(m->m_ext, size + gapsize); ++ } else { ++ gapsize = m->m_data - m->m_dat; ++ m->m_ext = g_malloc(size + gapsize); ++ memcpy(m->m_ext, m->m_dat, m->m_size); ++ m->m_flags |= M_EXT; ++ } ++ ++ m->m_data = m->m_ext + gapsize; ++ m->m_size = size + gapsize; ++} ++ ++ ++void m_adj(struct mbuf *m, int len) ++{ ++ if (m == NULL) ++ return; ++ if (len >= 0) { ++ /* Trim from head */ ++ m->m_data += len; ++ m->m_len -= len; ++ } else { ++ /* Trim from tail */ ++ len = -len; ++ m->m_len -= len; ++ } ++} ++ ++ ++/* ++ * Copy len bytes from m, starting off bytes into n ++ */ ++int m_copy(struct mbuf *n, struct mbuf *m, int off, int len) ++{ ++ if (len > M_FREEROOM(n)) ++ return -1; ++ ++ memcpy((n->m_data + n->m_len), (m->m_data + off), len); ++ n->m_len += len; ++ return 0; ++} ++ ++ ++/* ++ * Given a pointer into an mbuf, return the mbuf ++ * XXX This is a kludge, I should eliminate the need for it ++ * Fortunately, it's not used often ++ */ ++struct mbuf *dtom(Slirp *slirp, void *dat) ++{ ++ struct mbuf *m; ++ ++ DEBUG_CALL("dtom"); ++ DEBUG_ARG("dat = %p", dat); ++ ++ /* bug corrected for M_EXT buffers */ ++ for (m = (struct mbuf *)slirp->m_usedlist.qh_link; ++ (struct quehead *)m != &slirp->m_usedlist; m = m->m_next) { ++ if (m->m_flags & M_EXT) { ++ if ((char *)dat >= m->m_ext && (char *)dat < (m->m_ext + m->m_size)) ++ return m; ++ } else { ++ if ((char *)dat >= m->m_dat && (char *)dat < (m->m_dat + m->m_size)) ++ return m; ++ } ++ } ++ ++ DEBUG_ERROR("dtom failed"); ++ ++ return (struct mbuf *)0; ++} +diff --git a/slirp/src/mbuf.h b/slirp/src/mbuf.h +new file mode 100644 +index 0000000..546e785 +--- /dev/null ++++ b/slirp/src/mbuf.h +@@ -0,0 +1,127 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1988, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)mbuf.h 8.3 (Berkeley) 1/21/94 ++ * mbuf.h,v 1.9 1994/11/14 13:54:20 bde Exp ++ */ ++ ++#ifndef MBUF_H ++#define MBUF_H ++ ++/* ++ * Macros for type conversion ++ * mtod(m,t) - convert mbuf pointer to data pointer of correct type ++ */ ++#define mtod(m, t) ((t)(m)->m_data) ++ ++/* XXX About mbufs for slirp: ++ * Only one mbuf is ever used in a chain, for each "cell" of data. ++ * m_nextpkt points to the next packet, if fragmented. ++ * If the data is too large, the M_EXT is used, and a larger block ++ * is alloced. Therefore, m_free[m] must check for M_EXT and if set ++ * free the m_ext. This is inefficient memory-wise, but who cares. ++ */ ++ ++/* ++ * mbufs allow to have a gap between the start of the allocated buffer (m_ext if ++ * M_EXT is set, m_dat otherwise) and the in-use data: ++ * ++ * |--gapsize----->|---m_len-------> ++ * |----------m_size------------------------------> ++ * |----M_ROOM--------------------> ++ * |-M_FREEROOM--> ++ * ++ * ^ ^ ^ ++ * m_dat/m_ext m_data end of buffer ++ */ ++ ++/* ++ * How much room is in the mbuf, from m_data to the end of the mbuf ++ */ ++#define M_ROOM(m) \ ++ ((m->m_flags & M_EXT) ? (((m)->m_ext + (m)->m_size) - (m)->m_data) : \ ++ (((m)->m_dat + (m)->m_size) - (m)->m_data)) ++ ++/* ++ * How much free room there is ++ */ ++#define M_FREEROOM(m) (M_ROOM(m) - (m)->m_len) ++ ++struct mbuf { ++ /* XXX should union some of these! */ ++ /* header at beginning of each mbuf: */ ++ struct mbuf *m_next; /* Linked list of mbufs */ ++ struct mbuf *m_prev; ++ struct mbuf *m_nextpkt; /* Next packet in queue/record */ ++ struct mbuf *m_prevpkt; /* Flags aren't used in the output queue */ ++ int m_flags; /* Misc flags */ ++ ++ int m_size; /* Size of mbuf, from m_dat or m_ext */ ++ struct socket *m_so; ++ ++ char *m_data; /* Current location of data */ ++ int m_len; /* Amount of data in this mbuf, from m_data */ ++ ++ Slirp *slirp; ++ bool resolution_requested; ++ uint64_t expiration_date; ++ char *m_ext; ++ /* start of dynamic buffer area, must be last element */ ++ char m_dat[]; ++}; ++ ++#define ifq_prev m_prev ++#define ifq_next m_next ++#define ifs_prev m_prevpkt ++#define ifs_next m_nextpkt ++#define ifq_so m_so ++ ++#define M_EXT 0x01 /* m_ext points to more (malloced) data */ ++#define M_FREELIST 0x02 /* mbuf is on free list */ ++#define M_USEDLIST 0x04 /* XXX mbuf is on used list (for dtom()) */ ++#define M_DOFREE \ ++ 0x08 /* when m_free is called on the mbuf, free() \ ++ * it rather than putting it on the free list */ ++ ++void m_init(Slirp *); ++void m_cleanup(Slirp *slirp); ++struct mbuf *m_get(Slirp *); ++void m_free(struct mbuf *); ++void m_cat(register struct mbuf *, register struct mbuf *); ++void m_inc(struct mbuf *, int); ++void m_adj(struct mbuf *, int); ++int m_copy(struct mbuf *, struct mbuf *, int, int); ++struct mbuf *dtom(Slirp *, void *); ++ ++static inline void ifs_init(struct mbuf *ifm) ++{ ++ ifm->ifs_next = ifm->ifs_prev = ifm; ++} ++ ++#endif +diff --git a/slirp/src/misc.c b/slirp/src/misc.c +new file mode 100644 +index 0000000..e6bc0a2 +--- /dev/null ++++ b/slirp/src/misc.c +@@ -0,0 +1,390 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#include "slirp.h" ++#ifdef G_OS_UNIX ++#include ++#endif ++ ++inline void insque(void *a, void *b) ++{ ++ register struct quehead *element = (struct quehead *)a; ++ register struct quehead *head = (struct quehead *)b; ++ element->qh_link = head->qh_link; ++ head->qh_link = (struct quehead *)element; ++ element->qh_rlink = (struct quehead *)head; ++ ((struct quehead *)(element->qh_link))->qh_rlink = ++ (struct quehead *)element; ++} ++ ++inline void remque(void *a) ++{ ++ register struct quehead *element = (struct quehead *)a; ++ ((struct quehead *)(element->qh_link))->qh_rlink = element->qh_rlink; ++ ((struct quehead *)(element->qh_rlink))->qh_link = element->qh_link; ++ element->qh_rlink = NULL; ++} ++ ++/* TODO: IPv6 */ ++struct gfwd_list *add_guestfwd(struct gfwd_list **ex_ptr, SlirpWriteCb write_cb, ++ void *opaque, struct in_addr addr, int port) ++{ ++ struct gfwd_list *f = g_new0(struct gfwd_list, 1); ++ ++ f->write_cb = write_cb; ++ f->opaque = opaque; ++ f->ex_fport = port; ++ f->ex_addr = addr; ++ f->ex_next = *ex_ptr; ++ *ex_ptr = f; ++ ++ return f; ++} ++ ++struct gfwd_list *add_exec(struct gfwd_list **ex_ptr, const char *cmdline, ++ struct in_addr addr, int port) ++{ ++ struct gfwd_list *f = add_guestfwd(ex_ptr, NULL, NULL, addr, port); ++ ++ f->ex_exec = g_strdup(cmdline); ++ ++ return f; ++} ++ ++struct gfwd_list *add_unix(struct gfwd_list **ex_ptr, const char *unixsock, ++ struct in_addr addr, int port) ++{ ++ struct gfwd_list *f = add_guestfwd(ex_ptr, NULL, NULL, addr, port); ++ ++ f->ex_unix = g_strdup(unixsock); ++ ++ return f; ++} ++ ++int remove_guestfwd(struct gfwd_list **ex_ptr, struct in_addr addr, int port) ++{ ++ for (; *ex_ptr != NULL; ex_ptr = &((*ex_ptr)->ex_next)) { ++ struct gfwd_list *f = *ex_ptr; ++ if (f->ex_addr.s_addr == addr.s_addr && f->ex_fport == port) { ++ *ex_ptr = f->ex_next; ++ g_free(f->ex_exec); ++ g_free(f); ++ return 0; ++ } ++ } ++ return -1; ++} ++ ++static int slirp_socketpair_with_oob(int sv[2]) ++{ ++ struct sockaddr_in addr = { ++ .sin_family = AF_INET, ++ .sin_port = 0, ++ .sin_addr.s_addr = INADDR_ANY, ++ }; ++ socklen_t addrlen = sizeof(addr); ++ int ret, s; ++ ++ sv[1] = -1; ++ s = slirp_socket(AF_INET, SOCK_STREAM, 0); ++ if (s < 0 || bind(s, (struct sockaddr *)&addr, addrlen) < 0 || ++ listen(s, 1) < 0 || ++ getsockname(s, (struct sockaddr *)&addr, &addrlen) < 0) { ++ goto err; ++ } ++ ++ sv[1] = slirp_socket(AF_INET, SOCK_STREAM, 0); ++ if (sv[1] < 0) { ++ goto err; ++ } ++ /* ++ * This connect won't block because we've already listen()ed on ++ * the server end (even though we won't accept() the connection ++ * until later on). ++ */ ++ do { ++ ret = connect(sv[1], (struct sockaddr *)&addr, addrlen); ++ } while (ret < 0 && errno == EINTR); ++ if (ret < 0) { ++ goto err; ++ } ++ ++ do { ++ sv[0] = accept(s, (struct sockaddr *)&addr, &addrlen); ++ } while (sv[0] < 0 && errno == EINTR); ++ if (sv[0] < 0) { ++ goto err; ++ } ++ ++ closesocket(s); ++ return 0; ++ ++err: ++ g_critical("slirp_socketpair(): %s", strerror(errno)); ++ if (s >= 0) { ++ closesocket(s); ++ } ++ if (sv[1] >= 0) { ++ closesocket(sv[1]); ++ } ++ return -1; ++} ++ ++static void fork_exec_child_setup(gpointer data) ++{ ++#ifndef _WIN32 ++ setsid(); ++#endif ++} ++ ++#pragma GCC diagnostic push ++#pragma GCC diagnostic ignored "-Wdeprecated-declarations" ++ ++#if !GLIB_CHECK_VERSION(2, 58, 0) ++typedef struct SlirpGSpawnFds { ++ GSpawnChildSetupFunc child_setup; ++ gpointer user_data; ++ gint stdin_fd; ++ gint stdout_fd; ++ gint stderr_fd; ++} SlirpGSpawnFds; ++ ++static inline void slirp_gspawn_fds_setup(gpointer user_data) ++{ ++ SlirpGSpawnFds *q = (SlirpGSpawnFds *)user_data; ++ ++ dup2(q->stdin_fd, 0); ++ dup2(q->stdout_fd, 1); ++ dup2(q->stderr_fd, 2); ++ q->child_setup(q->user_data); ++} ++#endif ++ ++static inline gboolean ++g_spawn_async_with_fds_slirp(const gchar *working_directory, gchar **argv, ++ gchar **envp, GSpawnFlags flags, ++ GSpawnChildSetupFunc child_setup, ++ gpointer user_data, GPid *child_pid, gint stdin_fd, ++ gint stdout_fd, gint stderr_fd, GError **error) ++{ ++#if GLIB_CHECK_VERSION(2, 58, 0) ++ return g_spawn_async_with_fds(working_directory, argv, envp, flags, ++ child_setup, user_data, child_pid, stdin_fd, ++ stdout_fd, stderr_fd, error); ++#else ++ SlirpGSpawnFds setup = { ++ .child_setup = child_setup, ++ .user_data = user_data, ++ .stdin_fd = stdin_fd, ++ .stdout_fd = stdout_fd, ++ .stderr_fd = stderr_fd, ++ }; ++ ++ return g_spawn_async(working_directory, argv, envp, flags, ++ slirp_gspawn_fds_setup, &setup, child_pid, error); ++#endif ++} ++ ++#define g_spawn_async_with_fds(wd, argv, env, f, c, d, p, ifd, ofd, efd, err) \ ++ g_spawn_async_with_fds_slirp(wd, argv, env, f, c, d, p, ifd, ofd, efd, err) ++ ++#pragma GCC diagnostic pop ++ ++int fork_exec(struct socket *so, const char *ex) ++{ ++ GError *err = NULL; ++ gint argc = 0; ++ gchar **argv = NULL; ++ int opt, sp[2]; ++ ++ DEBUG_CALL("fork_exec"); ++ DEBUG_ARG("so = %p", so); ++ DEBUG_ARG("ex = %p", ex); ++ ++ if (slirp_socketpair_with_oob(sp) < 0) { ++ return 0; ++ } ++ ++ if (!g_shell_parse_argv(ex, &argc, &argv, &err)) { ++ g_critical("fork_exec invalid command: %s\nerror: %s", ex, err->message); ++ g_error_free(err); ++ return 0; ++ } ++ ++ g_spawn_async_with_fds(NULL /* cwd */, argv, NULL /* env */, ++ G_SPAWN_SEARCH_PATH, fork_exec_child_setup, ++ NULL /* data */, NULL /* child_pid */, sp[1], sp[1], ++ sp[1], &err); ++ g_strfreev(argv); ++ ++ if (err) { ++ g_critical("fork_exec: %s", err->message); ++ g_error_free(err); ++ closesocket(sp[0]); ++ closesocket(sp[1]); ++ return 0; ++ } ++ ++ so->s = sp[0]; ++ closesocket(sp[1]); ++ slirp_socket_set_fast_reuse(so->s); ++ opt = 1; ++ setsockopt(so->s, SOL_SOCKET, SO_OOBINLINE, &opt, sizeof(int)); ++ slirp_set_nonblock(so->s); ++ so->slirp->cb->register_poll_fd(so->s, so->slirp->opaque); ++ return 1; ++} ++ ++int open_unix(struct socket *so, const char *unixpath) ++{ ++#ifdef G_OS_UNIX ++ struct sockaddr_un sa; ++ int s; ++ ++ DEBUG_CALL("open_unix"); ++ DEBUG_ARG("so = %p", so); ++ DEBUG_ARG("unixpath = %s", unixpath); ++ ++ memset(&sa, 0, sizeof(sa)); ++ sa.sun_family = AF_UNIX; ++ if (g_strlcpy(sa.sun_path, unixpath, sizeof(sa.sun_path)) >= sizeof(sa.sun_path)) { ++ g_critical("Bad unix path: %s", unixpath); ++ return 0; ++ } ++ ++ s = slirp_socket(PF_UNIX, SOCK_STREAM, 0); ++ if (s < 0) { ++ g_critical("open_unix(): %s", strerror(errno)); ++ return 0; ++ } ++ ++ if (connect(s, (struct sockaddr *)&sa, sizeof(sa)) < 0) { ++ g_critical("open_unix(): %s", strerror(errno)); ++ closesocket(s); ++ return 0; ++ } ++ ++ so->s = s; ++ slirp_set_nonblock(so->s); ++ so->slirp->cb->register_poll_fd(so->s, so->slirp->opaque); ++ ++ return 1; ++#else ++ g_assert_not_reached(); ++#endif ++} ++ ++char *slirp_connection_info(Slirp *slirp) ++{ ++ GString *str = g_string_new(NULL); ++ const char *const tcpstates[] = { ++ [TCPS_CLOSED] = "CLOSED", [TCPS_LISTEN] = "LISTEN", ++ [TCPS_SYN_SENT] = "SYN_SENT", [TCPS_SYN_RECEIVED] = "SYN_RCVD", ++ [TCPS_ESTABLISHED] = "ESTABLISHED", [TCPS_CLOSE_WAIT] = "CLOSE_WAIT", ++ [TCPS_FIN_WAIT_1] = "FIN_WAIT_1", [TCPS_CLOSING] = "CLOSING", ++ [TCPS_LAST_ACK] = "LAST_ACK", [TCPS_FIN_WAIT_2] = "FIN_WAIT_2", ++ [TCPS_TIME_WAIT] = "TIME_WAIT", ++ }; ++ struct in_addr dst_addr; ++ struct sockaddr_in src; ++ socklen_t src_len; ++ uint16_t dst_port; ++ struct socket *so; ++ const char *state; ++ char buf[20]; ++ ++ g_string_append_printf(str, ++ " Protocol[State] FD Source Address Port " ++ "Dest. Address Port RecvQ SendQ\n"); ++ ++ /* TODO: IPv6 */ ++ ++ for (so = slirp->tcb.so_next; so != &slirp->tcb; so = so->so_next) { ++ if (so->so_state & SS_HOSTFWD) { ++ state = "HOST_FORWARD"; ++ } else if (so->so_tcpcb) { ++ state = tcpstates[so->so_tcpcb->t_state]; ++ } else { ++ state = "NONE"; ++ } ++ if (so->so_state & (SS_HOSTFWD | SS_INCOMING)) { ++ src_len = sizeof(src); ++ getsockname(so->s, (struct sockaddr *)&src, &src_len); ++ dst_addr = so->so_laddr; ++ dst_port = so->so_lport; ++ } else { ++ src.sin_addr = so->so_laddr; ++ src.sin_port = so->so_lport; ++ dst_addr = so->so_faddr; ++ dst_port = so->so_fport; ++ } ++ slirp_fmt0(buf, sizeof(buf), " TCP[%s]", state); ++ g_string_append_printf(str, "%-19s %3d %15s %5d ", buf, so->s, ++ src.sin_addr.s_addr ? inet_ntoa(src.sin_addr) : ++ "*", ++ ntohs(src.sin_port)); ++ g_string_append_printf(str, "%15s %5d %5d %5d\n", inet_ntoa(dst_addr), ++ ntohs(dst_port), so->so_rcv.sb_cc, ++ so->so_snd.sb_cc); ++ } ++ ++ for (so = slirp->udb.so_next; so != &slirp->udb; so = so->so_next) { ++ if (so->so_state & SS_HOSTFWD) { ++ slirp_fmt0(buf, sizeof(buf), " UDP[HOST_FORWARD]"); ++ src_len = sizeof(src); ++ getsockname(so->s, (struct sockaddr *)&src, &src_len); ++ dst_addr = so->so_laddr; ++ dst_port = so->so_lport; ++ } else { ++ slirp_fmt0(buf, sizeof(buf), " UDP[%d sec]", ++ (so->so_expire - curtime) / 1000); ++ src.sin_addr = so->so_laddr; ++ src.sin_port = so->so_lport; ++ dst_addr = so->so_faddr; ++ dst_port = so->so_fport; ++ } ++ g_string_append_printf(str, "%-19s %3d %15s %5d ", buf, so->s, ++ src.sin_addr.s_addr ? inet_ntoa(src.sin_addr) : ++ "*", ++ ntohs(src.sin_port)); ++ g_string_append_printf(str, "%15s %5d %5d %5d\n", inet_ntoa(dst_addr), ++ ntohs(dst_port), so->so_rcv.sb_cc, ++ so->so_snd.sb_cc); ++ } ++ ++ for (so = slirp->icmp.so_next; so != &slirp->icmp; so = so->so_next) { ++ slirp_fmt0(buf, sizeof(buf), " ICMP[%d sec]", ++ (so->so_expire - curtime) / 1000); ++ src.sin_addr = so->so_laddr; ++ dst_addr = so->so_faddr; ++ g_string_append_printf(str, "%-19s %3d %15s - ", buf, so->s, ++ src.sin_addr.s_addr ? inet_ntoa(src.sin_addr) : ++ "*"); ++ g_string_append_printf(str, "%15s - %5d %5d\n", inet_ntoa(dst_addr), ++ so->so_rcv.sb_cc, so->so_snd.sb_cc); ++ } ++ ++ return g_string_free(str, FALSE); ++} ++ ++int slirp_bind_outbound(struct socket *so, unsigned short af) ++{ ++ int ret = 0; ++ struct sockaddr *addr = NULL; ++ int addr_size = 0; ++ ++ if (af == AF_INET && so->slirp->outbound_addr != NULL) { ++ addr = (struct sockaddr *)so->slirp->outbound_addr; ++ addr_size = sizeof(struct sockaddr_in); ++ } else if (af == AF_INET6 && so->slirp->outbound_addr6 != NULL) { ++ addr = (struct sockaddr *)so->slirp->outbound_addr6; ++ addr_size = sizeof(struct sockaddr_in6); ++ } ++ ++ if (addr != NULL) { ++ ret = bind(so->s, addr, addr_size); ++ } ++ return ret; ++} +\ No newline at end of file +diff --git a/slirp/src/misc.h b/slirp/src/misc.h +new file mode 100644 +index 0000000..81b370c +--- /dev/null ++++ b/slirp/src/misc.h +@@ -0,0 +1,72 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#ifndef MISC_H ++#define MISC_H ++ ++#include "libslirp.h" ++ ++struct gfwd_list { ++ SlirpWriteCb write_cb; ++ void *opaque; ++ struct in_addr ex_addr; /* Server address */ ++ int ex_fport; /* Port to telnet to */ ++ char *ex_exec; /* Command line of what to exec */ ++ char *ex_unix; /* unix socket */ ++ struct gfwd_list *ex_next; ++}; ++ ++#define EMU_NONE 0x0 ++ ++/* TCP emulations */ ++#define EMU_CTL 0x1 ++#define EMU_FTP 0x2 ++#define EMU_KSH 0x3 ++#define EMU_IRC 0x4 ++#define EMU_REALAUDIO 0x5 ++#define EMU_RLOGIN 0x6 ++#define EMU_IDENT 0x7 ++ ++#define EMU_NOCONNECT 0x10 /* Don't connect */ ++ ++struct tos_t { ++ uint16_t lport; ++ uint16_t fport; ++ uint8_t tos; ++ uint8_t emu; ++}; ++ ++struct emu_t { ++ uint16_t lport; ++ uint16_t fport; ++ uint8_t tos; ++ uint8_t emu; ++ struct emu_t *next; ++}; ++ ++struct slirp_quehead { ++ struct slirp_quehead *qh_link; ++ struct slirp_quehead *qh_rlink; ++}; ++ ++void slirp_insque(void *, void *); ++void slirp_remque(void *); ++int fork_exec(struct socket *so, const char *ex); ++int open_unix(struct socket *so, const char *unixsock); ++ ++struct gfwd_list *add_guestfwd(struct gfwd_list **ex_ptr, SlirpWriteCb write_cb, ++ void *opaque, struct in_addr addr, int port); ++ ++struct gfwd_list *add_exec(struct gfwd_list **ex_ptr, const char *cmdline, ++ struct in_addr addr, int port); ++ ++struct gfwd_list *add_unix(struct gfwd_list **ex_ptr, const char *unixsock, ++ struct in_addr addr, int port); ++ ++int remove_guestfwd(struct gfwd_list **ex_ptr, struct in_addr addr, int port); ++ ++int slirp_bind_outbound(struct socket *so, unsigned short af); ++ ++#endif +diff --git a/slirp/src/ncsi-pkt.h b/slirp/src/ncsi-pkt.h +new file mode 100644 +index 0000000..7795ad8 +--- /dev/null ++++ b/slirp/src/ncsi-pkt.h +@@ -0,0 +1,445 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright Gavin Shan, IBM Corporation 2016. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * ++ * 1. Redistributions of source code must retain the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer. ++ * ++ * 2. Redistributions in binary form must reproduce the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer in the documentation and/or other materials provided ++ * with the distribution. ++ * ++ * 3. Neither the name of the copyright holder nor the names of its ++ * contributors may be used to endorse or promote products derived ++ * from this software without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ++ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS ++ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ++ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, ++ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES ++ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR ++ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, ++ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED ++ * OF THE POSSIBILITY OF SUCH DAMAGE. ++ */ ++ ++#ifndef NCSI_PKT_H ++#define NCSI_PKT_H ++ ++/* from linux/net/ncsi/ncsi-pkt.h */ ++#define __be32 uint32_t ++#define __be16 uint16_t ++ ++struct ncsi_pkt_hdr { ++ unsigned char mc_id; /* Management controller ID */ ++ unsigned char revision; /* NCSI version - 0x01 */ ++ unsigned char reserved; /* Reserved */ ++ unsigned char id; /* Packet sequence number */ ++ unsigned char type; /* Packet type */ ++ unsigned char channel; /* Network controller ID */ ++ __be16 length; /* Payload length */ ++ __be32 reserved1[2]; /* Reserved */ ++}; ++ ++struct ncsi_cmd_pkt_hdr { ++ struct ncsi_pkt_hdr common; /* Common NCSI packet header */ ++}; ++ ++struct ncsi_rsp_pkt_hdr { ++ struct ncsi_pkt_hdr common; /* Common NCSI packet header */ ++ __be16 code; /* Response code */ ++ __be16 reason; /* Response reason */ ++}; ++ ++struct ncsi_aen_pkt_hdr { ++ struct ncsi_pkt_hdr common; /* Common NCSI packet header */ ++ unsigned char reserved2[3]; /* Reserved */ ++ unsigned char type; /* AEN packet type */ ++}; ++ ++/* NCSI common command packet */ ++struct ncsi_cmd_pkt { ++ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[26]; ++}; ++ ++struct ncsi_rsp_pkt { ++ struct ncsi_rsp_pkt_hdr rsp; /* Response header */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[22]; ++}; ++ ++/* Select Package */ ++struct ncsi_cmd_sp_pkt { ++ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ ++ unsigned char reserved[3]; /* Reserved */ ++ unsigned char hw_arbitration; /* HW arbitration */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[22]; ++}; ++ ++/* Disable Channel */ ++struct ncsi_cmd_dc_pkt { ++ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ ++ unsigned char reserved[3]; /* Reserved */ ++ unsigned char ald; /* Allow link down */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[22]; ++}; ++ ++/* Reset Channel */ ++struct ncsi_cmd_rc_pkt { ++ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ ++ __be32 reserved; /* Reserved */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[22]; ++}; ++ ++/* AEN Enable */ ++struct ncsi_cmd_ae_pkt { ++ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ ++ unsigned char reserved[3]; /* Reserved */ ++ unsigned char mc_id; /* MC ID */ ++ __be32 mode; /* AEN working mode */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[18]; ++}; ++ ++/* Set Link */ ++struct ncsi_cmd_sl_pkt { ++ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ ++ __be32 mode; /* Link working mode */ ++ __be32 oem_mode; /* OEM link mode */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[18]; ++}; ++ ++/* Set VLAN Filter */ ++struct ncsi_cmd_svf_pkt { ++ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ ++ __be16 reserved; /* Reserved */ ++ __be16 vlan; /* VLAN ID */ ++ __be16 reserved1; /* Reserved */ ++ unsigned char index; /* VLAN table index */ ++ unsigned char enable; /* Enable or disable */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[14]; ++}; ++ ++/* Enable VLAN */ ++struct ncsi_cmd_ev_pkt { ++ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ ++ unsigned char reserved[3]; /* Reserved */ ++ unsigned char mode; /* VLAN filter mode */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[22]; ++}; ++ ++/* Set MAC Address */ ++struct ncsi_cmd_sma_pkt { ++ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ ++ unsigned char mac[6]; /* MAC address */ ++ unsigned char index; /* MAC table index */ ++ unsigned char at_e; /* Addr type and operation */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[18]; ++}; ++ ++/* Enable Broadcast Filter */ ++struct ncsi_cmd_ebf_pkt { ++ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ ++ __be32 mode; /* Filter mode */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[22]; ++}; ++ ++/* Enable Global Multicast Filter */ ++struct ncsi_cmd_egmf_pkt { ++ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ ++ __be32 mode; /* Global MC mode */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[22]; ++}; ++ ++/* Set NCSI Flow Control */ ++struct ncsi_cmd_snfc_pkt { ++ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ ++ unsigned char reserved[3]; /* Reserved */ ++ unsigned char mode; /* Flow control mode */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[22]; ++}; ++ ++/* Get Link Status */ ++struct ncsi_rsp_gls_pkt { ++ struct ncsi_rsp_pkt_hdr rsp; /* Response header */ ++ __be32 status; /* Link status */ ++ __be32 other; /* Other indications */ ++ __be32 oem_status; /* OEM link status */ ++ __be32 checksum; ++ unsigned char pad[10]; ++}; ++ ++/* Get Version ID */ ++struct ncsi_rsp_gvi_pkt { ++ struct ncsi_rsp_pkt_hdr rsp; /* Response header */ ++ __be32 ncsi_version; /* NCSI version */ ++ unsigned char reserved[3]; /* Reserved */ ++ unsigned char alpha2; /* NCSI version */ ++ unsigned char fw_name[12]; /* f/w name string */ ++ __be32 fw_version; /* f/w version */ ++ __be16 pci_ids[4]; /* PCI IDs */ ++ __be32 mf_id; /* Manufacture ID */ ++ __be32 checksum; ++}; ++ ++/* Get Capabilities */ ++struct ncsi_rsp_gc_pkt { ++ struct ncsi_rsp_pkt_hdr rsp; /* Response header */ ++ __be32 cap; /* Capabilities */ ++ __be32 bc_cap; /* Broadcast cap */ ++ __be32 mc_cap; /* Multicast cap */ ++ __be32 buf_cap; /* Buffering cap */ ++ __be32 aen_cap; /* AEN cap */ ++ unsigned char vlan_cnt; /* VLAN filter count */ ++ unsigned char mixed_cnt; /* Mix filter count */ ++ unsigned char mc_cnt; /* MC filter count */ ++ unsigned char uc_cnt; /* UC filter count */ ++ unsigned char reserved[2]; /* Reserved */ ++ unsigned char vlan_mode; /* VLAN mode */ ++ unsigned char channel_cnt; /* Channel count */ ++ __be32 checksum; /* Checksum */ ++}; ++ ++/* Get Parameters */ ++struct ncsi_rsp_gp_pkt { ++ struct ncsi_rsp_pkt_hdr rsp; /* Response header */ ++ unsigned char mac_cnt; /* Number of MAC addr */ ++ unsigned char reserved[2]; /* Reserved */ ++ unsigned char mac_enable; /* MAC addr enable flags */ ++ unsigned char vlan_cnt; /* VLAN tag count */ ++ unsigned char reserved1; /* Reserved */ ++ __be16 vlan_enable; /* VLAN tag enable flags */ ++ __be32 link_mode; /* Link setting */ ++ __be32 bc_mode; /* BC filter mode */ ++ __be32 valid_modes; /* Valid mode parameters */ ++ unsigned char vlan_mode; /* VLAN mode */ ++ unsigned char fc_mode; /* Flow control mode */ ++ unsigned char reserved2[2]; /* Reserved */ ++ __be32 aen_mode; /* AEN mode */ ++ unsigned char mac[6]; /* Supported MAC addr */ ++ __be16 vlan; /* Supported VLAN tags */ ++ __be32 checksum; /* Checksum */ ++}; ++ ++/* Get Controller Packet Statistics */ ++struct ncsi_rsp_gcps_pkt { ++ struct ncsi_rsp_pkt_hdr rsp; /* Response header */ ++ __be32 cnt_hi; /* Counter cleared */ ++ __be32 cnt_lo; /* Counter cleared */ ++ __be32 rx_bytes; /* Rx bytes */ ++ __be32 tx_bytes; /* Tx bytes */ ++ __be32 rx_uc_pkts; /* Rx UC packets */ ++ __be32 rx_mc_pkts; /* Rx MC packets */ ++ __be32 rx_bc_pkts; /* Rx BC packets */ ++ __be32 tx_uc_pkts; /* Tx UC packets */ ++ __be32 tx_mc_pkts; /* Tx MC packets */ ++ __be32 tx_bc_pkts; /* Tx BC packets */ ++ __be32 fcs_err; /* FCS errors */ ++ __be32 align_err; /* Alignment errors */ ++ __be32 false_carrier; /* False carrier detection */ ++ __be32 runt_pkts; /* Rx runt packets */ ++ __be32 jabber_pkts; /* Rx jabber packets */ ++ __be32 rx_pause_xon; /* Rx pause XON frames */ ++ __be32 rx_pause_xoff; /* Rx XOFF frames */ ++ __be32 tx_pause_xon; /* Tx XON frames */ ++ __be32 tx_pause_xoff; /* Tx XOFF frames */ ++ __be32 tx_s_collision; /* Single collision frames */ ++ __be32 tx_m_collision; /* Multiple collision frames */ ++ __be32 l_collision; /* Late collision frames */ ++ __be32 e_collision; /* Excessive collision frames */ ++ __be32 rx_ctl_frames; /* Rx control frames */ ++ __be32 rx_64_frames; /* Rx 64-bytes frames */ ++ __be32 rx_127_frames; /* Rx 65-127 bytes frames */ ++ __be32 rx_255_frames; /* Rx 128-255 bytes frames */ ++ __be32 rx_511_frames; /* Rx 256-511 bytes frames */ ++ __be32 rx_1023_frames; /* Rx 512-1023 bytes frames */ ++ __be32 rx_1522_frames; /* Rx 1024-1522 bytes frames */ ++ __be32 rx_9022_frames; /* Rx 1523-9022 bytes frames */ ++ __be32 tx_64_frames; /* Tx 64-bytes frames */ ++ __be32 tx_127_frames; /* Tx 65-127 bytes frames */ ++ __be32 tx_255_frames; /* Tx 128-255 bytes frames */ ++ __be32 tx_511_frames; /* Tx 256-511 bytes frames */ ++ __be32 tx_1023_frames; /* Tx 512-1023 bytes frames */ ++ __be32 tx_1522_frames; /* Tx 1024-1522 bytes frames */ ++ __be32 tx_9022_frames; /* Tx 1523-9022 bytes frames */ ++ __be32 rx_valid_bytes; /* Rx valid bytes */ ++ __be32 rx_runt_pkts; /* Rx error runt packets */ ++ __be32 rx_jabber_pkts; /* Rx error jabber packets */ ++ __be32 checksum; /* Checksum */ ++}; ++ ++/* Get NCSI Statistics */ ++struct ncsi_rsp_gns_pkt { ++ struct ncsi_rsp_pkt_hdr rsp; /* Response header */ ++ __be32 rx_cmds; /* Rx NCSI commands */ ++ __be32 dropped_cmds; /* Dropped commands */ ++ __be32 cmd_type_errs; /* Command type errors */ ++ __be32 cmd_csum_errs; /* Command checksum errors */ ++ __be32 rx_pkts; /* Rx NCSI packets */ ++ __be32 tx_pkts; /* Tx NCSI packets */ ++ __be32 tx_aen_pkts; /* Tx AEN packets */ ++ __be32 checksum; /* Checksum */ ++}; ++ ++/* Get NCSI Pass-through Statistics */ ++struct ncsi_rsp_gnpts_pkt { ++ struct ncsi_rsp_pkt_hdr rsp; /* Response header */ ++ __be32 tx_pkts; /* Tx packets */ ++ __be32 tx_dropped; /* Tx dropped packets */ ++ __be32 tx_channel_err; /* Tx channel errors */ ++ __be32 tx_us_err; /* Tx undersize errors */ ++ __be32 rx_pkts; /* Rx packets */ ++ __be32 rx_dropped; /* Rx dropped packets */ ++ __be32 rx_channel_err; /* Rx channel errors */ ++ __be32 rx_us_err; /* Rx undersize errors */ ++ __be32 rx_os_err; /* Rx oversize errors */ ++ __be32 checksum; /* Checksum */ ++}; ++ ++/* Get package status */ ++struct ncsi_rsp_gps_pkt { ++ struct ncsi_rsp_pkt_hdr rsp; /* Response header */ ++ __be32 status; /* Hardware arbitration status */ ++ __be32 checksum; ++}; ++ ++/* Get package UUID */ ++struct ncsi_rsp_gpuuid_pkt { ++ struct ncsi_rsp_pkt_hdr rsp; /* Response header */ ++ unsigned char uuid[16]; /* UUID */ ++ __be32 checksum; ++}; ++ ++/* AEN: Link State Change */ ++struct ncsi_aen_lsc_pkt { ++ struct ncsi_aen_pkt_hdr aen; /* AEN header */ ++ __be32 status; /* Link status */ ++ __be32 oem_status; /* OEM link status */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[14]; ++}; ++ ++/* AEN: Configuration Required */ ++struct ncsi_aen_cr_pkt { ++ struct ncsi_aen_pkt_hdr aen; /* AEN header */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[22]; ++}; ++ ++/* AEN: Host Network Controller Driver Status Change */ ++struct ncsi_aen_hncdsc_pkt { ++ struct ncsi_aen_pkt_hdr aen; /* AEN header */ ++ __be32 status; /* Status */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[18]; ++}; ++ ++/* NCSI packet revision */ ++#define NCSI_PKT_REVISION 0x01 ++ ++/* NCSI packet commands */ ++#define NCSI_PKT_CMD_CIS 0x00 /* Clear Initial State */ ++#define NCSI_PKT_CMD_SP 0x01 /* Select Package */ ++#define NCSI_PKT_CMD_DP 0x02 /* Deselect Package */ ++#define NCSI_PKT_CMD_EC 0x03 /* Enable Channel */ ++#define NCSI_PKT_CMD_DC 0x04 /* Disable Channel */ ++#define NCSI_PKT_CMD_RC 0x05 /* Reset Channel */ ++#define NCSI_PKT_CMD_ECNT 0x06 /* Enable Channel Network Tx */ ++#define NCSI_PKT_CMD_DCNT 0x07 /* Disable Channel Network Tx */ ++#define NCSI_PKT_CMD_AE 0x08 /* AEN Enable */ ++#define NCSI_PKT_CMD_SL 0x09 /* Set Link */ ++#define NCSI_PKT_CMD_GLS 0x0a /* Get Link */ ++#define NCSI_PKT_CMD_SVF 0x0b /* Set VLAN Filter */ ++#define NCSI_PKT_CMD_EV 0x0c /* Enable VLAN */ ++#define NCSI_PKT_CMD_DV 0x0d /* Disable VLAN */ ++#define NCSI_PKT_CMD_SMA 0x0e /* Set MAC address */ ++#define NCSI_PKT_CMD_EBF 0x10 /* Enable Broadcast Filter */ ++#define NCSI_PKT_CMD_DBF 0x11 /* Disable Broadcast Filter */ ++#define NCSI_PKT_CMD_EGMF 0x12 /* Enable Global Multicast Filter */ ++#define NCSI_PKT_CMD_DGMF 0x13 /* Disable Global Multicast Filter */ ++#define NCSI_PKT_CMD_SNFC 0x14 /* Set NCSI Flow Control */ ++#define NCSI_PKT_CMD_GVI 0x15 /* Get Version ID */ ++#define NCSI_PKT_CMD_GC 0x16 /* Get Capabilities */ ++#define NCSI_PKT_CMD_GP 0x17 /* Get Parameters */ ++#define NCSI_PKT_CMD_GCPS 0x18 /* Get Controller Packet Statistics */ ++#define NCSI_PKT_CMD_GNS 0x19 /* Get NCSI Statistics */ ++#define NCSI_PKT_CMD_GNPTS 0x1a /* Get NCSI Pass-throu Statistics */ ++#define NCSI_PKT_CMD_GPS 0x1b /* Get package status */ ++#define NCSI_PKT_CMD_OEM 0x50 /* OEM */ ++#define NCSI_PKT_CMD_PLDM 0x51 /* PLDM request over NCSI over RBT */ ++#define NCSI_PKT_CMD_GPUUID 0x52 /* Get package UUID */ ++ ++/* NCSI packet responses */ ++#define NCSI_PKT_RSP_CIS (NCSI_PKT_CMD_CIS + 0x80) ++#define NCSI_PKT_RSP_SP (NCSI_PKT_CMD_SP + 0x80) ++#define NCSI_PKT_RSP_DP (NCSI_PKT_CMD_DP + 0x80) ++#define NCSI_PKT_RSP_EC (NCSI_PKT_CMD_EC + 0x80) ++#define NCSI_PKT_RSP_DC (NCSI_PKT_CMD_DC + 0x80) ++#define NCSI_PKT_RSP_RC (NCSI_PKT_CMD_RC + 0x80) ++#define NCSI_PKT_RSP_ECNT (NCSI_PKT_CMD_ECNT + 0x80) ++#define NCSI_PKT_RSP_DCNT (NCSI_PKT_CMD_DCNT + 0x80) ++#define NCSI_PKT_RSP_AE (NCSI_PKT_CMD_AE + 0x80) ++#define NCSI_PKT_RSP_SL (NCSI_PKT_CMD_SL + 0x80) ++#define NCSI_PKT_RSP_GLS (NCSI_PKT_CMD_GLS + 0x80) ++#define NCSI_PKT_RSP_SVF (NCSI_PKT_CMD_SVF + 0x80) ++#define NCSI_PKT_RSP_EV (NCSI_PKT_CMD_EV + 0x80) ++#define NCSI_PKT_RSP_DV (NCSI_PKT_CMD_DV + 0x80) ++#define NCSI_PKT_RSP_SMA (NCSI_PKT_CMD_SMA + 0x80) ++#define NCSI_PKT_RSP_EBF (NCSI_PKT_CMD_EBF + 0x80) ++#define NCSI_PKT_RSP_DBF (NCSI_PKT_CMD_DBF + 0x80) ++#define NCSI_PKT_RSP_EGMF (NCSI_PKT_CMD_EGMF + 0x80) ++#define NCSI_PKT_RSP_DGMF (NCSI_PKT_CMD_DGMF + 0x80) ++#define NCSI_PKT_RSP_SNFC (NCSI_PKT_CMD_SNFC + 0x80) ++#define NCSI_PKT_RSP_GVI (NCSI_PKT_CMD_GVI + 0x80) ++#define NCSI_PKT_RSP_GC (NCSI_PKT_CMD_GC + 0x80) ++#define NCSI_PKT_RSP_GP (NCSI_PKT_CMD_GP + 0x80) ++#define NCSI_PKT_RSP_GCPS (NCSI_PKT_CMD_GCPS + 0x80) ++#define NCSI_PKT_RSP_GNS (NCSI_PKT_CMD_GNS + 0x80) ++#define NCSI_PKT_RSP_GNPTS (NCSI_PKT_CMD_GNPTS + 0x80) ++#define NCSI_PKT_RSP_GPS (NCSI_PKT_CMD_GPS + 0x80) ++#define NCSI_PKT_RSP_OEM (NCSI_PKT_CMD_OEM + 0x80) ++#define NCSI_PKT_RSP_PLDM (NCSI_PKT_CMD_PLDM + 0x80) ++#define NCSI_PKT_RSP_GPUUID (NCSI_PKT_CMD_GPUUID + 0x80) ++ ++/* NCSI response code/reason */ ++#define NCSI_PKT_RSP_C_COMPLETED 0x0000 /* Command Completed */ ++#define NCSI_PKT_RSP_C_FAILED 0x0001 /* Command Failed */ ++#define NCSI_PKT_RSP_C_UNAVAILABLE 0x0002 /* Command Unavailable */ ++#define NCSI_PKT_RSP_C_UNSUPPORTED 0x0003 /* Command Unsupported */ ++#define NCSI_PKT_RSP_R_NO_ERROR 0x0000 /* No Error */ ++#define NCSI_PKT_RSP_R_INTERFACE 0x0001 /* Interface not ready */ ++#define NCSI_PKT_RSP_R_PARAM 0x0002 /* Invalid Parameter */ ++#define NCSI_PKT_RSP_R_CHANNEL 0x0003 /* Channel not Ready */ ++#define NCSI_PKT_RSP_R_PACKAGE 0x0004 /* Package not Ready */ ++#define NCSI_PKT_RSP_R_LENGTH 0x0005 /* Invalid payload length */ ++#define NCSI_PKT_RSP_R_UNKNOWN 0x7fff /* Command type unsupported */ ++ ++/* NCSI AEN packet type */ ++#define NCSI_PKT_AEN 0xFF /* AEN Packet */ ++#define NCSI_PKT_AEN_LSC 0x00 /* Link status change */ ++#define NCSI_PKT_AEN_CR 0x01 /* Configuration required */ ++#define NCSI_PKT_AEN_HNCDSC 0x02 /* HNC driver status change */ ++ ++#endif /* NCSI_PKT_H */ +diff --git a/slirp/src/ncsi.c b/slirp/src/ncsi.c +new file mode 100644 +index 0000000..ddd980d +--- /dev/null ++++ b/slirp/src/ncsi.c +@@ -0,0 +1,192 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * NC-SI (Network Controller Sideband Interface) "echo" model ++ * ++ * Copyright (C) 2016-2018 IBM Corp. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * ++ * 1. Redistributions of source code must retain the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer. ++ * ++ * 2. Redistributions in binary form must reproduce the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer in the documentation and/or other materials provided ++ * with the distribution. ++ * ++ * 3. Neither the name of the copyright holder nor the names of its ++ * contributors may be used to endorse or promote products derived ++ * from this software without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ++ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS ++ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ++ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, ++ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES ++ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR ++ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, ++ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED ++ * OF THE POSSIBILITY OF SUCH DAMAGE. ++ */ ++#include "slirp.h" ++ ++#include "ncsi-pkt.h" ++ ++static uint32_t ncsi_calculate_checksum(uint16_t *data, int len) ++{ ++ uint32_t checksum = 0; ++ int i; ++ ++ /* ++ * 32-bit unsigned sum of the NC-SI packet header and NC-SI packet ++ * payload interpreted as a series of 16-bit unsigned integer values. ++ */ ++ for (i = 0; i < len / 2; i++) { ++ checksum += htons(data[i]); ++ } ++ ++ checksum = (~checksum + 1); ++ return checksum; ++} ++ ++/* Get Capabilities */ ++static int ncsi_rsp_handler_gc(struct ncsi_rsp_pkt_hdr *rnh) ++{ ++ struct ncsi_rsp_gc_pkt *rsp = (struct ncsi_rsp_gc_pkt *)rnh; ++ ++ rsp->cap = htonl(~0); ++ rsp->bc_cap = htonl(~0); ++ rsp->mc_cap = htonl(~0); ++ rsp->buf_cap = htonl(~0); ++ rsp->aen_cap = htonl(~0); ++ rsp->vlan_mode = 0xff; ++ rsp->uc_cnt = 2; ++ return 0; ++} ++ ++/* Get Link status */ ++static int ncsi_rsp_handler_gls(struct ncsi_rsp_pkt_hdr *rnh) ++{ ++ struct ncsi_rsp_gls_pkt *rsp = (struct ncsi_rsp_gls_pkt *)rnh; ++ ++ rsp->status = htonl(0x1); ++ return 0; ++} ++ ++/* Get Parameters */ ++static int ncsi_rsp_handler_gp(struct ncsi_rsp_pkt_hdr *rnh) ++{ ++ struct ncsi_rsp_gp_pkt *rsp = (struct ncsi_rsp_gp_pkt *)rnh; ++ ++ /* no MAC address filters or VLAN filters on the channel */ ++ rsp->mac_cnt = 0; ++ rsp->mac_enable = 0; ++ rsp->vlan_cnt = 0; ++ rsp->vlan_enable = 0; ++ ++ return 0; ++} ++ ++static const struct ncsi_rsp_handler { ++ unsigned char type; ++ int payload; ++ int (*handler)(struct ncsi_rsp_pkt_hdr *rnh); ++} ncsi_rsp_handlers[] = { { NCSI_PKT_RSP_CIS, 4, NULL }, ++ { NCSI_PKT_RSP_SP, 4, NULL }, ++ { NCSI_PKT_RSP_DP, 4, NULL }, ++ { NCSI_PKT_RSP_EC, 4, NULL }, ++ { NCSI_PKT_RSP_DC, 4, NULL }, ++ { NCSI_PKT_RSP_RC, 4, NULL }, ++ { NCSI_PKT_RSP_ECNT, 4, NULL }, ++ { NCSI_PKT_RSP_DCNT, 4, NULL }, ++ { NCSI_PKT_RSP_AE, 4, NULL }, ++ { NCSI_PKT_RSP_SL, 4, NULL }, ++ { NCSI_PKT_RSP_GLS, 16, ncsi_rsp_handler_gls }, ++ { NCSI_PKT_RSP_SVF, 4, NULL }, ++ { NCSI_PKT_RSP_EV, 4, NULL }, ++ { NCSI_PKT_RSP_DV, 4, NULL }, ++ { NCSI_PKT_RSP_SMA, 4, NULL }, ++ { NCSI_PKT_RSP_EBF, 4, NULL }, ++ { NCSI_PKT_RSP_DBF, 4, NULL }, ++ { NCSI_PKT_RSP_EGMF, 4, NULL }, ++ { NCSI_PKT_RSP_DGMF, 4, NULL }, ++ { NCSI_PKT_RSP_SNFC, 4, NULL }, ++ { NCSI_PKT_RSP_GVI, 40, NULL }, ++ { NCSI_PKT_RSP_GC, 32, ncsi_rsp_handler_gc }, ++ { NCSI_PKT_RSP_GP, 40, ncsi_rsp_handler_gp }, ++ { NCSI_PKT_RSP_GCPS, 172, NULL }, ++ { NCSI_PKT_RSP_GNS, 172, NULL }, ++ { NCSI_PKT_RSP_GNPTS, 172, NULL }, ++ { NCSI_PKT_RSP_GPS, 8, NULL }, ++ { NCSI_PKT_RSP_OEM, 0, NULL }, ++ { NCSI_PKT_RSP_PLDM, 0, NULL }, ++ { NCSI_PKT_RSP_GPUUID, 20, NULL } }; ++ ++/* ++ * packet format : ncsi header + payload + checksum ++ */ ++#define NCSI_MAX_PAYLOAD 172 ++#define NCSI_MAX_LEN (sizeof(struct ncsi_pkt_hdr) + NCSI_MAX_PAYLOAD + 4) ++ ++void ncsi_input(Slirp *slirp, const uint8_t *pkt, int pkt_len) ++{ ++ struct ncsi_pkt_hdr *nh = (struct ncsi_pkt_hdr *)(pkt + ETH_HLEN); ++ uint8_t ncsi_reply[ETH_HLEN + NCSI_MAX_LEN]; ++ struct ethhdr *reh = (struct ethhdr *)ncsi_reply; ++ struct ncsi_rsp_pkt_hdr *rnh = ++ (struct ncsi_rsp_pkt_hdr *)(ncsi_reply + ETH_HLEN); ++ const struct ncsi_rsp_handler *handler = NULL; ++ int i; ++ int ncsi_rsp_len = sizeof(*nh); ++ uint32_t checksum; ++ uint32_t *pchecksum; ++ ++ memset(ncsi_reply, 0, sizeof(ncsi_reply)); ++ ++ memset(reh->h_dest, 0xff, ETH_ALEN); ++ memset(reh->h_source, 0xff, ETH_ALEN); ++ reh->h_proto = htons(ETH_P_NCSI); ++ ++ for (i = 0; i < G_N_ELEMENTS(ncsi_rsp_handlers); i++) { ++ if (ncsi_rsp_handlers[i].type == nh->type + 0x80) { ++ handler = &ncsi_rsp_handlers[i]; ++ break; ++ } ++ } ++ ++ rnh->common.mc_id = nh->mc_id; ++ rnh->common.revision = NCSI_PKT_REVISION; ++ rnh->common.id = nh->id; ++ rnh->common.type = nh->type + 0x80; ++ rnh->common.channel = nh->channel; ++ ++ if (handler) { ++ rnh->common.length = htons(handler->payload); ++ rnh->code = htons(NCSI_PKT_RSP_C_COMPLETED); ++ rnh->reason = htons(NCSI_PKT_RSP_R_NO_ERROR); ++ ++ if (handler->handler) { ++ /* TODO: handle errors */ ++ handler->handler(rnh); ++ } ++ ncsi_rsp_len += handler->payload; ++ } else { ++ rnh->common.length = 0; ++ rnh->code = htons(NCSI_PKT_RSP_C_UNAVAILABLE); ++ rnh->reason = htons(NCSI_PKT_RSP_R_UNKNOWN); ++ } ++ ++ /* Add the optional checksum at the end of the frame. */ ++ checksum = ncsi_calculate_checksum((uint16_t *)rnh, ncsi_rsp_len); ++ pchecksum = (uint32_t *)((void *)rnh + ncsi_rsp_len); ++ *pchecksum = htonl(checksum); ++ ncsi_rsp_len += 4; ++ ++ slirp_send_packet_all(slirp, ncsi_reply, ETH_HLEN + ncsi_rsp_len); ++} +diff --git a/slirp/src/ndp_table.c b/slirp/src/ndp_table.c +new file mode 100644 +index 0000000..110d6ea +--- /dev/null ++++ b/slirp/src/ndp_table.c +@@ -0,0 +1,87 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 2013 ++ * Guillaume Subiron, Yann Bordenave, Serigne Modou Wagne. ++ */ ++ ++#include "slirp.h" ++ ++void ndp_table_add(Slirp *slirp, struct in6_addr ip_addr, ++ uint8_t ethaddr[ETH_ALEN]) ++{ ++ char addrstr[INET6_ADDRSTRLEN]; ++ NdpTable *ndp_table = &slirp->ndp_table; ++ int i; ++ ++ inet_ntop(AF_INET6, &(ip_addr), addrstr, INET6_ADDRSTRLEN); ++ ++ DEBUG_CALL("ndp_table_add"); ++ DEBUG_ARG("ip = %s", addrstr); ++ DEBUG_ARG("hw addr = %02x:%02x:%02x:%02x:%02x:%02x", ethaddr[0], ethaddr[1], ++ ethaddr[2], ethaddr[3], ethaddr[4], ethaddr[5]); ++ ++ if (IN6_IS_ADDR_MULTICAST(&ip_addr) || in6_zero(&ip_addr)) { ++ /* Do not register multicast or unspecified addresses */ ++ DEBUG_CALL(" abort: do not register multicast or unspecified address"); ++ return; ++ } ++ ++ /* Search for an entry */ ++ for (i = 0; i < NDP_TABLE_SIZE; i++) { ++ if (in6_equal(&ndp_table->table[i].ip_addr, &ip_addr)) { ++ DEBUG_CALL(" already in table: update the entry"); ++ /* Update the entry */ ++ memcpy(ndp_table->table[i].eth_addr, ethaddr, ETH_ALEN); ++ return; ++ } ++ } ++ ++ /* No entry found, create a new one */ ++ DEBUG_CALL(" create new entry"); ++ ndp_table->table[ndp_table->next_victim].ip_addr = ip_addr; ++ memcpy(ndp_table->table[ndp_table->next_victim].eth_addr, ethaddr, ++ ETH_ALEN); ++ ndp_table->next_victim = (ndp_table->next_victim + 1) % NDP_TABLE_SIZE; ++} ++ ++bool ndp_table_search(Slirp *slirp, struct in6_addr ip_addr, ++ uint8_t out_ethaddr[ETH_ALEN]) ++{ ++ char addrstr[INET6_ADDRSTRLEN]; ++ NdpTable *ndp_table = &slirp->ndp_table; ++ int i; ++ ++ inet_ntop(AF_INET6, &(ip_addr), addrstr, INET6_ADDRSTRLEN); ++ ++ DEBUG_CALL("ndp_table_search"); ++ DEBUG_ARG("ip = %s", addrstr); ++ ++ assert(!in6_zero(&ip_addr)); ++ ++ /* Multicast address: fec0::abcd:efgh/8 -> 33:33:ab:cd:ef:gh */ ++ if (IN6_IS_ADDR_MULTICAST(&ip_addr)) { ++ out_ethaddr[0] = 0x33; ++ out_ethaddr[1] = 0x33; ++ out_ethaddr[2] = ip_addr.s6_addr[12]; ++ out_ethaddr[3] = ip_addr.s6_addr[13]; ++ out_ethaddr[4] = ip_addr.s6_addr[14]; ++ out_ethaddr[5] = ip_addr.s6_addr[15]; ++ DEBUG_ARG("multicast addr = %02x:%02x:%02x:%02x:%02x:%02x", ++ out_ethaddr[0], out_ethaddr[1], out_ethaddr[2], ++ out_ethaddr[3], out_ethaddr[4], out_ethaddr[5]); ++ return 1; ++ } ++ ++ for (i = 0; i < NDP_TABLE_SIZE; i++) { ++ if (in6_equal(&ndp_table->table[i].ip_addr, &ip_addr)) { ++ memcpy(out_ethaddr, ndp_table->table[i].eth_addr, ETH_ALEN); ++ DEBUG_ARG("found hw addr = %02x:%02x:%02x:%02x:%02x:%02x", ++ out_ethaddr[0], out_ethaddr[1], out_ethaddr[2], ++ out_ethaddr[3], out_ethaddr[4], out_ethaddr[5]); ++ return 1; ++ } ++ } ++ ++ DEBUG_CALL(" ip not found in table"); ++ return 0; ++} +diff --git a/slirp/src/sbuf.c b/slirp/src/sbuf.c +new file mode 100644 +index 0000000..2fb9176 +--- /dev/null ++++ b/slirp/src/sbuf.c +@@ -0,0 +1,168 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#include "slirp.h" ++ ++static void sbappendsb(struct sbuf *sb, struct mbuf *m); ++ ++void sbfree(struct sbuf *sb) ++{ ++ g_free(sb->sb_data); ++} ++ ++bool sbdrop(struct sbuf *sb, size_t num) ++{ ++ int limit = sb->sb_datalen / 2; ++ ++ g_warn_if_fail(num <= sb->sb_cc); ++ if (num > sb->sb_cc) ++ num = sb->sb_cc; ++ ++ sb->sb_cc -= num; ++ sb->sb_rptr += num; ++ if (sb->sb_rptr >= sb->sb_data + sb->sb_datalen) ++ sb->sb_rptr -= sb->sb_datalen; ++ ++ if (sb->sb_cc < limit && sb->sb_cc + num >= limit) { ++ return true; ++ } ++ ++ return false; ++} ++ ++void sbreserve(struct sbuf *sb, size_t size) ++{ ++ sb->sb_wptr = sb->sb_rptr = sb->sb_data = g_realloc(sb->sb_data, size); ++ sb->sb_cc = 0; ++ sb->sb_datalen = size; ++} ++ ++/* ++ * Try and write() to the socket, whatever doesn't get written ++ * append to the buffer... for a host with a fast net connection, ++ * this prevents an unnecessary copy of the data ++ * (the socket is non-blocking, so we won't hang) ++ */ ++void sbappend(struct socket *so, struct mbuf *m) ++{ ++ int ret = 0; ++ ++ DEBUG_CALL("sbappend"); ++ DEBUG_ARG("so = %p", so); ++ DEBUG_ARG("m = %p", m); ++ DEBUG_ARG("m->m_len = %d", m->m_len); ++ ++ /* Shouldn't happen, but... e.g. foreign host closes connection */ ++ if (m->m_len <= 0) { ++ m_free(m); ++ return; ++ } ++ ++ /* ++ * If there is urgent data, call sosendoob ++ * if not all was sent, sowrite will take care of the rest ++ * (The rest of this function is just an optimisation) ++ */ ++ if (so->so_urgc) { ++ sbappendsb(&so->so_rcv, m); ++ m_free(m); ++ (void)sosendoob(so); ++ return; ++ } ++ ++ /* ++ * We only write if there's nothing in the buffer, ++ * ottherwise it'll arrive out of order, and hence corrupt ++ */ ++ if (!so->so_rcv.sb_cc) ++ ret = slirp_send(so, m->m_data, m->m_len, 0); ++ ++ if (ret <= 0) { ++ /* ++ * Nothing was written ++ * It's possible that the socket has closed, but ++ * we don't need to check because if it has closed, ++ * it will be detected in the normal way by soread() ++ */ ++ sbappendsb(&so->so_rcv, m); ++ } else if (ret != m->m_len) { ++ /* ++ * Something was written, but not everything.. ++ * sbappendsb the rest ++ */ ++ m->m_len -= ret; ++ m->m_data += ret; ++ sbappendsb(&so->so_rcv, m); ++ } /* else */ ++ /* Whatever happened, we free the mbuf */ ++ m_free(m); ++} ++ ++/* ++ * Copy the data from m into sb ++ * The caller is responsible to make sure there's enough room ++ */ ++static void sbappendsb(struct sbuf *sb, struct mbuf *m) ++{ ++ int len, n, nn; ++ ++ len = m->m_len; ++ ++ if (sb->sb_wptr < sb->sb_rptr) { ++ n = sb->sb_rptr - sb->sb_wptr; ++ if (n > len) ++ n = len; ++ memcpy(sb->sb_wptr, m->m_data, n); ++ } else { ++ /* Do the right edge first */ ++ n = sb->sb_data + sb->sb_datalen - sb->sb_wptr; ++ if (n > len) ++ n = len; ++ memcpy(sb->sb_wptr, m->m_data, n); ++ len -= n; ++ if (len) { ++ /* Now the left edge */ ++ nn = sb->sb_rptr - sb->sb_data; ++ if (nn > len) ++ nn = len; ++ memcpy(sb->sb_data, m->m_data + n, nn); ++ n += nn; ++ } ++ } ++ ++ sb->sb_cc += n; ++ sb->sb_wptr += n; ++ if (sb->sb_wptr >= sb->sb_data + sb->sb_datalen) ++ sb->sb_wptr -= sb->sb_datalen; ++} ++ ++/* ++ * Copy data from sbuf to a normal, straight buffer ++ * Don't update the sbuf rptr, this will be ++ * done in sbdrop when the data is acked ++ */ ++void sbcopy(struct sbuf *sb, size_t off, size_t len, char *to) ++{ ++ char *from; ++ ++ g_assert(len + off <= sb->sb_cc); ++ ++ from = sb->sb_rptr + off; ++ if (from >= sb->sb_data + sb->sb_datalen) ++ from -= sb->sb_datalen; ++ ++ if (from < sb->sb_wptr) { ++ memcpy(to, from, len); ++ } else { ++ /* re-use off */ ++ off = (sb->sb_data + sb->sb_datalen) - from; ++ if (off > len) ++ off = len; ++ memcpy(to, from, off); ++ len -= off; ++ if (len) ++ memcpy(to + off, sb->sb_data, len); ++ } ++} +diff --git a/slirp/src/sbuf.h b/slirp/src/sbuf.h +new file mode 100644 +index 0000000..01886fb +--- /dev/null ++++ b/slirp/src/sbuf.h +@@ -0,0 +1,27 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#ifndef SBUF_H ++#define SBUF_H ++ ++#define sbspace(sb) ((sb)->sb_datalen - (sb)->sb_cc) ++ ++struct sbuf { ++ uint32_t sb_cc; /* actual chars in buffer */ ++ uint32_t sb_datalen; /* Length of data */ ++ char *sb_wptr; /* write pointer. points to where the next ++ * bytes should be written in the sbuf */ ++ char *sb_rptr; /* read pointer. points to where the next ++ * byte should be read from the sbuf */ ++ char *sb_data; /* Actual data */ ++}; ++ ++void sbfree(struct sbuf *sb); ++bool sbdrop(struct sbuf *sb, size_t len); ++void sbreserve(struct sbuf *sb, size_t size); ++void sbappend(struct socket *sb, struct mbuf *mb); ++void sbcopy(struct sbuf *sb, size_t off, size_t len, char *p); ++ ++#endif +diff --git a/slirp/src/slirp.c b/slirp/src/slirp.c +new file mode 100644 +index 0000000..14458e8 +--- /dev/null ++++ b/slirp/src/slirp.c +@@ -0,0 +1,1185 @@ ++/* SPDX-License-Identifier: MIT */ ++/* ++ * libslirp glue ++ * ++ * Copyright (c) 2004-2008 Fabrice Bellard ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to deal ++ * in the Software without restriction, including without limitation the rights ++ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell ++ * copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN ++ * THE SOFTWARE. ++ */ ++#include "slirp.h" ++ ++ ++#ifndef _WIN32 ++#include ++#endif ++ ++/* https://gitlab.freedesktop.org/slirp/libslirp/issues/18 */ ++#if defined(__NetBSD__) && defined(if_mtu) ++#undef if_mtu ++#endif ++ ++int slirp_debug; ++ ++/* Define to 1 if you want KEEPALIVE timers */ ++bool slirp_do_keepalive; ++ ++/* host loopback address */ ++struct in_addr loopback_addr; ++/* host loopback network mask */ ++unsigned long loopback_mask; ++ ++/* emulated hosts use the MAC addr 52:55:IP:IP:IP:IP */ ++static const uint8_t special_ethaddr[ETH_ALEN] = { 0x52, 0x55, 0x00, ++ 0x00, 0x00, 0x00 }; ++ ++unsigned curtime; ++ ++static struct in_addr dns_addr; ++#ifndef _WIN32 ++static struct in6_addr dns6_addr; ++#endif ++static unsigned dns_addr_time; ++#ifndef _WIN32 ++static unsigned dns6_addr_time; ++#endif ++ ++#define TIMEOUT_FAST 2 /* milliseconds */ ++#define TIMEOUT_SLOW 499 /* milliseconds */ ++/* for the aging of certain requests like DNS */ ++#define TIMEOUT_DEFAULT 1000 /* milliseconds */ ++ ++#ifdef _WIN32 ++ ++int get_dns_addr(struct in_addr *pdns_addr) ++{ ++ FIXED_INFO *FixedInfo = NULL; ++ ULONG BufLen; ++ DWORD ret; ++ IP_ADDR_STRING *pIPAddr; ++ struct in_addr tmp_addr; ++ ++ if (dns_addr.s_addr != 0 && (curtime - dns_addr_time) < TIMEOUT_DEFAULT) { ++ *pdns_addr = dns_addr; ++ return 0; ++ } ++ ++ FixedInfo = (FIXED_INFO *)GlobalAlloc(GPTR, sizeof(FIXED_INFO)); ++ BufLen = sizeof(FIXED_INFO); ++ ++ if (ERROR_BUFFER_OVERFLOW == GetNetworkParams(FixedInfo, &BufLen)) { ++ if (FixedInfo) { ++ GlobalFree(FixedInfo); ++ FixedInfo = NULL; ++ } ++ FixedInfo = GlobalAlloc(GPTR, BufLen); ++ } ++ ++ if ((ret = GetNetworkParams(FixedInfo, &BufLen)) != ERROR_SUCCESS) { ++ printf("GetNetworkParams failed. ret = %08x\n", (unsigned)ret); ++ if (FixedInfo) { ++ GlobalFree(FixedInfo); ++ FixedInfo = NULL; ++ } ++ return -1; ++ } ++ ++ pIPAddr = &(FixedInfo->DnsServerList); ++ inet_aton(pIPAddr->IpAddress.String, &tmp_addr); ++ *pdns_addr = tmp_addr; ++ dns_addr = tmp_addr; ++ dns_addr_time = curtime; ++ if (FixedInfo) { ++ GlobalFree(FixedInfo); ++ FixedInfo = NULL; ++ } ++ return 0; ++} ++ ++int get_dns6_addr(struct in6_addr *pdns6_addr, uint32_t *scope_id) ++{ ++ return -1; ++} ++ ++static void winsock_cleanup(void) ++{ ++ WSACleanup(); ++} ++ ++#else ++ ++static int get_dns_addr_cached(void *pdns_addr, void *cached_addr, ++ socklen_t addrlen, struct stat *cached_stat, ++ unsigned *cached_time) ++{ ++ struct stat old_stat; ++ if (curtime - *cached_time < TIMEOUT_DEFAULT) { ++ memcpy(pdns_addr, cached_addr, addrlen); ++ return 0; ++ } ++ old_stat = *cached_stat; ++ if (stat("/etc/resolv.conf", cached_stat) != 0) { ++ return -1; ++ } ++ if (cached_stat->st_dev == old_stat.st_dev && ++ cached_stat->st_ino == old_stat.st_ino && ++ cached_stat->st_size == old_stat.st_size && ++ cached_stat->st_mtime == old_stat.st_mtime) { ++ memcpy(pdns_addr, cached_addr, addrlen); ++ return 0; ++ } ++ return 1; ++} ++ ++static int get_dns_addr_resolv_conf(int af, void *pdns_addr, void *cached_addr, ++ socklen_t addrlen, uint32_t *scope_id, ++ unsigned *cached_time) ++{ ++ char buff[512]; ++ char buff2[257]; ++ FILE *f; ++ int found = 0; ++ void *tmp_addr = alloca(addrlen); ++ unsigned if_index; ++ ++ f = fopen("/etc/resolv.conf", "r"); ++ if (!f) ++ return -1; ++ ++ DEBUG_MISC("IP address of your DNS(s):"); ++ while (fgets(buff, 512, f) != NULL) { ++ if (sscanf(buff, "nameserver%*[ \t]%256s", buff2) == 1) { ++ char *c = strchr(buff2, '%'); ++ if (c) { ++ if_index = if_nametoindex(c + 1); ++ *c = '\0'; ++ } else { ++ if_index = 0; ++ } ++ ++ if (!inet_pton(af, buff2, tmp_addr)) { ++ continue; ++ } ++ /* If it's the first one, set it to dns_addr */ ++ if (!found) { ++ memcpy(pdns_addr, tmp_addr, addrlen); ++ memcpy(cached_addr, tmp_addr, addrlen); ++ if (scope_id) { ++ *scope_id = if_index; ++ } ++ *cached_time = curtime; ++ } ++ ++ if (++found > 3) { ++ DEBUG_MISC(" (more)"); ++ break; ++ } else if (slirp_debug & DBG_MISC) { ++ char s[INET6_ADDRSTRLEN]; ++ const char *res = inet_ntop(af, tmp_addr, s, sizeof(s)); ++ if (!res) { ++ res = " (string conversion error)"; ++ } ++ DEBUG_MISC(" %s", res); ++ } ++ } ++ } ++ fclose(f); ++ if (!found) ++ return -1; ++ return 0; ++} ++ ++int get_dns_addr(struct in_addr *pdns_addr) ++{ ++ static struct stat dns_addr_stat; ++ ++ if (dns_addr.s_addr != 0) { ++ int ret; ++ ret = get_dns_addr_cached(pdns_addr, &dns_addr, sizeof(dns_addr), ++ &dns_addr_stat, &dns_addr_time); ++ if (ret <= 0) { ++ return ret; ++ } ++ } ++ return get_dns_addr_resolv_conf(AF_INET, pdns_addr, &dns_addr, ++ sizeof(dns_addr), NULL, &dns_addr_time); ++} ++ ++int get_dns6_addr(struct in6_addr *pdns6_addr, uint32_t *scope_id) ++{ ++ static struct stat dns6_addr_stat; ++ ++ if (!in6_zero(&dns6_addr)) { ++ int ret; ++ ret = get_dns_addr_cached(pdns6_addr, &dns6_addr, sizeof(dns6_addr), ++ &dns6_addr_stat, &dns6_addr_time); ++ if (ret <= 0) { ++ return ret; ++ } ++ } ++ return get_dns_addr_resolv_conf(AF_INET6, pdns6_addr, &dns6_addr, ++ sizeof(dns6_addr), scope_id, ++ &dns6_addr_time); ++} ++ ++#endif ++ ++static void slirp_init_once(void) ++{ ++ static int initialized; ++ const char *debug; ++#ifdef _WIN32 ++ WSADATA Data; ++#endif ++ ++ if (initialized) { ++ return; ++ } ++ initialized = 1; ++ ++#ifdef _WIN32 ++ WSAStartup(MAKEWORD(2, 0), &Data); ++ atexit(winsock_cleanup); ++#endif ++ ++ loopback_addr.s_addr = htonl(INADDR_LOOPBACK); ++ loopback_mask = htonl(IN_CLASSA_NET); ++ ++ debug = g_getenv("SLIRP_DEBUG"); ++ if (debug) { ++ const GDebugKey keys[] = { ++ { "call", DBG_CALL }, ++ { "misc", DBG_MISC }, ++ { "error", DBG_ERROR }, ++ { "tftp", DBG_TFTP }, ++ }; ++ slirp_debug = g_parse_debug_string(debug, keys, G_N_ELEMENTS(keys)); ++ } ++} ++ ++Slirp *slirp_new(const SlirpConfig *cfg, const SlirpCb *callbacks, void *opaque) ++{ ++ Slirp *slirp; ++ ++ g_return_val_if_fail(cfg != NULL, NULL); ++ g_return_val_if_fail(cfg->version >= SLIRP_CONFIG_VERSION_MIN, NULL); ++ g_return_val_if_fail(cfg->version <= SLIRP_CONFIG_VERSION_MAX, NULL); ++ g_return_val_if_fail(cfg->if_mtu >= IF_MTU_MIN || cfg->if_mtu == 0, NULL); ++ g_return_val_if_fail(cfg->if_mtu <= IF_MTU_MAX, NULL); ++ g_return_val_if_fail(cfg->if_mru >= IF_MRU_MIN || cfg->if_mru == 0, NULL); ++ g_return_val_if_fail(cfg->if_mru <= IF_MRU_MAX, NULL); ++ g_return_val_if_fail(!cfg->bootfile || ++ (strlen(cfg->bootfile) < ++ G_SIZEOF_MEMBER(struct bootp_t, bp_file)), NULL); ++ ++ slirp = g_malloc0(sizeof(Slirp)); ++ ++ slirp_init_once(); ++ ++ slirp->opaque = opaque; ++ slirp->cb = callbacks; ++ slirp->grand = g_rand_new(); ++ slirp->restricted = cfg->restricted; ++ ++ slirp->in_enabled = cfg->in_enabled; ++ slirp->in6_enabled = cfg->in6_enabled; ++ ++ if_init(slirp); ++ ip_init(slirp); ++ ip6_init(slirp); ++ ++ m_init(slirp); ++ ++ slirp->vnetwork_addr = cfg->vnetwork; ++ slirp->vnetwork_mask = cfg->vnetmask; ++ slirp->vhost_addr = cfg->vhost; ++ slirp->vprefix_addr6 = cfg->vprefix_addr6; ++ slirp->vprefix_len = cfg->vprefix_len; ++ slirp->vhost_addr6 = cfg->vhost6; ++ if (cfg->vhostname) { ++ slirp_pstrcpy(slirp->client_hostname, sizeof(slirp->client_hostname), ++ cfg->vhostname); ++ } ++ slirp->tftp_prefix = g_strdup(cfg->tftp_path); ++ slirp->bootp_filename = g_strdup(cfg->bootfile); ++ slirp->vdomainname = g_strdup(cfg->vdomainname); ++ slirp->vdhcp_startaddr = cfg->vdhcp_start; ++ slirp->vnameserver_addr = cfg->vnameserver; ++ slirp->vnameserver_addr6 = cfg->vnameserver6; ++ slirp->tftp_server_name = g_strdup(cfg->tftp_server_name); ++ ++ if (cfg->vdnssearch) { ++ translate_dnssearch(slirp, cfg->vdnssearch); ++ } ++ slirp->if_mtu = cfg->if_mtu == 0 ? IF_MTU_DEFAULT : cfg->if_mtu; ++ slirp->if_mru = cfg->if_mru == 0 ? IF_MRU_DEFAULT : cfg->if_mru; ++ slirp->disable_host_loopback = cfg->disable_host_loopback; ++ slirp->enable_emu = cfg->enable_emu; ++ ++ if (cfg->version >= 2) { ++ slirp->outbound_addr = cfg->outbound_addr; ++ slirp->outbound_addr6 = cfg->outbound_addr6; ++ } else { ++ slirp->outbound_addr = NULL; ++ slirp->outbound_addr6 = NULL; ++ } ++ return slirp; ++} ++ ++Slirp *slirp_init(int restricted, bool in_enabled, struct in_addr vnetwork, ++ struct in_addr vnetmask, struct in_addr vhost, ++ bool in6_enabled, struct in6_addr vprefix_addr6, ++ uint8_t vprefix_len, struct in6_addr vhost6, ++ const char *vhostname, const char *tftp_server_name, ++ const char *tftp_path, const char *bootfile, ++ struct in_addr vdhcp_start, struct in_addr vnameserver, ++ struct in6_addr vnameserver6, const char **vdnssearch, ++ const char *vdomainname, const SlirpCb *callbacks, ++ void *opaque) ++{ ++ SlirpConfig cfg; ++ memset(&cfg, 0, sizeof(cfg)); ++ cfg.version = 1; ++ cfg.restricted = restricted; ++ cfg.in_enabled = in_enabled; ++ cfg.vnetwork = vnetwork; ++ cfg.vnetmask = vnetmask; ++ cfg.vhost = vhost; ++ cfg.in6_enabled = in6_enabled; ++ cfg.vprefix_addr6 = vprefix_addr6; ++ cfg.vprefix_len = vprefix_len; ++ cfg.vhost6 = vhost6; ++ cfg.vhostname = vhostname; ++ cfg.tftp_server_name = tftp_server_name; ++ cfg.tftp_path = tftp_path; ++ cfg.bootfile = bootfile; ++ cfg.vdhcp_start = vdhcp_start; ++ cfg.vnameserver = vnameserver; ++ cfg.vnameserver6 = vnameserver6; ++ cfg.vdnssearch = vdnssearch; ++ cfg.vdomainname = vdomainname; ++ return slirp_new(&cfg, callbacks, opaque); ++} ++ ++void slirp_cleanup(Slirp *slirp) ++{ ++ struct gfwd_list *e, *next; ++ ++ for (e = slirp->guestfwd_list; e; e = next) { ++ next = e->ex_next; ++ g_free(e->ex_exec); ++ g_free(e->ex_unix); ++ g_free(e); ++ } ++ ++ ip_cleanup(slirp); ++ ip6_cleanup(slirp); ++ m_cleanup(slirp); ++ ++ g_rand_free(slirp->grand); ++ ++ g_free(slirp->vdnssearch); ++ g_free(slirp->tftp_prefix); ++ g_free(slirp->bootp_filename); ++ g_free(slirp->vdomainname); ++ g_free(slirp); ++} ++ ++#define CONN_CANFSEND(so) \ ++ (((so)->so_state & (SS_FCANTSENDMORE | SS_ISFCONNECTED)) == SS_ISFCONNECTED) ++#define CONN_CANFRCV(so) \ ++ (((so)->so_state & (SS_FCANTRCVMORE | SS_ISFCONNECTED)) == SS_ISFCONNECTED) ++ ++static void slirp_update_timeout(Slirp *slirp, uint32_t *timeout) ++{ ++ uint32_t t; ++ ++ if (*timeout <= TIMEOUT_FAST) { ++ return; ++ } ++ ++ t = MIN(1000, *timeout); ++ ++ /* If we have tcp timeout with slirp, then we will fill @timeout with ++ * more precise value. ++ */ ++ if (slirp->time_fasttimo) { ++ *timeout = TIMEOUT_FAST; ++ return; ++ } ++ if (slirp->do_slowtimo) { ++ t = MIN(TIMEOUT_SLOW, t); ++ } ++ *timeout = t; ++} ++ ++void slirp_pollfds_fill(Slirp *slirp, uint32_t *timeout, ++ SlirpAddPollCb add_poll, void *opaque) ++{ ++ struct socket *so, *so_next; ++ ++ /* ++ * First, TCP sockets ++ */ ++ ++ /* ++ * *_slowtimo needs calling if there are IP fragments ++ * in the fragment queue, or there are TCP connections active ++ */ ++ slirp->do_slowtimo = ((slirp->tcb.so_next != &slirp->tcb) || ++ (&slirp->ipq.ip_link != slirp->ipq.ip_link.next)); ++ ++ for (so = slirp->tcb.so_next; so != &slirp->tcb; so = so_next) { ++ int events = 0; ++ ++ so_next = so->so_next; ++ ++ so->pollfds_idx = -1; ++ ++ /* ++ * See if we need a tcp_fasttimo ++ */ ++ if (slirp->time_fasttimo == 0 && so->so_tcpcb->t_flags & TF_DELACK) { ++ slirp->time_fasttimo = curtime; /* Flag when want a fasttimo */ ++ } ++ ++ /* ++ * NOFDREF can include still connecting to local-host, ++ * newly socreated() sockets etc. Don't want to select these. ++ */ ++ if (so->so_state & SS_NOFDREF || so->s == -1) { ++ continue; ++ } ++ ++ /* ++ * Set for reading sockets which are accepting ++ */ ++ if (so->so_state & SS_FACCEPTCONN) { ++ so->pollfds_idx = add_poll( ++ so->s, SLIRP_POLL_IN | SLIRP_POLL_HUP | SLIRP_POLL_ERR, opaque); ++ continue; ++ } ++ ++ /* ++ * Set for writing sockets which are connecting ++ */ ++ if (so->so_state & SS_ISFCONNECTING) { ++ so->pollfds_idx = ++ add_poll(so->s, SLIRP_POLL_OUT | SLIRP_POLL_ERR, opaque); ++ continue; ++ } ++ ++ /* ++ * Set for writing if we are connected, can send more, and ++ * we have something to send ++ */ ++ if (CONN_CANFSEND(so) && so->so_rcv.sb_cc) { ++ events |= SLIRP_POLL_OUT | SLIRP_POLL_ERR; ++ } ++ ++ /* ++ * Set for reading (and urgent data) if we are connected, can ++ * receive more, and we have room for it XXX /2 ? ++ */ ++ if (CONN_CANFRCV(so) && ++ (so->so_snd.sb_cc < (so->so_snd.sb_datalen / 2))) { ++ events |= SLIRP_POLL_IN | SLIRP_POLL_HUP | SLIRP_POLL_ERR | ++ SLIRP_POLL_PRI; ++ } ++ ++ if (events) { ++ so->pollfds_idx = add_poll(so->s, events, opaque); ++ } ++ } ++ ++ /* ++ * UDP sockets ++ */ ++ for (so = slirp->udb.so_next; so != &slirp->udb; so = so_next) { ++ so_next = so->so_next; ++ ++ so->pollfds_idx = -1; ++ ++ /* ++ * See if it's timed out ++ */ ++ if (so->so_expire) { ++ if (so->so_expire <= curtime) { ++ udp_detach(so); ++ continue; ++ } else { ++ slirp->do_slowtimo = true; /* Let socket expire */ ++ } ++ } ++ ++ /* ++ * When UDP packets are received from over the ++ * link, they're sendto()'d straight away, so ++ * no need for setting for writing ++ * Limit the number of packets queued by this session ++ * to 4. Note that even though we try and limit this ++ * to 4 packets, the session could have more queued ++ * if the packets needed to be fragmented ++ * (XXX <= 4 ?) ++ */ ++ if ((so->so_state & SS_ISFCONNECTED) && so->so_queued <= 4) { ++ so->pollfds_idx = add_poll( ++ so->s, SLIRP_POLL_IN | SLIRP_POLL_HUP | SLIRP_POLL_ERR, opaque); ++ } ++ } ++ ++ /* ++ * ICMP sockets ++ */ ++ for (so = slirp->icmp.so_next; so != &slirp->icmp; so = so_next) { ++ so_next = so->so_next; ++ ++ so->pollfds_idx = -1; ++ ++ /* ++ * See if it's timed out ++ */ ++ if (so->so_expire) { ++ if (so->so_expire <= curtime) { ++ icmp_detach(so); ++ continue; ++ } else { ++ slirp->do_slowtimo = true; /* Let socket expire */ ++ } ++ } ++ ++ if (so->so_state & SS_ISFCONNECTED) { ++ so->pollfds_idx = add_poll( ++ so->s, SLIRP_POLL_IN | SLIRP_POLL_HUP | SLIRP_POLL_ERR, opaque); ++ } ++ } ++ ++ slirp_update_timeout(slirp, timeout); ++} ++ ++void slirp_pollfds_poll(Slirp *slirp, int select_error, ++ SlirpGetREventsCb get_revents, void *opaque) ++{ ++ struct socket *so, *so_next; ++ int ret; ++ ++ curtime = slirp->cb->clock_get_ns(slirp->opaque) / SCALE_MS; ++ ++ /* ++ * See if anything has timed out ++ */ ++ if (slirp->time_fasttimo && ++ ((curtime - slirp->time_fasttimo) >= TIMEOUT_FAST)) { ++ tcp_fasttimo(slirp); ++ slirp->time_fasttimo = 0; ++ } ++ if (slirp->do_slowtimo && ++ ((curtime - slirp->last_slowtimo) >= TIMEOUT_SLOW)) { ++ ip_slowtimo(slirp); ++ tcp_slowtimo(slirp); ++ slirp->last_slowtimo = curtime; ++ } ++ ++ /* ++ * Check sockets ++ */ ++ if (!select_error) { ++ /* ++ * Check TCP sockets ++ */ ++ for (so = slirp->tcb.so_next; so != &slirp->tcb; so = so_next) { ++ int revents; ++ ++ so_next = so->so_next; ++ ++ revents = 0; ++ if (so->pollfds_idx != -1) { ++ revents = get_revents(so->pollfds_idx, opaque); ++ } ++ ++ if (so->so_state & SS_NOFDREF || so->s == -1) { ++ continue; ++ } ++ ++ /* ++ * Check for URG data ++ * This will soread as well, so no need to ++ * test for SLIRP_POLL_IN below if this succeeds ++ */ ++ if (revents & SLIRP_POLL_PRI) { ++ ret = sorecvoob(so); ++ if (ret < 0) { ++ /* Socket error might have resulted in the socket being ++ * removed, do not try to do anything more with it. */ ++ continue; ++ } ++ } ++ /* ++ * Check sockets for reading ++ */ ++ else if (revents & ++ (SLIRP_POLL_IN | SLIRP_POLL_HUP | SLIRP_POLL_ERR)) { ++ /* ++ * Check for incoming connections ++ */ ++ if (so->so_state & SS_FACCEPTCONN) { ++ tcp_connect(so); ++ continue; ++ } /* else */ ++ ret = soread(so); ++ ++ /* Output it if we read something */ ++ if (ret > 0) { ++ tcp_output(sototcpcb(so)); ++ } ++ if (ret < 0) { ++ /* Socket error might have resulted in the socket being ++ * removed, do not try to do anything more with it. */ ++ continue; ++ } ++ } ++ ++ /* ++ * Check sockets for writing ++ */ ++ if (!(so->so_state & SS_NOFDREF) && ++ (revents & (SLIRP_POLL_OUT | SLIRP_POLL_ERR))) { ++ /* ++ * Check for non-blocking, still-connecting sockets ++ */ ++ if (so->so_state & SS_ISFCONNECTING) { ++ /* Connected */ ++ so->so_state &= ~SS_ISFCONNECTING; ++ ++ ret = send(so->s, (const void *)&ret, 0, 0); ++ if (ret < 0) { ++ /* XXXXX Must fix, zero bytes is a NOP */ ++ if (errno == EAGAIN || errno == EWOULDBLOCK || ++ errno == EINPROGRESS || errno == ENOTCONN) { ++ continue; ++ } ++ ++ /* else failed */ ++ so->so_state &= SS_PERSISTENT_MASK; ++ so->so_state |= SS_NOFDREF; ++ } ++ /* else so->so_state &= ~SS_ISFCONNECTING; */ ++ ++ /* ++ * Continue tcp_input ++ */ ++ tcp_input((struct mbuf *)NULL, sizeof(struct ip), so, ++ so->so_ffamily); ++ /* continue; */ ++ } else { ++ ret = sowrite(so); ++ if (ret > 0) { ++ /* Call tcp_output in case we need to send a window ++ * update to the guest, otherwise it will be stuck ++ * until it sends a window probe. */ ++ tcp_output(sototcpcb(so)); ++ } ++ } ++ } ++ } ++ ++ /* ++ * Now UDP sockets. ++ * Incoming packets are sent straight away, they're not buffered. ++ * Incoming UDP data isn't buffered either. ++ */ ++ for (so = slirp->udb.so_next; so != &slirp->udb; so = so_next) { ++ int revents; ++ ++ so_next = so->so_next; ++ ++ revents = 0; ++ if (so->pollfds_idx != -1) { ++ revents = get_revents(so->pollfds_idx, opaque); ++ } ++ ++ if (so->s != -1 && ++ (revents & (SLIRP_POLL_IN | SLIRP_POLL_HUP | SLIRP_POLL_ERR))) { ++ sorecvfrom(so); ++ } ++ } ++ ++ /* ++ * Check incoming ICMP relies. ++ */ ++ for (so = slirp->icmp.so_next; so != &slirp->icmp; so = so_next) { ++ int revents; ++ ++ so_next = so->so_next; ++ ++ revents = 0; ++ if (so->pollfds_idx != -1) { ++ revents = get_revents(so->pollfds_idx, opaque); ++ } ++ ++ if (so->s != -1 && ++ (revents & (SLIRP_POLL_IN | SLIRP_POLL_HUP | SLIRP_POLL_ERR))) { ++ icmp_receive(so); ++ } ++ } ++ } ++ ++ if_start(slirp); ++} ++ ++static void arp_input(Slirp *slirp, const uint8_t *pkt, int pkt_len) ++{ ++ struct slirp_arphdr *ah = (struct slirp_arphdr *)(pkt + ETH_HLEN); ++ uint8_t arp_reply[MAX(ETH_HLEN + sizeof(struct slirp_arphdr), 64)]; ++ struct ethhdr *reh = (struct ethhdr *)arp_reply; ++ struct slirp_arphdr *rah = (struct slirp_arphdr *)(arp_reply + ETH_HLEN); ++ int ar_op; ++ struct gfwd_list *ex_ptr; ++ ++ if (!slirp->in_enabled) { ++ return; ++ } ++ ++ ar_op = ntohs(ah->ar_op); ++ switch (ar_op) { ++ case ARPOP_REQUEST: ++ if (ah->ar_tip == ah->ar_sip) { ++ /* Gratuitous ARP */ ++ arp_table_add(slirp, ah->ar_sip, ah->ar_sha); ++ return; ++ } ++ ++ if ((ah->ar_tip & slirp->vnetwork_mask.s_addr) == ++ slirp->vnetwork_addr.s_addr) { ++ if (ah->ar_tip == slirp->vnameserver_addr.s_addr || ++ ah->ar_tip == slirp->vhost_addr.s_addr) ++ goto arp_ok; ++ /* TODO: IPv6 */ ++ for (ex_ptr = slirp->guestfwd_list; ex_ptr; ++ ex_ptr = ex_ptr->ex_next) { ++ if (ex_ptr->ex_addr.s_addr == ah->ar_tip) ++ goto arp_ok; ++ } ++ return; ++ arp_ok: ++ memset(arp_reply, 0, sizeof(arp_reply)); ++ ++ arp_table_add(slirp, ah->ar_sip, ah->ar_sha); ++ ++ /* ARP request for alias/dns mac address */ ++ memcpy(reh->h_dest, pkt + ETH_ALEN, ETH_ALEN); ++ memcpy(reh->h_source, special_ethaddr, ETH_ALEN - 4); ++ memcpy(&reh->h_source[2], &ah->ar_tip, 4); ++ reh->h_proto = htons(ETH_P_ARP); ++ ++ rah->ar_hrd = htons(1); ++ rah->ar_pro = htons(ETH_P_IP); ++ rah->ar_hln = ETH_ALEN; ++ rah->ar_pln = 4; ++ rah->ar_op = htons(ARPOP_REPLY); ++ memcpy(rah->ar_sha, reh->h_source, ETH_ALEN); ++ rah->ar_sip = ah->ar_tip; ++ memcpy(rah->ar_tha, ah->ar_sha, ETH_ALEN); ++ rah->ar_tip = ah->ar_sip; ++ slirp_send_packet_all(slirp, arp_reply, sizeof(arp_reply)); ++ } ++ break; ++ case ARPOP_REPLY: ++ arp_table_add(slirp, ah->ar_sip, ah->ar_sha); ++ break; ++ default: ++ break; ++ } ++} ++ ++void slirp_input(Slirp *slirp, const uint8_t *pkt, int pkt_len) ++{ ++ struct mbuf *m; ++ int proto; ++ ++ if (pkt_len < ETH_HLEN) ++ return; ++ ++ proto = (((uint16_t)pkt[12]) << 8) + pkt[13]; ++ switch (proto) { ++ case ETH_P_ARP: ++ arp_input(slirp, pkt, pkt_len); ++ break; ++ case ETH_P_IP: ++ case ETH_P_IPV6: ++ m = m_get(slirp); ++ if (!m) ++ return; ++ /* Note: we add 2 to align the IP header on 4 bytes, ++ * and add the margin for the tcpiphdr overhead */ ++ if (M_FREEROOM(m) < pkt_len + TCPIPHDR_DELTA + 2) { ++ m_inc(m, pkt_len + TCPIPHDR_DELTA + 2); ++ } ++ m->m_len = pkt_len + TCPIPHDR_DELTA + 2; ++ memcpy(m->m_data + TCPIPHDR_DELTA + 2, pkt, pkt_len); ++ ++ m->m_data += TCPIPHDR_DELTA + 2 + ETH_HLEN; ++ m->m_len -= TCPIPHDR_DELTA + 2 + ETH_HLEN; ++ ++ if (proto == ETH_P_IP) { ++ ip_input(m); ++ } else if (proto == ETH_P_IPV6) { ++ ip6_input(m); ++ } ++ break; ++ ++ case ETH_P_NCSI: ++ ncsi_input(slirp, pkt, pkt_len); ++ break; ++ ++ default: ++ break; ++ } ++} ++ ++/* Prepare the IPv4 packet to be sent to the ethernet device. Returns 1 if no ++ * packet should be sent, 0 if the packet must be re-queued, 2 if the packet ++ * is ready to go. ++ */ ++static int if_encap4(Slirp *slirp, struct mbuf *ifm, struct ethhdr *eh, ++ uint8_t ethaddr[ETH_ALEN]) ++{ ++ const struct ip *iph = (const struct ip *)ifm->m_data; ++ ++ if (!arp_table_search(slirp, iph->ip_dst.s_addr, ethaddr)) { ++ uint8_t arp_req[ETH_HLEN + sizeof(struct slirp_arphdr)]; ++ struct ethhdr *reh = (struct ethhdr *)arp_req; ++ struct slirp_arphdr *rah = (struct slirp_arphdr *)(arp_req + ETH_HLEN); ++ ++ if (!ifm->resolution_requested) { ++ /* If the client addr is not known, send an ARP request */ ++ memset(reh->h_dest, 0xff, ETH_ALEN); ++ memcpy(reh->h_source, special_ethaddr, ETH_ALEN - 4); ++ memcpy(&reh->h_source[2], &slirp->vhost_addr, 4); ++ reh->h_proto = htons(ETH_P_ARP); ++ rah->ar_hrd = htons(1); ++ rah->ar_pro = htons(ETH_P_IP); ++ rah->ar_hln = ETH_ALEN; ++ rah->ar_pln = 4; ++ rah->ar_op = htons(ARPOP_REQUEST); ++ ++ /* source hw addr */ ++ memcpy(rah->ar_sha, special_ethaddr, ETH_ALEN - 4); ++ memcpy(&rah->ar_sha[2], &slirp->vhost_addr, 4); ++ ++ /* source IP */ ++ rah->ar_sip = slirp->vhost_addr.s_addr; ++ ++ /* target hw addr (none) */ ++ memset(rah->ar_tha, 0, ETH_ALEN); ++ ++ /* target IP */ ++ rah->ar_tip = iph->ip_dst.s_addr; ++ slirp->client_ipaddr = iph->ip_dst; ++ slirp_send_packet_all(slirp, arp_req, sizeof(arp_req)); ++ ifm->resolution_requested = true; ++ ++ /* Expire request and drop outgoing packet after 1 second */ ++ ifm->expiration_date = ++ slirp->cb->clock_get_ns(slirp->opaque) + 1000000000ULL; ++ } ++ return 0; ++ } else { ++ memcpy(eh->h_source, special_ethaddr, ETH_ALEN - 4); ++ /* XXX: not correct */ ++ memcpy(&eh->h_source[2], &slirp->vhost_addr, 4); ++ eh->h_proto = htons(ETH_P_IP); ++ ++ /* Send this */ ++ return 2; ++ } ++} ++ ++/* Prepare the IPv6 packet to be sent to the ethernet device. Returns 1 if no ++ * packet should be sent, 0 if the packet must be re-queued, 2 if the packet ++ * is ready to go. ++ */ ++static int if_encap6(Slirp *slirp, struct mbuf *ifm, struct ethhdr *eh, ++ uint8_t ethaddr[ETH_ALEN]) ++{ ++ const struct ip6 *ip6h = mtod(ifm, const struct ip6 *); ++ if (!ndp_table_search(slirp, ip6h->ip_dst, ethaddr)) { ++ if (!ifm->resolution_requested) { ++ ndp_send_ns(slirp, ip6h->ip_dst); ++ ifm->resolution_requested = true; ++ ifm->expiration_date = ++ slirp->cb->clock_get_ns(slirp->opaque) + 1000000000ULL; ++ } ++ return 0; ++ } else { ++ eh->h_proto = htons(ETH_P_IPV6); ++ in6_compute_ethaddr(ip6h->ip_src, eh->h_source); ++ ++ /* Send this */ ++ return 2; ++ } ++} ++ ++/* Output the IP packet to the ethernet device. Returns 0 if the packet must be ++ * re-queued. ++ */ ++int if_encap(Slirp *slirp, struct mbuf *ifm) ++{ ++ uint8_t buf[IF_MTU_MAX + 100]; ++ struct ethhdr *eh = (struct ethhdr *)buf; ++ uint8_t ethaddr[ETH_ALEN]; ++ const struct ip *iph = (const struct ip *)ifm->m_data; ++ int ret; ++ ++ if (ifm->m_len + ETH_HLEN > sizeof(buf)) { ++ return 1; ++ } ++ ++ switch (iph->ip_v) { ++ case IPVERSION: ++ ret = if_encap4(slirp, ifm, eh, ethaddr); ++ if (ret < 2) { ++ return ret; ++ } ++ break; ++ ++ case IP6VERSION: ++ ret = if_encap6(slirp, ifm, eh, ethaddr); ++ if (ret < 2) { ++ return ret; ++ } ++ break; ++ ++ default: ++ g_assert_not_reached(); ++ break; ++ } ++ ++ memcpy(eh->h_dest, ethaddr, ETH_ALEN); ++ DEBUG_ARG("src = %02x:%02x:%02x:%02x:%02x:%02x", eh->h_source[0], ++ eh->h_source[1], eh->h_source[2], eh->h_source[3], ++ eh->h_source[4], eh->h_source[5]); ++ DEBUG_ARG("dst = %02x:%02x:%02x:%02x:%02x:%02x", eh->h_dest[0], ++ eh->h_dest[1], eh->h_dest[2], eh->h_dest[3], eh->h_dest[4], ++ eh->h_dest[5]); ++ memcpy(buf + sizeof(struct ethhdr), ifm->m_data, ifm->m_len); ++ slirp_send_packet_all(slirp, buf, ifm->m_len + ETH_HLEN); ++ return 1; ++} ++ ++/* Drop host forwarding rule, return 0 if found. */ ++/* TODO: IPv6 */ ++int slirp_remove_hostfwd(Slirp *slirp, int is_udp, struct in_addr host_addr, ++ int host_port) ++{ ++ struct socket *so; ++ struct socket *head = (is_udp ? &slirp->udb : &slirp->tcb); ++ struct sockaddr_in addr; ++ int port = htons(host_port); ++ socklen_t addr_len; ++ ++ for (so = head->so_next; so != head; so = so->so_next) { ++ addr_len = sizeof(addr); ++ if ((so->so_state & SS_HOSTFWD) && ++ getsockname(so->s, (struct sockaddr *)&addr, &addr_len) == 0 && ++ addr.sin_addr.s_addr == host_addr.s_addr && addr.sin_port == port) { ++ so->slirp->cb->unregister_poll_fd(so->s, so->slirp->opaque); ++ closesocket(so->s); ++ sofree(so); ++ return 0; ++ } ++ } ++ ++ return -1; ++} ++ ++/* TODO: IPv6 */ ++int slirp_add_hostfwd(Slirp *slirp, int is_udp, struct in_addr host_addr, ++ int host_port, struct in_addr guest_addr, int guest_port) ++{ ++ if (!guest_addr.s_addr) { ++ guest_addr = slirp->vdhcp_startaddr; ++ } ++ if (is_udp) { ++ if (!udp_listen(slirp, host_addr.s_addr, htons(host_port), ++ guest_addr.s_addr, htons(guest_port), SS_HOSTFWD)) ++ return -1; ++ } else { ++ if (!tcp_listen(slirp, host_addr.s_addr, htons(host_port), ++ guest_addr.s_addr, htons(guest_port), SS_HOSTFWD)) ++ return -1; ++ } ++ return 0; ++} ++ ++/* TODO: IPv6 */ ++static bool check_guestfwd(Slirp *slirp, struct in_addr *guest_addr, ++ int guest_port) ++{ ++ struct gfwd_list *tmp_ptr; ++ ++ if (!guest_addr->s_addr) { ++ guest_addr->s_addr = slirp->vnetwork_addr.s_addr | ++ (htonl(0x0204) & ~slirp->vnetwork_mask.s_addr); ++ } ++ if ((guest_addr->s_addr & slirp->vnetwork_mask.s_addr) != ++ slirp->vnetwork_addr.s_addr || ++ guest_addr->s_addr == slirp->vhost_addr.s_addr || ++ guest_addr->s_addr == slirp->vnameserver_addr.s_addr) { ++ return false; ++ } ++ ++ /* check if the port is "bound" */ ++ for (tmp_ptr = slirp->guestfwd_list; tmp_ptr; tmp_ptr = tmp_ptr->ex_next) { ++ if (guest_port == tmp_ptr->ex_fport && ++ guest_addr->s_addr == tmp_ptr->ex_addr.s_addr) ++ return false; ++ } ++ ++ return true; ++} ++ ++int slirp_add_exec(Slirp *slirp, const char *cmdline, ++ struct in_addr *guest_addr, int guest_port) ++{ ++ if (!check_guestfwd(slirp, guest_addr, guest_port)) { ++ return -1; ++ } ++ ++ add_exec(&slirp->guestfwd_list, cmdline, *guest_addr, htons(guest_port)); ++ return 0; ++} ++ ++int slirp_add_unix(Slirp *slirp, const char *unixsock, ++ struct in_addr *guest_addr, int guest_port) ++{ ++#ifdef G_OS_UNIX ++ if (!check_guestfwd(slirp, guest_addr, guest_port)) { ++ return -1; ++ } ++ ++ add_unix(&slirp->guestfwd_list, unixsock, *guest_addr, htons(guest_port)); ++ return 0; ++#else ++ g_warn_if_reached(); ++ return -1; ++#endif ++} ++ ++int slirp_add_guestfwd(Slirp *slirp, SlirpWriteCb write_cb, void *opaque, ++ struct in_addr *guest_addr, int guest_port) ++{ ++ if (!check_guestfwd(slirp, guest_addr, guest_port)) { ++ return -1; ++ } ++ ++ add_guestfwd(&slirp->guestfwd_list, write_cb, opaque, *guest_addr, ++ htons(guest_port)); ++ return 0; ++} ++ ++int slirp_remove_guestfwd(Slirp *slirp, struct in_addr guest_addr, ++ int guest_port) ++{ ++ return remove_guestfwd(&slirp->guestfwd_list, guest_addr, ++ htons(guest_port)); ++} ++ ++ssize_t slirp_send(struct socket *so, const void *buf, size_t len, int flags) ++{ ++ if (so->s == -1 && so->guestfwd) { ++ /* XXX this blocks entire thread. Rewrite to use ++ * qemu_chr_fe_write and background I/O callbacks */ ++ so->guestfwd->write_cb(buf, len, so->guestfwd->opaque); ++ return len; ++ } ++ ++ if (so->s == -1) { ++ /* ++ * This should in theory not happen but it is hard to be ++ * sure because some code paths will end up with so->s == -1 ++ * on a failure but don't dispose of the struct socket. ++ * Check specifically, so we don't pass -1 to send(). ++ */ ++ errno = EBADF; ++ return -1; ++ } ++ ++ return send(so->s, buf, len, flags); ++} ++ ++struct socket *slirp_find_ctl_socket(Slirp *slirp, struct in_addr guest_addr, ++ int guest_port) ++{ ++ struct socket *so; ++ ++ /* TODO: IPv6 */ ++ for (so = slirp->tcb.so_next; so != &slirp->tcb; so = so->so_next) { ++ if (so->so_faddr.s_addr == guest_addr.s_addr && ++ htons(so->so_fport) == guest_port) { ++ return so; ++ } ++ } ++ return NULL; ++} ++ ++size_t slirp_socket_can_recv(Slirp *slirp, struct in_addr guest_addr, ++ int guest_port) ++{ ++ struct iovec iov[2]; ++ struct socket *so; ++ ++ so = slirp_find_ctl_socket(slirp, guest_addr, guest_port); ++ ++ if (!so || so->so_state & SS_NOFDREF) { ++ return 0; ++ } ++ ++ if (!CONN_CANFRCV(so) || so->so_snd.sb_cc >= (so->so_snd.sb_datalen / 2)) { ++ return 0; ++ } ++ ++ return sopreprbuf(so, iov, NULL); ++} ++ ++void slirp_socket_recv(Slirp *slirp, struct in_addr guest_addr, int guest_port, ++ const uint8_t *buf, int size) ++{ ++ int ret; ++ struct socket *so = slirp_find_ctl_socket(slirp, guest_addr, guest_port); ++ ++ if (!so) ++ return; ++ ++ ret = soreadbuf(so, (const char *)buf, size); ++ ++ if (ret > 0) ++ tcp_output(sototcpcb(so)); ++} ++ ++void slirp_send_packet_all(Slirp *slirp, const void *buf, size_t len) ++{ ++ ssize_t ret = slirp->cb->send_packet(buf, len, slirp->opaque); ++ ++ if (ret < 0) { ++ g_critical("Failed to send packet, ret: %ld", (long)ret); ++ } else if (ret < len) { ++ DEBUG_ERROR("send_packet() didn't send all data: %ld < %lu", (long)ret, ++ (unsigned long)len); ++ } ++} +diff --git a/slirp/src/slirp.h b/slirp/src/slirp.h +new file mode 100644 +index 0000000..32634bc +--- /dev/null ++++ b/slirp/src/slirp.h +@@ -0,0 +1,283 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++#ifndef SLIRP_H ++#define SLIRP_H ++ ++#ifdef _WIN32 ++ ++/* as defined in sdkddkver.h */ ++#ifndef _WIN32_WINNT ++#define _WIN32_WINNT 0x0600 /* Vista */ ++#endif ++/* reduces the number of implicitly included headers */ ++#ifndef WIN32_LEAN_AND_MEAN ++#define WIN32_LEAN_AND_MEAN ++#endif ++ ++#include ++#include ++#include ++#include ++#include ++ ++#else ++#if !defined(__HAIKU__) ++#define O_BINARY 0 ++#endif ++#endif ++ ++#ifndef _WIN32 ++#include ++#include ++#include ++#include ++#include ++#endif ++ ++#ifdef __APPLE__ ++#include ++#endif ++ ++/* Avoid conflicting with the libc insque() and remque(), which ++ have different prototypes. */ ++#define insque slirp_insque ++#define remque slirp_remque ++#define quehead slirp_quehead ++ ++#include "debug.h" ++#include "util.h" ++ ++#include "libslirp.h" ++#include "ip.h" ++#include "ip6.h" ++#include "tcp.h" ++#include "tcp_timer.h" ++#include "tcp_var.h" ++#include "tcpip.h" ++#include "udp.h" ++#include "ip_icmp.h" ++#include "ip6_icmp.h" ++#include "mbuf.h" ++#include "sbuf.h" ++#include "socket.h" ++#include "if.h" ++#include "main.h" ++#include "misc.h" ++ ++#include "bootp.h" ++#include "tftp.h" ++ ++#define ARPOP_REQUEST 1 /* ARP request */ ++#define ARPOP_REPLY 2 /* ARP reply */ ++ ++struct ethhdr { ++ unsigned char h_dest[ETH_ALEN]; /* destination eth addr */ ++ unsigned char h_source[ETH_ALEN]; /* source ether addr */ ++ unsigned short h_proto; /* packet type ID field */ ++}; ++ ++struct slirp_arphdr { ++ unsigned short ar_hrd; /* format of hardware address */ ++ unsigned short ar_pro; /* format of protocol address */ ++ unsigned char ar_hln; /* length of hardware address */ ++ unsigned char ar_pln; /* length of protocol address */ ++ unsigned short ar_op; /* ARP opcode (command) */ ++ ++ /* ++ * Ethernet looks like this : This bit is variable sized however... ++ */ ++ unsigned char ar_sha[ETH_ALEN]; /* sender hardware address */ ++ uint32_t ar_sip; /* sender IP address */ ++ unsigned char ar_tha[ETH_ALEN]; /* target hardware address */ ++ uint32_t ar_tip; /* target IP address */ ++} SLIRP_PACKED; ++ ++#define ARP_TABLE_SIZE 16 ++ ++typedef struct ArpTable { ++ struct slirp_arphdr table[ARP_TABLE_SIZE]; ++ int next_victim; ++} ArpTable; ++ ++void arp_table_add(Slirp *slirp, uint32_t ip_addr, uint8_t ethaddr[ETH_ALEN]); ++ ++bool arp_table_search(Slirp *slirp, uint32_t ip_addr, ++ uint8_t out_ethaddr[ETH_ALEN]); ++ ++struct ndpentry { ++ unsigned char eth_addr[ETH_ALEN]; /* sender hardware address */ ++ struct in6_addr ip_addr; /* sender IP address */ ++}; ++ ++#define NDP_TABLE_SIZE 16 ++ ++typedef struct NdpTable { ++ struct ndpentry table[NDP_TABLE_SIZE]; ++ int next_victim; ++} NdpTable; ++ ++void ndp_table_add(Slirp *slirp, struct in6_addr ip_addr, ++ uint8_t ethaddr[ETH_ALEN]); ++bool ndp_table_search(Slirp *slirp, struct in6_addr ip_addr, ++ uint8_t out_ethaddr[ETH_ALEN]); ++ ++struct Slirp { ++ unsigned time_fasttimo; ++ unsigned last_slowtimo; ++ bool do_slowtimo; ++ ++ bool in_enabled, in6_enabled; ++ ++ /* virtual network configuration */ ++ struct in_addr vnetwork_addr; ++ struct in_addr vnetwork_mask; ++ struct in_addr vhost_addr; ++ struct in6_addr vprefix_addr6; ++ uint8_t vprefix_len; ++ struct in6_addr vhost_addr6; ++ struct in_addr vdhcp_startaddr; ++ struct in_addr vnameserver_addr; ++ struct in6_addr vnameserver_addr6; ++ ++ struct in_addr client_ipaddr; ++ char client_hostname[33]; ++ ++ int restricted; ++ struct gfwd_list *guestfwd_list; ++ ++ int if_mtu; ++ int if_mru; ++ ++ bool disable_host_loopback; ++ ++ /* mbuf states */ ++ struct quehead m_freelist; ++ struct quehead m_usedlist; ++ int mbuf_alloced; ++ ++ /* if states */ ++ struct quehead if_fastq; /* fast queue (for interactive data) */ ++ struct quehead if_batchq; /* queue for non-interactive data */ ++ bool if_start_busy; /* avoid if_start recursion */ ++ ++ /* ip states */ ++ struct ipq ipq; /* ip reass. queue */ ++ uint16_t ip_id; /* ip packet ctr, for ids */ ++ ++ /* bootp/dhcp states */ ++ BOOTPClient bootp_clients[NB_BOOTP_CLIENTS]; ++ char *bootp_filename; ++ size_t vdnssearch_len; ++ uint8_t *vdnssearch; ++ char *vdomainname; ++ ++ /* tcp states */ ++ struct socket tcb; ++ struct socket *tcp_last_so; ++ tcp_seq tcp_iss; /* tcp initial send seq # */ ++ uint32_t tcp_now; /* for RFC 1323 timestamps */ ++ ++ /* udp states */ ++ struct socket udb; ++ struct socket *udp_last_so; ++ ++ /* icmp states */ ++ struct socket icmp; ++ struct socket *icmp_last_so; ++ ++ /* tftp states */ ++ char *tftp_prefix; ++ struct tftp_session tftp_sessions[TFTP_SESSIONS_MAX]; ++ char *tftp_server_name; ++ ++ ArpTable arp_table; ++ NdpTable ndp_table; ++ ++ GRand *grand; ++ void *ra_timer; ++ ++ bool enable_emu; ++ ++ const SlirpCb *cb; ++ void *opaque; ++ ++ struct sockaddr_in *outbound_addr; ++ struct sockaddr_in6 *outbound_addr6; ++}; ++ ++void if_start(Slirp *); ++ ++int get_dns_addr(struct in_addr *pdns_addr); ++int get_dns6_addr(struct in6_addr *pdns6_addr, uint32_t *scope_id); ++ ++/* ncsi.c */ ++void ncsi_input(Slirp *slirp, const uint8_t *pkt, int pkt_len); ++ ++#ifndef _WIN32 ++#include ++#endif ++ ++ ++extern bool slirp_do_keepalive; ++ ++#define TCP_MAXIDLE (TCPTV_KEEPCNT * TCPTV_KEEPINTVL) ++ ++/* dnssearch.c */ ++int translate_dnssearch(Slirp *s, const char **names); ++ ++/* cksum.c */ ++int cksum(struct mbuf *m, int len); ++int ip6_cksum(struct mbuf *m); ++ ++/* if.c */ ++void if_init(Slirp *); ++void if_output(struct socket *, struct mbuf *); ++ ++/* ip_input.c */ ++void ip_init(Slirp *); ++void ip_cleanup(Slirp *); ++void ip_input(struct mbuf *); ++void ip_slowtimo(Slirp *); ++void ip_stripoptions(register struct mbuf *, struct mbuf *); ++ ++/* ip_output.c */ ++int ip_output(struct socket *, struct mbuf *); ++ ++/* ip6_input.c */ ++void ip6_init(Slirp *); ++void ip6_cleanup(Slirp *); ++void ip6_input(struct mbuf *); ++ ++/* ip6_output */ ++int ip6_output(struct socket *, struct mbuf *, int fast); ++ ++/* tcp_input.c */ ++void tcp_input(register struct mbuf *, int, struct socket *, unsigned short af); ++int tcp_mss(register struct tcpcb *, unsigned); ++ ++/* tcp_output.c */ ++int tcp_output(register struct tcpcb *); ++void tcp_setpersist(register struct tcpcb *); ++ ++/* tcp_subr.c */ ++void tcp_init(Slirp *); ++void tcp_cleanup(Slirp *); ++void tcp_template(struct tcpcb *); ++void tcp_respond(struct tcpcb *, register struct tcpiphdr *, ++ register struct mbuf *, tcp_seq, tcp_seq, int, unsigned short); ++struct tcpcb *tcp_newtcpcb(struct socket *); ++struct tcpcb *tcp_close(register struct tcpcb *); ++void tcp_sockclosed(struct tcpcb *); ++int tcp_fconnect(struct socket *, unsigned short af); ++void tcp_connect(struct socket *); ++void tcp_attach(struct socket *); ++uint8_t tcp_tos(struct socket *); ++int tcp_emu(struct socket *, struct mbuf *); ++int tcp_ctl(struct socket *); ++struct tcpcb *tcp_drop(struct tcpcb *tp, int err); ++ ++struct socket *slirp_find_ctl_socket(Slirp *slirp, struct in_addr guest_addr, ++ int guest_port); ++ ++void slirp_send_packet_all(Slirp *slirp, const void *buf, size_t len); ++ ++#endif +diff --git a/slirp/src/socket.c b/slirp/src/socket.c +new file mode 100644 +index 0000000..4cd9a64 +--- /dev/null ++++ b/slirp/src/socket.c +@@ -0,0 +1,957 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#include "slirp.h" ++#include "ip_icmp.h" ++#ifdef __sun__ ++#include ++#endif ++ ++static void sofcantrcvmore(struct socket *so); ++static void sofcantsendmore(struct socket *so); ++ ++struct socket *solookup(struct socket **last, struct socket *head, ++ struct sockaddr_storage *lhost, ++ struct sockaddr_storage *fhost) ++{ ++ struct socket *so = *last; ++ ++ /* Optimisation */ ++ if (so != head && sockaddr_equal(&(so->lhost.ss), lhost) && ++ (!fhost || sockaddr_equal(&so->fhost.ss, fhost))) { ++ return so; ++ } ++ ++ for (so = head->so_next; so != head; so = so->so_next) { ++ if (sockaddr_equal(&(so->lhost.ss), lhost) && ++ (!fhost || sockaddr_equal(&so->fhost.ss, fhost))) { ++ *last = so; ++ return so; ++ } ++ } ++ ++ return (struct socket *)NULL; ++} ++ ++/* ++ * Create a new socket, initialise the fields ++ * It is the responsibility of the caller to ++ * insque() it into the correct linked-list ++ */ ++struct socket *socreate(Slirp *slirp) ++{ ++ struct socket *so = g_new(struct socket, 1); ++ ++ memset(so, 0, sizeof(struct socket)); ++ so->so_state = SS_NOFDREF; ++ so->s = -1; ++ so->slirp = slirp; ++ so->pollfds_idx = -1; ++ ++ return so; ++} ++ ++/* ++ * Remove references to so from the given message queue. ++ */ ++static void soqfree(struct socket *so, struct quehead *qh) ++{ ++ struct mbuf *ifq; ++ ++ for (ifq = (struct mbuf *)qh->qh_link; (struct quehead *)ifq != qh; ++ ifq = ifq->ifq_next) { ++ if (ifq->ifq_so == so) { ++ struct mbuf *ifm; ++ ifq->ifq_so = NULL; ++ for (ifm = ifq->ifs_next; ifm != ifq; ifm = ifm->ifs_next) { ++ ifm->ifq_so = NULL; ++ } ++ } ++ } ++} ++ ++/* ++ * remque and free a socket, clobber cache ++ */ ++void sofree(struct socket *so) ++{ ++ Slirp *slirp = so->slirp; ++ ++ soqfree(so, &slirp->if_fastq); ++ soqfree(so, &slirp->if_batchq); ++ ++ if (so == slirp->tcp_last_so) { ++ slirp->tcp_last_so = &slirp->tcb; ++ } else if (so == slirp->udp_last_so) { ++ slirp->udp_last_so = &slirp->udb; ++ } else if (so == slirp->icmp_last_so) { ++ slirp->icmp_last_so = &slirp->icmp; ++ } ++ m_free(so->so_m); ++ ++ if (so->so_next && so->so_prev) ++ remque(so); /* crashes if so is not in a queue */ ++ ++ if (so->so_tcpcb) { ++ g_free(so->so_tcpcb); ++ } ++ g_free(so); ++} ++ ++size_t sopreprbuf(struct socket *so, struct iovec *iov, int *np) ++{ ++ int n, lss, total; ++ struct sbuf *sb = &so->so_snd; ++ int len = sb->sb_datalen - sb->sb_cc; ++ int mss = so->so_tcpcb->t_maxseg; ++ ++ DEBUG_CALL("sopreprbuf"); ++ DEBUG_ARG("so = %p", so); ++ ++ if (len <= 0) ++ return 0; ++ ++ iov[0].iov_base = sb->sb_wptr; ++ iov[1].iov_base = NULL; ++ iov[1].iov_len = 0; ++ if (sb->sb_wptr < sb->sb_rptr) { ++ iov[0].iov_len = sb->sb_rptr - sb->sb_wptr; ++ /* Should never succeed, but... */ ++ if (iov[0].iov_len > len) ++ iov[0].iov_len = len; ++ if (iov[0].iov_len > mss) ++ iov[0].iov_len -= iov[0].iov_len % mss; ++ n = 1; ++ } else { ++ iov[0].iov_len = (sb->sb_data + sb->sb_datalen) - sb->sb_wptr; ++ /* Should never succeed, but... */ ++ if (iov[0].iov_len > len) ++ iov[0].iov_len = len; ++ len -= iov[0].iov_len; ++ if (len) { ++ iov[1].iov_base = sb->sb_data; ++ iov[1].iov_len = sb->sb_rptr - sb->sb_data; ++ if (iov[1].iov_len > len) ++ iov[1].iov_len = len; ++ total = iov[0].iov_len + iov[1].iov_len; ++ if (total > mss) { ++ lss = total % mss; ++ if (iov[1].iov_len > lss) { ++ iov[1].iov_len -= lss; ++ n = 2; ++ } else { ++ lss -= iov[1].iov_len; ++ iov[0].iov_len -= lss; ++ n = 1; ++ } ++ } else ++ n = 2; ++ } else { ++ if (iov[0].iov_len > mss) ++ iov[0].iov_len -= iov[0].iov_len % mss; ++ n = 1; ++ } ++ } ++ if (np) ++ *np = n; ++ ++ return iov[0].iov_len + (n - 1) * iov[1].iov_len; ++} ++ ++/* ++ * Read from so's socket into sb_snd, updating all relevant sbuf fields ++ * NOTE: This will only be called if it is select()ed for reading, so ++ * a read() of 0 (or less) means it's disconnected ++ */ ++int soread(struct socket *so) ++{ ++ int n, nn; ++ size_t buf_len; ++ struct sbuf *sb = &so->so_snd; ++ struct iovec iov[2]; ++ ++ DEBUG_CALL("soread"); ++ DEBUG_ARG("so = %p", so); ++ ++ /* ++ * No need to check if there's enough room to read. ++ * soread wouldn't have been called if there weren't ++ */ ++ buf_len = sopreprbuf(so, iov, &n); ++ assert(buf_len != 0); ++ ++ nn = recv(so->s, iov[0].iov_base, iov[0].iov_len, 0); ++ if (nn <= 0) { ++ if (nn < 0 && (errno == EINTR || errno == EAGAIN)) ++ return 0; ++ else { ++ int err; ++ socklen_t elen = sizeof err; ++ struct sockaddr_storage addr; ++ struct sockaddr *paddr = (struct sockaddr *)&addr; ++ socklen_t alen = sizeof addr; ++ ++ err = errno; ++ if (nn == 0) { ++ int shutdown_wr = so->so_state & SS_FCANTSENDMORE; ++ ++ if (!shutdown_wr && getpeername(so->s, paddr, &alen) < 0) { ++ err = errno; ++ } else { ++ getsockopt(so->s, SOL_SOCKET, SO_ERROR, &err, &elen); ++ } ++ } ++ ++ DEBUG_MISC(" --- soread() disconnected, nn = %d, errno = %d-%s", nn, ++ errno, strerror(errno)); ++ sofcantrcvmore(so); ++ ++ if (err == ECONNRESET || err == ECONNREFUSED || err == ENOTCONN || ++ err == EPIPE) { ++ tcp_drop(sototcpcb(so), err); ++ } else { ++ tcp_sockclosed(sototcpcb(so)); ++ } ++ return -1; ++ } ++ } ++ ++ /* ++ * If there was no error, try and read the second time round ++ * We read again if n = 2 (ie, there's another part of the buffer) ++ * and we read as much as we could in the first read ++ * We don't test for <= 0 this time, because there legitimately ++ * might not be any more data (since the socket is non-blocking), ++ * a close will be detected on next iteration. ++ * A return of -1 won't (shouldn't) happen, since it didn't happen above ++ */ ++ if (n == 2 && nn == iov[0].iov_len) { ++ int ret; ++ ret = recv(so->s, iov[1].iov_base, iov[1].iov_len, 0); ++ if (ret > 0) ++ nn += ret; ++ } ++ ++ DEBUG_MISC(" ... read nn = %d bytes", nn); ++ ++ /* Update fields */ ++ sb->sb_cc += nn; ++ sb->sb_wptr += nn; ++ if (sb->sb_wptr >= (sb->sb_data + sb->sb_datalen)) ++ sb->sb_wptr -= sb->sb_datalen; ++ return nn; ++} ++ ++int soreadbuf(struct socket *so, const char *buf, int size) ++{ ++ int n, nn, copy = size; ++ struct sbuf *sb = &so->so_snd; ++ struct iovec iov[2]; ++ ++ DEBUG_CALL("soreadbuf"); ++ DEBUG_ARG("so = %p", so); ++ ++ /* ++ * No need to check if there's enough room to read. ++ * soread wouldn't have been called if there weren't ++ */ ++ assert(size > 0); ++ if (sopreprbuf(so, iov, &n) < size) ++ goto err; ++ ++ nn = MIN(iov[0].iov_len, copy); ++ memcpy(iov[0].iov_base, buf, nn); ++ ++ copy -= nn; ++ buf += nn; ++ ++ if (copy == 0) ++ goto done; ++ ++ memcpy(iov[1].iov_base, buf, copy); ++ ++done: ++ /* Update fields */ ++ sb->sb_cc += size; ++ sb->sb_wptr += size; ++ if (sb->sb_wptr >= (sb->sb_data + sb->sb_datalen)) ++ sb->sb_wptr -= sb->sb_datalen; ++ return size; ++err: ++ ++ sofcantrcvmore(so); ++ tcp_sockclosed(sototcpcb(so)); ++ g_critical("soreadbuf buffer too small"); ++ return -1; ++} ++ ++/* ++ * Get urgent data ++ * ++ * When the socket is created, we set it SO_OOBINLINE, ++ * so when OOB data arrives, we soread() it and everything ++ * in the send buffer is sent as urgent data ++ */ ++int sorecvoob(struct socket *so) ++{ ++ struct tcpcb *tp = sototcpcb(so); ++ int ret; ++ ++ DEBUG_CALL("sorecvoob"); ++ DEBUG_ARG("so = %p", so); ++ ++ /* ++ * We take a guess at how much urgent data has arrived. ++ * In most situations, when urgent data arrives, the next ++ * read() should get all the urgent data. This guess will ++ * be wrong however if more data arrives just after the ++ * urgent data, or the read() doesn't return all the ++ * urgent data. ++ */ ++ ret = soread(so); ++ if (ret > 0) { ++ tp->snd_up = tp->snd_una + so->so_snd.sb_cc; ++ tp->t_force = 1; ++ tcp_output(tp); ++ tp->t_force = 0; ++ } ++ ++ return ret; ++} ++ ++/* ++ * Send urgent data ++ * There's a lot duplicated code here, but... ++ */ ++int sosendoob(struct socket *so) ++{ ++ struct sbuf *sb = &so->so_rcv; ++ char buff[2048]; /* XXX Shouldn't be sending more oob data than this */ ++ ++ int n; ++ ++ DEBUG_CALL("sosendoob"); ++ DEBUG_ARG("so = %p", so); ++ DEBUG_ARG("sb->sb_cc = %d", sb->sb_cc); ++ ++ if (so->so_urgc > 2048) ++ so->so_urgc = 2048; /* XXXX */ ++ ++ if (sb->sb_rptr < sb->sb_wptr) { ++ /* We can send it directly */ ++ n = slirp_send(so, sb->sb_rptr, so->so_urgc, ++ (MSG_OOB)); /* |MSG_DONTWAIT)); */ ++ } else { ++ /* ++ * Since there's no sendv or sendtov like writev, ++ * we must copy all data to a linear buffer then ++ * send it all ++ */ ++ uint32_t urgc = so->so_urgc; ++ int len = (sb->sb_data + sb->sb_datalen) - sb->sb_rptr; ++ if (len > urgc) { ++ len = urgc; ++ } ++ memcpy(buff, sb->sb_rptr, len); ++ urgc -= len; ++ if (urgc) { ++ n = sb->sb_wptr - sb->sb_data; ++ if (n > urgc) { ++ n = urgc; ++ } ++ memcpy((buff + len), sb->sb_data, n); ++ len += n; ++ } ++ n = slirp_send(so, buff, len, (MSG_OOB)); /* |MSG_DONTWAIT)); */ ++#ifdef DEBUG ++ if (n != len) { ++ DEBUG_ERROR("Didn't send all data urgently XXXXX"); ++ } ++#endif ++ } ++ ++ if (n < 0) { ++ return n; ++ } ++ so->so_urgc -= n; ++ DEBUG_MISC(" ---2 sent %d bytes urgent data, %d urgent bytes left", n, ++ so->so_urgc); ++ ++ sb->sb_cc -= n; ++ sb->sb_rptr += n; ++ if (sb->sb_rptr >= (sb->sb_data + sb->sb_datalen)) ++ sb->sb_rptr -= sb->sb_datalen; ++ ++ return n; ++} ++ ++/* ++ * Write data from so_rcv to so's socket, ++ * updating all sbuf field as necessary ++ */ ++int sowrite(struct socket *so) ++{ ++ int n, nn; ++ struct sbuf *sb = &so->so_rcv; ++ int len = sb->sb_cc; ++ struct iovec iov[2]; ++ ++ DEBUG_CALL("sowrite"); ++ DEBUG_ARG("so = %p", so); ++ ++ if (so->so_urgc) { ++ uint32_t expected = so->so_urgc; ++ if (sosendoob(so) < expected) { ++ /* Treat a short write as a fatal error too, ++ * rather than continuing on and sending the urgent ++ * data as if it were non-urgent and leaving the ++ * so_urgc count wrong. ++ */ ++ goto err_disconnected; ++ } ++ if (sb->sb_cc == 0) ++ return 0; ++ } ++ ++ /* ++ * No need to check if there's something to write, ++ * sowrite wouldn't have been called otherwise ++ */ ++ ++ iov[0].iov_base = sb->sb_rptr; ++ iov[1].iov_base = NULL; ++ iov[1].iov_len = 0; ++ if (sb->sb_rptr < sb->sb_wptr) { ++ iov[0].iov_len = sb->sb_wptr - sb->sb_rptr; ++ /* Should never succeed, but... */ ++ if (iov[0].iov_len > len) ++ iov[0].iov_len = len; ++ n = 1; ++ } else { ++ iov[0].iov_len = (sb->sb_data + sb->sb_datalen) - sb->sb_rptr; ++ if (iov[0].iov_len > len) ++ iov[0].iov_len = len; ++ len -= iov[0].iov_len; ++ if (len) { ++ iov[1].iov_base = sb->sb_data; ++ iov[1].iov_len = sb->sb_wptr - sb->sb_data; ++ if (iov[1].iov_len > len) ++ iov[1].iov_len = len; ++ n = 2; ++ } else ++ n = 1; ++ } ++ /* Check if there's urgent data to send, and if so, send it */ ++ ++ nn = slirp_send(so, iov[0].iov_base, iov[0].iov_len, 0); ++ /* This should never happen, but people tell me it does *shrug* */ ++ if (nn < 0 && (errno == EAGAIN || errno == EINTR)) ++ return 0; ++ ++ if (nn <= 0) { ++ goto err_disconnected; ++ } ++ ++ if (n == 2 && nn == iov[0].iov_len) { ++ int ret; ++ ret = slirp_send(so, iov[1].iov_base, iov[1].iov_len, 0); ++ if (ret > 0) ++ nn += ret; ++ } ++ DEBUG_MISC(" ... wrote nn = %d bytes", nn); ++ ++ /* Update sbuf */ ++ sb->sb_cc -= nn; ++ sb->sb_rptr += nn; ++ if (sb->sb_rptr >= (sb->sb_data + sb->sb_datalen)) ++ sb->sb_rptr -= sb->sb_datalen; ++ ++ /* ++ * If in DRAIN mode, and there's no more data, set ++ * it CANTSENDMORE ++ */ ++ if ((so->so_state & SS_FWDRAIN) && sb->sb_cc == 0) ++ sofcantsendmore(so); ++ ++ return nn; ++ ++err_disconnected: ++ DEBUG_MISC(" --- sowrite disconnected, so->so_state = %x, errno = %d", ++ so->so_state, errno); ++ sofcantsendmore(so); ++ tcp_sockclosed(sototcpcb(so)); ++ return -1; ++} ++ ++/* ++ * recvfrom() a UDP socket ++ */ ++void sorecvfrom(struct socket *so) ++{ ++ struct sockaddr_storage addr; ++ struct sockaddr_storage saddr, daddr; ++ socklen_t addrlen = sizeof(struct sockaddr_storage); ++ ++ DEBUG_CALL("sorecvfrom"); ++ DEBUG_ARG("so = %p", so); ++ ++ if (so->so_type == IPPROTO_ICMP) { /* This is a "ping" reply */ ++ char buff[256]; ++ int len; ++ ++ len = recvfrom(so->s, buff, 256, 0, (struct sockaddr *)&addr, &addrlen); ++ /* XXX Check if reply is "correct"? */ ++ ++ if (len == -1 || len == 0) { ++ uint8_t code = ICMP_UNREACH_PORT; ++ ++ if (errno == EHOSTUNREACH) ++ code = ICMP_UNREACH_HOST; ++ else if (errno == ENETUNREACH) ++ code = ICMP_UNREACH_NET; ++ ++ DEBUG_MISC(" udp icmp rx errno = %d-%s", errno, strerror(errno)); ++ icmp_send_error(so->so_m, ICMP_UNREACH, code, 0, strerror(errno)); ++ } else { ++ icmp_reflect(so->so_m); ++ so->so_m = NULL; /* Don't m_free() it again! */ ++ } ++ /* No need for this socket anymore, udp_detach it */ ++ udp_detach(so); ++ } else { /* A "normal" UDP packet */ ++ struct mbuf *m; ++ int len; ++#ifdef _WIN32 ++ unsigned long n; ++#else ++ int n; ++#endif ++ ++ if (ioctlsocket(so->s, FIONREAD, &n) != 0) { ++ DEBUG_MISC(" ioctlsocket errno = %d-%s\n", errno, strerror(errno)); ++ return; ++ } ++ if (n == 0) { ++ return; ++ } ++ ++ m = m_get(so->slirp); ++ if (!m) { ++ return; ++ } ++ switch (so->so_ffamily) { ++ case AF_INET: ++ m->m_data += IF_MAXLINKHDR + sizeof(struct udpiphdr); ++ break; ++ case AF_INET6: ++ m->m_data += ++ IF_MAXLINKHDR + sizeof(struct ip6) + sizeof(struct udphdr); ++ break; ++ default: ++ g_assert_not_reached(); ++ break; ++ } ++ ++ /* ++ * XXX Shouldn't FIONREAD packets destined for port 53, ++ * but I don't know the max packet size for DNS lookups ++ */ ++ len = M_FREEROOM(m); ++ /* if (so->so_fport != htons(53)) { */ ++ ++ if (n > len) { ++ n = (m->m_data - m->m_dat) + m->m_len + n + 1; ++ m_inc(m, n); ++ len = M_FREEROOM(m); ++ } ++ /* } */ ++ ++ m->m_len = recvfrom(so->s, m->m_data, len, 0, (struct sockaddr *)&addr, ++ &addrlen); ++ DEBUG_MISC(" did recvfrom %d, errno = %d-%s", m->m_len, errno, ++ strerror(errno)); ++ if (m->m_len < 0) { ++ /* Report error as ICMP */ ++ switch (so->so_lfamily) { ++ uint8_t code; ++ case AF_INET: ++ code = ICMP_UNREACH_PORT; ++ ++ if (errno == EHOSTUNREACH) { ++ code = ICMP_UNREACH_HOST; ++ } else if (errno == ENETUNREACH) { ++ code = ICMP_UNREACH_NET; ++ } ++ ++ DEBUG_MISC(" rx error, tx icmp ICMP_UNREACH:%i", code); ++ icmp_send_error(so->so_m, ICMP_UNREACH, code, 0, ++ strerror(errno)); ++ break; ++ case AF_INET6: ++ code = ICMP6_UNREACH_PORT; ++ ++ if (errno == EHOSTUNREACH) { ++ code = ICMP6_UNREACH_ADDRESS; ++ } else if (errno == ENETUNREACH) { ++ code = ICMP6_UNREACH_NO_ROUTE; ++ } ++ ++ DEBUG_MISC(" rx error, tx icmp6 ICMP_UNREACH:%i", code); ++ icmp6_send_error(so->so_m, ICMP6_UNREACH, code); ++ break; ++ default: ++ g_assert_not_reached(); ++ break; ++ } ++ m_free(m); ++ } else { ++ /* ++ * Hack: domain name lookup will be used the most for UDP, ++ * and since they'll only be used once there's no need ++ * for the 4 minute (or whatever) timeout... So we time them ++ * out much quicker (10 seconds for now...) ++ */ ++ if (so->so_expire) { ++ if (so->so_fport == htons(53)) ++ so->so_expire = curtime + SO_EXPIREFAST; ++ else ++ so->so_expire = curtime + SO_EXPIRE; ++ } ++ ++ /* ++ * If this packet was destined for CTL_ADDR, ++ * make it look like that's where it came from ++ */ ++ saddr = addr; ++ sotranslate_in(so, &saddr); ++ daddr = so->lhost.ss; ++ ++ switch (so->so_ffamily) { ++ case AF_INET: ++ udp_output(so, m, (struct sockaddr_in *)&saddr, ++ (struct sockaddr_in *)&daddr, so->so_iptos); ++ break; ++ case AF_INET6: ++ udp6_output(so, m, (struct sockaddr_in6 *)&saddr, ++ (struct sockaddr_in6 *)&daddr); ++ break; ++ default: ++ g_assert_not_reached(); ++ break; ++ } ++ } /* rx error */ ++ } /* if ping packet */ ++} ++ ++/* ++ * sendto() a socket ++ */ ++int sosendto(struct socket *so, struct mbuf *m) ++{ ++ int ret; ++ struct sockaddr_storage addr; ++ ++ DEBUG_CALL("sosendto"); ++ DEBUG_ARG("so = %p", so); ++ DEBUG_ARG("m = %p", m); ++ ++ addr = so->fhost.ss; ++ DEBUG_CALL(" sendto()ing)"); ++ if (sotranslate_out(so, &addr) < 0) { ++ return -1; ++ } ++ ++ /* Don't care what port we get */ ++ ret = sendto(so->s, m->m_data, m->m_len, 0, (struct sockaddr *)&addr, ++ sockaddr_size(&addr)); ++ if (ret < 0) ++ return -1; ++ ++ /* ++ * Kill the socket if there's no reply in 4 minutes, ++ * but only if it's an expirable socket ++ */ ++ if (so->so_expire) ++ so->so_expire = curtime + SO_EXPIRE; ++ so->so_state &= SS_PERSISTENT_MASK; ++ so->so_state |= SS_ISFCONNECTED; /* So that it gets select()ed */ ++ return 0; ++} ++ ++/* ++ * Listen for incoming TCP connections ++ */ ++struct socket *tcp_listen(Slirp *slirp, uint32_t haddr, unsigned hport, ++ uint32_t laddr, unsigned lport, int flags) ++{ ++ /* TODO: IPv6 */ ++ struct sockaddr_in addr; ++ struct socket *so; ++ int s, opt = 1; ++ socklen_t addrlen = sizeof(addr); ++ memset(&addr, 0, addrlen); ++ ++ DEBUG_CALL("tcp_listen"); ++ DEBUG_ARG("haddr = %s", inet_ntoa((struct in_addr){ .s_addr = haddr })); ++ DEBUG_ARG("hport = %d", ntohs(hport)); ++ DEBUG_ARG("laddr = %s", inet_ntoa((struct in_addr){ .s_addr = laddr })); ++ DEBUG_ARG("lport = %d", ntohs(lport)); ++ DEBUG_ARG("flags = %x", flags); ++ ++ so = socreate(slirp); ++ ++ /* Don't tcp_attach... we don't need so_snd nor so_rcv */ ++ if ((so->so_tcpcb = tcp_newtcpcb(so)) == NULL) { ++ g_free(so); ++ return NULL; ++ } ++ insque(so, &slirp->tcb); ++ ++ /* ++ * SS_FACCEPTONCE sockets must time out. ++ */ ++ if (flags & SS_FACCEPTONCE) ++ so->so_tcpcb->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT * 2; ++ ++ so->so_state &= SS_PERSISTENT_MASK; ++ so->so_state |= (SS_FACCEPTCONN | flags); ++ so->so_lfamily = AF_INET; ++ so->so_lport = lport; /* Kept in network format */ ++ so->so_laddr.s_addr = laddr; /* Ditto */ ++ ++ addr.sin_family = AF_INET; ++ addr.sin_addr.s_addr = haddr; ++ addr.sin_port = hport; ++ ++ if (((s = slirp_socket(AF_INET, SOCK_STREAM, 0)) < 0) || ++ (slirp_socket_set_fast_reuse(s) < 0) || ++ (bind(s, (struct sockaddr *)&addr, sizeof(addr)) < 0) || ++ (listen(s, 1) < 0)) { ++ int tmperrno = errno; /* Don't clobber the real reason we failed */ ++ ++ if (s >= 0) { ++ closesocket(s); ++ } ++ sofree(so); ++ /* Restore the real errno */ ++#ifdef _WIN32 ++ WSASetLastError(tmperrno); ++#else ++ errno = tmperrno; ++#endif ++ return NULL; ++ } ++ setsockopt(s, SOL_SOCKET, SO_OOBINLINE, &opt, sizeof(int)); ++ opt = 1; ++ setsockopt(s, IPPROTO_TCP, TCP_NODELAY, &opt, sizeof(int)); ++ ++ getsockname(s, (struct sockaddr *)&addr, &addrlen); ++ so->so_ffamily = AF_INET; ++ so->so_fport = addr.sin_port; ++ if (addr.sin_addr.s_addr == 0 || ++ addr.sin_addr.s_addr == loopback_addr.s_addr) ++ so->so_faddr = slirp->vhost_addr; ++ else ++ so->so_faddr = addr.sin_addr; ++ ++ so->s = s; ++ return so; ++} ++ ++/* ++ * Various session state calls ++ * XXX Should be #define's ++ * The socket state stuff needs work, these often get call 2 or 3 ++ * times each when only 1 was needed ++ */ ++void soisfconnecting(struct socket *so) ++{ ++ so->so_state &= ~(SS_NOFDREF | SS_ISFCONNECTED | SS_FCANTRCVMORE | ++ SS_FCANTSENDMORE | SS_FWDRAIN); ++ so->so_state |= SS_ISFCONNECTING; /* Clobber other states */ ++} ++ ++void soisfconnected(struct socket *so) ++{ ++ so->so_state &= ~(SS_ISFCONNECTING | SS_FWDRAIN | SS_NOFDREF); ++ so->so_state |= SS_ISFCONNECTED; /* Clobber other states */ ++} ++ ++static void sofcantrcvmore(struct socket *so) ++{ ++ if ((so->so_state & SS_NOFDREF) == 0) { ++ shutdown(so->s, 0); ++ } ++ so->so_state &= ~(SS_ISFCONNECTING); ++ if (so->so_state & SS_FCANTSENDMORE) { ++ so->so_state &= SS_PERSISTENT_MASK; ++ so->so_state |= SS_NOFDREF; /* Don't select it */ ++ } else { ++ so->so_state |= SS_FCANTRCVMORE; ++ } ++} ++ ++static void sofcantsendmore(struct socket *so) ++{ ++ if ((so->so_state & SS_NOFDREF) == 0) { ++ shutdown(so->s, 1); /* send FIN to fhost */ ++ } ++ so->so_state &= ~(SS_ISFCONNECTING); ++ if (so->so_state & SS_FCANTRCVMORE) { ++ so->so_state &= SS_PERSISTENT_MASK; ++ so->so_state |= SS_NOFDREF; /* as above */ ++ } else { ++ so->so_state |= SS_FCANTSENDMORE; ++ } ++} ++ ++/* ++ * Set write drain mode ++ * Set CANTSENDMORE once all data has been write()n ++ */ ++void sofwdrain(struct socket *so) ++{ ++ if (so->so_rcv.sb_cc) ++ so->so_state |= SS_FWDRAIN; ++ else ++ sofcantsendmore(so); ++} ++ ++static bool sotranslate_out4(Slirp *s, struct socket *so, struct sockaddr_in *sin) ++{ ++ if (so->so_faddr.s_addr == s->vnameserver_addr.s_addr) { ++ return get_dns_addr(&sin->sin_addr) >= 0; ++ } ++ ++ if (so->so_faddr.s_addr == s->vhost_addr.s_addr || ++ so->so_faddr.s_addr == 0xffffffff) { ++ if (s->disable_host_loopback) { ++ return false; ++ } ++ ++ sin->sin_addr = loopback_addr; ++ } ++ ++ return true; ++} ++ ++static bool sotranslate_out6(Slirp *s, struct socket *so, struct sockaddr_in6 *sin) ++{ ++ if (in6_equal(&so->so_faddr6, &s->vnameserver_addr6)) { ++ uint32_t scope_id; ++ if (get_dns6_addr(&sin->sin6_addr, &scope_id) >= 0) { ++ sin->sin6_scope_id = scope_id; ++ return true; ++ } ++ return false; ++ } ++ ++ if (in6_equal_net(&so->so_faddr6, &s->vprefix_addr6, s->vprefix_len) || ++ in6_equal(&so->so_faddr6, &(struct in6_addr)ALLNODES_MULTICAST)) { ++ if (s->disable_host_loopback) { ++ return false; ++ } ++ ++ sin->sin6_addr = in6addr_loopback; ++ } ++ ++ return true; ++} ++ ++ ++/* ++ * Translate addr in host addr when it is a virtual address ++ */ ++int sotranslate_out(struct socket *so, struct sockaddr_storage *addr) ++{ ++ bool ok = true; ++ ++ switch (addr->ss_family) { ++ case AF_INET: ++ ok = sotranslate_out4(so->slirp, so, (struct sockaddr_in *)addr); ++ break; ++ case AF_INET6: ++ ok = sotranslate_out6(so->slirp, so, (struct sockaddr_in6 *)addr); ++ break; ++ } ++ ++ if (!ok) { ++ errno = EPERM; ++ return -1; ++ } ++ ++ return 0; ++} ++ ++void sotranslate_in(struct socket *so, struct sockaddr_storage *addr) ++{ ++ Slirp *slirp = so->slirp; ++ struct sockaddr_in *sin = (struct sockaddr_in *)addr; ++ struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)addr; ++ ++ switch (addr->ss_family) { ++ case AF_INET: ++ if ((so->so_faddr.s_addr & slirp->vnetwork_mask.s_addr) == ++ slirp->vnetwork_addr.s_addr) { ++ uint32_t inv_mask = ~slirp->vnetwork_mask.s_addr; ++ ++ if ((so->so_faddr.s_addr & inv_mask) == inv_mask) { ++ sin->sin_addr = slirp->vhost_addr; ++ } else if (sin->sin_addr.s_addr == loopback_addr.s_addr || ++ so->so_faddr.s_addr != slirp->vhost_addr.s_addr) { ++ sin->sin_addr = so->so_faddr; ++ } ++ } ++ break; ++ ++ case AF_INET6: ++ if (in6_equal_net(&so->so_faddr6, &slirp->vprefix_addr6, ++ slirp->vprefix_len)) { ++ if (in6_equal(&sin6->sin6_addr, &in6addr_loopback) || ++ !in6_equal(&so->so_faddr6, &slirp->vhost_addr6)) { ++ sin6->sin6_addr = so->so_faddr6; ++ } ++ } ++ break; ++ ++ default: ++ break; ++ } ++} ++ ++/* ++ * Translate connections from localhost to the real hostname ++ */ ++void sotranslate_accept(struct socket *so) ++{ ++ Slirp *slirp = so->slirp; ++ ++ switch (so->so_ffamily) { ++ case AF_INET: ++ if (so->so_faddr.s_addr == INADDR_ANY || ++ (so->so_faddr.s_addr & loopback_mask) == ++ (loopback_addr.s_addr & loopback_mask)) { ++ so->so_faddr = slirp->vhost_addr; ++ } ++ break; ++ ++ case AF_INET6: ++ if (in6_equal(&so->so_faddr6, &in6addr_any) || ++ in6_equal(&so->so_faddr6, &in6addr_loopback)) { ++ so->so_faddr6 = slirp->vhost_addr6; ++ } ++ break; ++ ++ default: ++ break; ++ } ++} ++ ++void sodrop(struct socket *s, int num) ++{ ++ if (sbdrop(&s->so_snd, num)) { ++ s->slirp->cb->notify(s->slirp->opaque); ++ } ++} +diff --git a/slirp/src/socket.h b/slirp/src/socket.h +new file mode 100644 +index 0000000..a6a1e5e +--- /dev/null ++++ b/slirp/src/socket.h +@@ -0,0 +1,164 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#ifndef SLIRP_SOCKET_H ++#define SLIRP_SOCKET_H ++ ++#include "misc.h" ++ ++#define SO_EXPIRE 240000 ++#define SO_EXPIREFAST 10000 ++ ++/* ++ * Our socket structure ++ */ ++ ++union slirp_sockaddr { ++ struct sockaddr_storage ss; ++ struct sockaddr_in sin; ++ struct sockaddr_in6 sin6; ++}; ++ ++struct socket { ++ struct socket *so_next, *so_prev; /* For a linked list of sockets */ ++ ++ int s; /* The actual socket */ ++ struct gfwd_list *guestfwd; ++ ++ int pollfds_idx; /* GPollFD GArray index */ ++ ++ Slirp *slirp; /* managing slirp instance */ ++ ++ /* XXX union these with not-yet-used sbuf params */ ++ struct mbuf *so_m; /* Pointer to the original SYN packet, ++ * for non-blocking connect()'s, and ++ * PING reply's */ ++ struct tcpiphdr *so_ti; /* Pointer to the original ti within ++ * so_mconn, for non-blocking connections */ ++ uint32_t so_urgc; ++ union slirp_sockaddr fhost; /* Foreign host */ ++#define so_faddr fhost.sin.sin_addr ++#define so_fport fhost.sin.sin_port ++#define so_faddr6 fhost.sin6.sin6_addr ++#define so_fport6 fhost.sin6.sin6_port ++#define so_ffamily fhost.ss.ss_family ++ ++ union slirp_sockaddr lhost; /* Local host */ ++#define so_laddr lhost.sin.sin_addr ++#define so_lport lhost.sin.sin_port ++#define so_laddr6 lhost.sin6.sin6_addr ++#define so_lport6 lhost.sin6.sin6_port ++#define so_lfamily lhost.ss.ss_family ++ ++ uint8_t so_iptos; /* Type of service */ ++ uint8_t so_emu; /* Is the socket emulated? */ ++ ++ uint8_t so_type; /* Type of socket, UDP or TCP */ ++ int32_t so_state; /* internal state flags SS_*, below */ ++ ++ struct tcpcb *so_tcpcb; /* pointer to TCP protocol control block */ ++ unsigned so_expire; /* When the socket will expire */ ++ ++ int so_queued; /* Number of packets queued from this socket */ ++ int so_nqueued; /* Number of packets queued in a row ++ * Used to determine when to "downgrade" a session ++ * from fastq to batchq */ ++ ++ struct sbuf so_rcv; /* Receive buffer */ ++ struct sbuf so_snd; /* Send buffer */ ++}; ++ ++ ++/* ++ * Socket state bits. (peer means the host on the Internet, ++ * local host means the host on the other end of the modem) ++ */ ++#define SS_NOFDREF 0x001 /* No fd reference */ ++ ++#define SS_ISFCONNECTING \ ++ 0x002 /* Socket is connecting to peer (non-blocking connect()'s) */ ++#define SS_ISFCONNECTED 0x004 /* Socket is connected to peer */ ++#define SS_FCANTRCVMORE \ ++ 0x008 /* Socket can't receive more from peer (for half-closes) */ ++#define SS_FCANTSENDMORE \ ++ 0x010 /* Socket can't send more to peer (for half-closes) */ ++#define SS_FWDRAIN \ ++ 0x040 /* We received a FIN, drain data and set SS_FCANTSENDMORE */ ++ ++#define SS_CTL 0x080 ++#define SS_FACCEPTCONN \ ++ 0x100 /* Socket is accepting connections from a host on the internet */ ++#define SS_FACCEPTONCE \ ++ 0x200 /* If set, the SS_FACCEPTCONN socket will die after one accept */ ++ ++#define SS_PERSISTENT_MASK 0xf000 /* Unremovable state bits */ ++#define SS_HOSTFWD 0x1000 /* Socket describes host->guest forwarding */ ++#define SS_INCOMING \ ++ 0x2000 /* Connection was initiated by a host on the internet */ ++ ++static inline int sockaddr_equal(struct sockaddr_storage *a, ++ struct sockaddr_storage *b) ++{ ++ if (a->ss_family != b->ss_family) { ++ return 0; ++ } ++ ++ switch (a->ss_family) { ++ case AF_INET: { ++ struct sockaddr_in *a4 = (struct sockaddr_in *)a; ++ struct sockaddr_in *b4 = (struct sockaddr_in *)b; ++ return a4->sin_addr.s_addr == b4->sin_addr.s_addr && ++ a4->sin_port == b4->sin_port; ++ } ++ case AF_INET6: { ++ struct sockaddr_in6 *a6 = (struct sockaddr_in6 *)a; ++ struct sockaddr_in6 *b6 = (struct sockaddr_in6 *)b; ++ return (in6_equal(&a6->sin6_addr, &b6->sin6_addr) && ++ a6->sin6_port == b6->sin6_port); ++ } ++ default: ++ g_assert_not_reached(); ++ } ++ ++ return 0; ++} ++ ++static inline socklen_t sockaddr_size(struct sockaddr_storage *a) ++{ ++ switch (a->ss_family) { ++ case AF_INET: ++ return sizeof(struct sockaddr_in); ++ case AF_INET6: ++ return sizeof(struct sockaddr_in6); ++ default: ++ g_assert_not_reached(); ++ } ++} ++ ++struct socket *solookup(struct socket **, struct socket *, ++ struct sockaddr_storage *, struct sockaddr_storage *); ++struct socket *socreate(Slirp *); ++void sofree(struct socket *); ++int soread(struct socket *); ++int sorecvoob(struct socket *); ++int sosendoob(struct socket *); ++int sowrite(struct socket *); ++void sorecvfrom(struct socket *); ++int sosendto(struct socket *, struct mbuf *); ++struct socket *tcp_listen(Slirp *, uint32_t, unsigned, uint32_t, unsigned, int); ++void soisfconnecting(register struct socket *); ++void soisfconnected(register struct socket *); ++void sofwdrain(struct socket *); ++struct iovec; /* For win32 */ ++size_t sopreprbuf(struct socket *so, struct iovec *iov, int *np); ++int soreadbuf(struct socket *so, const char *buf, int size); ++ ++int sotranslate_out(struct socket *, struct sockaddr_storage *); ++void sotranslate_in(struct socket *, struct sockaddr_storage *); ++void sotranslate_accept(struct socket *); ++void sodrop(struct socket *, int num); ++ ++ ++#endif /* SLIRP_SOCKET_H */ +diff --git a/slirp/src/state.c b/slirp/src/state.c +new file mode 100644 +index 0000000..22af77b +--- /dev/null ++++ b/slirp/src/state.c +@@ -0,0 +1,379 @@ ++/* SPDX-License-Identifier: MIT */ ++/* ++ * libslirp ++ * ++ * Copyright (c) 2004-2008 Fabrice Bellard ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to deal ++ * in the Software without restriction, including without limitation the rights ++ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell ++ * copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN ++ * THE SOFTWARE. ++ */ ++#include "slirp.h" ++#include "vmstate.h" ++#include "stream.h" ++ ++static int slirp_tcp_post_load(void *opaque, int version) ++{ ++ tcp_template((struct tcpcb *)opaque); ++ ++ return 0; ++} ++ ++static const VMStateDescription vmstate_slirp_tcp = { ++ .name = "slirp-tcp", ++ .version_id = 0, ++ .post_load = slirp_tcp_post_load, ++ .fields = (VMStateField[]){ VMSTATE_INT16(t_state, struct tcpcb), ++ VMSTATE_INT16_ARRAY(t_timer, struct tcpcb, ++ TCPT_NTIMERS), ++ VMSTATE_INT16(t_rxtshift, struct tcpcb), ++ VMSTATE_INT16(t_rxtcur, struct tcpcb), ++ VMSTATE_INT16(t_dupacks, struct tcpcb), ++ VMSTATE_UINT16(t_maxseg, struct tcpcb), ++ VMSTATE_UINT8(t_force, struct tcpcb), ++ VMSTATE_UINT16(t_flags, struct tcpcb), ++ VMSTATE_UINT32(snd_una, struct tcpcb), ++ VMSTATE_UINT32(snd_nxt, struct tcpcb), ++ VMSTATE_UINT32(snd_up, struct tcpcb), ++ VMSTATE_UINT32(snd_wl1, struct tcpcb), ++ VMSTATE_UINT32(snd_wl2, struct tcpcb), ++ VMSTATE_UINT32(iss, struct tcpcb), ++ VMSTATE_UINT32(snd_wnd, struct tcpcb), ++ VMSTATE_UINT32(rcv_wnd, struct tcpcb), ++ VMSTATE_UINT32(rcv_nxt, struct tcpcb), ++ VMSTATE_UINT32(rcv_up, struct tcpcb), ++ VMSTATE_UINT32(irs, struct tcpcb), ++ VMSTATE_UINT32(rcv_adv, struct tcpcb), ++ VMSTATE_UINT32(snd_max, struct tcpcb), ++ VMSTATE_UINT32(snd_cwnd, struct tcpcb), ++ VMSTATE_UINT32(snd_ssthresh, struct tcpcb), ++ VMSTATE_INT16(t_idle, struct tcpcb), ++ VMSTATE_INT16(t_rtt, struct tcpcb), ++ VMSTATE_UINT32(t_rtseq, struct tcpcb), ++ VMSTATE_INT16(t_srtt, struct tcpcb), ++ VMSTATE_INT16(t_rttvar, struct tcpcb), ++ VMSTATE_UINT16(t_rttmin, struct tcpcb), ++ VMSTATE_UINT32(max_sndwnd, struct tcpcb), ++ VMSTATE_UINT8(t_oobflags, struct tcpcb), ++ VMSTATE_UINT8(t_iobc, struct tcpcb), ++ VMSTATE_INT16(t_softerror, struct tcpcb), ++ VMSTATE_UINT8(snd_scale, struct tcpcb), ++ VMSTATE_UINT8(rcv_scale, struct tcpcb), ++ VMSTATE_UINT8(request_r_scale, struct tcpcb), ++ VMSTATE_UINT8(requested_s_scale, struct tcpcb), ++ VMSTATE_UINT32(ts_recent, struct tcpcb), ++ VMSTATE_UINT32(ts_recent_age, struct tcpcb), ++ VMSTATE_UINT32(last_ack_sent, struct tcpcb), ++ VMSTATE_END_OF_LIST() } ++}; ++ ++/* The sbuf has a pair of pointers that are migrated as offsets; ++ * we calculate the offsets and restore the pointers using ++ * pre_save/post_load on a tmp structure. ++ */ ++struct sbuf_tmp { ++ struct sbuf *parent; ++ uint32_t roff, woff; ++}; ++ ++static int sbuf_tmp_pre_save(void *opaque) ++{ ++ struct sbuf_tmp *tmp = opaque; ++ tmp->woff = tmp->parent->sb_wptr - tmp->parent->sb_data; ++ tmp->roff = tmp->parent->sb_rptr - tmp->parent->sb_data; ++ ++ return 0; ++} ++ ++static int sbuf_tmp_post_load(void *opaque, int version) ++{ ++ struct sbuf_tmp *tmp = opaque; ++ uint32_t requested_len = tmp->parent->sb_datalen; ++ ++ /* Allocate the buffer space used by the field after the tmp */ ++ sbreserve(tmp->parent, tmp->parent->sb_datalen); ++ ++ if (tmp->woff >= requested_len || tmp->roff >= requested_len) { ++ g_critical("invalid sbuf offsets r/w=%u/%u len=%u", tmp->roff, ++ tmp->woff, requested_len); ++ return -EINVAL; ++ } ++ ++ tmp->parent->sb_wptr = tmp->parent->sb_data + tmp->woff; ++ tmp->parent->sb_rptr = tmp->parent->sb_data + tmp->roff; ++ ++ return 0; ++} ++ ++ ++static const VMStateDescription vmstate_slirp_sbuf_tmp = { ++ .name = "slirp-sbuf-tmp", ++ .post_load = sbuf_tmp_post_load, ++ .pre_save = sbuf_tmp_pre_save, ++ .version_id = 0, ++ .fields = (VMStateField[]){ VMSTATE_UINT32(woff, struct sbuf_tmp), ++ VMSTATE_UINT32(roff, struct sbuf_tmp), ++ VMSTATE_END_OF_LIST() } ++}; ++ ++static const VMStateDescription vmstate_slirp_sbuf = { ++ .name = "slirp-sbuf", ++ .version_id = 0, ++ .fields = (VMStateField[]){ VMSTATE_UINT32(sb_cc, struct sbuf), ++ VMSTATE_UINT32(sb_datalen, struct sbuf), ++ VMSTATE_WITH_TMP(struct sbuf, struct sbuf_tmp, ++ vmstate_slirp_sbuf_tmp), ++ VMSTATE_VBUFFER_UINT32(sb_data, struct sbuf, 0, ++ NULL, sb_datalen), ++ VMSTATE_END_OF_LIST() } ++}; ++ ++static bool slirp_older_than_v4(void *opaque, int version_id) ++{ ++ return version_id < 4; ++} ++ ++static bool slirp_family_inet(void *opaque, int version_id) ++{ ++ union slirp_sockaddr *ssa = (union slirp_sockaddr *)opaque; ++ return ssa->ss.ss_family == AF_INET; ++} ++ ++static int slirp_socket_pre_load(void *opaque) ++{ ++ struct socket *so = opaque; ++ ++ tcp_attach(so); ++ /* Older versions don't load these fields */ ++ so->so_ffamily = AF_INET; ++ so->so_lfamily = AF_INET; ++ return 0; ++} ++ ++#ifndef _WIN32 ++#define VMSTATE_SIN4_ADDR(f, s, t) VMSTATE_UINT32_TEST(f, s, t) ++#else ++/* Win uses u_long rather than uint32_t - but it's still 32bits long */ ++#define VMSTATE_SIN4_ADDR(f, s, t) \ ++ VMSTATE_SINGLE_TEST(f, s, t, 0, slirp_vmstate_info_uint32, u_long) ++#endif ++ ++/* The OS provided ss_family field isn't that portable; it's size ++ * and type varies (16/8 bit, signed, unsigned) ++ * and the values it contains aren't fully portable. ++ */ ++typedef struct SS_FamilyTmpStruct { ++ union slirp_sockaddr *parent; ++ uint16_t portable_family; ++} SS_FamilyTmpStruct; ++ ++#define SS_FAMILY_MIG_IPV4 2 /* Linux, BSD, Win... */ ++#define SS_FAMILY_MIG_IPV6 10 /* Linux */ ++#define SS_FAMILY_MIG_OTHER 0xffff ++ ++static int ss_family_pre_save(void *opaque) ++{ ++ SS_FamilyTmpStruct *tss = opaque; ++ ++ tss->portable_family = SS_FAMILY_MIG_OTHER; ++ ++ if (tss->parent->ss.ss_family == AF_INET) { ++ tss->portable_family = SS_FAMILY_MIG_IPV4; ++ } else if (tss->parent->ss.ss_family == AF_INET6) { ++ tss->portable_family = SS_FAMILY_MIG_IPV6; ++ } ++ ++ return 0; ++} ++ ++static int ss_family_post_load(void *opaque, int version_id) ++{ ++ SS_FamilyTmpStruct *tss = opaque; ++ ++ switch (tss->portable_family) { ++ case SS_FAMILY_MIG_IPV4: ++ tss->parent->ss.ss_family = AF_INET; ++ break; ++ case SS_FAMILY_MIG_IPV6: ++ case 23: /* compatibility: AF_INET6 from mingw */ ++ case 28: /* compatibility: AF_INET6 from FreeBSD sys/socket.h */ ++ tss->parent->ss.ss_family = AF_INET6; ++ break; ++ default: ++ g_critical("invalid ss_family type %x", tss->portable_family); ++ return -EINVAL; ++ } ++ ++ return 0; ++} ++ ++static const VMStateDescription vmstate_slirp_ss_family = { ++ .name = "slirp-socket-addr/ss_family", ++ .pre_save = ss_family_pre_save, ++ .post_load = ss_family_post_load, ++ .fields = ++ (VMStateField[]){ VMSTATE_UINT16(portable_family, SS_FamilyTmpStruct), ++ VMSTATE_END_OF_LIST() } ++}; ++ ++static const VMStateDescription vmstate_slirp_socket_addr = { ++ .name = "slirp-socket-addr", ++ .version_id = 4, ++ .fields = ++ (VMStateField[]){ ++ VMSTATE_WITH_TMP(union slirp_sockaddr, SS_FamilyTmpStruct, ++ vmstate_slirp_ss_family), ++ VMSTATE_SIN4_ADDR(sin.sin_addr.s_addr, union slirp_sockaddr, ++ slirp_family_inet), ++ VMSTATE_UINT16_TEST(sin.sin_port, union slirp_sockaddr, ++ slirp_family_inet), ++ ++#if 0 ++ /* Untested: Needs checking by someone with IPv6 test */ ++ VMSTATE_BUFFER_TEST(sin6.sin6_addr, union slirp_sockaddr, ++ slirp_family_inet6), ++ VMSTATE_UINT16_TEST(sin6.sin6_port, union slirp_sockaddr, ++ slirp_family_inet6), ++ VMSTATE_UINT32_TEST(sin6.sin6_flowinfo, union slirp_sockaddr, ++ slirp_family_inet6), ++ VMSTATE_UINT32_TEST(sin6.sin6_scope_id, union slirp_sockaddr, ++ slirp_family_inet6), ++#endif ++ ++ VMSTATE_END_OF_LIST() } ++}; ++ ++static const VMStateDescription vmstate_slirp_socket = { ++ .name = "slirp-socket", ++ .version_id = 4, ++ .pre_load = slirp_socket_pre_load, ++ .fields = ++ (VMStateField[]){ ++ VMSTATE_UINT32(so_urgc, struct socket), ++ /* Pre-v4 versions */ ++ VMSTATE_SIN4_ADDR(so_faddr.s_addr, struct socket, ++ slirp_older_than_v4), ++ VMSTATE_SIN4_ADDR(so_laddr.s_addr, struct socket, ++ slirp_older_than_v4), ++ VMSTATE_UINT16_TEST(so_fport, struct socket, slirp_older_than_v4), ++ VMSTATE_UINT16_TEST(so_lport, struct socket, slirp_older_than_v4), ++ /* v4 and newer */ ++ VMSTATE_STRUCT(fhost, struct socket, 4, vmstate_slirp_socket_addr, ++ union slirp_sockaddr), ++ VMSTATE_STRUCT(lhost, struct socket, 4, vmstate_slirp_socket_addr, ++ union slirp_sockaddr), ++ ++ VMSTATE_UINT8(so_iptos, struct socket), ++ VMSTATE_UINT8(so_emu, struct socket), ++ VMSTATE_UINT8(so_type, struct socket), ++ VMSTATE_INT32(so_state, struct socket), ++ VMSTATE_STRUCT(so_rcv, struct socket, 0, vmstate_slirp_sbuf, ++ struct sbuf), ++ VMSTATE_STRUCT(so_snd, struct socket, 0, vmstate_slirp_sbuf, ++ struct sbuf), ++ VMSTATE_STRUCT_POINTER(so_tcpcb, struct socket, vmstate_slirp_tcp, ++ struct tcpcb), ++ VMSTATE_END_OF_LIST() } ++}; ++ ++static const VMStateDescription vmstate_slirp_bootp_client = { ++ .name = "slirp_bootpclient", ++ .fields = (VMStateField[]){ VMSTATE_UINT16(allocated, BOOTPClient), ++ VMSTATE_BUFFER(macaddr, BOOTPClient), ++ VMSTATE_END_OF_LIST() } ++}; ++ ++static const VMStateDescription vmstate_slirp = { ++ .name = "slirp", ++ .version_id = 4, ++ .fields = (VMStateField[]){ VMSTATE_UINT16_V(ip_id, Slirp, 2), ++ VMSTATE_STRUCT_ARRAY( ++ bootp_clients, Slirp, NB_BOOTP_CLIENTS, 3, ++ vmstate_slirp_bootp_client, BOOTPClient), ++ VMSTATE_END_OF_LIST() } ++}; ++ ++void slirp_state_save(Slirp *slirp, SlirpWriteCb write_cb, void *opaque) ++{ ++ struct gfwd_list *ex_ptr; ++ SlirpOStream f = { ++ .write_cb = write_cb, ++ .opaque = opaque, ++ }; ++ ++ for (ex_ptr = slirp->guestfwd_list; ex_ptr; ex_ptr = ex_ptr->ex_next) ++ if (ex_ptr->write_cb) { ++ struct socket *so; ++ so = slirp_find_ctl_socket(slirp, ex_ptr->ex_addr, ++ ntohs(ex_ptr->ex_fport)); ++ if (!so) { ++ continue; ++ } ++ ++ slirp_ostream_write_u8(&f, 42); ++ slirp_vmstate_save_state(&f, &vmstate_slirp_socket, so); ++ } ++ slirp_ostream_write_u8(&f, 0); ++ ++ slirp_vmstate_save_state(&f, &vmstate_slirp, slirp); ++} ++ ++ ++int slirp_state_load(Slirp *slirp, int version_id, SlirpReadCb read_cb, ++ void *opaque) ++{ ++ struct gfwd_list *ex_ptr; ++ SlirpIStream f = { ++ .read_cb = read_cb, ++ .opaque = opaque, ++ }; ++ ++ while (slirp_istream_read_u8(&f)) { ++ int ret; ++ struct socket *so = socreate(slirp); ++ ++ ret = ++ slirp_vmstate_load_state(&f, &vmstate_slirp_socket, so, version_id); ++ if (ret < 0) { ++ return ret; ++ } ++ ++ if ((so->so_faddr.s_addr & slirp->vnetwork_mask.s_addr) != ++ slirp->vnetwork_addr.s_addr) { ++ return -EINVAL; ++ } ++ for (ex_ptr = slirp->guestfwd_list; ex_ptr; ex_ptr = ex_ptr->ex_next) { ++ if (ex_ptr->write_cb && ++ so->so_faddr.s_addr == ex_ptr->ex_addr.s_addr && ++ so->so_fport == ex_ptr->ex_fport) { ++ break; ++ } ++ } ++ if (!ex_ptr) { ++ return -EINVAL; ++ } ++ ++ so->guestfwd = ex_ptr; ++ } ++ ++ return slirp_vmstate_load_state(&f, &vmstate_slirp, slirp, version_id); ++} ++ ++int slirp_state_version(void) ++{ ++ return 4; ++} +diff --git a/slirp/src/stream.c b/slirp/src/stream.c +new file mode 100644 +index 0000000..6cf326f +--- /dev/null ++++ b/slirp/src/stream.c +@@ -0,0 +1,120 @@ ++/* SPDX-License-Identifier: MIT */ ++/* ++ * libslirp io streams ++ * ++ * Copyright (c) 2018 Red Hat, Inc. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to deal ++ * in the Software without restriction, including without limitation the rights ++ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell ++ * copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN ++ * THE SOFTWARE. ++ */ ++#include "stream.h" ++#include ++ ++bool slirp_istream_read(SlirpIStream *f, void *buf, size_t size) ++{ ++ return f->read_cb(buf, size, f->opaque) == size; ++} ++ ++bool slirp_ostream_write(SlirpOStream *f, const void *buf, size_t size) ++{ ++ return f->write_cb(buf, size, f->opaque) == size; ++} ++ ++uint8_t slirp_istream_read_u8(SlirpIStream *f) ++{ ++ uint8_t b; ++ ++ if (slirp_istream_read(f, &b, sizeof(b))) { ++ return b; ++ } ++ ++ return 0; ++} ++ ++bool slirp_ostream_write_u8(SlirpOStream *f, uint8_t b) ++{ ++ return slirp_ostream_write(f, &b, sizeof(b)); ++} ++ ++uint16_t slirp_istream_read_u16(SlirpIStream *f) ++{ ++ uint16_t b; ++ ++ if (slirp_istream_read(f, &b, sizeof(b))) { ++ return GUINT16_FROM_BE(b); ++ } ++ ++ return 0; ++} ++ ++bool slirp_ostream_write_u16(SlirpOStream *f, uint16_t b) ++{ ++ b = GUINT16_TO_BE(b); ++ return slirp_ostream_write(f, &b, sizeof(b)); ++} ++ ++uint32_t slirp_istream_read_u32(SlirpIStream *f) ++{ ++ uint32_t b; ++ ++ if (slirp_istream_read(f, &b, sizeof(b))) { ++ return GUINT32_FROM_BE(b); ++ } ++ ++ return 0; ++} ++ ++bool slirp_ostream_write_u32(SlirpOStream *f, uint32_t b) ++{ ++ b = GUINT32_TO_BE(b); ++ return slirp_ostream_write(f, &b, sizeof(b)); ++} ++ ++int16_t slirp_istream_read_i16(SlirpIStream *f) ++{ ++ int16_t b; ++ ++ if (slirp_istream_read(f, &b, sizeof(b))) { ++ return GINT16_FROM_BE(b); ++ } ++ ++ return 0; ++} ++ ++bool slirp_ostream_write_i16(SlirpOStream *f, int16_t b) ++{ ++ b = GINT16_TO_BE(b); ++ return slirp_ostream_write(f, &b, sizeof(b)); ++} ++ ++int32_t slirp_istream_read_i32(SlirpIStream *f) ++{ ++ int32_t b; ++ ++ if (slirp_istream_read(f, &b, sizeof(b))) { ++ return GINT32_FROM_BE(b); ++ } ++ ++ return 0; ++} ++ ++bool slirp_ostream_write_i32(SlirpOStream *f, int32_t b) ++{ ++ b = GINT32_TO_BE(b); ++ return slirp_ostream_write(f, &b, sizeof(b)); ++} +diff --git a/slirp/src/stream.h b/slirp/src/stream.h +new file mode 100644 +index 0000000..08bb5b6 +--- /dev/null ++++ b/slirp/src/stream.h +@@ -0,0 +1,35 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++#ifndef STREAM_H_ ++#define STREAM_H_ ++ ++#include "libslirp.h" ++ ++typedef struct SlirpIStream { ++ SlirpReadCb read_cb; ++ void *opaque; ++} SlirpIStream; ++ ++typedef struct SlirpOStream { ++ SlirpWriteCb write_cb; ++ void *opaque; ++} SlirpOStream; ++ ++bool slirp_istream_read(SlirpIStream *f, void *buf, size_t size); ++bool slirp_ostream_write(SlirpOStream *f, const void *buf, size_t size); ++ ++uint8_t slirp_istream_read_u8(SlirpIStream *f); ++bool slirp_ostream_write_u8(SlirpOStream *f, uint8_t b); ++ ++uint16_t slirp_istream_read_u16(SlirpIStream *f); ++bool slirp_ostream_write_u16(SlirpOStream *f, uint16_t b); ++ ++uint32_t slirp_istream_read_u32(SlirpIStream *f); ++bool slirp_ostream_write_u32(SlirpOStream *f, uint32_t b); ++ ++int16_t slirp_istream_read_i16(SlirpIStream *f); ++bool slirp_ostream_write_i16(SlirpOStream *f, int16_t b); ++ ++int32_t slirp_istream_read_i32(SlirpIStream *f); ++bool slirp_ostream_write_i32(SlirpOStream *f, int32_t b); ++ ++#endif /* STREAM_H_ */ +diff --git a/slirp/src/tcp.h b/slirp/src/tcp.h +new file mode 100644 +index 0000000..70a9760 +--- /dev/null ++++ b/slirp/src/tcp.h +@@ -0,0 +1,169 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)tcp.h 8.1 (Berkeley) 6/10/93 ++ * tcp.h,v 1.3 1994/08/21 05:27:34 paul Exp ++ */ ++ ++#ifndef TCP_H ++#define TCP_H ++ ++#include ++ ++typedef uint32_t tcp_seq; ++ ++#define PR_SLOWHZ 2 /* 2 slow timeouts per second (approx) */ ++#define PR_FASTHZ 5 /* 5 fast timeouts per second (not important) */ ++ ++#define TCP_SNDSPACE 1024 * 128 ++#define TCP_RCVSPACE 1024 * 128 ++#define TCP_MAXSEG_MAX 32768 ++ ++/* ++ * TCP header. ++ * Per RFC 793, September, 1981. ++ */ ++#define tcphdr slirp_tcphdr ++struct tcphdr { ++ uint16_t th_sport; /* source port */ ++ uint16_t th_dport; /* destination port */ ++ tcp_seq th_seq; /* sequence number */ ++ tcp_seq th_ack; /* acknowledgement number */ ++#if G_BYTE_ORDER == G_BIG_ENDIAN ++ uint8_t th_off : 4, /* data offset */ ++ th_x2 : 4; /* (unused) */ ++#else ++ uint8_t th_x2 : 4, /* (unused) */ ++ th_off : 4; /* data offset */ ++#endif ++ uint8_t th_flags; ++ uint16_t th_win; /* window */ ++ uint16_t th_sum; /* checksum */ ++ uint16_t th_urp; /* urgent pointer */ ++}; ++ ++#include "tcp_var.h" ++ ++#ifndef TH_FIN ++#define TH_FIN 0x01 ++#define TH_SYN 0x02 ++#define TH_RST 0x04 ++#define TH_PUSH 0x08 ++#define TH_ACK 0x10 ++#define TH_URG 0x20 ++#endif ++ ++#ifndef TCPOPT_EOL ++#define TCPOPT_EOL 0 ++#define TCPOPT_NOP 1 ++#define TCPOPT_MAXSEG 2 ++#define TCPOPT_WINDOW 3 ++#define TCPOPT_SACK_PERMITTED 4 /* Experimental */ ++#define TCPOPT_SACK 5 /* Experimental */ ++#define TCPOPT_TIMESTAMP 8 ++ ++#define TCPOPT_TSTAMP_HDR \ ++ (TCPOPT_NOP << 24 | TCPOPT_NOP << 16 | TCPOPT_TIMESTAMP << 8 | \ ++ TCPOLEN_TIMESTAMP) ++#endif ++ ++#ifndef TCPOLEN_MAXSEG ++#define TCPOLEN_MAXSEG 4 ++#define TCPOLEN_WINDOW 3 ++#define TCPOLEN_SACK_PERMITTED 2 ++#define TCPOLEN_TIMESTAMP 10 ++#define TCPOLEN_TSTAMP_APPA (TCPOLEN_TIMESTAMP + 2) /* appendix A */ ++#endif ++ ++#undef TCP_MAXWIN ++#define TCP_MAXWIN 65535 /* largest value for (unscaled) window */ ++ ++#undef TCP_MAX_WINSHIFT ++#define TCP_MAX_WINSHIFT 14 /* maximum window shift */ ++ ++/* ++ * User-settable options (used with setsockopt). ++ * ++ * We don't use the system headers on unix because we have conflicting ++ * local structures. We can't avoid the system definitions on Windows, ++ * so we undefine them. ++ */ ++#undef TCP_NODELAY ++#define TCP_NODELAY 0x01 /* don't delay send to coalesce packets */ ++#undef TCP_MAXSEG ++ ++/* ++ * TCP FSM state definitions. ++ * Per RFC793, September, 1981. ++ */ ++ ++#define TCP_NSTATES 11 ++ ++#define TCPS_CLOSED 0 /* closed */ ++#define TCPS_LISTEN 1 /* listening for connection */ ++#define TCPS_SYN_SENT 2 /* active, have sent syn */ ++#define TCPS_SYN_RECEIVED 3 /* have send and received syn */ ++/* states < TCPS_ESTABLISHED are those where connections not established */ ++#define TCPS_ESTABLISHED 4 /* established */ ++#define TCPS_CLOSE_WAIT 5 /* rcvd fin, waiting for close */ ++/* states > TCPS_CLOSE_WAIT are those where user has closed */ ++#define TCPS_FIN_WAIT_1 6 /* have closed, sent fin */ ++#define TCPS_CLOSING 7 /* closed xchd FIN; await FIN ACK */ ++#define TCPS_LAST_ACK 8 /* had fin and close; await FIN ACK */ ++/* states > TCPS_CLOSE_WAIT && < TCPS_FIN_WAIT_2 await ACK of FIN */ ++#define TCPS_FIN_WAIT_2 9 /* have closed, fin is acked */ ++#define TCPS_TIME_WAIT 10 /* in 2*msl quiet wait after close */ ++ ++#define TCPS_HAVERCVDSYN(s) ((s) >= TCPS_SYN_RECEIVED) ++#define TCPS_HAVEESTABLISHED(s) ((s) >= TCPS_ESTABLISHED) ++#define TCPS_HAVERCVDFIN(s) ((s) >= TCPS_TIME_WAIT) ++ ++/* ++ * TCP sequence numbers are 32 bit integers operated ++ * on with modular arithmetic. These macros can be ++ * used to compare such integers. ++ */ ++#define SEQ_LT(a, b) ((int)((a) - (b)) < 0) ++#define SEQ_LEQ(a, b) ((int)((a) - (b)) <= 0) ++#define SEQ_GT(a, b) ((int)((a) - (b)) > 0) ++#define SEQ_GEQ(a, b) ((int)((a) - (b)) >= 0) ++ ++/* ++ * Macros to initialize tcp sequence numbers for ++ * send and receive from initial send and receive ++ * sequence numbers. ++ */ ++#define tcp_rcvseqinit(tp) (tp)->rcv_adv = (tp)->rcv_nxt = (tp)->irs + 1 ++ ++#define tcp_sendseqinit(tp) \ ++ (tp)->snd_una = (tp)->snd_nxt = (tp)->snd_max = (tp)->snd_up = (tp)->iss ++ ++#define TCP_ISSINCR (125 * 1024) /* increment for tcp_iss each second */ ++ ++#endif +diff --git a/slirp/src/tcp_input.c b/slirp/src/tcp_input.c +new file mode 100644 +index 0000000..d55b0c8 +--- /dev/null ++++ b/slirp/src/tcp_input.c +@@ -0,0 +1,1539 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)tcp_input.c 8.5 (Berkeley) 4/10/94 ++ * tcp_input.c,v 1.10 1994/10/13 18:36:32 wollman Exp ++ */ ++ ++/* ++ * Changes and additions relating to SLiRP ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#include "slirp.h" ++#include "ip_icmp.h" ++ ++#define TCPREXMTTHRESH 3 ++ ++#define TCP_PAWS_IDLE (24 * 24 * 60 * 60 * PR_SLOWHZ) ++ ++/* for modulo comparisons of timestamps */ ++#define TSTMP_LT(a, b) ((int)((a) - (b)) < 0) ++#define TSTMP_GEQ(a, b) ((int)((a) - (b)) >= 0) ++ ++/* ++ * Insert segment ti into reassembly queue of tcp with ++ * control block tp. Return TH_FIN if reassembly now includes ++ * a segment with FIN. The macro form does the common case inline ++ * (segment is the next to be received on an established connection, ++ * and the queue is empty), avoiding linkage into and removal ++ * from the queue and repetition of various conversions. ++ * Set DELACK for segments received in order, but ack immediately ++ * when segments are out of order (so fast retransmit can work). ++ */ ++#define TCP_REASS(tp, ti, m, so, flags) \ ++ { \ ++ if ((ti)->ti_seq == (tp)->rcv_nxt && tcpfrag_list_empty(tp) && \ ++ (tp)->t_state == TCPS_ESTABLISHED) { \ ++ tp->t_flags |= TF_DELACK; \ ++ (tp)->rcv_nxt += (ti)->ti_len; \ ++ flags = (ti)->ti_flags & TH_FIN; \ ++ if (so->so_emu) { \ ++ if (tcp_emu((so), (m))) \ ++ sbappend(so, (m)); \ ++ } else \ ++ sbappend((so), (m)); \ ++ } else { \ ++ (flags) = tcp_reass((tp), (ti), (m)); \ ++ tp->t_flags |= TF_ACKNOW; \ ++ } \ ++ } ++ ++static void tcp_dooptions(struct tcpcb *tp, uint8_t *cp, int cnt, ++ struct tcpiphdr *ti); ++static void tcp_xmit_timer(register struct tcpcb *tp, int rtt); ++ ++static int tcp_reass(register struct tcpcb *tp, register struct tcpiphdr *ti, ++ struct mbuf *m) ++{ ++ register struct tcpiphdr *q; ++ struct socket *so = tp->t_socket; ++ int flags; ++ ++ /* ++ * Call with ti==NULL after become established to ++ * force pre-ESTABLISHED data up to user socket. ++ */ ++ if (ti == NULL) ++ goto present; ++ ++ /* ++ * Find a segment which begins after this one does. ++ */ ++ for (q = tcpfrag_list_first(tp); !tcpfrag_list_end(q, tp); ++ q = tcpiphdr_next(q)) ++ if (SEQ_GT(q->ti_seq, ti->ti_seq)) ++ break; ++ ++ /* ++ * If there is a preceding segment, it may provide some of ++ * our data already. If so, drop the data from the incoming ++ * segment. If it provides all of our data, drop us. ++ */ ++ if (!tcpfrag_list_end(tcpiphdr_prev(q), tp)) { ++ register int i; ++ q = tcpiphdr_prev(q); ++ /* conversion to int (in i) handles seq wraparound */ ++ i = q->ti_seq + q->ti_len - ti->ti_seq; ++ if (i > 0) { ++ if (i >= ti->ti_len) { ++ m_free(m); ++ /* ++ * Try to present any queued data ++ * at the left window edge to the user. ++ * This is needed after the 3-WHS ++ * completes. ++ */ ++ goto present; /* ??? */ ++ } ++ m_adj(m, i); ++ ti->ti_len -= i; ++ ti->ti_seq += i; ++ } ++ q = tcpiphdr_next(q); ++ } ++ ti->ti_mbuf = m; ++ ++ /* ++ * While we overlap succeeding segments trim them or, ++ * if they are completely covered, dequeue them. ++ */ ++ while (!tcpfrag_list_end(q, tp)) { ++ register int i = (ti->ti_seq + ti->ti_len) - q->ti_seq; ++ if (i <= 0) ++ break; ++ if (i < q->ti_len) { ++ q->ti_seq += i; ++ q->ti_len -= i; ++ m_adj(q->ti_mbuf, i); ++ break; ++ } ++ q = tcpiphdr_next(q); ++ m = tcpiphdr_prev(q)->ti_mbuf; ++ remque(tcpiphdr2qlink(tcpiphdr_prev(q))); ++ m_free(m); ++ } ++ ++ /* ++ * Stick new segment in its place. ++ */ ++ insque(tcpiphdr2qlink(ti), tcpiphdr2qlink(tcpiphdr_prev(q))); ++ ++present: ++ /* ++ * Present data to user, advancing rcv_nxt through ++ * completed sequence space. ++ */ ++ if (!TCPS_HAVEESTABLISHED(tp->t_state)) ++ return (0); ++ ti = tcpfrag_list_first(tp); ++ if (tcpfrag_list_end(ti, tp) || ti->ti_seq != tp->rcv_nxt) ++ return (0); ++ if (tp->t_state == TCPS_SYN_RECEIVED && ti->ti_len) ++ return (0); ++ do { ++ tp->rcv_nxt += ti->ti_len; ++ flags = ti->ti_flags & TH_FIN; ++ remque(tcpiphdr2qlink(ti)); ++ m = ti->ti_mbuf; ++ ti = tcpiphdr_next(ti); ++ if (so->so_state & SS_FCANTSENDMORE) ++ m_free(m); ++ else { ++ if (so->so_emu) { ++ if (tcp_emu(so, m)) ++ sbappend(so, m); ++ } else ++ sbappend(so, m); ++ } ++ } while (ti != (struct tcpiphdr *)tp && ti->ti_seq == tp->rcv_nxt); ++ return (flags); ++} ++ ++/* ++ * TCP input routine, follows pages 65-76 of the ++ * protocol specification dated September, 1981 very closely. ++ */ ++void tcp_input(struct mbuf *m, int iphlen, struct socket *inso, ++ unsigned short af) ++{ ++ struct ip save_ip, *ip; ++ struct ip6 save_ip6, *ip6; ++ register struct tcpiphdr *ti; ++ char *optp = NULL; ++ int optlen = 0; ++ int len, tlen, off; ++ register struct tcpcb *tp = NULL; ++ register int tiflags; ++ struct socket *so = NULL; ++ int todrop, acked, ourfinisacked, needoutput = 0; ++ int iss = 0; ++ uint32_t tiwin; ++ int ret; ++ struct sockaddr_storage lhost, fhost; ++ struct sockaddr_in *lhost4, *fhost4; ++ struct sockaddr_in6 *lhost6, *fhost6; ++ struct gfwd_list *ex_ptr; ++ Slirp *slirp; ++ ++ DEBUG_CALL("tcp_input"); ++ DEBUG_ARG("m = %p iphlen = %2d inso = %p", m, iphlen, inso); ++ ++ /* ++ * If called with m == 0, then we're continuing the connect ++ */ ++ if (m == NULL) { ++ so = inso; ++ slirp = so->slirp; ++ ++ /* Re-set a few variables */ ++ tp = sototcpcb(so); ++ m = so->so_m; ++ so->so_m = NULL; ++ ti = so->so_ti; ++ tiwin = ti->ti_win; ++ tiflags = ti->ti_flags; ++ ++ goto cont_conn; ++ } ++ slirp = m->slirp; ++ ++ ip = mtod(m, struct ip *); ++ ip6 = mtod(m, struct ip6 *); ++ ++ switch (af) { ++ case AF_INET: ++ if (iphlen > sizeof(struct ip)) { ++ ip_stripoptions(m, (struct mbuf *)0); ++ iphlen = sizeof(struct ip); ++ } ++ /* XXX Check if too short */ ++ ++ ++ /* ++ * Save a copy of the IP header in case we want restore it ++ * for sending an ICMP error message in response. ++ */ ++ save_ip = *ip; ++ save_ip.ip_len += iphlen; ++ ++ /* ++ * Get IP and TCP header together in first mbuf. ++ * Note: IP leaves IP header in first mbuf. ++ */ ++ m->m_data -= ++ sizeof(struct tcpiphdr) - sizeof(struct ip) - sizeof(struct tcphdr); ++ m->m_len += ++ sizeof(struct tcpiphdr) - sizeof(struct ip) - sizeof(struct tcphdr); ++ ti = mtod(m, struct tcpiphdr *); ++ ++ /* ++ * Checksum extended TCP header and data. ++ */ ++ tlen = ip->ip_len; ++ tcpiphdr2qlink(ti)->next = tcpiphdr2qlink(ti)->prev = NULL; ++ memset(&ti->ih_mbuf, 0, sizeof(struct mbuf_ptr)); ++ memset(&ti->ti, 0, sizeof(ti->ti)); ++ ti->ti_x0 = 0; ++ ti->ti_src = save_ip.ip_src; ++ ti->ti_dst = save_ip.ip_dst; ++ ti->ti_pr = save_ip.ip_p; ++ ti->ti_len = htons((uint16_t)tlen); ++ break; ++ ++ case AF_INET6: ++ /* ++ * Save a copy of the IP header in case we want restore it ++ * for sending an ICMP error message in response. ++ */ ++ save_ip6 = *ip6; ++ /* ++ * Get IP and TCP header together in first mbuf. ++ * Note: IP leaves IP header in first mbuf. ++ */ ++ m->m_data -= sizeof(struct tcpiphdr) - ++ (sizeof(struct ip6) + sizeof(struct tcphdr)); ++ m->m_len += sizeof(struct tcpiphdr) - ++ (sizeof(struct ip6) + sizeof(struct tcphdr)); ++ ti = mtod(m, struct tcpiphdr *); ++ ++ tlen = ip6->ip_pl; ++ tcpiphdr2qlink(ti)->next = tcpiphdr2qlink(ti)->prev = NULL; ++ memset(&ti->ih_mbuf, 0, sizeof(struct mbuf_ptr)); ++ memset(&ti->ti, 0, sizeof(ti->ti)); ++ ti->ti_x0 = 0; ++ ti->ti_src6 = save_ip6.ip_src; ++ ti->ti_dst6 = save_ip6.ip_dst; ++ ti->ti_nh6 = save_ip6.ip_nh; ++ ti->ti_len = htons((uint16_t)tlen); ++ break; ++ ++ default: ++ g_assert_not_reached(); ++ } ++ ++ len = ((sizeof(struct tcpiphdr) - sizeof(struct tcphdr)) + tlen); ++ if (cksum(m, len)) { ++ goto drop; ++ } ++ ++ /* ++ * Check that TCP offset makes sense, ++ * pull out TCP options and adjust length. XXX ++ */ ++ off = ti->ti_off << 2; ++ if (off < sizeof(struct tcphdr) || off > tlen) { ++ goto drop; ++ } ++ tlen -= off; ++ ti->ti_len = tlen; ++ if (off > sizeof(struct tcphdr)) { ++ optlen = off - sizeof(struct tcphdr); ++ optp = mtod(m, char *) + sizeof(struct tcpiphdr); ++ } ++ tiflags = ti->ti_flags; ++ ++ /* ++ * Convert TCP protocol specific fields to host format. ++ */ ++ NTOHL(ti->ti_seq); ++ NTOHL(ti->ti_ack); ++ NTOHS(ti->ti_win); ++ NTOHS(ti->ti_urp); ++ ++ /* ++ * Drop TCP, IP headers and TCP options. ++ */ ++ m->m_data += sizeof(struct tcpiphdr) + off - sizeof(struct tcphdr); ++ m->m_len -= sizeof(struct tcpiphdr) + off - sizeof(struct tcphdr); ++ ++ /* ++ * Locate pcb for segment. ++ */ ++findso: ++ lhost.ss_family = af; ++ fhost.ss_family = af; ++ switch (af) { ++ case AF_INET: ++ lhost4 = (struct sockaddr_in *)&lhost; ++ lhost4->sin_addr = ti->ti_src; ++ lhost4->sin_port = ti->ti_sport; ++ fhost4 = (struct sockaddr_in *)&fhost; ++ fhost4->sin_addr = ti->ti_dst; ++ fhost4->sin_port = ti->ti_dport; ++ break; ++ case AF_INET6: ++ lhost6 = (struct sockaddr_in6 *)&lhost; ++ lhost6->sin6_addr = ti->ti_src6; ++ lhost6->sin6_port = ti->ti_sport; ++ fhost6 = (struct sockaddr_in6 *)&fhost; ++ fhost6->sin6_addr = ti->ti_dst6; ++ fhost6->sin6_port = ti->ti_dport; ++ break; ++ default: ++ g_assert_not_reached(); ++ } ++ ++ so = solookup(&slirp->tcp_last_so, &slirp->tcb, &lhost, &fhost); ++ ++ /* ++ * If the state is CLOSED (i.e., TCB does not exist) then ++ * all data in the incoming segment is discarded. ++ * If the TCB exists but is in CLOSED state, it is embryonic, ++ * but should either do a listen or a connect soon. ++ * ++ * state == CLOSED means we've done socreate() but haven't ++ * attached it to a protocol yet... ++ * ++ * XXX If a TCB does not exist, and the TH_SYN flag is ++ * the only flag set, then create a session, mark it ++ * as if it was LISTENING, and continue... ++ */ ++ if (so == NULL) { ++ /* TODO: IPv6 */ ++ if (slirp->restricted) { ++ /* Any hostfwds will have an existing socket, so we only get here ++ * for non-hostfwd connections. These should be dropped, unless it ++ * happens to be a guestfwd. ++ */ ++ for (ex_ptr = slirp->guestfwd_list; ex_ptr; ++ ex_ptr = ex_ptr->ex_next) { ++ if (ex_ptr->ex_fport == ti->ti_dport && ++ ti->ti_dst.s_addr == ex_ptr->ex_addr.s_addr) { ++ break; ++ } ++ } ++ if (!ex_ptr) { ++ goto dropwithreset; ++ } ++ } ++ ++ if ((tiflags & (TH_SYN | TH_FIN | TH_RST | TH_URG | TH_ACK)) != TH_SYN) ++ goto dropwithreset; ++ ++ so = socreate(slirp); ++ tcp_attach(so); ++ ++ sbreserve(&so->so_snd, TCP_SNDSPACE); ++ sbreserve(&so->so_rcv, TCP_RCVSPACE); ++ ++ so->lhost.ss = lhost; ++ so->fhost.ss = fhost; ++ ++ so->so_iptos = tcp_tos(so); ++ if (so->so_iptos == 0) { ++ switch (af) { ++ case AF_INET: ++ so->so_iptos = ((struct ip *)ti)->ip_tos; ++ break; ++ case AF_INET6: ++ break; ++ default: ++ g_assert_not_reached(); ++ } ++ } ++ ++ tp = sototcpcb(so); ++ tp->t_state = TCPS_LISTEN; ++ } ++ ++ /* ++ * If this is a still-connecting socket, this probably ++ * a retransmit of the SYN. Whether it's a retransmit SYN ++ * or something else, we nuke it. ++ */ ++ if (so->so_state & SS_ISFCONNECTING) ++ goto drop; ++ ++ tp = sototcpcb(so); ++ ++ /* XXX Should never fail */ ++ if (tp == NULL) ++ goto dropwithreset; ++ if (tp->t_state == TCPS_CLOSED) ++ goto drop; ++ ++ tiwin = ti->ti_win; ++ ++ /* ++ * Segment received on connection. ++ * Reset idle time and keep-alive timer. ++ */ ++ tp->t_idle = 0; ++ if (slirp_do_keepalive) ++ tp->t_timer[TCPT_KEEP] = TCPTV_KEEPINTVL; ++ else ++ tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_IDLE; ++ ++ /* ++ * Process options if not in LISTEN state, ++ * else do it below (after getting remote address). ++ */ ++ if (optp && tp->t_state != TCPS_LISTEN) ++ tcp_dooptions(tp, (uint8_t *)optp, optlen, ti); ++ ++ /* ++ * Header prediction: check for the two common cases ++ * of a uni-directional data xfer. If the packet has ++ * no control flags, is in-sequence, the window didn't ++ * change and we're not retransmitting, it's a ++ * candidate. If the length is zero and the ack moved ++ * forward, we're the sender side of the xfer. Just ++ * free the data acked & wake any higher level process ++ * that was blocked waiting for space. If the length ++ * is non-zero and the ack didn't move, we're the ++ * receiver side. If we're getting packets in-order ++ * (the reassembly queue is empty), add the data to ++ * the socket buffer and note that we need a delayed ack. ++ * ++ * XXX Some of these tests are not needed ++ * eg: the tiwin == tp->snd_wnd prevents many more ++ * predictions.. with no *real* advantage.. ++ */ ++ if (tp->t_state == TCPS_ESTABLISHED && ++ (tiflags & (TH_SYN | TH_FIN | TH_RST | TH_URG | TH_ACK)) == TH_ACK && ++ ti->ti_seq == tp->rcv_nxt && tiwin && tiwin == tp->snd_wnd && ++ tp->snd_nxt == tp->snd_max) { ++ if (ti->ti_len == 0) { ++ if (SEQ_GT(ti->ti_ack, tp->snd_una) && ++ SEQ_LEQ(ti->ti_ack, tp->snd_max) && ++ tp->snd_cwnd >= tp->snd_wnd) { ++ /* ++ * this is a pure ack for outstanding data. ++ */ ++ if (tp->t_rtt && SEQ_GT(ti->ti_ack, tp->t_rtseq)) ++ tcp_xmit_timer(tp, tp->t_rtt); ++ acked = ti->ti_ack - tp->snd_una; ++ sodrop(so, acked); ++ tp->snd_una = ti->ti_ack; ++ m_free(m); ++ ++ /* ++ * If all outstanding data are acked, stop ++ * retransmit timer, otherwise restart timer ++ * using current (possibly backed-off) value. ++ * If process is waiting for space, ++ * wakeup/selwakeup/signal. If data ++ * are ready to send, let tcp_output ++ * decide between more output or persist. ++ */ ++ if (tp->snd_una == tp->snd_max) ++ tp->t_timer[TCPT_REXMT] = 0; ++ else if (tp->t_timer[TCPT_PERSIST] == 0) ++ tp->t_timer[TCPT_REXMT] = tp->t_rxtcur; ++ ++ /* ++ * This is called because sowwakeup might have ++ * put data into so_snd. Since we don't so sowwakeup, ++ * we don't need this.. XXX??? ++ */ ++ if (so->so_snd.sb_cc) ++ (void)tcp_output(tp); ++ ++ return; ++ } ++ } else if (ti->ti_ack == tp->snd_una && tcpfrag_list_empty(tp) && ++ ti->ti_len <= sbspace(&so->so_rcv)) { ++ /* ++ * this is a pure, in-sequence data packet ++ * with nothing on the reassembly queue and ++ * we have enough buffer space to take it. ++ */ ++ tp->rcv_nxt += ti->ti_len; ++ /* ++ * Add data to socket buffer. ++ */ ++ if (so->so_emu) { ++ if (tcp_emu(so, m)) ++ sbappend(so, m); ++ } else ++ sbappend(so, m); ++ ++ /* ++ * If this is a short packet, then ACK now - with Nagel ++ * congestion avoidance sender won't send more until ++ * he gets an ACK. ++ * ++ * It is better to not delay acks at all to maximize ++ * TCP throughput. See RFC 2581. ++ */ ++ tp->t_flags |= TF_ACKNOW; ++ tcp_output(tp); ++ return; ++ } ++ } /* header prediction */ ++ /* ++ * Calculate amount of space in receive window, ++ * and then do TCP input processing. ++ * Receive window is amount of space in rcv queue, ++ * but not less than advertised window. ++ */ ++ { ++ int win; ++ win = sbspace(&so->so_rcv); ++ if (win < 0) ++ win = 0; ++ tp->rcv_wnd = MAX(win, (int)(tp->rcv_adv - tp->rcv_nxt)); ++ } ++ ++ switch (tp->t_state) { ++ /* ++ * If the state is LISTEN then ignore segment if it contains an RST. ++ * If the segment contains an ACK then it is bad and send a RST. ++ * If it does not contain a SYN then it is not interesting; drop it. ++ * Don't bother responding if the destination was a broadcast. ++ * Otherwise initialize tp->rcv_nxt, and tp->irs, select an initial ++ * tp->iss, and send a segment: ++ * ++ * Also initialize tp->snd_nxt to tp->iss+1 and tp->snd_una to tp->iss. ++ * Fill in remote peer address fields if not previously specified. ++ * Enter SYN_RECEIVED state, and process any other fields of this ++ * segment in this state. ++ */ ++ case TCPS_LISTEN: { ++ if (tiflags & TH_RST) ++ goto drop; ++ if (tiflags & TH_ACK) ++ goto dropwithreset; ++ if ((tiflags & TH_SYN) == 0) ++ goto drop; ++ ++ /* ++ * This has way too many gotos... ++ * But a bit of spaghetti code never hurt anybody :) ++ */ ++ ++ /* ++ * If this is destined for the control address, then flag to ++ * tcp_ctl once connected, otherwise connect ++ */ ++ /* TODO: IPv6 */ ++ if (af == AF_INET && ++ (so->so_faddr.s_addr & slirp->vnetwork_mask.s_addr) == ++ slirp->vnetwork_addr.s_addr) { ++ if (so->so_faddr.s_addr != slirp->vhost_addr.s_addr && ++ so->so_faddr.s_addr != slirp->vnameserver_addr.s_addr) { ++ /* May be an add exec */ ++ for (ex_ptr = slirp->guestfwd_list; ex_ptr; ++ ex_ptr = ex_ptr->ex_next) { ++ if (ex_ptr->ex_fport == so->so_fport && ++ so->so_faddr.s_addr == ex_ptr->ex_addr.s_addr) { ++ so->so_state |= SS_CTL; ++ break; ++ } ++ } ++ if (so->so_state & SS_CTL) { ++ goto cont_input; ++ } ++ } ++ /* CTL_ALIAS: Do nothing, tcp_fconnect will be called on it */ ++ } ++ ++ if (so->so_emu & EMU_NOCONNECT) { ++ so->so_emu &= ~EMU_NOCONNECT; ++ goto cont_input; ++ } ++ ++ if ((tcp_fconnect(so, so->so_ffamily) == -1) && (errno != EAGAIN) && ++ (errno != EINPROGRESS) && (errno != EWOULDBLOCK)) { ++ uint8_t code; ++ DEBUG_MISC(" tcp fconnect errno = %d-%s", errno, strerror(errno)); ++ if (errno == ECONNREFUSED) { ++ /* ACK the SYN, send RST to refuse the connection */ ++ tcp_respond(tp, ti, m, ti->ti_seq + 1, (tcp_seq)0, ++ TH_RST | TH_ACK, af); ++ } else { ++ switch (af) { ++ case AF_INET: ++ code = ICMP_UNREACH_NET; ++ if (errno == EHOSTUNREACH) { ++ code = ICMP_UNREACH_HOST; ++ } ++ break; ++ case AF_INET6: ++ code = ICMP6_UNREACH_NO_ROUTE; ++ if (errno == EHOSTUNREACH) { ++ code = ICMP6_UNREACH_ADDRESS; ++ } ++ break; ++ default: ++ g_assert_not_reached(); ++ } ++ HTONL(ti->ti_seq); /* restore tcp header */ ++ HTONL(ti->ti_ack); ++ HTONS(ti->ti_win); ++ HTONS(ti->ti_urp); ++ m->m_data -= ++ sizeof(struct tcpiphdr) + off - sizeof(struct tcphdr); ++ m->m_len += ++ sizeof(struct tcpiphdr) + off - sizeof(struct tcphdr); ++ switch (af) { ++ case AF_INET: ++ m->m_data += sizeof(struct tcpiphdr) - sizeof(struct ip) - ++ sizeof(struct tcphdr); ++ m->m_len -= sizeof(struct tcpiphdr) - sizeof(struct ip) - ++ sizeof(struct tcphdr); ++ *ip = save_ip; ++ icmp_send_error(m, ICMP_UNREACH, code, 0, strerror(errno)); ++ break; ++ case AF_INET6: ++ m->m_data += sizeof(struct tcpiphdr) - ++ (sizeof(struct ip6) + sizeof(struct tcphdr)); ++ m->m_len -= sizeof(struct tcpiphdr) - ++ (sizeof(struct ip6) + sizeof(struct tcphdr)); ++ *ip6 = save_ip6; ++ icmp6_send_error(m, ICMP6_UNREACH, code); ++ break; ++ default: ++ g_assert_not_reached(); ++ } ++ } ++ tcp_close(tp); ++ m_free(m); ++ } else { ++ /* ++ * Haven't connected yet, save the current mbuf ++ * and ti, and return ++ * XXX Some OS's don't tell us whether the connect() ++ * succeeded or not. So we must time it out. ++ */ ++ so->so_m = m; ++ so->so_ti = ti; ++ tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT; ++ tp->t_state = TCPS_SYN_RECEIVED; ++ /* ++ * Initialize receive sequence numbers now so that we can send a ++ * valid RST if the remote end rejects our connection. ++ */ ++ tp->irs = ti->ti_seq; ++ tcp_rcvseqinit(tp); ++ tcp_template(tp); ++ } ++ return; ++ ++ cont_conn: ++ /* m==NULL ++ * Check if the connect succeeded ++ */ ++ if (so->so_state & SS_NOFDREF) { ++ tp = tcp_close(tp); ++ goto dropwithreset; ++ } ++ cont_input: ++ tcp_template(tp); ++ ++ if (optp) ++ tcp_dooptions(tp, (uint8_t *)optp, optlen, ti); ++ ++ if (iss) ++ tp->iss = iss; ++ else ++ tp->iss = slirp->tcp_iss; ++ slirp->tcp_iss += TCP_ISSINCR / 2; ++ tp->irs = ti->ti_seq; ++ tcp_sendseqinit(tp); ++ tcp_rcvseqinit(tp); ++ tp->t_flags |= TF_ACKNOW; ++ tp->t_state = TCPS_SYN_RECEIVED; ++ tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT; ++ goto trimthenstep6; ++ } /* case TCPS_LISTEN */ ++ ++ /* ++ * If the state is SYN_SENT: ++ * if seg contains an ACK, but not for our SYN, drop the input. ++ * if seg contains a RST, then drop the connection. ++ * if seg does not contain SYN, then drop it. ++ * Otherwise this is an acceptable SYN segment ++ * initialize tp->rcv_nxt and tp->irs ++ * if seg contains ack then advance tp->snd_una ++ * if SYN has been acked change to ESTABLISHED else SYN_RCVD state ++ * arrange for segment to be acked (eventually) ++ * continue processing rest of data/controls, beginning with URG ++ */ ++ case TCPS_SYN_SENT: ++ if ((tiflags & TH_ACK) && ++ (SEQ_LEQ(ti->ti_ack, tp->iss) || SEQ_GT(ti->ti_ack, tp->snd_max))) ++ goto dropwithreset; ++ ++ if (tiflags & TH_RST) { ++ if (tiflags & TH_ACK) { ++ tcp_drop(tp, 0); /* XXX Check t_softerror! */ ++ } ++ goto drop; ++ } ++ ++ if ((tiflags & TH_SYN) == 0) ++ goto drop; ++ if (tiflags & TH_ACK) { ++ tp->snd_una = ti->ti_ack; ++ if (SEQ_LT(tp->snd_nxt, tp->snd_una)) ++ tp->snd_nxt = tp->snd_una; ++ } ++ ++ tp->t_timer[TCPT_REXMT] = 0; ++ tp->irs = ti->ti_seq; ++ tcp_rcvseqinit(tp); ++ tp->t_flags |= TF_ACKNOW; ++ if (tiflags & TH_ACK && SEQ_GT(tp->snd_una, tp->iss)) { ++ soisfconnected(so); ++ tp->t_state = TCPS_ESTABLISHED; ++ ++ (void)tcp_reass(tp, (struct tcpiphdr *)0, (struct mbuf *)0); ++ /* ++ * if we didn't have to retransmit the SYN, ++ * use its rtt as our initial srtt & rtt var. ++ */ ++ if (tp->t_rtt) ++ tcp_xmit_timer(tp, tp->t_rtt); ++ } else ++ tp->t_state = TCPS_SYN_RECEIVED; ++ ++ trimthenstep6: ++ /* ++ * Advance ti->ti_seq to correspond to first data byte. ++ * If data, trim to stay within window, ++ * dropping FIN if necessary. ++ */ ++ ti->ti_seq++; ++ if (ti->ti_len > tp->rcv_wnd) { ++ todrop = ti->ti_len - tp->rcv_wnd; ++ m_adj(m, -todrop); ++ ti->ti_len = tp->rcv_wnd; ++ tiflags &= ~TH_FIN; ++ } ++ tp->snd_wl1 = ti->ti_seq - 1; ++ tp->rcv_up = ti->ti_seq; ++ goto step6; ++ } /* switch tp->t_state */ ++ /* ++ * States other than LISTEN or SYN_SENT. ++ * Check that at least some bytes of segment are within ++ * receive window. If segment begins before rcv_nxt, ++ * drop leading data (and SYN); if nothing left, just ack. ++ */ ++ todrop = tp->rcv_nxt - ti->ti_seq; ++ if (todrop > 0) { ++ if (tiflags & TH_SYN) { ++ tiflags &= ~TH_SYN; ++ ti->ti_seq++; ++ if (ti->ti_urp > 1) ++ ti->ti_urp--; ++ else ++ tiflags &= ~TH_URG; ++ todrop--; ++ } ++ /* ++ * Following if statement from Stevens, vol. 2, p. 960. ++ */ ++ if (todrop > ti->ti_len || ++ (todrop == ti->ti_len && (tiflags & TH_FIN) == 0)) { ++ /* ++ * Any valid FIN must be to the left of the window. ++ * At this point the FIN must be a duplicate or out ++ * of sequence; drop it. ++ */ ++ tiflags &= ~TH_FIN; ++ ++ /* ++ * Send an ACK to resynchronize and drop any data. ++ * But keep on processing for RST or ACK. ++ */ ++ tp->t_flags |= TF_ACKNOW; ++ todrop = ti->ti_len; ++ } ++ m_adj(m, todrop); ++ ti->ti_seq += todrop; ++ ti->ti_len -= todrop; ++ if (ti->ti_urp > todrop) ++ ti->ti_urp -= todrop; ++ else { ++ tiflags &= ~TH_URG; ++ ti->ti_urp = 0; ++ } ++ } ++ /* ++ * If new data are received on a connection after the ++ * user processes are gone, then RST the other end. ++ */ ++ if ((so->so_state & SS_NOFDREF) && tp->t_state > TCPS_CLOSE_WAIT && ++ ti->ti_len) { ++ tp = tcp_close(tp); ++ goto dropwithreset; ++ } ++ ++ /* ++ * If segment ends after window, drop trailing data ++ * (and PUSH and FIN); if nothing left, just ACK. ++ */ ++ todrop = (ti->ti_seq + ti->ti_len) - (tp->rcv_nxt + tp->rcv_wnd); ++ if (todrop > 0) { ++ if (todrop >= ti->ti_len) { ++ /* ++ * If a new connection request is received ++ * while in TIME_WAIT, drop the old connection ++ * and start over if the sequence numbers ++ * are above the previous ones. ++ */ ++ if (tiflags & TH_SYN && tp->t_state == TCPS_TIME_WAIT && ++ SEQ_GT(ti->ti_seq, tp->rcv_nxt)) { ++ iss = tp->rcv_nxt + TCP_ISSINCR; ++ tp = tcp_close(tp); ++ goto findso; ++ } ++ /* ++ * If window is closed can only take segments at ++ * window edge, and have to drop data and PUSH from ++ * incoming segments. Continue processing, but ++ * remember to ack. Otherwise, drop segment ++ * and ack. ++ */ ++ if (tp->rcv_wnd == 0 && ti->ti_seq == tp->rcv_nxt) { ++ tp->t_flags |= TF_ACKNOW; ++ } else { ++ goto dropafterack; ++ } ++ } ++ m_adj(m, -todrop); ++ ti->ti_len -= todrop; ++ tiflags &= ~(TH_PUSH | TH_FIN); ++ } ++ ++ /* ++ * If the RST bit is set examine the state: ++ * SYN_RECEIVED STATE: ++ * If passive open, return to LISTEN state. ++ * If active open, inform user that connection was refused. ++ * ESTABLISHED, FIN_WAIT_1, FIN_WAIT2, CLOSE_WAIT STATES: ++ * Inform user that connection was reset, and close tcb. ++ * CLOSING, LAST_ACK, TIME_WAIT STATES ++ * Close the tcb. ++ */ ++ if (tiflags & TH_RST) ++ switch (tp->t_state) { ++ case TCPS_SYN_RECEIVED: ++ case TCPS_ESTABLISHED: ++ case TCPS_FIN_WAIT_1: ++ case TCPS_FIN_WAIT_2: ++ case TCPS_CLOSE_WAIT: ++ tp->t_state = TCPS_CLOSED; ++ tcp_close(tp); ++ goto drop; ++ ++ case TCPS_CLOSING: ++ case TCPS_LAST_ACK: ++ case TCPS_TIME_WAIT: ++ tcp_close(tp); ++ goto drop; ++ } ++ ++ /* ++ * If a SYN is in the window, then this is an ++ * error and we send an RST and drop the connection. ++ */ ++ if (tiflags & TH_SYN) { ++ tp = tcp_drop(tp, 0); ++ goto dropwithreset; ++ } ++ ++ /* ++ * If the ACK bit is off we drop the segment and return. ++ */ ++ if ((tiflags & TH_ACK) == 0) ++ goto drop; ++ ++ /* ++ * Ack processing. ++ */ ++ switch (tp->t_state) { ++ /* ++ * In SYN_RECEIVED state if the ack ACKs our SYN then enter ++ * ESTABLISHED state and continue processing, otherwise ++ * send an RST. una<=ack<=max ++ */ ++ case TCPS_SYN_RECEIVED: ++ ++ if (SEQ_GT(tp->snd_una, ti->ti_ack) || SEQ_GT(ti->ti_ack, tp->snd_max)) ++ goto dropwithreset; ++ tp->t_state = TCPS_ESTABLISHED; ++ /* ++ * The sent SYN is ack'ed with our sequence number +1 ++ * The first data byte already in the buffer will get ++ * lost if no correction is made. This is only needed for ++ * SS_CTL since the buffer is empty otherwise. ++ * tp->snd_una++; or: ++ */ ++ tp->snd_una = ti->ti_ack; ++ if (so->so_state & SS_CTL) { ++ /* So tcp_ctl reports the right state */ ++ ret = tcp_ctl(so); ++ if (ret == 1) { ++ soisfconnected(so); ++ so->so_state &= ~SS_CTL; /* success XXX */ ++ } else if (ret == 2) { ++ so->so_state &= SS_PERSISTENT_MASK; ++ so->so_state |= SS_NOFDREF; /* CTL_CMD */ ++ } else { ++ needoutput = 1; ++ tp->t_state = TCPS_FIN_WAIT_1; ++ } ++ } else { ++ soisfconnected(so); ++ } ++ ++ (void)tcp_reass(tp, (struct tcpiphdr *)0, (struct mbuf *)0); ++ tp->snd_wl1 = ti->ti_seq - 1; ++ /* Avoid ack processing; snd_una==ti_ack => dup ack */ ++ goto synrx_to_est; ++ /* fall into ... */ ++ ++ /* ++ * In ESTABLISHED state: drop duplicate ACKs; ACK out of range ++ * ACKs. If the ack is in the range ++ * tp->snd_una < ti->ti_ack <= tp->snd_max ++ * then advance tp->snd_una to ti->ti_ack and drop ++ * data from the retransmission queue. If this ACK reflects ++ * more up to date window information we update our window information. ++ */ ++ case TCPS_ESTABLISHED: ++ case TCPS_FIN_WAIT_1: ++ case TCPS_FIN_WAIT_2: ++ case TCPS_CLOSE_WAIT: ++ case TCPS_CLOSING: ++ case TCPS_LAST_ACK: ++ case TCPS_TIME_WAIT: ++ ++ if (SEQ_LEQ(ti->ti_ack, tp->snd_una)) { ++ if (ti->ti_len == 0 && tiwin == tp->snd_wnd) { ++ DEBUG_MISC(" dup ack m = %p so = %p", m, so); ++ /* ++ * If we have outstanding data (other than ++ * a window probe), this is a completely ++ * duplicate ack (ie, window info didn't ++ * change), the ack is the biggest we've ++ * seen and we've seen exactly our rexmt ++ * threshold of them, assume a packet ++ * has been dropped and retransmit it. ++ * Kludge snd_nxt & the congestion ++ * window so we send only this one ++ * packet. ++ * ++ * We know we're losing at the current ++ * window size so do congestion avoidance ++ * (set ssthresh to half the current window ++ * and pull our congestion window back to ++ * the new ssthresh). ++ * ++ * Dup acks mean that packets have left the ++ * network (they're now cached at the receiver) ++ * so bump cwnd by the amount in the receiver ++ * to keep a constant cwnd packets in the ++ * network. ++ */ ++ if (tp->t_timer[TCPT_REXMT] == 0 || ti->ti_ack != tp->snd_una) ++ tp->t_dupacks = 0; ++ else if (++tp->t_dupacks == TCPREXMTTHRESH) { ++ tcp_seq onxt = tp->snd_nxt; ++ unsigned win = ++ MIN(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_maxseg; ++ ++ if (win < 2) ++ win = 2; ++ tp->snd_ssthresh = win * tp->t_maxseg; ++ tp->t_timer[TCPT_REXMT] = 0; ++ tp->t_rtt = 0; ++ tp->snd_nxt = ti->ti_ack; ++ tp->snd_cwnd = tp->t_maxseg; ++ (void)tcp_output(tp); ++ tp->snd_cwnd = ++ tp->snd_ssthresh + tp->t_maxseg * tp->t_dupacks; ++ if (SEQ_GT(onxt, tp->snd_nxt)) ++ tp->snd_nxt = onxt; ++ goto drop; ++ } else if (tp->t_dupacks > TCPREXMTTHRESH) { ++ tp->snd_cwnd += tp->t_maxseg; ++ (void)tcp_output(tp); ++ goto drop; ++ } ++ } else ++ tp->t_dupacks = 0; ++ break; ++ } ++ synrx_to_est: ++ /* ++ * If the congestion window was inflated to account ++ * for the other side's cached packets, retract it. ++ */ ++ if (tp->t_dupacks > TCPREXMTTHRESH && tp->snd_cwnd > tp->snd_ssthresh) ++ tp->snd_cwnd = tp->snd_ssthresh; ++ tp->t_dupacks = 0; ++ if (SEQ_GT(ti->ti_ack, tp->snd_max)) { ++ goto dropafterack; ++ } ++ acked = ti->ti_ack - tp->snd_una; ++ ++ /* ++ * If transmit timer is running and timed sequence ++ * number was acked, update smoothed round trip time. ++ * Since we now have an rtt measurement, cancel the ++ * timer backoff (cf., Phil Karn's retransmit alg.). ++ * Recompute the initial retransmit timer. ++ */ ++ if (tp->t_rtt && SEQ_GT(ti->ti_ack, tp->t_rtseq)) ++ tcp_xmit_timer(tp, tp->t_rtt); ++ ++ /* ++ * If all outstanding data is acked, stop retransmit ++ * timer and remember to restart (more output or persist). ++ * If there is more data to be acked, restart retransmit ++ * timer, using current (possibly backed-off) value. ++ */ ++ if (ti->ti_ack == tp->snd_max) { ++ tp->t_timer[TCPT_REXMT] = 0; ++ needoutput = 1; ++ } else if (tp->t_timer[TCPT_PERSIST] == 0) ++ tp->t_timer[TCPT_REXMT] = tp->t_rxtcur; ++ /* ++ * When new data is acked, open the congestion window. ++ * If the window gives us less than ssthresh packets ++ * in flight, open exponentially (maxseg per packet). ++ * Otherwise open linearly: maxseg per window ++ * (maxseg^2 / cwnd per packet). ++ */ ++ { ++ register unsigned cw = tp->snd_cwnd; ++ register unsigned incr = tp->t_maxseg; ++ ++ if (cw > tp->snd_ssthresh) ++ incr = incr * incr / cw; ++ tp->snd_cwnd = MIN(cw + incr, TCP_MAXWIN << tp->snd_scale); ++ } ++ if (acked > so->so_snd.sb_cc) { ++ tp->snd_wnd -= so->so_snd.sb_cc; ++ sodrop(so, (int)so->so_snd.sb_cc); ++ ourfinisacked = 1; ++ } else { ++ sodrop(so, acked); ++ tp->snd_wnd -= acked; ++ ourfinisacked = 0; ++ } ++ tp->snd_una = ti->ti_ack; ++ if (SEQ_LT(tp->snd_nxt, tp->snd_una)) ++ tp->snd_nxt = tp->snd_una; ++ ++ switch (tp->t_state) { ++ /* ++ * In FIN_WAIT_1 STATE in addition to the processing ++ * for the ESTABLISHED state if our FIN is now acknowledged ++ * then enter FIN_WAIT_2. ++ */ ++ case TCPS_FIN_WAIT_1: ++ if (ourfinisacked) { ++ /* ++ * If we can't receive any more ++ * data, then closing user can proceed. ++ * Starting the timer is contrary to the ++ * specification, but if we don't get a FIN ++ * we'll hang forever. ++ */ ++ if (so->so_state & SS_FCANTRCVMORE) { ++ tp->t_timer[TCPT_2MSL] = TCP_MAXIDLE; ++ } ++ tp->t_state = TCPS_FIN_WAIT_2; ++ } ++ break; ++ ++ /* ++ * In CLOSING STATE in addition to the processing for ++ * the ESTABLISHED state if the ACK acknowledges our FIN ++ * then enter the TIME-WAIT state, otherwise ignore ++ * the segment. ++ */ ++ case TCPS_CLOSING: ++ if (ourfinisacked) { ++ tp->t_state = TCPS_TIME_WAIT; ++ tcp_canceltimers(tp); ++ tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; ++ } ++ break; ++ ++ /* ++ * In LAST_ACK, we may still be waiting for data to drain ++ * and/or to be acked, as well as for the ack of our FIN. ++ * If our FIN is now acknowledged, delete the TCB, ++ * enter the closed state and return. ++ */ ++ case TCPS_LAST_ACK: ++ if (ourfinisacked) { ++ tcp_close(tp); ++ goto drop; ++ } ++ break; ++ ++ /* ++ * In TIME_WAIT state the only thing that should arrive ++ * is a retransmission of the remote FIN. Acknowledge ++ * it and restart the finack timer. ++ */ ++ case TCPS_TIME_WAIT: ++ tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; ++ goto dropafterack; ++ } ++ } /* switch(tp->t_state) */ ++ ++step6: ++ /* ++ * Update window information. ++ * Don't look at window if no ACK: TAC's send garbage on first SYN. ++ */ ++ if ((tiflags & TH_ACK) && ++ (SEQ_LT(tp->snd_wl1, ti->ti_seq) || ++ (tp->snd_wl1 == ti->ti_seq && ++ (SEQ_LT(tp->snd_wl2, ti->ti_ack) || ++ (tp->snd_wl2 == ti->ti_ack && tiwin > tp->snd_wnd))))) { ++ tp->snd_wnd = tiwin; ++ tp->snd_wl1 = ti->ti_seq; ++ tp->snd_wl2 = ti->ti_ack; ++ if (tp->snd_wnd > tp->max_sndwnd) ++ tp->max_sndwnd = tp->snd_wnd; ++ needoutput = 1; ++ } ++ ++ /* ++ * Process segments with URG. ++ */ ++ if ((tiflags & TH_URG) && ti->ti_urp && ++ TCPS_HAVERCVDFIN(tp->t_state) == 0) { ++ /* ++ * This is a kludge, but if we receive and accept ++ * random urgent pointers, we'll crash in ++ * soreceive. It's hard to imagine someone ++ * actually wanting to send this much urgent data. ++ */ ++ if (ti->ti_urp + so->so_rcv.sb_cc > so->so_rcv.sb_datalen) { ++ ti->ti_urp = 0; ++ tiflags &= ~TH_URG; ++ goto dodata; ++ } ++ /* ++ * If this segment advances the known urgent pointer, ++ * then mark the data stream. This should not happen ++ * in CLOSE_WAIT, CLOSING, LAST_ACK or TIME_WAIT STATES since ++ * a FIN has been received from the remote side. ++ * In these states we ignore the URG. ++ * ++ * According to RFC961 (Assigned Protocols), ++ * the urgent pointer points to the last octet ++ * of urgent data. We continue, however, ++ * to consider it to indicate the first octet ++ * of data past the urgent section as the original ++ * spec states (in one of two places). ++ */ ++ if (SEQ_GT(ti->ti_seq + ti->ti_urp, tp->rcv_up)) { ++ tp->rcv_up = ti->ti_seq + ti->ti_urp; ++ so->so_urgc = ++ so->so_rcv.sb_cc + (tp->rcv_up - tp->rcv_nxt); /* -1; */ ++ tp->rcv_up = ti->ti_seq + ti->ti_urp; ++ } ++ } else ++ /* ++ * If no out of band data is expected, ++ * pull receive urgent pointer along ++ * with the receive window. ++ */ ++ if (SEQ_GT(tp->rcv_nxt, tp->rcv_up)) ++ tp->rcv_up = tp->rcv_nxt; ++dodata: ++ ++ /* ++ * If this is a small packet, then ACK now - with Nagel ++ * congestion avoidance sender won't send more until ++ * he gets an ACK. ++ */ ++ if (ti->ti_len && (unsigned)ti->ti_len <= 5 && ++ ((struct tcpiphdr_2 *)ti)->first_char == (char)27) { ++ tp->t_flags |= TF_ACKNOW; ++ } ++ ++ /* ++ * Process the segment text, merging it into the TCP sequencing queue, ++ * and arranging for acknowledgment of receipt if necessary. ++ * This process logically involves adjusting tp->rcv_wnd as data ++ * is presented to the user (this happens in tcp_usrreq.c, ++ * case PRU_RCVD). If a FIN has already been received on this ++ * connection then we just ignore the text. ++ */ ++ if ((ti->ti_len || (tiflags & TH_FIN)) && ++ TCPS_HAVERCVDFIN(tp->t_state) == 0) { ++ TCP_REASS(tp, ti, m, so, tiflags); ++ } else { ++ m_free(m); ++ tiflags &= ~TH_FIN; ++ } ++ ++ /* ++ * If FIN is received ACK the FIN and let the user know ++ * that the connection is closing. ++ */ ++ if (tiflags & TH_FIN) { ++ if (TCPS_HAVERCVDFIN(tp->t_state) == 0) { ++ /* ++ * If we receive a FIN we can't send more data, ++ * set it SS_FDRAIN ++ * Shutdown the socket if there is no rx data in the ++ * buffer. ++ * soread() is called on completion of shutdown() and ++ * will got to TCPS_LAST_ACK, and use tcp_output() ++ * to send the FIN. ++ */ ++ sofwdrain(so); ++ ++ tp->t_flags |= TF_ACKNOW; ++ tp->rcv_nxt++; ++ } ++ switch (tp->t_state) { ++ /* ++ * In SYN_RECEIVED and ESTABLISHED STATES ++ * enter the CLOSE_WAIT state. ++ */ ++ case TCPS_SYN_RECEIVED: ++ case TCPS_ESTABLISHED: ++ if (so->so_emu == EMU_CTL) /* no shutdown on socket */ ++ tp->t_state = TCPS_LAST_ACK; ++ else ++ tp->t_state = TCPS_CLOSE_WAIT; ++ break; ++ ++ /* ++ * If still in FIN_WAIT_1 STATE FIN has not been acked so ++ * enter the CLOSING state. ++ */ ++ case TCPS_FIN_WAIT_1: ++ tp->t_state = TCPS_CLOSING; ++ break; ++ ++ /* ++ * In FIN_WAIT_2 state enter the TIME_WAIT state, ++ * starting the time-wait timer, turning off the other ++ * standard timers. ++ */ ++ case TCPS_FIN_WAIT_2: ++ tp->t_state = TCPS_TIME_WAIT; ++ tcp_canceltimers(tp); ++ tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; ++ break; ++ ++ /* ++ * In TIME_WAIT state restart the 2 MSL time_wait timer. ++ */ ++ case TCPS_TIME_WAIT: ++ tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; ++ break; ++ } ++ } ++ ++ /* ++ * Return any desired output. ++ */ ++ if (needoutput || (tp->t_flags & TF_ACKNOW)) { ++ (void)tcp_output(tp); ++ } ++ return; ++ ++dropafterack: ++ /* ++ * Generate an ACK dropping incoming segment if it occupies ++ * sequence space, where the ACK reflects our state. ++ */ ++ if (tiflags & TH_RST) ++ goto drop; ++ m_free(m); ++ tp->t_flags |= TF_ACKNOW; ++ (void)tcp_output(tp); ++ return; ++ ++dropwithreset: ++ /* reuses m if m!=NULL, m_free() unnecessary */ ++ if (tiflags & TH_ACK) ++ tcp_respond(tp, ti, m, (tcp_seq)0, ti->ti_ack, TH_RST, af); ++ else { ++ if (tiflags & TH_SYN) ++ ti->ti_len++; ++ tcp_respond(tp, ti, m, ti->ti_seq + ti->ti_len, (tcp_seq)0, ++ TH_RST | TH_ACK, af); ++ } ++ ++ return; ++ ++drop: ++ /* ++ * Drop space held by incoming segment and return. ++ */ ++ m_free(m); ++} ++ ++static void tcp_dooptions(struct tcpcb *tp, uint8_t *cp, int cnt, ++ struct tcpiphdr *ti) ++{ ++ uint16_t mss; ++ int opt, optlen; ++ ++ DEBUG_CALL("tcp_dooptions"); ++ DEBUG_ARG("tp = %p cnt=%i", tp, cnt); ++ ++ for (; cnt > 0; cnt -= optlen, cp += optlen) { ++ opt = cp[0]; ++ if (opt == TCPOPT_EOL) ++ break; ++ if (opt == TCPOPT_NOP) ++ optlen = 1; ++ else { ++ optlen = cp[1]; ++ if (optlen <= 0) ++ break; ++ } ++ switch (opt) { ++ default: ++ continue; ++ ++ case TCPOPT_MAXSEG: ++ if (optlen != TCPOLEN_MAXSEG) ++ continue; ++ if (!(ti->ti_flags & TH_SYN)) ++ continue; ++ memcpy((char *)&mss, (char *)cp + 2, sizeof(mss)); ++ NTOHS(mss); ++ (void)tcp_mss(tp, mss); /* sets t_maxseg */ ++ break; ++ } ++ } ++} ++ ++/* ++ * Collect new round-trip time estimate ++ * and update averages and current timeout. ++ */ ++ ++static void tcp_xmit_timer(register struct tcpcb *tp, int rtt) ++{ ++ register short delta; ++ ++ DEBUG_CALL("tcp_xmit_timer"); ++ DEBUG_ARG("tp = %p", tp); ++ DEBUG_ARG("rtt = %d", rtt); ++ ++ if (tp->t_srtt != 0) { ++ /* ++ * srtt is stored as fixed point with 3 bits after the ++ * binary point (i.e., scaled by 8). The following magic ++ * is equivalent to the smoothing algorithm in rfc793 with ++ * an alpha of .875 (srtt = rtt/8 + srtt*7/8 in fixed ++ * point). Adjust rtt to origin 0. ++ */ ++ delta = rtt - 1 - (tp->t_srtt >> TCP_RTT_SHIFT); ++ if ((tp->t_srtt += delta) <= 0) ++ tp->t_srtt = 1; ++ /* ++ * We accumulate a smoothed rtt variance (actually, a ++ * smoothed mean difference), then set the retransmit ++ * timer to smoothed rtt + 4 times the smoothed variance. ++ * rttvar is stored as fixed point with 2 bits after the ++ * binary point (scaled by 4). The following is ++ * equivalent to rfc793 smoothing with an alpha of .75 ++ * (rttvar = rttvar*3/4 + |delta| / 4). This replaces ++ * rfc793's wired-in beta. ++ */ ++ if (delta < 0) ++ delta = -delta; ++ delta -= (tp->t_rttvar >> TCP_RTTVAR_SHIFT); ++ if ((tp->t_rttvar += delta) <= 0) ++ tp->t_rttvar = 1; ++ } else { ++ /* ++ * No rtt measurement yet - use the unsmoothed rtt. ++ * Set the variance to half the rtt (so our first ++ * retransmit happens at 3*rtt). ++ */ ++ tp->t_srtt = rtt << TCP_RTT_SHIFT; ++ tp->t_rttvar = rtt << (TCP_RTTVAR_SHIFT - 1); ++ } ++ tp->t_rtt = 0; ++ tp->t_rxtshift = 0; ++ ++ /* ++ * the retransmit should happen at rtt + 4 * rttvar. ++ * Because of the way we do the smoothing, srtt and rttvar ++ * will each average +1/2 tick of bias. When we compute ++ * the retransmit timer, we want 1/2 tick of rounding and ++ * 1 extra tick because of +-1/2 tick uncertainty in the ++ * firing of the timer. The bias will give us exactly the ++ * 1.5 tick we need. But, because the bias is ++ * statistical, we have to test that we don't drop below ++ * the minimum feasible timer (which is 2 ticks). ++ */ ++ TCPT_RANGESET(tp->t_rxtcur, TCP_REXMTVAL(tp), (short)tp->t_rttmin, ++ TCPTV_REXMTMAX); /* XXX */ ++ ++ /* ++ * We received an ack for a packet that wasn't retransmitted; ++ * it is probably safe to discard any error indications we've ++ * received recently. This isn't quite right, but close enough ++ * for now (a route might have failed after we sent a segment, ++ * and the return path might not be symmetrical). ++ */ ++ tp->t_softerror = 0; ++} ++ ++/* ++ * Determine a reasonable value for maxseg size. ++ * If the route is known, check route for mtu. ++ * If none, use an mss that can be handled on the outgoing ++ * interface without forcing IP to fragment; if bigger than ++ * an mbuf cluster (MCLBYTES), round down to nearest multiple of MCLBYTES ++ * to utilize large mbufs. If no route is found, route has no mtu, ++ * or the destination isn't local, use a default, hopefully conservative ++ * size (usually 512 or the default IP max size, but no more than the mtu ++ * of the interface), as we can't discover anything about intervening ++ * gateways or networks. We also initialize the congestion/slow start ++ * window to be a single segment if the destination isn't local. ++ * While looking at the routing entry, we also initialize other path-dependent ++ * parameters from pre-set or cached values in the routing entry. ++ */ ++ ++int tcp_mss(struct tcpcb *tp, unsigned offer) ++{ ++ struct socket *so = tp->t_socket; ++ int mss; ++ ++ DEBUG_CALL("tcp_mss"); ++ DEBUG_ARG("tp = %p", tp); ++ DEBUG_ARG("offer = %d", offer); ++ ++ switch (so->so_ffamily) { ++ case AF_INET: ++ mss = MIN(so->slirp->if_mtu, so->slirp->if_mru) - ++ sizeof(struct tcphdr) - sizeof(struct ip); ++ break; ++ case AF_INET6: ++ mss = MIN(so->slirp->if_mtu, so->slirp->if_mru) - ++ sizeof(struct tcphdr) - sizeof(struct ip6); ++ break; ++ default: ++ g_assert_not_reached(); ++ } ++ ++ if (offer) ++ mss = MIN(mss, offer); ++ mss = MAX(mss, 32); ++ if (mss < tp->t_maxseg || offer != 0) ++ tp->t_maxseg = MIN(mss, TCP_MAXSEG_MAX); ++ ++ tp->snd_cwnd = mss; ++ ++ sbreserve(&so->so_snd, ++ TCP_SNDSPACE + ++ ((TCP_SNDSPACE % mss) ? (mss - (TCP_SNDSPACE % mss)) : 0)); ++ sbreserve(&so->so_rcv, ++ TCP_RCVSPACE + ++ ((TCP_RCVSPACE % mss) ? (mss - (TCP_RCVSPACE % mss)) : 0)); ++ ++ DEBUG_MISC(" returning mss = %d", mss); ++ ++ return mss; ++} +diff --git a/slirp/src/tcp_output.c b/slirp/src/tcp_output.c +new file mode 100644 +index 0000000..383fe31 +--- /dev/null ++++ b/slirp/src/tcp_output.c +@@ -0,0 +1,516 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1988, 1990, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)tcp_output.c 8.3 (Berkeley) 12/30/93 ++ * tcp_output.c,v 1.3 1994/09/15 10:36:55 davidg Exp ++ */ ++ ++/* ++ * Changes and additions relating to SLiRP ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#include "slirp.h" ++ ++static const uint8_t tcp_outflags[TCP_NSTATES] = { ++ TH_RST | TH_ACK, 0, TH_SYN, TH_SYN | TH_ACK, ++ TH_ACK, TH_ACK, TH_FIN | TH_ACK, TH_FIN | TH_ACK, ++ TH_FIN | TH_ACK, TH_ACK, TH_ACK, ++}; ++ ++ ++#undef MAX_TCPOPTLEN ++#define MAX_TCPOPTLEN 32 /* max # bytes that go in options */ ++ ++/* ++ * Tcp output routine: figure out what should be sent and send it. ++ */ ++int tcp_output(struct tcpcb *tp) ++{ ++ register struct socket *so = tp->t_socket; ++ register long len, win; ++ int off, flags, error; ++ register struct mbuf *m; ++ register struct tcpiphdr *ti, tcpiph_save; ++ struct ip *ip; ++ struct ip6 *ip6; ++ uint8_t opt[MAX_TCPOPTLEN]; ++ unsigned optlen, hdrlen; ++ int idle, sendalot; ++ ++ DEBUG_CALL("tcp_output"); ++ DEBUG_ARG("tp = %p", tp); ++ ++ /* ++ * Determine length of data that should be transmitted, ++ * and flags that will be used. ++ * If there is some data or critical controls (SYN, RST) ++ * to send, then transmit; otherwise, investigate further. ++ */ ++ idle = (tp->snd_max == tp->snd_una); ++ if (idle && tp->t_idle >= tp->t_rxtcur) ++ /* ++ * We have been idle for "a while" and no acks are ++ * expected to clock out any data we send -- ++ * slow start to get ack "clock" running again. ++ */ ++ tp->snd_cwnd = tp->t_maxseg; ++again: ++ sendalot = 0; ++ off = tp->snd_nxt - tp->snd_una; ++ win = MIN(tp->snd_wnd, tp->snd_cwnd); ++ ++ flags = tcp_outflags[tp->t_state]; ++ ++ DEBUG_MISC(" --- tcp_output flags = 0x%x", flags); ++ ++ /* ++ * If in persist timeout with window of 0, send 1 byte. ++ * Otherwise, if window is small but nonzero ++ * and timer expired, we will send what we can ++ * and go to transmit state. ++ */ ++ if (tp->t_force) { ++ if (win == 0) { ++ /* ++ * If we still have some data to send, then ++ * clear the FIN bit. Usually this would ++ * happen below when it realizes that we ++ * aren't sending all the data. However, ++ * if we have exactly 1 byte of unset data, ++ * then it won't clear the FIN bit below, ++ * and if we are in persist state, we wind ++ * up sending the packet without recording ++ * that we sent the FIN bit. ++ * ++ * We can't just blindly clear the FIN bit, ++ * because if we don't have any more data ++ * to send then the probe will be the FIN ++ * itself. ++ */ ++ if (off < so->so_snd.sb_cc) ++ flags &= ~TH_FIN; ++ win = 1; ++ } else { ++ tp->t_timer[TCPT_PERSIST] = 0; ++ tp->t_rxtshift = 0; ++ } ++ } ++ ++ len = MIN(so->so_snd.sb_cc, win) - off; ++ ++ if (len < 0) { ++ /* ++ * If FIN has been sent but not acked, ++ * but we haven't been called to retransmit, ++ * len will be -1. Otherwise, window shrank ++ * after we sent into it. If window shrank to 0, ++ * cancel pending retransmit and pull snd_nxt ++ * back to (closed) window. We will enter persist ++ * state below. If the window didn't close completely, ++ * just wait for an ACK. ++ */ ++ len = 0; ++ if (win == 0) { ++ tp->t_timer[TCPT_REXMT] = 0; ++ tp->snd_nxt = tp->snd_una; ++ } ++ } ++ ++ if (len > tp->t_maxseg) { ++ len = tp->t_maxseg; ++ sendalot = 1; ++ } ++ if (SEQ_LT(tp->snd_nxt + len, tp->snd_una + so->so_snd.sb_cc)) ++ flags &= ~TH_FIN; ++ ++ win = sbspace(&so->so_rcv); ++ ++ /* ++ * Sender silly window avoidance. If connection is idle ++ * and can send all data, a maximum segment, ++ * at least a maximum default-size segment do it, ++ * or are forced, do it; otherwise don't bother. ++ * If peer's buffer is tiny, then send ++ * when window is at least half open. ++ * If retransmitting (possibly after persist timer forced us ++ * to send into a small window), then must resend. ++ */ ++ if (len) { ++ if (len == tp->t_maxseg) ++ goto send; ++ if ((1 || idle || tp->t_flags & TF_NODELAY) && ++ len + off >= so->so_snd.sb_cc) ++ goto send; ++ if (tp->t_force) ++ goto send; ++ if (len >= tp->max_sndwnd / 2 && tp->max_sndwnd > 0) ++ goto send; ++ if (SEQ_LT(tp->snd_nxt, tp->snd_max)) ++ goto send; ++ } ++ ++ /* ++ * Compare available window to amount of window ++ * known to peer (as advertised window less ++ * next expected input). If the difference is at least two ++ * max size segments, or at least 50% of the maximum possible ++ * window, then want to send a window update to peer. ++ */ ++ if (win > 0) { ++ /* ++ * "adv" is the amount we can increase the window, ++ * taking into account that we are limited by ++ * TCP_MAXWIN << tp->rcv_scale. ++ */ ++ long adv = MIN(win, (long)TCP_MAXWIN << tp->rcv_scale) - ++ (tp->rcv_adv - tp->rcv_nxt); ++ ++ if (adv >= (long)(2 * tp->t_maxseg)) ++ goto send; ++ if (2 * adv >= (long)so->so_rcv.sb_datalen) ++ goto send; ++ } ++ ++ /* ++ * Send if we owe peer an ACK. ++ */ ++ if (tp->t_flags & TF_ACKNOW) ++ goto send; ++ if (flags & (TH_SYN | TH_RST)) ++ goto send; ++ if (SEQ_GT(tp->snd_up, tp->snd_una)) ++ goto send; ++ /* ++ * If our state indicates that FIN should be sent ++ * and we have not yet done so, or we're retransmitting the FIN, ++ * then we need to send. ++ */ ++ if (flags & TH_FIN && ++ ((tp->t_flags & TF_SENTFIN) == 0 || tp->snd_nxt == tp->snd_una)) ++ goto send; ++ ++ /* ++ * TCP window updates are not reliable, rather a polling protocol ++ * using ``persist'' packets is used to insure receipt of window ++ * updates. The three ``states'' for the output side are: ++ * idle not doing retransmits or persists ++ * persisting to move a small or zero window ++ * (re)transmitting and thereby not persisting ++ * ++ * tp->t_timer[TCPT_PERSIST] ++ * is set when we are in persist state. ++ * tp->t_force ++ * is set when we are called to send a persist packet. ++ * tp->t_timer[TCPT_REXMT] ++ * is set when we are retransmitting ++ * The output side is idle when both timers are zero. ++ * ++ * If send window is too small, there is data to transmit, and no ++ * retransmit or persist is pending, then go to persist state. ++ * If nothing happens soon, send when timer expires: ++ * if window is nonzero, transmit what we can, ++ * otherwise force out a byte. ++ */ ++ if (so->so_snd.sb_cc && tp->t_timer[TCPT_REXMT] == 0 && ++ tp->t_timer[TCPT_PERSIST] == 0) { ++ tp->t_rxtshift = 0; ++ tcp_setpersist(tp); ++ } ++ ++ /* ++ * No reason to send a segment, just return. ++ */ ++ return (0); ++ ++send: ++ /* ++ * Before ESTABLISHED, force sending of initial options ++ * unless TCP set not to do any options. ++ * NOTE: we assume that the IP/TCP header plus TCP options ++ * always fit in a single mbuf, leaving room for a maximum ++ * link header, i.e. ++ * max_linkhdr + sizeof (struct tcpiphdr) + optlen <= MHLEN ++ */ ++ optlen = 0; ++ hdrlen = sizeof(struct tcpiphdr); ++ if (flags & TH_SYN) { ++ tp->snd_nxt = tp->iss; ++ if ((tp->t_flags & TF_NOOPT) == 0) { ++ uint16_t mss; ++ ++ opt[0] = TCPOPT_MAXSEG; ++ opt[1] = 4; ++ mss = htons((uint16_t)tcp_mss(tp, 0)); ++ memcpy((char *)(opt + 2), (char *)&mss, sizeof(mss)); ++ optlen = 4; ++ } ++ } ++ ++ hdrlen += optlen; ++ ++ /* ++ * Adjust data length if insertion of options will ++ * bump the packet length beyond the t_maxseg length. ++ */ ++ if (len > tp->t_maxseg - optlen) { ++ len = tp->t_maxseg - optlen; ++ sendalot = 1; ++ } ++ ++ /* ++ * Grab a header mbuf, attaching a copy of data to ++ * be transmitted, and initialize the header from ++ * the template for sends on this connection. ++ */ ++ if (len) { ++ m = m_get(so->slirp); ++ if (m == NULL) { ++ error = 1; ++ goto out; ++ } ++ m->m_data += IF_MAXLINKHDR; ++ m->m_len = hdrlen; ++ ++ sbcopy(&so->so_snd, off, (int)len, mtod(m, char *) + hdrlen); ++ m->m_len += len; ++ ++ /* ++ * If we're sending everything we've got, set PUSH. ++ * (This will keep happy those implementations which only ++ * give data to the user when a buffer fills or ++ * a PUSH comes in.) ++ */ ++ if (off + len == so->so_snd.sb_cc) ++ flags |= TH_PUSH; ++ } else { ++ m = m_get(so->slirp); ++ if (m == NULL) { ++ error = 1; ++ goto out; ++ } ++ m->m_data += IF_MAXLINKHDR; ++ m->m_len = hdrlen; ++ } ++ ++ ti = mtod(m, struct tcpiphdr *); ++ ++ memcpy((char *)ti, &tp->t_template, sizeof(struct tcpiphdr)); ++ ++ /* ++ * Fill in fields, remembering maximum advertised ++ * window for use in delaying messages about window sizes. ++ * If resending a FIN, be sure not to use a new sequence number. ++ */ ++ if (flags & TH_FIN && tp->t_flags & TF_SENTFIN && ++ tp->snd_nxt == tp->snd_max) ++ tp->snd_nxt--; ++ /* ++ * If we are doing retransmissions, then snd_nxt will ++ * not reflect the first unsent octet. For ACK only ++ * packets, we do not want the sequence number of the ++ * retransmitted packet, we want the sequence number ++ * of the next unsent octet. So, if there is no data ++ * (and no SYN or FIN), use snd_max instead of snd_nxt ++ * when filling in ti_seq. But if we are in persist ++ * state, snd_max might reflect one byte beyond the ++ * right edge of the window, so use snd_nxt in that ++ * case, since we know we aren't doing a retransmission. ++ * (retransmit and persist are mutually exclusive...) ++ */ ++ if (len || (flags & (TH_SYN | TH_FIN)) || tp->t_timer[TCPT_PERSIST]) ++ ti->ti_seq = htonl(tp->snd_nxt); ++ else ++ ti->ti_seq = htonl(tp->snd_max); ++ ti->ti_ack = htonl(tp->rcv_nxt); ++ if (optlen) { ++ memcpy((char *)(ti + 1), (char *)opt, optlen); ++ ti->ti_off = (sizeof(struct tcphdr) + optlen) >> 2; ++ } ++ ti->ti_flags = flags; ++ /* ++ * Calculate receive window. Don't shrink window, ++ * but avoid silly window syndrome. ++ */ ++ if (win < (long)(so->so_rcv.sb_datalen / 4) && win < (long)tp->t_maxseg) ++ win = 0; ++ if (win > (long)TCP_MAXWIN << tp->rcv_scale) ++ win = (long)TCP_MAXWIN << tp->rcv_scale; ++ if (win < (long)(tp->rcv_adv - tp->rcv_nxt)) ++ win = (long)(tp->rcv_adv - tp->rcv_nxt); ++ ti->ti_win = htons((uint16_t)(win >> tp->rcv_scale)); ++ ++ if (SEQ_GT(tp->snd_up, tp->snd_una)) { ++ ti->ti_urp = htons((uint16_t)(tp->snd_up - ntohl(ti->ti_seq))); ++ ti->ti_flags |= TH_URG; ++ } else ++ /* ++ * If no urgent pointer to send, then we pull ++ * the urgent pointer to the left edge of the send window ++ * so that it doesn't drift into the send window on sequence ++ * number wraparound. ++ */ ++ tp->snd_up = tp->snd_una; /* drag it along */ ++ ++ /* ++ * Put TCP length in extended header, and then ++ * checksum extended header and data. ++ */ ++ if (len + optlen) ++ ti->ti_len = htons((uint16_t)(sizeof(struct tcphdr) + optlen + len)); ++ ti->ti_sum = cksum(m, (int)(hdrlen + len)); ++ ++ /* ++ * In transmit state, time the transmission and arrange for ++ * the retransmit. In persist state, just set snd_max. ++ */ ++ if (tp->t_force == 0 || tp->t_timer[TCPT_PERSIST] == 0) { ++ tcp_seq startseq = tp->snd_nxt; ++ ++ /* ++ * Advance snd_nxt over sequence space of this segment. ++ */ ++ if (flags & (TH_SYN | TH_FIN)) { ++ if (flags & TH_SYN) ++ tp->snd_nxt++; ++ if (flags & TH_FIN) { ++ tp->snd_nxt++; ++ tp->t_flags |= TF_SENTFIN; ++ } ++ } ++ tp->snd_nxt += len; ++ if (SEQ_GT(tp->snd_nxt, tp->snd_max)) { ++ tp->snd_max = tp->snd_nxt; ++ /* ++ * Time this transmission if not a retransmission and ++ * not currently timing anything. ++ */ ++ if (tp->t_rtt == 0) { ++ tp->t_rtt = 1; ++ tp->t_rtseq = startseq; ++ } ++ } ++ ++ /* ++ * Set retransmit timer if not currently set, ++ * and not doing an ack or a keep-alive probe. ++ * Initial value for retransmit timer is smoothed ++ * round-trip time + 2 * round-trip time variance. ++ * Initialize shift counter which is used for backoff ++ * of retransmit time. ++ */ ++ if (tp->t_timer[TCPT_REXMT] == 0 && tp->snd_nxt != tp->snd_una) { ++ tp->t_timer[TCPT_REXMT] = tp->t_rxtcur; ++ if (tp->t_timer[TCPT_PERSIST]) { ++ tp->t_timer[TCPT_PERSIST] = 0; ++ tp->t_rxtshift = 0; ++ } ++ } ++ } else if (SEQ_GT(tp->snd_nxt + len, tp->snd_max)) ++ tp->snd_max = tp->snd_nxt + len; ++ ++ /* ++ * Fill in IP length and desired time to live and ++ * send to IP level. There should be a better way ++ * to handle ttl and tos; we could keep them in ++ * the template, but need a way to checksum without them. ++ */ ++ m->m_len = hdrlen + len; /* XXX Needed? m_len should be correct */ ++ tcpiph_save = *mtod(m, struct tcpiphdr *); ++ ++ switch (so->so_ffamily) { ++ case AF_INET: ++ m->m_data += ++ sizeof(struct tcpiphdr) - sizeof(struct tcphdr) - sizeof(struct ip); ++ m->m_len -= ++ sizeof(struct tcpiphdr) - sizeof(struct tcphdr) - sizeof(struct ip); ++ ip = mtod(m, struct ip *); ++ ++ ip->ip_len = m->m_len; ++ ip->ip_dst = tcpiph_save.ti_dst; ++ ip->ip_src = tcpiph_save.ti_src; ++ ip->ip_p = tcpiph_save.ti_pr; ++ ++ ip->ip_ttl = IPDEFTTL; ++ ip->ip_tos = so->so_iptos; ++ error = ip_output(so, m); ++ break; ++ ++ case AF_INET6: ++ m->m_data += sizeof(struct tcpiphdr) - sizeof(struct tcphdr) - ++ sizeof(struct ip6); ++ m->m_len -= sizeof(struct tcpiphdr) - sizeof(struct tcphdr) - ++ sizeof(struct ip6); ++ ip6 = mtod(m, struct ip6 *); ++ ++ ip6->ip_pl = tcpiph_save.ti_len; ++ ip6->ip_dst = tcpiph_save.ti_dst6; ++ ip6->ip_src = tcpiph_save.ti_src6; ++ ip6->ip_nh = tcpiph_save.ti_nh6; ++ ++ error = ip6_output(so, m, 0); ++ break; ++ ++ default: ++ g_assert_not_reached(); ++ } ++ ++ if (error) { ++ out: ++ return (error); ++ } ++ ++ /* ++ * Data sent (as far as we can tell). ++ * If this advertises a larger window than any other segment, ++ * then remember the size of the advertised window. ++ * Any pending ACK has now been sent. ++ */ ++ if (win > 0 && SEQ_GT(tp->rcv_nxt + win, tp->rcv_adv)) ++ tp->rcv_adv = tp->rcv_nxt + win; ++ tp->last_ack_sent = tp->rcv_nxt; ++ tp->t_flags &= ~(TF_ACKNOW | TF_DELACK); ++ if (sendalot) ++ goto again; ++ ++ return (0); ++} ++ ++void tcp_setpersist(struct tcpcb *tp) ++{ ++ int t = ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1; ++ ++ /* ++ * Start/restart persistence timer. ++ */ ++ TCPT_RANGESET(tp->t_timer[TCPT_PERSIST], t * tcp_backoff[tp->t_rxtshift], ++ TCPTV_PERSMIN, TCPTV_PERSMAX); ++ if (tp->t_rxtshift < TCP_MAXRXTSHIFT) ++ tp->t_rxtshift++; ++} +diff --git a/slirp/src/tcp_subr.c b/slirp/src/tcp_subr.c +new file mode 100644 +index 0000000..a1016d9 +--- /dev/null ++++ b/slirp/src/tcp_subr.c +@@ -0,0 +1,980 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1988, 1990, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)tcp_subr.c 8.1 (Berkeley) 6/10/93 ++ * tcp_subr.c,v 1.5 1994/10/08 22:39:58 phk Exp ++ */ ++ ++/* ++ * Changes and additions relating to SLiRP ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#include "slirp.h" ++ ++/* patchable/settable parameters for tcp */ ++/* Don't do rfc1323 performance enhancements */ ++#define TCP_DO_RFC1323 0 ++ ++/* ++ * Tcp initialization ++ */ ++void tcp_init(Slirp *slirp) ++{ ++ slirp->tcp_iss = 1; /* wrong */ ++ slirp->tcb.so_next = slirp->tcb.so_prev = &slirp->tcb; ++ slirp->tcp_last_so = &slirp->tcb; ++} ++ ++void tcp_cleanup(Slirp *slirp) ++{ ++ while (slirp->tcb.so_next != &slirp->tcb) { ++ tcp_close(sototcpcb(slirp->tcb.so_next)); ++ } ++} ++ ++/* ++ * Create template to be used to send tcp packets on a connection. ++ * Call after host entry created, fills ++ * in a skeletal tcp/ip header, minimizing the amount of work ++ * necessary when the connection is used. ++ */ ++void tcp_template(struct tcpcb *tp) ++{ ++ struct socket *so = tp->t_socket; ++ register struct tcpiphdr *n = &tp->t_template; ++ ++ n->ti_mbuf = NULL; ++ memset(&n->ti, 0, sizeof(n->ti)); ++ n->ti_x0 = 0; ++ switch (so->so_ffamily) { ++ case AF_INET: ++ n->ti_pr = IPPROTO_TCP; ++ n->ti_len = htons(sizeof(struct tcphdr)); ++ n->ti_src = so->so_faddr; ++ n->ti_dst = so->so_laddr; ++ n->ti_sport = so->so_fport; ++ n->ti_dport = so->so_lport; ++ break; ++ ++ case AF_INET6: ++ n->ti_nh6 = IPPROTO_TCP; ++ n->ti_len = htons(sizeof(struct tcphdr)); ++ n->ti_src6 = so->so_faddr6; ++ n->ti_dst6 = so->so_laddr6; ++ n->ti_sport = so->so_fport6; ++ n->ti_dport = so->so_lport6; ++ break; ++ ++ default: ++ g_assert_not_reached(); ++ } ++ ++ n->ti_seq = 0; ++ n->ti_ack = 0; ++ n->ti_x2 = 0; ++ n->ti_off = 5; ++ n->ti_flags = 0; ++ n->ti_win = 0; ++ n->ti_sum = 0; ++ n->ti_urp = 0; ++} ++ ++/* ++ * Send a single message to the TCP at address specified by ++ * the given TCP/IP header. If m == 0, then we make a copy ++ * of the tcpiphdr at ti and send directly to the addressed host. ++ * This is used to force keep alive messages out using the TCP ++ * template for a connection tp->t_template. If flags are given ++ * then we send a message back to the TCP which originated the ++ * segment ti, and discard the mbuf containing it and any other ++ * attached mbufs. ++ * ++ * In any case the ack and sequence number of the transmitted ++ * segment are as specified by the parameters. ++ */ ++void tcp_respond(struct tcpcb *tp, struct tcpiphdr *ti, struct mbuf *m, ++ tcp_seq ack, tcp_seq seq, int flags, unsigned short af) ++{ ++ register int tlen; ++ int win = 0; ++ ++ DEBUG_CALL("tcp_respond"); ++ DEBUG_ARG("tp = %p", tp); ++ DEBUG_ARG("ti = %p", ti); ++ DEBUG_ARG("m = %p", m); ++ DEBUG_ARG("ack = %u", ack); ++ DEBUG_ARG("seq = %u", seq); ++ DEBUG_ARG("flags = %x", flags); ++ ++ if (tp) ++ win = sbspace(&tp->t_socket->so_rcv); ++ if (m == NULL) { ++ if (!tp || (m = m_get(tp->t_socket->slirp)) == NULL) ++ return; ++ tlen = 0; ++ m->m_data += IF_MAXLINKHDR; ++ *mtod(m, struct tcpiphdr *) = *ti; ++ ti = mtod(m, struct tcpiphdr *); ++ switch (af) { ++ case AF_INET: ++ ti->ti.ti_i4.ih_x1 = 0; ++ break; ++ case AF_INET6: ++ ti->ti.ti_i6.ih_x1 = 0; ++ break; ++ default: ++ g_assert_not_reached(); ++ } ++ flags = TH_ACK; ++ } else { ++ /* ++ * ti points into m so the next line is just making ++ * the mbuf point to ti ++ */ ++ m->m_data = (char *)ti; ++ ++ m->m_len = sizeof(struct tcpiphdr); ++ tlen = 0; ++#define xchg(a, b, type) \ ++ { \ ++ type t; \ ++ t = a; \ ++ a = b; \ ++ b = t; \ ++ } ++ switch (af) { ++ case AF_INET: ++ xchg(ti->ti_dst.s_addr, ti->ti_src.s_addr, uint32_t); ++ xchg(ti->ti_dport, ti->ti_sport, uint16_t); ++ break; ++ case AF_INET6: ++ xchg(ti->ti_dst6, ti->ti_src6, struct in6_addr); ++ xchg(ti->ti_dport, ti->ti_sport, uint16_t); ++ break; ++ default: ++ g_assert_not_reached(); ++ } ++#undef xchg ++ } ++ ti->ti_len = htons((uint16_t)(sizeof(struct tcphdr) + tlen)); ++ tlen += sizeof(struct tcpiphdr); ++ m->m_len = tlen; ++ ++ ti->ti_mbuf = NULL; ++ ti->ti_x0 = 0; ++ ti->ti_seq = htonl(seq); ++ ti->ti_ack = htonl(ack); ++ ti->ti_x2 = 0; ++ ti->ti_off = sizeof(struct tcphdr) >> 2; ++ ti->ti_flags = flags; ++ if (tp) ++ ti->ti_win = htons((uint16_t)(win >> tp->rcv_scale)); ++ else ++ ti->ti_win = htons((uint16_t)win); ++ ti->ti_urp = 0; ++ ti->ti_sum = 0; ++ ti->ti_sum = cksum(m, tlen); ++ ++ struct tcpiphdr tcpiph_save = *(mtod(m, struct tcpiphdr *)); ++ struct ip *ip; ++ struct ip6 *ip6; ++ ++ switch (af) { ++ case AF_INET: ++ m->m_data += ++ sizeof(struct tcpiphdr) - sizeof(struct tcphdr) - sizeof(struct ip); ++ m->m_len -= ++ sizeof(struct tcpiphdr) - sizeof(struct tcphdr) - sizeof(struct ip); ++ ip = mtod(m, struct ip *); ++ ip->ip_len = m->m_len; ++ ip->ip_dst = tcpiph_save.ti_dst; ++ ip->ip_src = tcpiph_save.ti_src; ++ ip->ip_p = tcpiph_save.ti_pr; ++ ++ if (flags & TH_RST) { ++ ip->ip_ttl = MAXTTL; ++ } else { ++ ip->ip_ttl = IPDEFTTL; ++ } ++ ++ ip_output(NULL, m); ++ break; ++ ++ case AF_INET6: ++ m->m_data += sizeof(struct tcpiphdr) - sizeof(struct tcphdr) - ++ sizeof(struct ip6); ++ m->m_len -= sizeof(struct tcpiphdr) - sizeof(struct tcphdr) - ++ sizeof(struct ip6); ++ ip6 = mtod(m, struct ip6 *); ++ ip6->ip_pl = tcpiph_save.ti_len; ++ ip6->ip_dst = tcpiph_save.ti_dst6; ++ ip6->ip_src = tcpiph_save.ti_src6; ++ ip6->ip_nh = tcpiph_save.ti_nh6; ++ ++ ip6_output(NULL, m, 0); ++ break; ++ ++ default: ++ g_assert_not_reached(); ++ } ++} ++ ++/* ++ * Create a new TCP control block, making an ++ * empty reassembly queue and hooking it to the argument ++ * protocol control block. ++ */ ++struct tcpcb *tcp_newtcpcb(struct socket *so) ++{ ++ register struct tcpcb *tp; ++ ++ tp = g_new0(struct tcpcb, 1); ++ tp->seg_next = tp->seg_prev = (struct tcpiphdr *)tp; ++ /* ++ * 40: length of IPv4 header (20) + TCP header (20) ++ * 60: length of IPv6 header (40) + TCP header (20) ++ */ ++ tp->t_maxseg = ++ MIN(so->slirp->if_mtu - ((so->so_ffamily == AF_INET) ? 40 : 60), ++ TCP_MAXSEG_MAX); ++ ++ tp->t_flags = TCP_DO_RFC1323 ? (TF_REQ_SCALE | TF_REQ_TSTMP) : 0; ++ tp->t_socket = so; ++ ++ /* ++ * Init srtt to TCPTV_SRTTBASE (0), so we can tell that we have no ++ * rtt estimate. Set rttvar so that srtt + 2 * rttvar gives ++ * reasonable initial retransmit time. ++ */ ++ tp->t_srtt = TCPTV_SRTTBASE; ++ tp->t_rttvar = TCPTV_SRTTDFLT << 2; ++ tp->t_rttmin = TCPTV_MIN; ++ ++ TCPT_RANGESET(tp->t_rxtcur, ++ ((TCPTV_SRTTBASE >> 2) + (TCPTV_SRTTDFLT << 2)) >> 1, ++ TCPTV_MIN, TCPTV_REXMTMAX); ++ ++ tp->snd_cwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT; ++ tp->snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT; ++ tp->t_state = TCPS_CLOSED; ++ ++ so->so_tcpcb = tp; ++ ++ return (tp); ++} ++ ++/* ++ * Drop a TCP connection, reporting ++ * the specified error. If connection is synchronized, ++ * then send a RST to peer. ++ */ ++struct tcpcb *tcp_drop(struct tcpcb *tp, int err) ++{ ++ DEBUG_CALL("tcp_drop"); ++ DEBUG_ARG("tp = %p", tp); ++ DEBUG_ARG("errno = %d", errno); ++ ++ if (TCPS_HAVERCVDSYN(tp->t_state)) { ++ tp->t_state = TCPS_CLOSED; ++ (void)tcp_output(tp); ++ } ++ return (tcp_close(tp)); ++} ++ ++/* ++ * Close a TCP control block: ++ * discard all space held by the tcp ++ * discard internet protocol block ++ * wake up any sleepers ++ */ ++struct tcpcb *tcp_close(struct tcpcb *tp) ++{ ++ register struct tcpiphdr *t; ++ struct socket *so = tp->t_socket; ++ Slirp *slirp = so->slirp; ++ register struct mbuf *m; ++ ++ DEBUG_CALL("tcp_close"); ++ DEBUG_ARG("tp = %p", tp); ++ ++ /* free the reassembly queue, if any */ ++ t = tcpfrag_list_first(tp); ++ while (!tcpfrag_list_end(t, tp)) { ++ t = tcpiphdr_next(t); ++ m = tcpiphdr_prev(t)->ti_mbuf; ++ remque(tcpiphdr2qlink(tcpiphdr_prev(t))); ++ m_free(m); ++ } ++ g_free(tp); ++ so->so_tcpcb = NULL; ++ /* clobber input socket cache if we're closing the cached connection */ ++ if (so == slirp->tcp_last_so) ++ slirp->tcp_last_so = &slirp->tcb; ++ so->slirp->cb->unregister_poll_fd(so->s, so->slirp->opaque); ++ closesocket(so->s); ++ sbfree(&so->so_rcv); ++ sbfree(&so->so_snd); ++ sofree(so); ++ return ((struct tcpcb *)0); ++} ++ ++/* ++ * TCP protocol interface to socket abstraction. ++ */ ++ ++/* ++ * User issued close, and wish to trail through shutdown states: ++ * if never received SYN, just forget it. If got a SYN from peer, ++ * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN. ++ * If already got a FIN from peer, then almost done; go to LAST_ACK ++ * state. In all other cases, have already sent FIN to peer (e.g. ++ * after PRU_SHUTDOWN), and just have to play tedious game waiting ++ * for peer to send FIN or not respond to keep-alives, etc. ++ * We can let the user exit from the close as soon as the FIN is acked. ++ */ ++void tcp_sockclosed(struct tcpcb *tp) ++{ ++ DEBUG_CALL("tcp_sockclosed"); ++ DEBUG_ARG("tp = %p", tp); ++ ++ if (!tp) { ++ return; ++ } ++ ++ switch (tp->t_state) { ++ case TCPS_CLOSED: ++ case TCPS_LISTEN: ++ case TCPS_SYN_SENT: ++ tp->t_state = TCPS_CLOSED; ++ tcp_close(tp); ++ return; ++ ++ case TCPS_SYN_RECEIVED: ++ case TCPS_ESTABLISHED: ++ tp->t_state = TCPS_FIN_WAIT_1; ++ break; ++ ++ case TCPS_CLOSE_WAIT: ++ tp->t_state = TCPS_LAST_ACK; ++ break; ++ } ++ tcp_output(tp); ++} ++ ++/* ++ * Connect to a host on the Internet ++ * Called by tcp_input ++ * Only do a connect, the tcp fields will be set in tcp_input ++ * return 0 if there's a result of the connect, ++ * else return -1 means we're still connecting ++ * The return value is almost always -1 since the socket is ++ * nonblocking. Connect returns after the SYN is sent, and does ++ * not wait for ACK+SYN. ++ */ ++int tcp_fconnect(struct socket *so, unsigned short af) ++{ ++ int ret = 0; ++ ++ DEBUG_CALL("tcp_fconnect"); ++ DEBUG_ARG("so = %p", so); ++ ++ ret = so->s = slirp_socket(af, SOCK_STREAM, 0); ++ if (ret >= 0) { ++ ret = slirp_bind_outbound(so, af); ++ if (ret < 0) { ++ // bind failed - close socket ++ closesocket(so->s); ++ so->s = -1; ++ return (ret); ++ } ++ } ++ ++ if (ret >= 0) { ++ int opt, s = so->s; ++ struct sockaddr_storage addr; ++ ++ slirp_set_nonblock(s); ++ so->slirp->cb->register_poll_fd(so->s, so->slirp->opaque); ++ slirp_socket_set_fast_reuse(s); ++ opt = 1; ++ setsockopt(s, SOL_SOCKET, SO_OOBINLINE, &opt, sizeof(opt)); ++ opt = 1; ++ setsockopt(s, IPPROTO_TCP, TCP_NODELAY, &opt, sizeof(opt)); ++ ++ addr = so->fhost.ss; ++ DEBUG_CALL(" connect()ing"); ++ if (sotranslate_out(so, &addr) < 0) { ++ return -1; ++ } ++ ++ /* We don't care what port we get */ ++ ret = connect(s, (struct sockaddr *)&addr, sockaddr_size(&addr)); ++ ++ /* ++ * If it's not in progress, it failed, so we just return 0, ++ * without clearing SS_NOFDREF ++ */ ++ soisfconnecting(so); ++ } ++ ++ return (ret); ++} ++ ++/* ++ * Accept the socket and connect to the local-host ++ * ++ * We have a problem. The correct thing to do would be ++ * to first connect to the local-host, and only if the ++ * connection is accepted, then do an accept() here. ++ * But, a) we need to know who's trying to connect ++ * to the socket to be able to SYN the local-host, and ++ * b) we are already connected to the foreign host by ++ * the time it gets to accept(), so... We simply accept ++ * here and SYN the local-host. ++ */ ++void tcp_connect(struct socket *inso) ++{ ++ Slirp *slirp = inso->slirp; ++ struct socket *so; ++ struct sockaddr_storage addr; ++ socklen_t addrlen = sizeof(struct sockaddr_storage); ++ struct tcpcb *tp; ++ int s, opt; ++ ++ DEBUG_CALL("tcp_connect"); ++ DEBUG_ARG("inso = %p", inso); ++ ++ /* ++ * If it's an SS_ACCEPTONCE socket, no need to socreate() ++ * another socket, just use the accept() socket. ++ */ ++ if (inso->so_state & SS_FACCEPTONCE) { ++ /* FACCEPTONCE already have a tcpcb */ ++ so = inso; ++ } else { ++ so = socreate(slirp); ++ tcp_attach(so); ++ so->lhost = inso->lhost; ++ so->so_ffamily = inso->so_ffamily; ++ } ++ ++ tcp_mss(sototcpcb(so), 0); ++ ++ s = accept(inso->s, (struct sockaddr *)&addr, &addrlen); ++ if (s < 0) { ++ tcp_close(sototcpcb(so)); /* This will sofree() as well */ ++ return; ++ } ++ slirp_set_nonblock(s); ++ so->slirp->cb->register_poll_fd(so->s, so->slirp->opaque); ++ slirp_socket_set_fast_reuse(s); ++ opt = 1; ++ setsockopt(s, SOL_SOCKET, SO_OOBINLINE, &opt, sizeof(int)); ++ slirp_socket_set_nodelay(s); ++ ++ so->fhost.ss = addr; ++ sotranslate_accept(so); ++ ++ /* Close the accept() socket, set right state */ ++ if (inso->so_state & SS_FACCEPTONCE) { ++ /* If we only accept once, close the accept() socket */ ++ so->slirp->cb->unregister_poll_fd(so->s, so->slirp->opaque); ++ closesocket(so->s); ++ ++ /* Don't select it yet, even though we have an FD */ ++ /* if it's not FACCEPTONCE, it's already NOFDREF */ ++ so->so_state = SS_NOFDREF; ++ } ++ so->s = s; ++ so->so_state |= SS_INCOMING; ++ ++ so->so_iptos = tcp_tos(so); ++ tp = sototcpcb(so); ++ ++ tcp_template(tp); ++ ++ tp->t_state = TCPS_SYN_SENT; ++ tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT; ++ tp->iss = slirp->tcp_iss; ++ slirp->tcp_iss += TCP_ISSINCR / 2; ++ tcp_sendseqinit(tp); ++ tcp_output(tp); ++} ++ ++/* ++ * Attach a TCPCB to a socket. ++ */ ++void tcp_attach(struct socket *so) ++{ ++ so->so_tcpcb = tcp_newtcpcb(so); ++ insque(so, &so->slirp->tcb); ++} ++ ++/* ++ * Set the socket's type of service field ++ */ ++static const struct tos_t tcptos[] = { ++ { 0, 20, IPTOS_THROUGHPUT, 0 }, /* ftp data */ ++ { 21, 21, IPTOS_LOWDELAY, EMU_FTP }, /* ftp control */ ++ { 0, 23, IPTOS_LOWDELAY, 0 }, /* telnet */ ++ { 0, 80, IPTOS_THROUGHPUT, 0 }, /* WWW */ ++ { 0, 513, IPTOS_LOWDELAY, EMU_RLOGIN | EMU_NOCONNECT }, /* rlogin */ ++ { 0, 544, IPTOS_LOWDELAY, EMU_KSH }, /* kshell */ ++ { 0, 543, IPTOS_LOWDELAY, 0 }, /* klogin */ ++ { 0, 6667, IPTOS_THROUGHPUT, EMU_IRC }, /* IRC */ ++ { 0, 6668, IPTOS_THROUGHPUT, EMU_IRC }, /* IRC undernet */ ++ { 0, 7070, IPTOS_LOWDELAY, EMU_REALAUDIO }, /* RealAudio control */ ++ { 0, 113, IPTOS_LOWDELAY, EMU_IDENT }, /* identd protocol */ ++ { 0, 0, 0, 0 } ++}; ++ ++/* ++ * Return TOS according to the above table ++ */ ++uint8_t tcp_tos(struct socket *so) ++{ ++ int i = 0; ++ ++ while (tcptos[i].tos) { ++ if ((tcptos[i].fport && (ntohs(so->so_fport) == tcptos[i].fport)) || ++ (tcptos[i].lport && (ntohs(so->so_lport) == tcptos[i].lport))) { ++ if (so->slirp->enable_emu) ++ so->so_emu = tcptos[i].emu; ++ return tcptos[i].tos; ++ } ++ i++; ++ } ++ return 0; ++} ++ ++/* ++ * Emulate programs that try and connect to us ++ * This includes ftp (the data connection is ++ * initiated by the server) and IRC (DCC CHAT and ++ * DCC SEND) for now ++ * ++ * NOTE: It's possible to crash SLiRP by sending it ++ * unstandard strings to emulate... if this is a problem, ++ * more checks are needed here ++ * ++ * XXX Assumes the whole command came in one packet ++ * XXX If there is more than one command in the packet, the others may ++ * be truncated. ++ * XXX If the command is too long, it may be truncated. ++ * ++ * XXX Some ftp clients will have their TOS set to ++ * LOWDELAY and so Nagel will kick in. Because of this, ++ * we'll get the first letter, followed by the rest, so ++ * we simply scan for ORT instead of PORT... ++ * DCC doesn't have this problem because there's other stuff ++ * in the packet before the DCC command. ++ * ++ * Return 1 if the mbuf m is still valid and should be ++ * sbappend()ed ++ * ++ * NOTE: if you return 0 you MUST m_free() the mbuf! ++ */ ++int tcp_emu(struct socket *so, struct mbuf *m) ++{ ++ Slirp *slirp = so->slirp; ++ unsigned n1, n2, n3, n4, n5, n6; ++ char buff[257]; ++ uint32_t laddr; ++ unsigned lport; ++ char *bptr; ++ ++ DEBUG_CALL("tcp_emu"); ++ DEBUG_ARG("so = %p", so); ++ DEBUG_ARG("m = %p", m); ++ ++ switch (so->so_emu) { ++ int x, i; ++ ++ /* TODO: IPv6 */ ++ case EMU_IDENT: ++ /* ++ * Identification protocol as per rfc-1413 ++ */ ++ ++ { ++ struct socket *tmpso; ++ struct sockaddr_in addr; ++ socklen_t addrlen = sizeof(struct sockaddr_in); ++ char *eol = g_strstr_len(m->m_data, m->m_len, "\r\n"); ++ ++ if (!eol) { ++ return 1; ++ } ++ ++ *eol = '\0'; ++ if (sscanf(m->m_data, "%u%*[ ,]%u", &n1, &n2) == 2) { ++ HTONS(n1); ++ HTONS(n2); ++ /* n2 is the one on our host */ ++ for (tmpso = slirp->tcb.so_next; tmpso != &slirp->tcb; ++ tmpso = tmpso->so_next) { ++ if (tmpso->so_laddr.s_addr == so->so_laddr.s_addr && ++ tmpso->so_lport == n2 && ++ tmpso->so_faddr.s_addr == so->so_faddr.s_addr && ++ tmpso->so_fport == n1) { ++ if (getsockname(tmpso->s, (struct sockaddr *)&addr, ++ &addrlen) == 0) ++ n2 = addr.sin_port; ++ break; ++ } ++ } ++ NTOHS(n1); ++ NTOHS(n2); ++ m_inc(m, g_snprintf(NULL, 0, "%d,%d\r\n", n1, n2) + 1); ++ m->m_len = slirp_fmt(m->m_data, M_ROOM(m), "%d,%d\r\n", n1, n2); ++ } else { ++ *eol = '\r'; ++ } ++ ++ return 1; ++ } ++ ++ case EMU_FTP: /* ftp */ ++ m_inc(m, m->m_len + 1); ++ *(m->m_data + m->m_len) = 0; /* NUL terminate for strstr */ ++ if ((bptr = (char *)strstr(m->m_data, "ORT")) != NULL) { ++ /* ++ * Need to emulate the PORT command ++ */ ++ x = sscanf(bptr, "ORT %u,%u,%u,%u,%u,%u\r\n%256[^\177]", &n1, &n2, ++ &n3, &n4, &n5, &n6, buff); ++ if (x < 6) ++ return 1; ++ ++ laddr = htonl((n1 << 24) | (n2 << 16) | (n3 << 8) | (n4)); ++ lport = htons((n5 << 8) | (n6)); ++ ++ if ((so = tcp_listen(slirp, INADDR_ANY, 0, laddr, lport, ++ SS_FACCEPTONCE)) == NULL) { ++ return 1; ++ } ++ n6 = ntohs(so->so_fport); ++ ++ n5 = (n6 >> 8) & 0xff; ++ n6 &= 0xff; ++ ++ laddr = ntohl(so->so_faddr.s_addr); ++ ++ n1 = ((laddr >> 24) & 0xff); ++ n2 = ((laddr >> 16) & 0xff); ++ n3 = ((laddr >> 8) & 0xff); ++ n4 = (laddr & 0xff); ++ ++ m->m_len = bptr - m->m_data; /* Adjust length */ ++ m->m_len += slirp_fmt(bptr, M_FREEROOM(m), ++ "ORT %d,%d,%d,%d,%d,%d\r\n%s", ++ n1, n2, n3, n4, n5, n6, x == 7 ? buff : ""); ++ return 1; ++ } else if ((bptr = (char *)strstr(m->m_data, "27 Entering")) != NULL) { ++ /* ++ * Need to emulate the PASV response ++ */ ++ x = sscanf( ++ bptr, ++ "27 Entering Passive Mode (%u,%u,%u,%u,%u,%u)\r\n%256[^\177]", ++ &n1, &n2, &n3, &n4, &n5, &n6, buff); ++ if (x < 6) ++ return 1; ++ ++ laddr = htonl((n1 << 24) | (n2 << 16) | (n3 << 8) | (n4)); ++ lport = htons((n5 << 8) | (n6)); ++ ++ if ((so = tcp_listen(slirp, INADDR_ANY, 0, laddr, lport, ++ SS_FACCEPTONCE)) == NULL) { ++ return 1; ++ } ++ n6 = ntohs(so->so_fport); ++ ++ n5 = (n6 >> 8) & 0xff; ++ n6 &= 0xff; ++ ++ laddr = ntohl(so->so_faddr.s_addr); ++ ++ n1 = ((laddr >> 24) & 0xff); ++ n2 = ((laddr >> 16) & 0xff); ++ n3 = ((laddr >> 8) & 0xff); ++ n4 = (laddr & 0xff); ++ ++ m->m_len = bptr - m->m_data; /* Adjust length */ ++ m->m_len += slirp_fmt(bptr, M_FREEROOM(m), ++ "27 Entering Passive Mode (%d,%d,%d,%d,%d,%d)\r\n%s", ++ n1, n2, n3, n4, n5, n6, x == 7 ? buff : ""); ++ return 1; ++ } ++ ++ return 1; ++ ++ case EMU_KSH: ++ /* ++ * The kshell (Kerberos rsh) and shell services both pass ++ * a local port port number to carry signals to the server ++ * and stderr to the client. It is passed at the beginning ++ * of the connection as a NUL-terminated decimal ASCII string. ++ */ ++ so->so_emu = 0; ++ for (lport = 0, i = 0; i < m->m_len - 1; ++i) { ++ if (m->m_data[i] < '0' || m->m_data[i] > '9') ++ return 1; /* invalid number */ ++ lport *= 10; ++ lport += m->m_data[i] - '0'; ++ } ++ if (m->m_data[m->m_len - 1] == '\0' && lport != 0 && ++ (so = tcp_listen(slirp, INADDR_ANY, 0, so->so_laddr.s_addr, ++ htons(lport), SS_FACCEPTONCE)) != NULL) ++ m->m_len = slirp_fmt0(m->m_data, M_ROOM(m), ++ "%d", ntohs(so->so_fport)); ++ return 1; ++ ++ case EMU_IRC: ++ /* ++ * Need to emulate DCC CHAT, DCC SEND and DCC MOVE ++ */ ++ m_inc(m, m->m_len + 1); ++ *(m->m_data + m->m_len) = 0; /* NULL terminate the string for strstr */ ++ if ((bptr = (char *)strstr(m->m_data, "DCC")) == NULL) ++ return 1; ++ ++ /* The %256s is for the broken mIRC */ ++ if (sscanf(bptr, "DCC CHAT %256s %u %u", buff, &laddr, &lport) == 3) { ++ if ((so = tcp_listen(slirp, INADDR_ANY, 0, htonl(laddr), ++ htons(lport), SS_FACCEPTONCE)) == NULL) { ++ return 1; ++ } ++ m->m_len = bptr - m->m_data; /* Adjust length */ ++ m->m_len += slirp_fmt(bptr, M_FREEROOM(m), ++ "DCC CHAT chat %lu %u%c\n", ++ (unsigned long)ntohl(so->so_faddr.s_addr), ++ ntohs(so->so_fport), 1); ++ } else if (sscanf(bptr, "DCC SEND %256s %u %u %u", buff, &laddr, &lport, ++ &n1) == 4) { ++ if ((so = tcp_listen(slirp, INADDR_ANY, 0, htonl(laddr), ++ htons(lport), SS_FACCEPTONCE)) == NULL) { ++ return 1; ++ } ++ m->m_len = bptr - m->m_data; /* Adjust length */ ++ m->m_len += slirp_fmt(bptr, M_FREEROOM(m), ++ "DCC SEND %s %lu %u %u%c\n", buff, ++ (unsigned long)ntohl(so->so_faddr.s_addr), ++ ntohs(so->so_fport), n1, 1); ++ } else if (sscanf(bptr, "DCC MOVE %256s %u %u %u", buff, &laddr, &lport, ++ &n1) == 4) { ++ if ((so = tcp_listen(slirp, INADDR_ANY, 0, htonl(laddr), ++ htons(lport), SS_FACCEPTONCE)) == NULL) { ++ return 1; ++ } ++ m->m_len = bptr - m->m_data; /* Adjust length */ ++ m->m_len += slirp_fmt(bptr, M_FREEROOM(m), ++ "DCC MOVE %s %lu %u %u%c\n", buff, ++ (unsigned long)ntohl(so->so_faddr.s_addr), ++ ntohs(so->so_fport), n1, 1); ++ } ++ return 1; ++ ++ case EMU_REALAUDIO: ++ /* ++ * RealAudio emulation - JP. We must try to parse the incoming ++ * data and try to find the two characters that contain the ++ * port number. Then we redirect an udp port and replace the ++ * number with the real port we got. ++ * ++ * The 1.0 beta versions of the player are not supported ++ * any more. ++ * ++ * A typical packet for player version 1.0 (release version): ++ * ++ * 0000:50 4E 41 00 05 ++ * 0000:00 01 00 02 1B D7 00 00 67 E6 6C DC 63 00 12 50 ........g.l.c..P ++ * 0010:4E 43 4C 49 45 4E 54 20 31 30 31 20 41 4C 50 48 NCLIENT 101 ALPH ++ * 0020:41 6C 00 00 52 00 17 72 61 66 69 6C 65 73 2F 76 Al..R..rafiles/v ++ * 0030:6F 61 2F 65 6E 67 6C 69 73 68 5F 2E 72 61 79 42 oa/english_.rayB ++ * ++ * Now the port number 0x1BD7 is found at offset 0x04 of the ++ * Now the port number 0x1BD7 is found at offset 0x04 of the ++ * second packet. This time we received five bytes first and ++ * then the rest. You never know how many bytes you get. ++ * ++ * A typical packet for player version 2.0 (beta): ++ * ++ * 0000:50 4E 41 00 06 00 02 00 00 00 01 00 02 1B C1 00 PNA............. ++ * 0010:00 67 75 78 F5 63 00 0A 57 69 6E 32 2E 30 2E 30 .gux.c..Win2.0.0 ++ * 0020:2E 35 6C 00 00 52 00 1C 72 61 66 69 6C 65 73 2F .5l..R..rafiles/ ++ * 0030:77 65 62 73 69 74 65 2F 32 30 72 65 6C 65 61 73 website/20releas ++ * 0040:65 2E 72 61 79 53 00 00 06 36 42 e.rayS...6B ++ * ++ * Port number 0x1BC1 is found at offset 0x0d. ++ * ++ * This is just a horrible switch statement. Variable ra tells ++ * us where we're going. ++ */ ++ ++ bptr = m->m_data; ++ while (bptr < m->m_data + m->m_len) { ++ uint16_t p; ++ static int ra = 0; ++ char ra_tbl[4]; ++ ++ ra_tbl[0] = 0x50; ++ ra_tbl[1] = 0x4e; ++ ra_tbl[2] = 0x41; ++ ra_tbl[3] = 0; ++ ++ switch (ra) { ++ case 0: ++ case 2: ++ case 3: ++ if (*bptr++ != ra_tbl[ra]) { ++ ra = 0; ++ continue; ++ } ++ break; ++ ++ case 1: ++ /* ++ * We may get 0x50 several times, ignore them ++ */ ++ if (*bptr == 0x50) { ++ ra = 1; ++ bptr++; ++ continue; ++ } else if (*bptr++ != ra_tbl[ra]) { ++ ra = 0; ++ continue; ++ } ++ break; ++ ++ case 4: ++ /* ++ * skip version number ++ */ ++ bptr++; ++ break; ++ ++ case 5: ++ if (bptr == m->m_data + m->m_len - 1) ++ return 1; /* We need two bytes */ ++ ++ /* ++ * The difference between versions 1.0 and ++ * 2.0 is here. For future versions of ++ * the player this may need to be modified. ++ */ ++ if (*(bptr + 1) == 0x02) ++ bptr += 8; ++ else ++ bptr += 4; ++ break; ++ ++ case 6: ++ /* This is the field containing the port ++ * number that RA-player is listening to. ++ */ ++ ++ if (bptr == m->m_data + m->m_len - 1) ++ return 1; /* We need two bytes */ ++ ++ lport = (((uint8_t *)bptr)[0] << 8) + ((uint8_t *)bptr)[1]; ++ if (lport < 6970) ++ lport += 256; /* don't know why */ ++ if (lport < 6970 || lport > 7170) ++ return 1; /* failed */ ++ ++ /* try to get udp port between 6970 - 7170 */ ++ for (p = 6970; p < 7071; p++) { ++ if (udp_listen(slirp, INADDR_ANY, htons(p), ++ so->so_laddr.s_addr, htons(lport), ++ SS_FACCEPTONCE)) { ++ break; ++ } ++ } ++ if (p == 7071) ++ p = 0; ++ *(uint8_t *)bptr++ = (p >> 8) & 0xff; ++ *(uint8_t *)bptr = p & 0xff; ++ ra = 0; ++ return 1; /* port redirected, we're done */ ++ break; ++ ++ default: ++ ra = 0; ++ } ++ ra++; ++ } ++ return 1; ++ ++ default: ++ /* Ooops, not emulated, won't call tcp_emu again */ ++ so->so_emu = 0; ++ return 1; ++ } ++} ++ ++/* ++ * Do misc. config of SLiRP while its running. ++ * Return 0 if this connections is to be closed, 1 otherwise, ++ * return 2 if this is a command-line connection ++ */ ++int tcp_ctl(struct socket *so) ++{ ++ Slirp *slirp = so->slirp; ++ struct sbuf *sb = &so->so_snd; ++ struct gfwd_list *ex_ptr; ++ ++ DEBUG_CALL("tcp_ctl"); ++ DEBUG_ARG("so = %p", so); ++ ++ /* TODO: IPv6 */ ++ if (so->so_faddr.s_addr != slirp->vhost_addr.s_addr) { ++ /* Check if it's pty_exec */ ++ for (ex_ptr = slirp->guestfwd_list; ex_ptr; ex_ptr = ex_ptr->ex_next) { ++ if (ex_ptr->ex_fport == so->so_fport && ++ so->so_faddr.s_addr == ex_ptr->ex_addr.s_addr) { ++ if (ex_ptr->write_cb) { ++ so->s = -1; ++ so->guestfwd = ex_ptr; ++ return 1; ++ } ++ DEBUG_MISC(" executing %s", ex_ptr->ex_exec); ++ if (ex_ptr->ex_unix) ++ return open_unix(so, ex_ptr->ex_unix); ++ else ++ return fork_exec(so, ex_ptr->ex_exec); ++ } ++ } ++ } ++ sb->sb_cc = slirp_fmt(sb->sb_wptr, sb->sb_datalen - (sb->sb_wptr - sb->sb_data), ++ "Error: No application configured.\r\n"); ++ sb->sb_wptr += sb->sb_cc; ++ return 0; ++} +diff --git a/slirp/src/tcp_timer.c b/slirp/src/tcp_timer.c +new file mode 100644 +index 0000000..102023e +--- /dev/null ++++ b/slirp/src/tcp_timer.c +@@ -0,0 +1,286 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1988, 1990, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)tcp_timer.c 8.1 (Berkeley) 6/10/93 ++ * tcp_timer.c,v 1.2 1994/08/02 07:49:10 davidg Exp ++ */ ++ ++#include "slirp.h" ++ ++static struct tcpcb *tcp_timers(register struct tcpcb *tp, int timer); ++ ++/* ++ * Fast timeout routine for processing delayed acks ++ */ ++void tcp_fasttimo(Slirp *slirp) ++{ ++ register struct socket *so; ++ register struct tcpcb *tp; ++ ++ DEBUG_CALL("tcp_fasttimo"); ++ ++ so = slirp->tcb.so_next; ++ if (so) ++ for (; so != &slirp->tcb; so = so->so_next) ++ if ((tp = (struct tcpcb *)so->so_tcpcb) && ++ (tp->t_flags & TF_DELACK)) { ++ tp->t_flags &= ~TF_DELACK; ++ tp->t_flags |= TF_ACKNOW; ++ (void)tcp_output(tp); ++ } ++} ++ ++/* ++ * Tcp protocol timeout routine called every 500 ms. ++ * Updates the timers in all active tcb's and ++ * causes finite state machine actions if timers expire. ++ */ ++void tcp_slowtimo(Slirp *slirp) ++{ ++ register struct socket *ip, *ipnxt; ++ register struct tcpcb *tp; ++ register int i; ++ ++ DEBUG_CALL("tcp_slowtimo"); ++ ++ /* ++ * Search through tcb's and update active timers. ++ */ ++ ip = slirp->tcb.so_next; ++ if (ip == NULL) { ++ return; ++ } ++ for (; ip != &slirp->tcb; ip = ipnxt) { ++ ipnxt = ip->so_next; ++ tp = sototcpcb(ip); ++ if (tp == NULL) { ++ continue; ++ } ++ for (i = 0; i < TCPT_NTIMERS; i++) { ++ if (tp->t_timer[i] && --tp->t_timer[i] == 0) { ++ tcp_timers(tp, i); ++ if (ipnxt->so_prev != ip) ++ goto tpgone; ++ } ++ } ++ tp->t_idle++; ++ if (tp->t_rtt) ++ tp->t_rtt++; ++ tpgone:; ++ } ++ slirp->tcp_iss += TCP_ISSINCR / PR_SLOWHZ; /* increment iss */ ++ slirp->tcp_now++; /* for timestamps */ ++} ++ ++/* ++ * Cancel all timers for TCP tp. ++ */ ++void tcp_canceltimers(struct tcpcb *tp) ++{ ++ register int i; ++ ++ for (i = 0; i < TCPT_NTIMERS; i++) ++ tp->t_timer[i] = 0; ++} ++ ++const int tcp_backoff[TCP_MAXRXTSHIFT + 1] = { 1, 2, 4, 8, 16, 32, 64, ++ 64, 64, 64, 64, 64, 64 }; ++ ++/* ++ * TCP timer processing. ++ */ ++static struct tcpcb *tcp_timers(register struct tcpcb *tp, int timer) ++{ ++ register int rexmt; ++ ++ DEBUG_CALL("tcp_timers"); ++ ++ switch (timer) { ++ /* ++ * 2 MSL timeout in shutdown went off. If we're closed but ++ * still waiting for peer to close and connection has been idle ++ * too long, or if 2MSL time is up from TIME_WAIT, delete connection ++ * control block. Otherwise, check again in a bit. ++ */ ++ case TCPT_2MSL: ++ if (tp->t_state != TCPS_TIME_WAIT && tp->t_idle <= TCP_MAXIDLE) ++ tp->t_timer[TCPT_2MSL] = TCPTV_KEEPINTVL; ++ else ++ tp = tcp_close(tp); ++ break; ++ ++ /* ++ * Retransmission timer went off. Message has not ++ * been acked within retransmit interval. Back off ++ * to a longer retransmit interval and retransmit one segment. ++ */ ++ case TCPT_REXMT: ++ ++ /* ++ * XXXXX If a packet has timed out, then remove all the queued ++ * packets for that session. ++ */ ++ ++ if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) { ++ /* ++ * This is a hack to suit our terminal server here at the uni of ++ * canberra since they have trouble with zeroes... It usually lets ++ * them through unharmed, but under some conditions, it'll eat the ++ * zeros. If we keep retransmitting it, it'll keep eating the ++ * zeroes, so we keep retransmitting, and eventually the connection ++ * dies... (this only happens on incoming data) ++ * ++ * So, if we were gonna drop the connection from too many ++ * retransmits, don't... instead halve the t_maxseg, which might ++ * break up the NULLs and let them through ++ * ++ * *sigh* ++ */ ++ ++ tp->t_maxseg >>= 1; ++ if (tp->t_maxseg < 32) { ++ /* ++ * We tried our best, now the connection must die! ++ */ ++ tp->t_rxtshift = TCP_MAXRXTSHIFT; ++ tp = tcp_drop(tp, tp->t_softerror); ++ /* tp->t_softerror : ETIMEDOUT); */ /* XXX */ ++ return (tp); /* XXX */ ++ } ++ ++ /* ++ * Set rxtshift to 6, which is still at the maximum ++ * backoff time ++ */ ++ tp->t_rxtshift = 6; ++ } ++ rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift]; ++ TCPT_RANGESET(tp->t_rxtcur, rexmt, (short)tp->t_rttmin, ++ TCPTV_REXMTMAX); /* XXX */ ++ tp->t_timer[TCPT_REXMT] = tp->t_rxtcur; ++ /* ++ * If losing, let the lower level know and try for ++ * a better route. Also, if we backed off this far, ++ * our srtt estimate is probably bogus. Clobber it ++ * so we'll take the next rtt measurement as our srtt; ++ * move the current srtt into rttvar to keep the current ++ * retransmit times until then. ++ */ ++ if (tp->t_rxtshift > TCP_MAXRXTSHIFT / 4) { ++ tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT); ++ tp->t_srtt = 0; ++ } ++ tp->snd_nxt = tp->snd_una; ++ /* ++ * If timing a segment in this window, stop the timer. ++ */ ++ tp->t_rtt = 0; ++ /* ++ * Close the congestion window down to one segment ++ * (we'll open it by one segment for each ack we get). ++ * Since we probably have a window's worth of unacked ++ * data accumulated, this "slow start" keeps us from ++ * dumping all that data as back-to-back packets (which ++ * might overwhelm an intermediate gateway). ++ * ++ * There are two phases to the opening: Initially we ++ * open by one mss on each ack. This makes the window ++ * size increase exponentially with time. If the ++ * window is larger than the path can handle, this ++ * exponential growth results in dropped packet(s) ++ * almost immediately. To get more time between ++ * drops but still "push" the network to take advantage ++ * of improving conditions, we switch from exponential ++ * to linear window opening at some threshold size. ++ * For a threshold, we use half the current window ++ * size, truncated to a multiple of the mss. ++ * ++ * (the minimum cwnd that will give us exponential ++ * growth is 2 mss. We don't allow the threshold ++ * to go below this.) ++ */ ++ { ++ unsigned win = MIN(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_maxseg; ++ if (win < 2) ++ win = 2; ++ tp->snd_cwnd = tp->t_maxseg; ++ tp->snd_ssthresh = win * tp->t_maxseg; ++ tp->t_dupacks = 0; ++ } ++ (void)tcp_output(tp); ++ break; ++ ++ /* ++ * Persistence timer into zero window. ++ * Force a byte to be output, if possible. ++ */ ++ case TCPT_PERSIST: ++ tcp_setpersist(tp); ++ tp->t_force = 1; ++ (void)tcp_output(tp); ++ tp->t_force = 0; ++ break; ++ ++ /* ++ * Keep-alive timer went off; send something ++ * or drop connection if idle for too long. ++ */ ++ case TCPT_KEEP: ++ if (tp->t_state < TCPS_ESTABLISHED) ++ goto dropit; ++ ++ if (slirp_do_keepalive && tp->t_state <= TCPS_CLOSE_WAIT) { ++ if (tp->t_idle >= TCPTV_KEEP_IDLE + TCP_MAXIDLE) ++ goto dropit; ++ /* ++ * Send a packet designed to force a response ++ * if the peer is up and reachable: ++ * either an ACK if the connection is still alive, ++ * or an RST if the peer has closed the connection ++ * due to timeout or reboot. ++ * Using sequence number tp->snd_una-1 ++ * causes the transmitted zero-length segment ++ * to lie outside the receive window; ++ * by the protocol spec, this requires the ++ * correspondent TCP to respond. ++ */ ++ tcp_respond(tp, &tp->t_template, (struct mbuf *)NULL, tp->rcv_nxt, ++ tp->snd_una - 1, 0, tp->t_socket->so_ffamily); ++ tp->t_timer[TCPT_KEEP] = TCPTV_KEEPINTVL; ++ } else ++ tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_IDLE; ++ break; ++ ++ dropit: ++ tp = tcp_drop(tp, 0); ++ break; ++ } ++ ++ return (tp); ++} +diff --git a/slirp/src/tcp_timer.h b/slirp/src/tcp_timer.h +new file mode 100644 +index 0000000..584a559 +--- /dev/null ++++ b/slirp/src/tcp_timer.h +@@ -0,0 +1,130 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)tcp_timer.h 8.1 (Berkeley) 6/10/93 ++ * tcp_timer.h,v 1.4 1994/08/21 05:27:38 paul Exp ++ */ ++ ++#ifndef TCP_TIMER_H ++#define TCP_TIMER_H ++ ++/* ++ * Definitions of the TCP timers. These timers are counted ++ * down PR_SLOWHZ times a second. ++ */ ++#define TCPT_NTIMERS 4 ++ ++#define TCPT_REXMT 0 /* retransmit */ ++#define TCPT_PERSIST 1 /* retransmit persistence */ ++#define TCPT_KEEP 2 /* keep alive */ ++#define TCPT_2MSL 3 /* 2*msl quiet time timer */ ++ ++/* ++ * The TCPT_REXMT timer is used to force retransmissions. ++ * The TCP has the TCPT_REXMT timer set whenever segments ++ * have been sent for which ACKs are expected but not yet ++ * received. If an ACK is received which advances tp->snd_una, ++ * then the retransmit timer is cleared (if there are no more ++ * outstanding segments) or reset to the base value (if there ++ * are more ACKs expected). Whenever the retransmit timer goes off, ++ * we retransmit one unacknowledged segment, and do a backoff ++ * on the retransmit timer. ++ * ++ * The TCPT_PERSIST timer is used to keep window size information ++ * flowing even if the window goes shut. If all previous transmissions ++ * have been acknowledged (so that there are no retransmissions in progress), ++ * and the window is too small to bother sending anything, then we start ++ * the TCPT_PERSIST timer. When it expires, if the window is nonzero, ++ * we go to transmit state. Otherwise, at intervals send a single byte ++ * into the peer's window to force him to update our window information. ++ * We do this at most as often as TCPT_PERSMIN time intervals, ++ * but no more frequently than the current estimate of round-trip ++ * packet time. The TCPT_PERSIST timer is cleared whenever we receive ++ * a window update from the peer. ++ * ++ * The TCPT_KEEP timer is used to keep connections alive. If an ++ * connection is idle (no segments received) for TCPTV_KEEP_INIT amount of time, ++ * but not yet established, then we drop the connection. Once the connection ++ * is established, if the connection is idle for TCPTV_KEEP_IDLE time ++ * (and keepalives have been enabled on the socket), we begin to probe ++ * the connection. We force the peer to send us a segment by sending: ++ * ++ * This segment is (deliberately) outside the window, and should elicit ++ * an ack segment in response from the peer. If, despite the TCPT_KEEP ++ * initiated segments we cannot elicit a response from a peer in TCPT_MAXIDLE ++ * amount of time probing, then we drop the connection. ++ */ ++ ++/* ++ * Time constants. ++ */ ++#define TCPTV_MSL (5 * PR_SLOWHZ) /* max seg lifetime (hah!) */ ++ ++#define TCPTV_SRTTBASE \ ++ 0 /* base roundtrip time; \ ++ if 0, no idea yet */ ++#define TCPTV_SRTTDFLT (3 * PR_SLOWHZ) /* assumed RTT if no info */ ++ ++#define TCPTV_PERSMIN (5 * PR_SLOWHZ) /* retransmit persistence */ ++#define TCPTV_PERSMAX (60 * PR_SLOWHZ) /* maximum persist interval */ ++ ++#define TCPTV_KEEP_INIT (75 * PR_SLOWHZ) /* initial connect keep alive */ ++#define TCPTV_KEEP_IDLE (120 * 60 * PR_SLOWHZ) /* dflt time before probing */ ++#define TCPTV_KEEPINTVL (75 * PR_SLOWHZ) /* default probe interval */ ++#define TCPTV_KEEPCNT 8 /* max probes before drop */ ++ ++#define TCPTV_MIN (1 * PR_SLOWHZ) /* minimum allowable value */ ++#define TCPTV_REXMTMAX (12 * PR_SLOWHZ) /* max allowable REXMT value */ ++ ++#define TCP_LINGERTIME 120 /* linger at most 2 minutes */ ++ ++#define TCP_MAXRXTSHIFT 12 /* maximum retransmits */ ++ ++ ++/* ++ * Force a time value to be in a certain range. ++ */ ++#define TCPT_RANGESET(tv, value, tvmin, tvmax) \ ++ { \ ++ (tv) = (value); \ ++ if ((tv) < (tvmin)) \ ++ (tv) = (tvmin); \ ++ else if ((tv) > (tvmax)) \ ++ (tv) = (tvmax); \ ++ } ++ ++extern const int tcp_backoff[]; ++ ++struct tcpcb; ++ ++void tcp_fasttimo(Slirp *); ++void tcp_slowtimo(Slirp *); ++void tcp_canceltimers(struct tcpcb *); ++ ++#endif +diff --git a/slirp/src/tcp_var.h b/slirp/src/tcp_var.h +new file mode 100644 +index 0000000..c8da8cb +--- /dev/null ++++ b/slirp/src/tcp_var.h +@@ -0,0 +1,161 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1993, 1994 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)tcp_var.h 8.3 (Berkeley) 4/10/94 ++ * tcp_var.h,v 1.3 1994/08/21 05:27:39 paul Exp ++ */ ++ ++#ifndef TCP_VAR_H ++#define TCP_VAR_H ++ ++#include "tcpip.h" ++#include "tcp_timer.h" ++ ++/* ++ * Tcp control block, one per tcp; fields: ++ */ ++struct tcpcb { ++ struct tcpiphdr *seg_next; /* sequencing queue */ ++ struct tcpiphdr *seg_prev; ++ short t_state; /* state of this connection */ ++ short t_timer[TCPT_NTIMERS]; /* tcp timers */ ++ short t_rxtshift; /* log(2) of rexmt exp. backoff */ ++ short t_rxtcur; /* current retransmit value */ ++ short t_dupacks; /* consecutive dup acks recd */ ++ uint16_t t_maxseg; /* maximum segment size */ ++ uint8_t t_force; /* 1 if forcing out a byte */ ++ uint16_t t_flags; ++#define TF_ACKNOW 0x0001 /* ack peer immediately */ ++#define TF_DELACK 0x0002 /* ack, but try to delay it */ ++#define TF_NODELAY 0x0004 /* don't delay packets to coalesce */ ++#define TF_NOOPT 0x0008 /* don't use tcp options */ ++#define TF_SENTFIN 0x0010 /* have sent FIN */ ++#define TF_REQ_SCALE 0x0020 /* have/will request window scaling */ ++#define TF_RCVD_SCALE 0x0040 /* other side has requested scaling */ ++#define TF_REQ_TSTMP 0x0080 /* have/will request timestamps */ ++#define TF_RCVD_TSTMP 0x0100 /* a timestamp was received in SYN */ ++#define TF_SACK_PERMIT 0x0200 /* other side said I could SACK */ ++ ++ struct tcpiphdr t_template; /* static skeletal packet for xmit */ ++ ++ struct socket *t_socket; /* back pointer to socket */ ++ /* ++ * The following fields are used as in the protocol specification. ++ * See RFC783, Dec. 1981, page 21. ++ */ ++ /* send sequence variables */ ++ tcp_seq snd_una; /* send unacknowledged */ ++ tcp_seq snd_nxt; /* send next */ ++ tcp_seq snd_up; /* send urgent pointer */ ++ tcp_seq snd_wl1; /* window update seg seq number */ ++ tcp_seq snd_wl2; /* window update seg ack number */ ++ tcp_seq iss; /* initial send sequence number */ ++ uint32_t snd_wnd; /* send window */ ++ /* receive sequence variables */ ++ uint32_t rcv_wnd; /* receive window */ ++ tcp_seq rcv_nxt; /* receive next */ ++ tcp_seq rcv_up; /* receive urgent pointer */ ++ tcp_seq irs; /* initial receive sequence number */ ++ /* ++ * Additional variables for this implementation. ++ */ ++ /* receive variables */ ++ tcp_seq rcv_adv; /* advertised window */ ++ /* retransmit variables */ ++ tcp_seq snd_max; /* highest sequence number sent; ++ * used to recognize retransmits ++ */ ++ /* congestion control (for slow start, source quench, retransmit after loss) ++ */ ++ uint32_t snd_cwnd; /* congestion-controlled window */ ++ uint32_t snd_ssthresh; /* snd_cwnd size threshold for ++ * for slow start exponential to ++ * linear switch ++ */ ++ /* ++ * transmit timing stuff. See below for scale of srtt and rttvar. ++ * "Variance" is actually smoothed difference. ++ */ ++ short t_idle; /* inactivity time */ ++ short t_rtt; /* round trip time */ ++ tcp_seq t_rtseq; /* sequence number being timed */ ++ short t_srtt; /* smoothed round-trip time */ ++ short t_rttvar; /* variance in round-trip time */ ++ uint16_t t_rttmin; /* minimum rtt allowed */ ++ uint32_t max_sndwnd; /* largest window peer has offered */ ++ ++ /* out-of-band data */ ++ uint8_t t_oobflags; /* have some */ ++ uint8_t t_iobc; /* input character */ ++#define TCPOOB_HAVEDATA 0x01 ++#define TCPOOB_HADDATA 0x02 ++ short t_softerror; /* possible error not yet reported */ ++ ++ /* RFC 1323 variables */ ++ uint8_t snd_scale; /* window scaling for send window */ ++ uint8_t rcv_scale; /* window scaling for recv window */ ++ uint8_t request_r_scale; /* pending window scaling */ ++ uint8_t requested_s_scale; ++ uint32_t ts_recent; /* timestamp echo data */ ++ uint32_t ts_recent_age; /* when last updated */ ++ tcp_seq last_ack_sent; ++}; ++ ++#define sototcpcb(so) ((so)->so_tcpcb) ++ ++/* ++ * The smoothed round-trip time and estimated variance ++ * are stored as fixed point numbers scaled by the values below. ++ * For convenience, these scales are also used in smoothing the average ++ * (smoothed = (1/scale)sample + ((scale-1)/scale)smoothed). ++ * With these scales, srtt has 3 bits to the right of the binary point, ++ * and thus an "ALPHA" of 0.875. rttvar has 2 bits to the right of the ++ * binary point, and is smoothed with an ALPHA of 0.75. ++ */ ++#define TCP_RTT_SCALE 8 /* multiplier for srtt; 3 bits frac. */ ++#define TCP_RTT_SHIFT 3 /* shift for srtt; 3 bits frac. */ ++#define TCP_RTTVAR_SCALE 4 /* multiplier for rttvar; 2 bits */ ++#define TCP_RTTVAR_SHIFT 2 /* multiplier for rttvar; 2 bits */ ++ ++/* ++ * The initial retransmission should happen at rtt + 4 * rttvar. ++ * Because of the way we do the smoothing, srtt and rttvar ++ * will each average +1/2 tick of bias. When we compute ++ * the retransmit timer, we want 1/2 tick of rounding and ++ * 1 extra tick because of +-1/2 tick uncertainty in the ++ * firing of the timer. The bias will give us exactly the ++ * 1.5 tick we need. But, because the bias is ++ * statistical, we have to test that we don't drop below ++ * the minimum feasible timer (which is 2 ticks). ++ * This macro assumes that the value of TCP_RTTVAR_SCALE ++ * is the same as the multiplier for rttvar. ++ */ ++#define TCP_REXMTVAL(tp) (((tp)->t_srtt >> TCP_RTT_SHIFT) + (tp)->t_rttvar) ++ ++#endif +diff --git a/slirp/src/tcpip.h b/slirp/src/tcpip.h +new file mode 100644 +index 0000000..d3df021 +--- /dev/null ++++ b/slirp/src/tcpip.h +@@ -0,0 +1,104 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)tcpip.h 8.1 (Berkeley) 6/10/93 ++ * tcpip.h,v 1.3 1994/08/21 05:27:40 paul Exp ++ */ ++ ++#ifndef TCPIP_H ++#define TCPIP_H ++ ++/* ++ * Tcp+ip header, after ip options removed. ++ */ ++struct tcpiphdr { ++ struct mbuf_ptr ih_mbuf; /* backpointer to mbuf */ ++ union { ++ struct { ++ struct in_addr ih_src; /* source internet address */ ++ struct in_addr ih_dst; /* destination internet address */ ++ uint8_t ih_x1; /* (unused) */ ++ uint8_t ih_pr; /* protocol */ ++ } ti_i4; ++ struct { ++ struct in6_addr ih_src; ++ struct in6_addr ih_dst; ++ uint8_t ih_x1; ++ uint8_t ih_nh; ++ } ti_i6; ++ } ti; ++ uint16_t ti_x0; ++ uint16_t ti_len; /* protocol length */ ++ struct tcphdr ti_t; /* tcp header */ ++}; ++#define ti_mbuf ih_mbuf.mptr ++#define ti_pr ti.ti_i4.ih_pr ++#define ti_src ti.ti_i4.ih_src ++#define ti_dst ti.ti_i4.ih_dst ++#define ti_src6 ti.ti_i6.ih_src ++#define ti_dst6 ti.ti_i6.ih_dst ++#define ti_nh6 ti.ti_i6.ih_nh ++#define ti_sport ti_t.th_sport ++#define ti_dport ti_t.th_dport ++#define ti_seq ti_t.th_seq ++#define ti_ack ti_t.th_ack ++#define ti_x2 ti_t.th_x2 ++#define ti_off ti_t.th_off ++#define ti_flags ti_t.th_flags ++#define ti_win ti_t.th_win ++#define ti_sum ti_t.th_sum ++#define ti_urp ti_t.th_urp ++ ++#define tcpiphdr2qlink(T) \ ++ ((struct qlink *)(((char *)(T)) - sizeof(struct qlink))) ++#define qlink2tcpiphdr(Q) \ ++ ((struct tcpiphdr *)(((char *)(Q)) + sizeof(struct qlink))) ++#define tcpiphdr_next(T) qlink2tcpiphdr(tcpiphdr2qlink(T)->next) ++#define tcpiphdr_prev(T) qlink2tcpiphdr(tcpiphdr2qlink(T)->prev) ++#define tcpfrag_list_first(T) qlink2tcpiphdr((T)->seg_next) ++#define tcpfrag_list_end(F, T) (tcpiphdr2qlink(F) == (struct qlink *)(T)) ++#define tcpfrag_list_empty(T) ((T)->seg_next == (struct tcpiphdr *)(T)) ++ ++/* This is the difference between the size of a tcpiphdr structure, and the ++ * size of actual ip+tcp headers, rounded up since we need to align data. */ ++#define TCPIPHDR_DELTA \ ++ (MAX(0, (sizeof(struct tcpiphdr) - sizeof(struct ip) - \ ++ sizeof(struct tcphdr) + 3) & \ ++ ~3)) ++ ++/* ++ * Just a clean way to get to the first byte ++ * of the packet ++ */ ++struct tcpiphdr_2 { ++ struct tcpiphdr dummy; ++ char first_char; ++}; ++ ++#endif +diff --git a/slirp/src/tftp.c b/slirp/src/tftp.c +new file mode 100644 +index 0000000..c209145 +--- /dev/null ++++ b/slirp/src/tftp.c +@@ -0,0 +1,462 @@ ++/* SPDX-License-Identifier: MIT */ ++/* ++ * tftp.c - a simple, read-only tftp server for qemu ++ * ++ * Copyright (c) 2004 Magnus Damm ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to deal ++ * in the Software without restriction, including without limitation the rights ++ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell ++ * copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN ++ * THE SOFTWARE. ++ */ ++ ++#include "slirp.h" ++ ++#include ++#include ++#include ++ ++static inline int tftp_session_in_use(struct tftp_session *spt) ++{ ++ return (spt->slirp != NULL); ++} ++ ++static inline void tftp_session_update(struct tftp_session *spt) ++{ ++ spt->timestamp = curtime; ++} ++ ++static void tftp_session_terminate(struct tftp_session *spt) ++{ ++ if (spt->fd >= 0) { ++ close(spt->fd); ++ spt->fd = -1; ++ } ++ g_free(spt->filename); ++ spt->slirp = NULL; ++} ++ ++static int tftp_session_allocate(Slirp *slirp, struct sockaddr_storage *srcsas, ++ struct tftp_t *tp) ++{ ++ struct tftp_session *spt; ++ int k; ++ ++ for (k = 0; k < TFTP_SESSIONS_MAX; k++) { ++ spt = &slirp->tftp_sessions[k]; ++ ++ if (!tftp_session_in_use(spt)) ++ goto found; ++ ++ /* sessions time out after 5 inactive seconds */ ++ if ((int)(curtime - spt->timestamp) > 5000) { ++ tftp_session_terminate(spt); ++ goto found; ++ } ++ } ++ ++ return -1; ++ ++found: ++ memset(spt, 0, sizeof(*spt)); ++ memcpy(&spt->client_addr, srcsas, sockaddr_size(srcsas)); ++ spt->fd = -1; ++ spt->block_size = 512; ++ spt->client_port = tp->udp.uh_sport; ++ spt->slirp = slirp; ++ ++ tftp_session_update(spt); ++ ++ return k; ++} ++ ++static int tftp_session_find(Slirp *slirp, struct sockaddr_storage *srcsas, ++ struct tftp_t *tp) ++{ ++ struct tftp_session *spt; ++ int k; ++ ++ for (k = 0; k < TFTP_SESSIONS_MAX; k++) { ++ spt = &slirp->tftp_sessions[k]; ++ ++ if (tftp_session_in_use(spt)) { ++ if (sockaddr_equal(&spt->client_addr, srcsas)) { ++ if (spt->client_port == tp->udp.uh_sport) { ++ return k; ++ } ++ } ++ } ++ } ++ ++ return -1; ++} ++ ++static int tftp_read_data(struct tftp_session *spt, uint32_t block_nr, ++ uint8_t *buf, int len) ++{ ++ int bytes_read = 0; ++ ++ if (spt->fd < 0) { ++ spt->fd = open(spt->filename, O_RDONLY | O_BINARY); ++ } ++ ++ if (spt->fd < 0) { ++ return -1; ++ } ++ ++ if (len) { ++ lseek(spt->fd, block_nr * spt->block_size, SEEK_SET); ++ ++ bytes_read = read(spt->fd, buf, len); ++ } ++ ++ return bytes_read; ++} ++ ++static struct tftp_t *tftp_prep_mbuf_data(struct tftp_session *spt, ++ struct mbuf *m) ++{ ++ struct tftp_t *tp; ++ ++ memset(m->m_data, 0, m->m_size); ++ ++ m->m_data += IF_MAXLINKHDR; ++ if (spt->client_addr.ss_family == AF_INET6) { ++ m->m_data += sizeof(struct ip6); ++ } else { ++ m->m_data += sizeof(struct ip); ++ } ++ tp = (void *)m->m_data; ++ m->m_data += sizeof(struct udphdr); ++ ++ return tp; ++} ++ ++static void tftp_udp_output(struct tftp_session *spt, struct mbuf *m, ++ struct tftp_t *recv_tp) ++{ ++ if (spt->client_addr.ss_family == AF_INET6) { ++ struct sockaddr_in6 sa6, da6; ++ ++ sa6.sin6_addr = spt->slirp->vhost_addr6; ++ sa6.sin6_port = recv_tp->udp.uh_dport; ++ da6.sin6_addr = ((struct sockaddr_in6 *)&spt->client_addr)->sin6_addr; ++ da6.sin6_port = spt->client_port; ++ ++ udp6_output(NULL, m, &sa6, &da6); ++ } else { ++ struct sockaddr_in sa4, da4; ++ ++ sa4.sin_addr = spt->slirp->vhost_addr; ++ sa4.sin_port = recv_tp->udp.uh_dport; ++ da4.sin_addr = ((struct sockaddr_in *)&spt->client_addr)->sin_addr; ++ da4.sin_port = spt->client_port; ++ ++ udp_output(NULL, m, &sa4, &da4, IPTOS_LOWDELAY); ++ } ++} ++ ++static int tftp_send_oack(struct tftp_session *spt, const char *keys[], ++ uint32_t values[], int nb, struct tftp_t *recv_tp) ++{ ++ struct mbuf *m; ++ struct tftp_t *tp; ++ int i, n = 0; ++ ++ m = m_get(spt->slirp); ++ ++ if (!m) ++ return -1; ++ ++ tp = tftp_prep_mbuf_data(spt, m); ++ ++ tp->tp_op = htons(TFTP_OACK); ++ for (i = 0; i < nb; i++) { ++ n += slirp_fmt0(tp->x.tp_buf + n, sizeof(tp->x.tp_buf) - n, "%s", keys[i]); ++ n += slirp_fmt0(tp->x.tp_buf + n, sizeof(tp->x.tp_buf) - n, "%u", values[i]); ++ } ++ ++ m->m_len = G_SIZEOF_MEMBER(struct tftp_t, tp_op) + n; ++ tftp_udp_output(spt, m, recv_tp); ++ ++ return 0; ++} ++ ++static void tftp_send_error(struct tftp_session *spt, uint16_t errorcode, ++ const char *msg, struct tftp_t *recv_tp) ++{ ++ struct mbuf *m; ++ struct tftp_t *tp; ++ ++ DEBUG_TFTP("tftp error msg: %s", msg); ++ ++ m = m_get(spt->slirp); ++ ++ if (!m) { ++ goto out; ++ } ++ ++ tp = tftp_prep_mbuf_data(spt, m); ++ ++ tp->tp_op = htons(TFTP_ERROR); ++ tp->x.tp_error.tp_error_code = htons(errorcode); ++ slirp_pstrcpy((char *)tp->x.tp_error.tp_msg, sizeof(tp->x.tp_error.tp_msg), ++ msg); ++ ++ m->m_len = sizeof(struct tftp_t) - (TFTP_BLOCKSIZE_MAX + 2) + 3 + ++ strlen(msg) - sizeof(struct udphdr); ++ tftp_udp_output(spt, m, recv_tp); ++ ++out: ++ tftp_session_terminate(spt); ++} ++ ++static void tftp_send_next_block(struct tftp_session *spt, ++ struct tftp_t *recv_tp) ++{ ++ struct mbuf *m; ++ struct tftp_t *tp; ++ int nobytes; ++ ++ m = m_get(spt->slirp); ++ ++ if (!m) { ++ return; ++ } ++ ++ tp = tftp_prep_mbuf_data(spt, m); ++ ++ tp->tp_op = htons(TFTP_DATA); ++ tp->x.tp_data.tp_block_nr = htons((spt->block_nr + 1) & 0xffff); ++ ++ nobytes = tftp_read_data(spt, spt->block_nr, tp->x.tp_data.tp_buf, ++ spt->block_size); ++ ++ if (nobytes < 0) { ++ m_free(m); ++ ++ /* send "file not found" error back */ ++ ++ tftp_send_error(spt, 1, "File not found", tp); ++ ++ return; ++ } ++ ++ m->m_len = sizeof(struct tftp_t) - (TFTP_BLOCKSIZE_MAX - nobytes) - ++ sizeof(struct udphdr); ++ tftp_udp_output(spt, m, recv_tp); ++ ++ if (nobytes == spt->block_size) { ++ tftp_session_update(spt); ++ } else { ++ tftp_session_terminate(spt); ++ } ++ ++ spt->block_nr++; ++} ++ ++static void tftp_handle_rrq(Slirp *slirp, struct sockaddr_storage *srcsas, ++ struct tftp_t *tp, int pktlen) ++{ ++ struct tftp_session *spt; ++ int s, k; ++ size_t prefix_len; ++ char *req_fname; ++ const char *option_name[2]; ++ uint32_t option_value[2]; ++ int nb_options = 0; ++ ++ /* check if a session already exists and if so terminate it */ ++ s = tftp_session_find(slirp, srcsas, tp); ++ if (s >= 0) { ++ tftp_session_terminate(&slirp->tftp_sessions[s]); ++ } ++ ++ s = tftp_session_allocate(slirp, srcsas, tp); ++ ++ if (s < 0) { ++ return; ++ } ++ ++ spt = &slirp->tftp_sessions[s]; ++ ++ /* unspecified prefix means service disabled */ ++ if (!slirp->tftp_prefix) { ++ tftp_send_error(spt, 2, "Access violation", tp); ++ return; ++ } ++ ++ /* skip header fields */ ++ k = 0; ++ pktlen -= offsetof(struct tftp_t, x.tp_buf); ++ ++ /* prepend tftp_prefix */ ++ prefix_len = strlen(slirp->tftp_prefix); ++ spt->filename = g_malloc(prefix_len + TFTP_FILENAME_MAX + 2); ++ memcpy(spt->filename, slirp->tftp_prefix, prefix_len); ++ spt->filename[prefix_len] = '/'; ++ ++ /* get name */ ++ req_fname = spt->filename + prefix_len + 1; ++ ++ while (1) { ++ if (k >= TFTP_FILENAME_MAX || k >= pktlen) { ++ tftp_send_error(spt, 2, "Access violation", tp); ++ return; ++ } ++ req_fname[k] = tp->x.tp_buf[k]; ++ if (req_fname[k++] == '\0') { ++ break; ++ } ++ } ++ ++ DEBUG_TFTP("tftp rrq file: %s", req_fname); ++ ++ /* check mode */ ++ if ((pktlen - k) < 6) { ++ tftp_send_error(spt, 2, "Access violation", tp); ++ return; ++ } ++ ++ if (strcasecmp(&tp->x.tp_buf[k], "octet") != 0) { ++ tftp_send_error(spt, 4, "Unsupported transfer mode", tp); ++ return; ++ } ++ ++ k += 6; /* skipping octet */ ++ ++ /* do sanity checks on the filename */ ++ if ( ++#ifdef G_OS_WIN32 ++ strstr(req_fname, "..\\") || ++ req_fname[strlen(req_fname) - 1] == '\\' || ++#endif ++ strstr(req_fname, "../") || ++ req_fname[strlen(req_fname) - 1] == '/') { ++ tftp_send_error(spt, 2, "Access violation", tp); ++ return; ++ } ++ ++ /* check if the file exists */ ++ if (tftp_read_data(spt, 0, NULL, 0) < 0) { ++ tftp_send_error(spt, 1, "File not found", tp); ++ return; ++ } ++ ++ if (tp->x.tp_buf[pktlen - 1] != 0) { ++ tftp_send_error(spt, 2, "Access violation", tp); ++ return; ++ } ++ ++ while (k < pktlen && nb_options < G_N_ELEMENTS(option_name)) { ++ const char *key, *value; ++ ++ key = &tp->x.tp_buf[k]; ++ k += strlen(key) + 1; ++ ++ if (k >= pktlen) { ++ tftp_send_error(spt, 2, "Access violation", tp); ++ return; ++ } ++ ++ value = &tp->x.tp_buf[k]; ++ k += strlen(value) + 1; ++ ++ if (strcasecmp(key, "tsize") == 0) { ++ int tsize = atoi(value); ++ struct stat stat_p; ++ ++ if (tsize == 0) { ++ if (stat(spt->filename, &stat_p) == 0) ++ tsize = stat_p.st_size; ++ else { ++ tftp_send_error(spt, 1, "File not found", tp); ++ return; ++ } ++ } ++ ++ option_name[nb_options] = "tsize"; ++ option_value[nb_options] = tsize; ++ nb_options++; ++ } else if (strcasecmp(key, "blksize") == 0) { ++ int blksize = atoi(value); ++ ++ /* Accept blksize up to our maximum size */ ++ if (blksize > 0) { ++ spt->block_size = MIN(blksize, TFTP_BLOCKSIZE_MAX); ++ option_name[nb_options] = "blksize"; ++ option_value[nb_options] = spt->block_size; ++ nb_options++; ++ } ++ } ++ } ++ ++ if (nb_options > 0) { ++ assert(nb_options <= G_N_ELEMENTS(option_name)); ++ tftp_send_oack(spt, option_name, option_value, nb_options, tp); ++ return; ++ } ++ ++ spt->block_nr = 0; ++ tftp_send_next_block(spt, tp); ++} ++ ++static void tftp_handle_ack(Slirp *slirp, struct sockaddr_storage *srcsas, ++ struct tftp_t *tp, int pktlen) ++{ ++ int s; ++ ++ s = tftp_session_find(slirp, srcsas, tp); ++ ++ if (s < 0) { ++ return; ++ } ++ ++ tftp_send_next_block(&slirp->tftp_sessions[s], tp); ++} ++ ++static void tftp_handle_error(Slirp *slirp, struct sockaddr_storage *srcsas, ++ struct tftp_t *tp, int pktlen) ++{ ++ int s; ++ ++ s = tftp_session_find(slirp, srcsas, tp); ++ ++ if (s < 0) { ++ return; ++ } ++ ++ tftp_session_terminate(&slirp->tftp_sessions[s]); ++} ++ ++void tftp_input(struct sockaddr_storage *srcsas, struct mbuf *m) ++{ ++ struct tftp_t *tp = (struct tftp_t *)m->m_data; ++ ++ switch (ntohs(tp->tp_op)) { ++ case TFTP_RRQ: ++ tftp_handle_rrq(m->slirp, srcsas, tp, m->m_len); ++ break; ++ ++ case TFTP_ACK: ++ tftp_handle_ack(m->slirp, srcsas, tp, m->m_len); ++ break; ++ ++ case TFTP_ERROR: ++ tftp_handle_error(m->slirp, srcsas, tp, m->m_len); ++ break; ++ } ++} +diff --git a/slirp/src/tftp.h b/slirp/src/tftp.h +new file mode 100644 +index 0000000..c47bb43 +--- /dev/null ++++ b/slirp/src/tftp.h +@@ -0,0 +1,52 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* tftp defines */ ++ ++#ifndef SLIRP_TFTP_H ++#define SLIRP_TFTP_H ++ ++#define TFTP_SESSIONS_MAX 20 ++ ++#define TFTP_SERVER 69 ++ ++#define TFTP_RRQ 1 ++#define TFTP_WRQ 2 ++#define TFTP_DATA 3 ++#define TFTP_ACK 4 ++#define TFTP_ERROR 5 ++#define TFTP_OACK 6 ++ ++#define TFTP_FILENAME_MAX 512 ++#define TFTP_BLOCKSIZE_MAX 1428 ++ ++struct tftp_t { ++ struct udphdr udp; ++ uint16_t tp_op; ++ union { ++ struct { ++ uint16_t tp_block_nr; ++ uint8_t tp_buf[TFTP_BLOCKSIZE_MAX]; ++ } tp_data; ++ struct { ++ uint16_t tp_error_code; ++ uint8_t tp_msg[TFTP_BLOCKSIZE_MAX]; ++ } tp_error; ++ char tp_buf[TFTP_BLOCKSIZE_MAX + 2]; ++ } x; ++} __attribute__((packed)); ++ ++struct tftp_session { ++ Slirp *slirp; ++ char *filename; ++ int fd; ++ uint16_t block_size; ++ ++ struct sockaddr_storage client_addr; ++ uint16_t client_port; ++ uint32_t block_nr; ++ ++ int timestamp; ++}; ++ ++void tftp_input(struct sockaddr_storage *srcsas, struct mbuf *m); ++ ++#endif +diff --git a/slirp/src/udp.c b/slirp/src/udp.c +new file mode 100644 +index 0000000..6bde20f +--- /dev/null ++++ b/slirp/src/udp.c +@@ -0,0 +1,361 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1988, 1990, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)udp_usrreq.c 8.4 (Berkeley) 1/21/94 ++ * udp_usrreq.c,v 1.4 1994/10/02 17:48:45 phk Exp ++ */ ++ ++/* ++ * Changes and additions relating to SLiRP ++ * Copyright (c) 1995 Danny Gasparovski. ++ * ++ * Please read the file COPYRIGHT for the ++ * terms and conditions of the copyright. ++ */ ++ ++#include "slirp.h" ++#include "ip_icmp.h" ++ ++static uint8_t udp_tos(struct socket *so); ++ ++void udp_init(Slirp *slirp) ++{ ++ slirp->udb.so_next = slirp->udb.so_prev = &slirp->udb; ++ slirp->udp_last_so = &slirp->udb; ++} ++ ++void udp_cleanup(Slirp *slirp) ++{ ++ while (slirp->udb.so_next != &slirp->udb) { ++ udp_detach(slirp->udb.so_next); ++ } ++} ++ ++/* m->m_data points at ip packet header ++ * m->m_len length ip packet ++ * ip->ip_len length data (IPDU) ++ */ ++void udp_input(register struct mbuf *m, int iphlen) ++{ ++ Slirp *slirp = m->slirp; ++ register struct ip *ip; ++ register struct udphdr *uh; ++ int len; ++ struct ip save_ip; ++ struct socket *so; ++ struct sockaddr_storage lhost; ++ struct sockaddr_in *lhost4; ++ ++ DEBUG_CALL("udp_input"); ++ DEBUG_ARG("m = %p", m); ++ DEBUG_ARG("iphlen = %d", iphlen); ++ ++ /* ++ * Strip IP options, if any; should skip this, ++ * make available to user, and use on returned packets, ++ * but we don't yet have a way to check the checksum ++ * with options still present. ++ */ ++ if (iphlen > sizeof(struct ip)) { ++ ip_stripoptions(m, (struct mbuf *)0); ++ iphlen = sizeof(struct ip); ++ } ++ ++ /* ++ * Get IP and UDP header together in first mbuf. ++ */ ++ ip = mtod(m, struct ip *); ++ uh = (struct udphdr *)((char *)ip + iphlen); ++ ++ /* ++ * Make mbuf data length reflect UDP length. ++ * If not enough data to reflect UDP length, drop. ++ */ ++ len = ntohs((uint16_t)uh->uh_ulen); ++ ++ if (ip->ip_len != len) { ++ if (len > ip->ip_len) { ++ goto bad; ++ } ++ m_adj(m, len - ip->ip_len); ++ ip->ip_len = len; ++ } ++ ++ /* ++ * Save a copy of the IP header in case we want restore it ++ * for sending an ICMP error message in response. ++ */ ++ save_ip = *ip; ++ save_ip.ip_len += iphlen; /* tcp_input subtracts this */ ++ ++ /* ++ * Checksum extended UDP header and data. ++ */ ++ if (uh->uh_sum) { ++ memset(&((struct ipovly *)ip)->ih_mbuf, 0, sizeof(struct mbuf_ptr)); ++ ((struct ipovly *)ip)->ih_x1 = 0; ++ ((struct ipovly *)ip)->ih_len = uh->uh_ulen; ++ if (cksum(m, len + sizeof(struct ip))) { ++ goto bad; ++ } ++ } ++ ++ lhost.ss_family = AF_INET; ++ lhost4 = (struct sockaddr_in *)&lhost; ++ lhost4->sin_addr = ip->ip_src; ++ lhost4->sin_port = uh->uh_sport; ++ ++ /* ++ * handle DHCP/BOOTP ++ */ ++ if (ntohs(uh->uh_dport) == BOOTP_SERVER && ++ (ip->ip_dst.s_addr == slirp->vhost_addr.s_addr || ++ ip->ip_dst.s_addr == 0xffffffff)) { ++ bootp_input(m); ++ goto bad; ++ } ++ ++ /* ++ * handle TFTP ++ */ ++ if (ntohs(uh->uh_dport) == TFTP_SERVER && ++ ip->ip_dst.s_addr == slirp->vhost_addr.s_addr) { ++ m->m_data += iphlen; ++ m->m_len -= iphlen; ++ tftp_input(&lhost, m); ++ m->m_data -= iphlen; ++ m->m_len += iphlen; ++ goto bad; ++ } ++ ++ if (slirp->restricted) { ++ goto bad; ++ } ++ ++ /* ++ * Locate pcb for datagram. ++ */ ++ so = solookup(&slirp->udp_last_so, &slirp->udb, &lhost, NULL); ++ ++ if (so == NULL) { ++ /* ++ * If there's no socket for this packet, ++ * create one ++ */ ++ so = socreate(slirp); ++ if (udp_attach(so, AF_INET) == -1) { ++ DEBUG_MISC(" udp_attach errno = %d-%s", errno, strerror(errno)); ++ sofree(so); ++ goto bad; ++ } ++ ++ /* ++ * Setup fields ++ */ ++ so->so_lfamily = AF_INET; ++ so->so_laddr = ip->ip_src; ++ so->so_lport = uh->uh_sport; ++ ++ if ((so->so_iptos = udp_tos(so)) == 0) ++ so->so_iptos = ip->ip_tos; ++ ++ /* ++ * XXXXX Here, check if it's in udpexec_list, ++ * and if it is, do the fork_exec() etc. ++ */ ++ } ++ ++ so->so_ffamily = AF_INET; ++ so->so_faddr = ip->ip_dst; /* XXX */ ++ so->so_fport = uh->uh_dport; /* XXX */ ++ ++ iphlen += sizeof(struct udphdr); ++ m->m_len -= iphlen; ++ m->m_data += iphlen; ++ ++ /* ++ * Now we sendto() the packet. ++ */ ++ if (sosendto(so, m) == -1) { ++ m->m_len += iphlen; ++ m->m_data -= iphlen; ++ *ip = save_ip; ++ DEBUG_MISC("udp tx errno = %d-%s", errno, strerror(errno)); ++ icmp_send_error(m, ICMP_UNREACH, ICMP_UNREACH_NET, 0, strerror(errno)); ++ goto bad; ++ } ++ ++ m_free(so->so_m); /* used for ICMP if error on sorecvfrom */ ++ ++ /* restore the orig mbuf packet */ ++ m->m_len += iphlen; ++ m->m_data -= iphlen; ++ *ip = save_ip; ++ so->so_m = m; /* ICMP backup */ ++ ++ return; ++bad: ++ m_free(m); ++} ++ ++int udp_output(struct socket *so, struct mbuf *m, struct sockaddr_in *saddr, ++ struct sockaddr_in *daddr, int iptos) ++{ ++ register struct udpiphdr *ui; ++ int error = 0; ++ ++ DEBUG_CALL("udp_output"); ++ DEBUG_ARG("so = %p", so); ++ DEBUG_ARG("m = %p", m); ++ DEBUG_ARG("saddr = %s", inet_ntoa(saddr->sin_addr)); ++ DEBUG_ARG("daddr = %s", inet_ntoa(daddr->sin_addr)); ++ ++ /* ++ * Adjust for header ++ */ ++ m->m_data -= sizeof(struct udpiphdr); ++ m->m_len += sizeof(struct udpiphdr); ++ ++ /* ++ * Fill in mbuf with extended UDP header ++ * and addresses and length put into network format. ++ */ ++ ui = mtod(m, struct udpiphdr *); ++ memset(&ui->ui_i.ih_mbuf, 0, sizeof(struct mbuf_ptr)); ++ ui->ui_x1 = 0; ++ ui->ui_pr = IPPROTO_UDP; ++ ui->ui_len = htons(m->m_len - sizeof(struct ip)); ++ /* XXXXX Check for from-one-location sockets, or from-any-location sockets ++ */ ++ ui->ui_src = saddr->sin_addr; ++ ui->ui_dst = daddr->sin_addr; ++ ui->ui_sport = saddr->sin_port; ++ ui->ui_dport = daddr->sin_port; ++ ui->ui_ulen = ui->ui_len; ++ ++ /* ++ * Stuff checksum and output datagram. ++ */ ++ ui->ui_sum = 0; ++ if ((ui->ui_sum = cksum(m, m->m_len)) == 0) ++ ui->ui_sum = 0xffff; ++ ((struct ip *)ui)->ip_len = m->m_len; ++ ++ ((struct ip *)ui)->ip_ttl = IPDEFTTL; ++ ((struct ip *)ui)->ip_tos = iptos; ++ ++ error = ip_output(so, m); ++ ++ return (error); ++} ++ ++int udp_attach(struct socket *so, unsigned short af) ++{ ++ so->s = slirp_socket(af, SOCK_DGRAM, 0); ++ if (so->s != -1) { ++ if (slirp_bind_outbound(so, af) != 0) { ++ // bind failed - close socket ++ closesocket(so->s); ++ so->s = -1; ++ return -1; ++ } ++ so->so_expire = curtime + SO_EXPIRE; ++ insque(so, &so->slirp->udb); ++ } ++ return (so->s); ++} ++ ++void udp_detach(struct socket *so) ++{ ++ so->slirp->cb->unregister_poll_fd(so->s, so->slirp->opaque); ++ closesocket(so->s); ++ sofree(so); ++} ++ ++static const struct tos_t udptos[] = { { 0, 53, IPTOS_LOWDELAY, 0 }, /* DNS */ ++ { 0, 0, 0, 0 } }; ++ ++static uint8_t udp_tos(struct socket *so) ++{ ++ int i = 0; ++ ++ while (udptos[i].tos) { ++ if ((udptos[i].fport && ntohs(so->so_fport) == udptos[i].fport) || ++ (udptos[i].lport && ntohs(so->so_lport) == udptos[i].lport)) { ++ if (so->slirp->enable_emu) ++ so->so_emu = udptos[i].emu; ++ return udptos[i].tos; ++ } ++ i++; ++ } ++ ++ return 0; ++} ++ ++struct socket *udp_listen(Slirp *slirp, uint32_t haddr, unsigned hport, ++ uint32_t laddr, unsigned lport, int flags) ++{ ++ /* TODO: IPv6 */ ++ struct sockaddr_in addr; ++ struct socket *so; ++ socklen_t addrlen = sizeof(struct sockaddr_in); ++ ++ so = socreate(slirp); ++ so->s = slirp_socket(AF_INET, SOCK_DGRAM, 0); ++ if (so->s < 0) { ++ sofree(so); ++ return NULL; ++ } ++ so->so_expire = curtime + SO_EXPIRE; ++ insque(so, &slirp->udb); ++ ++ addr.sin_family = AF_INET; ++ addr.sin_addr.s_addr = haddr; ++ addr.sin_port = hport; ++ ++ if (bind(so->s, (struct sockaddr *)&addr, addrlen) < 0) { ++ udp_detach(so); ++ return NULL; ++ } ++ slirp_socket_set_fast_reuse(so->s); ++ ++ getsockname(so->s, (struct sockaddr *)&addr, &addrlen); ++ so->fhost.sin = addr; ++ sotranslate_accept(so); ++ so->so_lfamily = AF_INET; ++ so->so_lport = lport; ++ so->so_laddr.s_addr = laddr; ++ if (flags != SS_FACCEPTONCE) ++ so->so_expire = 0; ++ ++ so->so_state &= SS_PERSISTENT_MASK; ++ so->so_state |= SS_ISFCONNECTED | flags; ++ ++ return so; ++} +diff --git a/slirp/src/udp.h b/slirp/src/udp.h +new file mode 100644 +index 0000000..c3b83fd +--- /dev/null ++++ b/slirp/src/udp.h +@@ -0,0 +1,90 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)udp.h 8.1 (Berkeley) 6/10/93 ++ * udp.h,v 1.3 1994/08/21 05:27:41 paul Exp ++ */ ++ ++#ifndef UDP_H ++#define UDP_H ++ ++#define UDP_TTL 0x60 ++#define UDP_UDPDATALEN 16192 ++ ++/* ++ * Udp protocol header. ++ * Per RFC 768, September, 1981. ++ */ ++struct udphdr { ++ uint16_t uh_sport; /* source port */ ++ uint16_t uh_dport; /* destination port */ ++ int16_t uh_ulen; /* udp length */ ++ uint16_t uh_sum; /* udp checksum */ ++}; ++ ++/* ++ * UDP kernel structures and variables. ++ */ ++struct udpiphdr { ++ struct ipovly ui_i; /* overlaid ip structure */ ++ struct udphdr ui_u; /* udp header */ ++}; ++#define ui_mbuf ui_i.ih_mbuf.mptr ++#define ui_x1 ui_i.ih_x1 ++#define ui_pr ui_i.ih_pr ++#define ui_len ui_i.ih_len ++#define ui_src ui_i.ih_src ++#define ui_dst ui_i.ih_dst ++#define ui_sport ui_u.uh_sport ++#define ui_dport ui_u.uh_dport ++#define ui_ulen ui_u.uh_ulen ++#define ui_sum ui_u.uh_sum ++ ++/* ++ * Names for UDP sysctl objects ++ */ ++#define UDPCTL_CHECKSUM 1 /* checksum UDP packets */ ++#define UDPCTL_MAXID 2 ++ ++struct mbuf; ++ ++void udp_init(Slirp *); ++void udp_cleanup(Slirp *); ++void udp_input(register struct mbuf *, int); ++int udp_attach(struct socket *, unsigned short af); ++void udp_detach(struct socket *); ++struct socket *udp_listen(Slirp *, uint32_t, unsigned, uint32_t, unsigned, int); ++int udp_output(struct socket *so, struct mbuf *m, struct sockaddr_in *saddr, ++ struct sockaddr_in *daddr, int iptos); ++ ++void udp6_input(register struct mbuf *); ++int udp6_output(struct socket *so, struct mbuf *m, struct sockaddr_in6 *saddr, ++ struct sockaddr_in6 *daddr); ++ ++#endif +diff --git a/slirp/src/udp6.c b/slirp/src/udp6.c +new file mode 100644 +index 0000000..6f9486b +--- /dev/null ++++ b/slirp/src/udp6.c +@@ -0,0 +1,173 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 2013 ++ * Guillaume Subiron ++ */ ++ ++#include "slirp.h" ++#include "udp.h" ++#include "dhcpv6.h" ++ ++void udp6_input(struct mbuf *m) ++{ ++ Slirp *slirp = m->slirp; ++ struct ip6 *ip, save_ip; ++ struct udphdr *uh; ++ int iphlen = sizeof(struct ip6); ++ int len; ++ struct socket *so; ++ struct sockaddr_in6 lhost; ++ ++ DEBUG_CALL("udp6_input"); ++ DEBUG_ARG("m = %p", m); ++ ++ if (slirp->restricted) { ++ goto bad; ++ } ++ ++ ip = mtod(m, struct ip6 *); ++ m->m_len -= iphlen; ++ m->m_data += iphlen; ++ uh = mtod(m, struct udphdr *); ++ m->m_len += iphlen; ++ m->m_data -= iphlen; ++ ++ if (ip6_cksum(m)) { ++ goto bad; ++ } ++ ++ len = ntohs((uint16_t)uh->uh_ulen); ++ ++ /* ++ * Make mbuf data length reflect UDP length. ++ * If not enough data to reflect UDP length, drop. ++ */ ++ if (ntohs(ip->ip_pl) != len) { ++ if (len > ntohs(ip->ip_pl)) { ++ goto bad; ++ } ++ m_adj(m, len - ntohs(ip->ip_pl)); ++ ip->ip_pl = htons(len); ++ } ++ ++ /* ++ * Save a copy of the IP header in case we want restore it ++ * for sending an ICMP error message in response. ++ */ ++ save_ip = *ip; ++ ++ /* Locate pcb for datagram. */ ++ lhost.sin6_family = AF_INET6; ++ lhost.sin6_addr = ip->ip_src; ++ lhost.sin6_port = uh->uh_sport; ++ ++ /* handle DHCPv6 */ ++ if (ntohs(uh->uh_dport) == DHCPV6_SERVER_PORT && ++ (in6_equal(&ip->ip_dst, &slirp->vhost_addr6) || ++ in6_dhcp_multicast(&ip->ip_dst))) { ++ m->m_data += iphlen; ++ m->m_len -= iphlen; ++ dhcpv6_input(&lhost, m); ++ m->m_data -= iphlen; ++ m->m_len += iphlen; ++ goto bad; ++ } ++ ++ /* handle TFTP */ ++ if (ntohs(uh->uh_dport) == TFTP_SERVER && ++ !memcmp(ip->ip_dst.s6_addr, slirp->vhost_addr6.s6_addr, 16)) { ++ m->m_data += iphlen; ++ m->m_len -= iphlen; ++ tftp_input((struct sockaddr_storage *)&lhost, m); ++ m->m_data -= iphlen; ++ m->m_len += iphlen; ++ goto bad; ++ } ++ ++ so = solookup(&slirp->udp_last_so, &slirp->udb, ++ (struct sockaddr_storage *)&lhost, NULL); ++ ++ if (so == NULL) { ++ /* If there's no socket for this packet, create one. */ ++ so = socreate(slirp); ++ if (udp_attach(so, AF_INET6) == -1) { ++ DEBUG_MISC(" udp6_attach errno = %d-%s", errno, strerror(errno)); ++ sofree(so); ++ goto bad; ++ } ++ ++ /* Setup fields */ ++ so->so_lfamily = AF_INET6; ++ so->so_laddr6 = ip->ip_src; ++ so->so_lport6 = uh->uh_sport; ++ } ++ ++ so->so_ffamily = AF_INET6; ++ so->so_faddr6 = ip->ip_dst; /* XXX */ ++ so->so_fport6 = uh->uh_dport; /* XXX */ ++ ++ iphlen += sizeof(struct udphdr); ++ m->m_len -= iphlen; ++ m->m_data += iphlen; ++ ++ /* ++ * Now we sendto() the packet. ++ */ ++ if (sosendto(so, m) == -1) { ++ m->m_len += iphlen; ++ m->m_data -= iphlen; ++ *ip = save_ip; ++ DEBUG_MISC("udp tx errno = %d-%s", errno, strerror(errno)); ++ icmp6_send_error(m, ICMP6_UNREACH, ICMP6_UNREACH_NO_ROUTE); ++ goto bad; ++ } ++ ++ m_free(so->so_m); /* used for ICMP if error on sorecvfrom */ ++ ++ /* restore the orig mbuf packet */ ++ m->m_len += iphlen; ++ m->m_data -= iphlen; ++ *ip = save_ip; ++ so->so_m = m; ++ ++ return; ++bad: ++ m_free(m); ++} ++ ++int udp6_output(struct socket *so, struct mbuf *m, struct sockaddr_in6 *saddr, ++ struct sockaddr_in6 *daddr) ++{ ++ struct ip6 *ip; ++ struct udphdr *uh; ++ ++ DEBUG_CALL("udp6_output"); ++ DEBUG_ARG("so = %p", so); ++ DEBUG_ARG("m = %p", m); ++ ++ /* adjust for header */ ++ m->m_data -= sizeof(struct udphdr); ++ m->m_len += sizeof(struct udphdr); ++ uh = mtod(m, struct udphdr *); ++ m->m_data -= sizeof(struct ip6); ++ m->m_len += sizeof(struct ip6); ++ ip = mtod(m, struct ip6 *); ++ ++ /* Build IP header */ ++ ip->ip_pl = htons(m->m_len - sizeof(struct ip6)); ++ ip->ip_nh = IPPROTO_UDP; ++ ip->ip_src = saddr->sin6_addr; ++ ip->ip_dst = daddr->sin6_addr; ++ ++ /* Build UDP header */ ++ uh->uh_sport = saddr->sin6_port; ++ uh->uh_dport = daddr->sin6_port; ++ uh->uh_ulen = ip->ip_pl; ++ uh->uh_sum = 0; ++ uh->uh_sum = ip6_cksum(m); ++ if (uh->uh_sum == 0) { ++ uh->uh_sum = 0xffff; ++ } ++ ++ return ip6_output(so, m, 0); ++} +diff --git a/slirp/src/util.c b/slirp/src/util.c +new file mode 100644 +index 0000000..570c53f +--- /dev/null ++++ b/slirp/src/util.c +@@ -0,0 +1,428 @@ ++/* SPDX-License-Identifier: MIT */ ++/* ++ * util.c (mostly based on QEMU os-win32.c) ++ * ++ * Copyright (c) 2003-2008 Fabrice Bellard ++ * Copyright (c) 2010-2016 Red Hat, Inc. ++ * ++ * QEMU library functions for win32 which are shared between QEMU and ++ * the QEMU tools. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to deal ++ * in the Software without restriction, including without limitation the rights ++ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell ++ * copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN ++ * THE SOFTWARE. ++ */ ++#include "util.h" ++ ++#include ++#include ++#include ++ ++#if defined(_WIN32) ++int slirp_inet_aton(const char *cp, struct in_addr *ia) ++{ ++ uint32_t addr = inet_addr(cp); ++ if (addr == 0xffffffff) { ++ return 0; ++ } ++ ia->s_addr = addr; ++ return 1; ++} ++#endif ++ ++void slirp_set_nonblock(int fd) ++{ ++#ifndef _WIN32 ++ int f; ++ f = fcntl(fd, F_GETFL); ++ assert(f != -1); ++ f = fcntl(fd, F_SETFL, f | O_NONBLOCK); ++ assert(f != -1); ++#else ++ unsigned long opt = 1; ++ ioctlsocket(fd, FIONBIO, &opt); ++#endif ++} ++ ++static void slirp_set_cloexec(int fd) ++{ ++#ifndef _WIN32 ++ int f; ++ f = fcntl(fd, F_GETFD); ++ assert(f != -1); ++ f = fcntl(fd, F_SETFD, f | FD_CLOEXEC); ++ assert(f != -1); ++#endif ++} ++ ++/* ++ * Opens a socket with FD_CLOEXEC set ++ */ ++int slirp_socket(int domain, int type, int protocol) ++{ ++ int ret; ++ ++#ifdef SOCK_CLOEXEC ++ ret = socket(domain, type | SOCK_CLOEXEC, protocol); ++ if (ret != -1 || errno != EINVAL) { ++ return ret; ++ } ++#endif ++ ret = socket(domain, type, protocol); ++ if (ret >= 0) { ++ slirp_set_cloexec(ret); ++ } ++ ++ return ret; ++} ++ ++#ifdef _WIN32 ++static int socket_error(void) ++{ ++ switch (WSAGetLastError()) { ++ case 0: ++ return 0; ++ case WSAEINTR: ++ return EINTR; ++ case WSAEINVAL: ++ return EINVAL; ++ case WSA_INVALID_HANDLE: ++ return EBADF; ++ case WSA_NOT_ENOUGH_MEMORY: ++ return ENOMEM; ++ case WSA_INVALID_PARAMETER: ++ return EINVAL; ++ case WSAENAMETOOLONG: ++ return ENAMETOOLONG; ++ case WSAENOTEMPTY: ++ return ENOTEMPTY; ++ case WSAEWOULDBLOCK: ++ /* not using EWOULDBLOCK as we don't want code to have ++ * to check both EWOULDBLOCK and EAGAIN */ ++ return EAGAIN; ++ case WSAEINPROGRESS: ++ return EINPROGRESS; ++ case WSAEALREADY: ++ return EALREADY; ++ case WSAENOTSOCK: ++ return ENOTSOCK; ++ case WSAEDESTADDRREQ: ++ return EDESTADDRREQ; ++ case WSAEMSGSIZE: ++ return EMSGSIZE; ++ case WSAEPROTOTYPE: ++ return EPROTOTYPE; ++ case WSAENOPROTOOPT: ++ return ENOPROTOOPT; ++ case WSAEPROTONOSUPPORT: ++ return EPROTONOSUPPORT; ++ case WSAEOPNOTSUPP: ++ return EOPNOTSUPP; ++ case WSAEAFNOSUPPORT: ++ return EAFNOSUPPORT; ++ case WSAEADDRINUSE: ++ return EADDRINUSE; ++ case WSAEADDRNOTAVAIL: ++ return EADDRNOTAVAIL; ++ case WSAENETDOWN: ++ return ENETDOWN; ++ case WSAENETUNREACH: ++ return ENETUNREACH; ++ case WSAENETRESET: ++ return ENETRESET; ++ case WSAECONNABORTED: ++ return ECONNABORTED; ++ case WSAECONNRESET: ++ return ECONNRESET; ++ case WSAENOBUFS: ++ return ENOBUFS; ++ case WSAEISCONN: ++ return EISCONN; ++ case WSAENOTCONN: ++ return ENOTCONN; ++ case WSAETIMEDOUT: ++ return ETIMEDOUT; ++ case WSAECONNREFUSED: ++ return ECONNREFUSED; ++ case WSAELOOP: ++ return ELOOP; ++ case WSAEHOSTUNREACH: ++ return EHOSTUNREACH; ++ default: ++ return EIO; ++ } ++} ++ ++#undef ioctlsocket ++int slirp_ioctlsocket_wrap(int fd, int req, void *val) ++{ ++ int ret; ++ ret = ioctlsocket(fd, req, val); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef closesocket ++int slirp_closesocket_wrap(int fd) ++{ ++ int ret; ++ ret = closesocket(fd); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef connect ++int slirp_connect_wrap(int sockfd, const struct sockaddr *addr, int addrlen) ++{ ++ int ret; ++ ret = connect(sockfd, addr, addrlen); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef listen ++int slirp_listen_wrap(int sockfd, int backlog) ++{ ++ int ret; ++ ret = listen(sockfd, backlog); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef bind ++int slirp_bind_wrap(int sockfd, const struct sockaddr *addr, int addrlen) ++{ ++ int ret; ++ ret = bind(sockfd, addr, addrlen); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef socket ++int slirp_socket_wrap(int domain, int type, int protocol) ++{ ++ int ret; ++ ret = socket(domain, type, protocol); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef accept ++int slirp_accept_wrap(int sockfd, struct sockaddr *addr, int *addrlen) ++{ ++ int ret; ++ ret = accept(sockfd, addr, addrlen); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef shutdown ++int slirp_shutdown_wrap(int sockfd, int how) ++{ ++ int ret; ++ ret = shutdown(sockfd, how); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef getsockopt ++int slirp_getsockopt_wrap(int sockfd, int level, int optname, void *optval, ++ int *optlen) ++{ ++ int ret; ++ ret = getsockopt(sockfd, level, optname, optval, optlen); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef setsockopt ++int slirp_setsockopt_wrap(int sockfd, int level, int optname, ++ const void *optval, int optlen) ++{ ++ int ret; ++ ret = setsockopt(sockfd, level, optname, optval, optlen); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef getpeername ++int slirp_getpeername_wrap(int sockfd, struct sockaddr *addr, int *addrlen) ++{ ++ int ret; ++ ret = getpeername(sockfd, addr, addrlen); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef getsockname ++int slirp_getsockname_wrap(int sockfd, struct sockaddr *addr, int *addrlen) ++{ ++ int ret; ++ ret = getsockname(sockfd, addr, addrlen); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef send ++ssize_t slirp_send_wrap(int sockfd, const void *buf, size_t len, int flags) ++{ ++ int ret; ++ ret = send(sockfd, buf, len, flags); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef sendto ++ssize_t slirp_sendto_wrap(int sockfd, const void *buf, size_t len, int flags, ++ const struct sockaddr *addr, int addrlen) ++{ ++ int ret; ++ ret = sendto(sockfd, buf, len, flags, addr, addrlen); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef recv ++ssize_t slirp_recv_wrap(int sockfd, void *buf, size_t len, int flags) ++{ ++ int ret; ++ ret = recv(sockfd, buf, len, flags); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef recvfrom ++ssize_t slirp_recvfrom_wrap(int sockfd, void *buf, size_t len, int flags, ++ struct sockaddr *addr, int *addrlen) ++{ ++ int ret; ++ ret = recvfrom(sockfd, buf, len, flags, addr, addrlen); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++#endif /* WIN32 */ ++ ++void slirp_pstrcpy(char *buf, int buf_size, const char *str) ++{ ++ int c; ++ char *q = buf; ++ ++ if (buf_size <= 0) ++ return; ++ ++ for (;;) { ++ c = *str++; ++ if (c == 0 || q >= buf + buf_size - 1) ++ break; ++ *q++ = c; ++ } ++ *q = '\0'; ++} ++ ++static int slirp_vsnprintf(char *str, size_t size, ++ const char *format, va_list args) ++{ ++ int rv = g_vsnprintf(str, size, format, args); ++ ++ if (rv < 0) { ++ g_error("g_vsnprintf() failed: %s", g_strerror(errno)); ++ } ++ ++ return rv; ++} ++ ++/* ++ * A snprintf()-like function that: ++ * - returns the number of bytes written (excluding optional \0-ending) ++ * - dies on error ++ * - warn on truncation ++ */ ++int slirp_fmt(char *str, size_t size, const char *format, ...) ++{ ++ va_list args; ++ int rv; ++ ++ va_start(args, format); ++ rv = slirp_vsnprintf(str, size, format, args); ++ va_end(args); ++ ++ if (rv > size) { ++ g_critical("slirp_fmt() truncation"); ++ } ++ ++ return MIN(rv, size); ++} ++ ++/* ++ * A snprintf()-like function that: ++ * - always \0-end (unless size == 0) ++ * - returns the number of bytes actually written, including \0 ending ++ * - dies on error ++ * - warn on truncation ++ */ ++int slirp_fmt0(char *str, size_t size, const char *format, ...) ++{ ++ va_list args; ++ int rv; ++ ++ va_start(args, format); ++ rv = slirp_vsnprintf(str, size, format, args); ++ va_end(args); ++ ++ if (rv >= size) { ++ g_critical("slirp_fmt0() truncation"); ++ if (size > 0) ++ str[size - 1] = '\0'; ++ rv = size; ++ } else { ++ rv += 1; /* include \0 */ ++ } ++ ++ return rv; ++} +diff --git a/slirp/src/util.h b/slirp/src/util.h +new file mode 100644 +index 0000000..d67b3d0 +--- /dev/null ++++ b/slirp/src/util.h +@@ -0,0 +1,189 @@ ++/* SPDX-License-Identifier: MIT */ ++/* ++ * Copyright (c) 2003-2008 Fabrice Bellard ++ * Copyright (c) 2010-2019 Red Hat, Inc. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to deal ++ * in the Software without restriction, including without limitation the rights ++ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell ++ * copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN ++ * THE SOFTWARE. ++ */ ++#ifndef UTIL_H_ ++#define UTIL_H_ ++ ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#ifdef _WIN32 ++#include ++#include ++#else ++#include ++#include ++#include ++#endif ++ ++#if defined(_WIN32) && (defined(__x86_64__) || defined(__i386__)) ++#define SLIRP_PACKED __attribute__((gcc_struct, packed)) ++#else ++#define SLIRP_PACKED __attribute__((packed)) ++#endif ++ ++#ifndef DIV_ROUND_UP ++#define DIV_ROUND_UP(n, d) (((n) + (d)-1) / (d)) ++#endif ++ ++#ifndef container_of ++#define container_of(ptr, type, member) \ ++ __extension__({ \ ++ void *__mptr = (void *)(ptr); \ ++ ((type *)(__mptr - offsetof(type, member))); \ ++ }) ++#endif ++ ++#ifndef G_SIZEOF_MEMBER ++#define G_SIZEOF_MEMBER(type, member) sizeof(((type *)0)->member) ++#endif ++ ++#if defined(_WIN32) /* CONFIG_IOVEC */ ++#if !defined(IOV_MAX) /* XXX: to avoid duplicate with QEMU osdep.h */ ++struct iovec { ++ void *iov_base; ++ size_t iov_len; ++}; ++#endif ++#else ++#include ++#endif ++ ++#define stringify(s) tostring(s) ++#define tostring(s) #s ++ ++#define SCALE_MS 1000000 ++ ++#define ETH_ALEN 6 ++#define ETH_HLEN 14 ++#define ETH_P_IP (0x0800) /* Internet Protocol packet */ ++#define ETH_P_ARP (0x0806) /* Address Resolution packet */ ++#define ETH_P_IPV6 (0x86dd) ++#define ETH_P_VLAN (0x8100) ++#define ETH_P_DVLAN (0x88a8) ++#define ETH_P_NCSI (0x88f8) ++#define ETH_P_UNKNOWN (0xffff) ++ ++/* FIXME: remove me when made standalone */ ++#ifdef _WIN32 ++#undef accept ++#undef bind ++#undef closesocket ++#undef connect ++#undef getpeername ++#undef getsockname ++#undef getsockopt ++#undef ioctlsocket ++#undef listen ++#undef recv ++#undef recvfrom ++#undef send ++#undef sendto ++#undef setsockopt ++#undef shutdown ++#undef socket ++#endif ++ ++#ifdef _WIN32 ++#define connect slirp_connect_wrap ++int slirp_connect_wrap(int fd, const struct sockaddr *addr, int addrlen); ++#define listen slirp_listen_wrap ++int slirp_listen_wrap(int fd, int backlog); ++#define bind slirp_bind_wrap ++int slirp_bind_wrap(int fd, const struct sockaddr *addr, int addrlen); ++#define socket slirp_socket_wrap ++int slirp_socket_wrap(int domain, int type, int protocol); ++#define accept slirp_accept_wrap ++int slirp_accept_wrap(int fd, struct sockaddr *addr, int *addrlen); ++#define shutdown slirp_shutdown_wrap ++int slirp_shutdown_wrap(int fd, int how); ++#define getpeername slirp_getpeername_wrap ++int slirp_getpeername_wrap(int fd, struct sockaddr *addr, int *addrlen); ++#define getsockname slirp_getsockname_wrap ++int slirp_getsockname_wrap(int fd, struct sockaddr *addr, int *addrlen); ++#define send slirp_send_wrap ++ssize_t slirp_send_wrap(int fd, const void *buf, size_t len, int flags); ++#define sendto slirp_sendto_wrap ++ssize_t slirp_sendto_wrap(int fd, const void *buf, size_t len, int flags, ++ const struct sockaddr *dest_addr, int addrlen); ++#define recv slirp_recv_wrap ++ssize_t slirp_recv_wrap(int fd, void *buf, size_t len, int flags); ++#define recvfrom slirp_recvfrom_wrap ++ssize_t slirp_recvfrom_wrap(int fd, void *buf, size_t len, int flags, ++ struct sockaddr *src_addr, int *addrlen); ++#define closesocket slirp_closesocket_wrap ++int slirp_closesocket_wrap(int fd); ++#define ioctlsocket slirp_ioctlsocket_wrap ++int slirp_ioctlsocket_wrap(int fd, int req, void *val); ++#define getsockopt slirp_getsockopt_wrap ++int slirp_getsockopt_wrap(int sockfd, int level, int optname, void *optval, ++ int *optlen); ++#define setsockopt slirp_setsockopt_wrap ++int slirp_setsockopt_wrap(int sockfd, int level, int optname, ++ const void *optval, int optlen); ++#define inet_aton slirp_inet_aton ++int slirp_inet_aton(const char *cp, struct in_addr *ia); ++#else ++#define closesocket(s) close(s) ++#define ioctlsocket(s, r, v) ioctl(s, r, v) ++#endif ++ ++int slirp_socket(int domain, int type, int protocol); ++void slirp_set_nonblock(int fd); ++ ++static inline int slirp_socket_set_nodelay(int fd) ++{ ++ int v = 1; ++ return setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &v, sizeof(v)); ++} ++ ++static inline int slirp_socket_set_fast_reuse(int fd) ++{ ++#ifndef _WIN32 ++ int v = 1; ++ return setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &v, sizeof(v)); ++#else ++ /* Enabling the reuse of an endpoint that was used by a socket still in ++ * TIME_WAIT state is usually performed by setting SO_REUSEADDR. On Windows ++ * fast reuse is the default and SO_REUSEADDR does strange things. So we ++ * don't have to do anything here. More info can be found at: ++ * http://msdn.microsoft.com/en-us/library/windows/desktop/ms740621.aspx */ ++ return 0; ++#endif ++} ++ ++void slirp_pstrcpy(char *buf, int buf_size, const char *str); ++ ++int slirp_fmt(char *str, size_t size, const char *format, ...) G_GNUC_PRINTF(3, 4); ++int slirp_fmt0(char *str, size_t size, const char *format, ...) G_GNUC_PRINTF(3, 4); ++ ++#endif +diff --git a/slirp/src/version.c b/slirp/src/version.c +new file mode 100644 +index 0000000..93e0be9 +--- /dev/null ++++ b/slirp/src/version.c +@@ -0,0 +1,8 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++#include "libslirp.h" ++ ++const char * ++slirp_version_string(void) ++{ ++ return SLIRP_VERSION_STRING; ++} +diff --git a/slirp/src/vmstate.c b/slirp/src/vmstate.c +new file mode 100644 +index 0000000..68cc172 +--- /dev/null ++++ b/slirp/src/vmstate.c +@@ -0,0 +1,444 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * VMState interpreter ++ * ++ * Copyright (c) 2009-2018 Red Hat Inc ++ * ++ * Authors: ++ * Juan Quintela ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * ++ * 1. Redistributions of source code must retain the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer. ++ * ++ * 2. Redistributions in binary form must reproduce the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer in the documentation and/or other materials provided ++ * with the distribution. ++ * ++ * 3. Neither the name of the copyright holder nor the names of its ++ * contributors may be used to endorse or promote products derived ++ * from this software without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ++ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS ++ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ++ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, ++ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES ++ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR ++ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, ++ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED ++ * OF THE POSSIBILITY OF SUCH DAMAGE. ++ */ ++#include ++#include ++#include ++#include ++ ++#include "stream.h" ++#include "vmstate.h" ++ ++static int get_nullptr(SlirpIStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ if (slirp_istream_read_u8(f) == VMS_NULLPTR_MARKER) { ++ return 0; ++ } ++ g_warning("vmstate: get_nullptr expected VMS_NULLPTR_MARKER"); ++ return -EINVAL; ++} ++ ++static int put_nullptr(SlirpOStream *f, void *pv, size_t size, ++ const VMStateField *field) ++ ++{ ++ if (pv == NULL) { ++ slirp_ostream_write_u8(f, VMS_NULLPTR_MARKER); ++ return 0; ++ } ++ g_warning("vmstate: put_nullptr must be called with pv == NULL"); ++ return -EINVAL; ++} ++ ++const VMStateInfo slirp_vmstate_info_nullptr = { ++ .name = "uint64", ++ .get = get_nullptr, ++ .put = put_nullptr, ++}; ++ ++/* 8 bit unsigned int */ ++ ++static int get_uint8(SlirpIStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ uint8_t *v = pv; ++ *v = slirp_istream_read_u8(f); ++ return 0; ++} ++ ++static int put_uint8(SlirpOStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ uint8_t *v = pv; ++ slirp_ostream_write_u8(f, *v); ++ return 0; ++} ++ ++const VMStateInfo slirp_vmstate_info_uint8 = { ++ .name = "uint8", ++ .get = get_uint8, ++ .put = put_uint8, ++}; ++ ++/* 16 bit unsigned int */ ++ ++static int get_uint16(SlirpIStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ uint16_t *v = pv; ++ *v = slirp_istream_read_u16(f); ++ return 0; ++} ++ ++static int put_uint16(SlirpOStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ uint16_t *v = pv; ++ slirp_ostream_write_u16(f, *v); ++ return 0; ++} ++ ++const VMStateInfo slirp_vmstate_info_uint16 = { ++ .name = "uint16", ++ .get = get_uint16, ++ .put = put_uint16, ++}; ++ ++/* 32 bit unsigned int */ ++ ++static int get_uint32(SlirpIStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ uint32_t *v = pv; ++ *v = slirp_istream_read_u32(f); ++ return 0; ++} ++ ++static int put_uint32(SlirpOStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ uint32_t *v = pv; ++ slirp_ostream_write_u32(f, *v); ++ return 0; ++} ++ ++const VMStateInfo slirp_vmstate_info_uint32 = { ++ .name = "uint32", ++ .get = get_uint32, ++ .put = put_uint32, ++}; ++ ++/* 16 bit int */ ++ ++static int get_int16(SlirpIStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ int16_t *v = pv; ++ *v = slirp_istream_read_i16(f); ++ return 0; ++} ++ ++static int put_int16(SlirpOStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ int16_t *v = pv; ++ slirp_ostream_write_i16(f, *v); ++ return 0; ++} ++ ++const VMStateInfo slirp_vmstate_info_int16 = { ++ .name = "int16", ++ .get = get_int16, ++ .put = put_int16, ++}; ++ ++/* 32 bit int */ ++ ++static int get_int32(SlirpIStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ int32_t *v = pv; ++ *v = slirp_istream_read_i32(f); ++ return 0; ++} ++ ++static int put_int32(SlirpOStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ int32_t *v = pv; ++ slirp_ostream_write_i32(f, *v); ++ return 0; ++} ++ ++const VMStateInfo slirp_vmstate_info_int32 = { ++ .name = "int32", ++ .get = get_int32, ++ .put = put_int32, ++}; ++ ++/* vmstate_info_tmp, see VMSTATE_WITH_TMP, the idea is that we allocate ++ * a temporary buffer and the pre_load/pre_save methods in the child vmsd ++ * copy stuff from the parent into the child and do calculations to fill ++ * in fields that don't really exist in the parent but need to be in the ++ * stream. ++ */ ++static int get_tmp(SlirpIStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ int ret; ++ const VMStateDescription *vmsd = field->vmsd; ++ int version_id = field->version_id; ++ void *tmp = g_malloc(size); ++ ++ /* Writes the parent field which is at the start of the tmp */ ++ *(void **)tmp = pv; ++ ret = slirp_vmstate_load_state(f, vmsd, tmp, version_id); ++ g_free(tmp); ++ return ret; ++} ++ ++static int put_tmp(SlirpOStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ const VMStateDescription *vmsd = field->vmsd; ++ void *tmp = g_malloc(size); ++ int ret; ++ ++ /* Writes the parent field which is at the start of the tmp */ ++ *(void **)tmp = pv; ++ ret = slirp_vmstate_save_state(f, vmsd, tmp); ++ g_free(tmp); ++ ++ return ret; ++} ++ ++const VMStateInfo slirp_vmstate_info_tmp = { ++ .name = "tmp", ++ .get = get_tmp, ++ .put = put_tmp, ++}; ++ ++/* uint8_t buffers */ ++ ++static int get_buffer(SlirpIStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ slirp_istream_read(f, pv, size); ++ return 0; ++} ++ ++static int put_buffer(SlirpOStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ slirp_ostream_write(f, pv, size); ++ return 0; ++} ++ ++const VMStateInfo slirp_vmstate_info_buffer = { ++ .name = "buffer", ++ .get = get_buffer, ++ .put = put_buffer, ++}; ++ ++static int vmstate_n_elems(void *opaque, const VMStateField *field) ++{ ++ int n_elems = 1; ++ ++ if (field->flags & VMS_ARRAY) { ++ n_elems = field->num; ++ } else if (field->flags & VMS_VARRAY_INT32) { ++ n_elems = *(int32_t *)(opaque + field->num_offset); ++ } else if (field->flags & VMS_VARRAY_UINT32) { ++ n_elems = *(uint32_t *)(opaque + field->num_offset); ++ } else if (field->flags & VMS_VARRAY_UINT16) { ++ n_elems = *(uint16_t *)(opaque + field->num_offset); ++ } else if (field->flags & VMS_VARRAY_UINT8) { ++ n_elems = *(uint8_t *)(opaque + field->num_offset); ++ } ++ ++ if (field->flags & VMS_MULTIPLY_ELEMENTS) { ++ n_elems *= field->num; ++ } ++ ++ return n_elems; ++} ++ ++static int vmstate_size(void *opaque, const VMStateField *field) ++{ ++ int size = field->size; ++ ++ if (field->flags & VMS_VBUFFER) { ++ size = *(int32_t *)(opaque + field->size_offset); ++ if (field->flags & VMS_MULTIPLY) { ++ size *= field->size; ++ } ++ } ++ ++ return size; ++} ++ ++static int vmstate_save_state_v(SlirpOStream *f, const VMStateDescription *vmsd, ++ void *opaque, int version_id) ++{ ++ int ret = 0; ++ const VMStateField *field = vmsd->fields; ++ ++ if (vmsd->pre_save) { ++ ret = vmsd->pre_save(opaque); ++ if (ret) { ++ g_warning("pre-save failed: %s", vmsd->name); ++ return ret; ++ } ++ } ++ ++ while (field->name) { ++ if ((field->field_exists && field->field_exists(opaque, version_id)) || ++ (!field->field_exists && field->version_id <= version_id)) { ++ void *first_elem = opaque + field->offset; ++ int i, n_elems = vmstate_n_elems(opaque, field); ++ int size = vmstate_size(opaque, field); ++ ++ if (field->flags & VMS_POINTER) { ++ first_elem = *(void **)first_elem; ++ assert(first_elem || !n_elems || !size); ++ } ++ for (i = 0; i < n_elems; i++) { ++ void *curr_elem = first_elem + size * i; ++ ++ if (field->flags & VMS_ARRAY_OF_POINTER) { ++ assert(curr_elem); ++ curr_elem = *(void **)curr_elem; ++ } ++ if (!curr_elem && size) { ++ /* if null pointer write placeholder and do not follow */ ++ assert(field->flags & VMS_ARRAY_OF_POINTER); ++ ret = slirp_vmstate_info_nullptr.put(f, curr_elem, size, ++ NULL); ++ } else if (field->flags & VMS_STRUCT) { ++ ret = slirp_vmstate_save_state(f, field->vmsd, curr_elem); ++ } else if (field->flags & VMS_VSTRUCT) { ++ ret = vmstate_save_state_v(f, field->vmsd, curr_elem, ++ field->struct_version_id); ++ } else { ++ ret = field->info->put(f, curr_elem, size, field); ++ } ++ if (ret) { ++ g_warning("Save of field %s/%s failed", vmsd->name, ++ field->name); ++ return ret; ++ } ++ } ++ } else { ++ if (field->flags & VMS_MUST_EXIST) { ++ g_warning("Output state validation failed: %s/%s", vmsd->name, ++ field->name); ++ assert(!(field->flags & VMS_MUST_EXIST)); ++ } ++ } ++ field++; ++ } ++ ++ return 0; ++} ++ ++int slirp_vmstate_save_state(SlirpOStream *f, const VMStateDescription *vmsd, ++ void *opaque) ++{ ++ return vmstate_save_state_v(f, vmsd, opaque, vmsd->version_id); ++} ++ ++static void vmstate_handle_alloc(void *ptr, VMStateField *field, void *opaque) ++{ ++ if (field->flags & VMS_POINTER && field->flags & VMS_ALLOC) { ++ size_t size = vmstate_size(opaque, field); ++ size *= vmstate_n_elems(opaque, field); ++ if (size) { ++ *(void **)ptr = g_malloc(size); ++ } ++ } ++} ++ ++int slirp_vmstate_load_state(SlirpIStream *f, const VMStateDescription *vmsd, ++ void *opaque, int version_id) ++{ ++ VMStateField *field = vmsd->fields; ++ int ret = 0; ++ ++ if (version_id > vmsd->version_id) { ++ g_warning("%s: incoming version_id %d is too new " ++ "for local version_id %d", ++ vmsd->name, version_id, vmsd->version_id); ++ return -EINVAL; ++ } ++ if (vmsd->pre_load) { ++ int ret = vmsd->pre_load(opaque); ++ if (ret) { ++ return ret; ++ } ++ } ++ while (field->name) { ++ if ((field->field_exists && field->field_exists(opaque, version_id)) || ++ (!field->field_exists && field->version_id <= version_id)) { ++ void *first_elem = opaque + field->offset; ++ int i, n_elems = vmstate_n_elems(opaque, field); ++ int size = vmstate_size(opaque, field); ++ ++ vmstate_handle_alloc(first_elem, field, opaque); ++ if (field->flags & VMS_POINTER) { ++ first_elem = *(void **)first_elem; ++ assert(first_elem || !n_elems || !size); ++ } ++ for (i = 0; i < n_elems; i++) { ++ void *curr_elem = first_elem + size * i; ++ ++ if (field->flags & VMS_ARRAY_OF_POINTER) { ++ curr_elem = *(void **)curr_elem; ++ } ++ if (!curr_elem && size) { ++ /* if null pointer check placeholder and do not follow */ ++ assert(field->flags & VMS_ARRAY_OF_POINTER); ++ ret = slirp_vmstate_info_nullptr.get(f, curr_elem, size, ++ NULL); ++ } else if (field->flags & VMS_STRUCT) { ++ ret = slirp_vmstate_load_state(f, field->vmsd, curr_elem, ++ field->vmsd->version_id); ++ } else if (field->flags & VMS_VSTRUCT) { ++ ret = slirp_vmstate_load_state(f, field->vmsd, curr_elem, ++ field->struct_version_id); ++ } else { ++ ret = field->info->get(f, curr_elem, size, field); ++ } ++ if (ret < 0) { ++ g_warning("Failed to load %s:%s", vmsd->name, field->name); ++ return ret; ++ } ++ } ++ } else if (field->flags & VMS_MUST_EXIST) { ++ g_warning("Input validation failed: %s/%s", vmsd->name, ++ field->name); ++ return -1; ++ } ++ field++; ++ } ++ if (vmsd->post_load) { ++ ret = vmsd->post_load(opaque, version_id); ++ } ++ return ret; ++} +diff --git a/slirp/src/vmstate.h b/slirp/src/vmstate.h +new file mode 100644 +index 0000000..94c6a4b +--- /dev/null ++++ b/slirp/src/vmstate.h +@@ -0,0 +1,391 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * QEMU migration/snapshot declarations ++ * ++ * Copyright (c) 2009-2011 Red Hat, Inc. ++ * ++ * Original author: Juan Quintela ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * ++ * 1. Redistributions of source code must retain the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer. ++ * ++ * 2. Redistributions in binary form must reproduce the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer in the documentation and/or other materials provided ++ * with the distribution. ++ * ++ * 3. Neither the name of the copyright holder nor the names of its ++ * contributors may be used to endorse or promote products derived ++ * from this software without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ++ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS ++ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ++ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, ++ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES ++ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR ++ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, ++ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED ++ * OF THE POSSIBILITY OF SUCH DAMAGE. ++ */ ++#ifndef VMSTATE_H_ ++#define VMSTATE_H_ ++ ++#include ++#include ++#include ++#include "slirp.h" ++#include "stream.h" ++ ++#define stringify(s) tostring(s) ++#define tostring(s) #s ++ ++typedef struct VMStateInfo VMStateInfo; ++typedef struct VMStateDescription VMStateDescription; ++typedef struct VMStateField VMStateField; ++ ++int slirp_vmstate_save_state(SlirpOStream *f, const VMStateDescription *vmsd, ++ void *opaque); ++int slirp_vmstate_load_state(SlirpIStream *f, const VMStateDescription *vmsd, ++ void *opaque, int version_id); ++ ++/* VMStateInfo allows customized migration of objects that don't fit in ++ * any category in VMStateFlags. Additional information is always passed ++ * into get and put in terms of field and vmdesc parameters. However ++ * these two parameters should only be used in cases when customized ++ * handling is needed, such as QTAILQ. For primitive data types such as ++ * integer, field and vmdesc parameters should be ignored inside get/put. ++ */ ++struct VMStateInfo { ++ const char *name; ++ int (*get)(SlirpIStream *f, void *pv, size_t size, ++ const VMStateField *field); ++ int (*put)(SlirpOStream *f, void *pv, size_t size, ++ const VMStateField *field); ++}; ++ ++enum VMStateFlags { ++ /* Ignored */ ++ VMS_SINGLE = 0x001, ++ ++ /* The struct member at opaque + VMStateField.offset is a pointer ++ * to the actual field (e.g. struct a { uint8_t *b; ++ * }). Dereference the pointer before using it as basis for ++ * further pointer arithmetic (see e.g. VMS_ARRAY). Does not ++ * affect the meaning of VMStateField.num_offset or ++ * VMStateField.size_offset; see VMS_VARRAY* and VMS_VBUFFER for ++ * those. */ ++ VMS_POINTER = 0x002, ++ ++ /* The field is an array of fixed size. VMStateField.num contains ++ * the number of entries in the array. The size of each entry is ++ * given by VMStateField.size and / or opaque + ++ * VMStateField.size_offset; see VMS_VBUFFER and ++ * VMS_MULTIPLY. Each array entry will be processed individually ++ * (VMStateField.info.get()/put() if VMS_STRUCT is not set, ++ * recursion into VMStateField.vmsd if VMS_STRUCT is set). May not ++ * be combined with VMS_VARRAY*. */ ++ VMS_ARRAY = 0x004, ++ ++ /* The field is itself a struct, containing one or more ++ * fields. Recurse into VMStateField.vmsd. Most useful in ++ * combination with VMS_ARRAY / VMS_VARRAY*, recursing into each ++ * array entry. */ ++ VMS_STRUCT = 0x008, ++ ++ /* The field is an array of variable size. The int32_t at opaque + ++ * VMStateField.num_offset contains the number of entries in the ++ * array. See the VMS_ARRAY description regarding array handling ++ * in general. May not be combined with VMS_ARRAY or any other ++ * VMS_VARRAY*. */ ++ VMS_VARRAY_INT32 = 0x010, ++ ++ /* Ignored */ ++ VMS_BUFFER = 0x020, ++ ++ /* The field is a (fixed-size or variable-size) array of pointers ++ * (e.g. struct a { uint8_t *b[]; }). Dereference each array entry ++ * before using it. Note: Does not imply any one of VMS_ARRAY / ++ * VMS_VARRAY*; these need to be set explicitly. */ ++ VMS_ARRAY_OF_POINTER = 0x040, ++ ++ /* The field is an array of variable size. The uint16_t at opaque ++ * + VMStateField.num_offset (subject to VMS_MULTIPLY_ELEMENTS) ++ * contains the number of entries in the array. See the VMS_ARRAY ++ * description regarding array handling in general. May not be ++ * combined with VMS_ARRAY or any other VMS_VARRAY*. */ ++ VMS_VARRAY_UINT16 = 0x080, ++ ++ /* The size of the individual entries (a single array entry if ++ * VMS_ARRAY or any of VMS_VARRAY* are set, or the field itself if ++ * neither is set) is variable (i.e. not known at compile-time), ++ * but the same for all entries. Use the int32_t at opaque + ++ * VMStateField.size_offset (subject to VMS_MULTIPLY) to determine ++ * the size of each (and every) entry. */ ++ VMS_VBUFFER = 0x100, ++ ++ /* Multiply the entry size given by the int32_t at opaque + ++ * VMStateField.size_offset (see VMS_VBUFFER description) with ++ * VMStateField.size to determine the number of bytes to be ++ * allocated. Only valid in combination with VMS_VBUFFER. */ ++ VMS_MULTIPLY = 0x200, ++ ++ /* The field is an array of variable size. The uint8_t at opaque + ++ * VMStateField.num_offset (subject to VMS_MULTIPLY_ELEMENTS) ++ * contains the number of entries in the array. See the VMS_ARRAY ++ * description regarding array handling in general. May not be ++ * combined with VMS_ARRAY or any other VMS_VARRAY*. */ ++ VMS_VARRAY_UINT8 = 0x400, ++ ++ /* The field is an array of variable size. The uint32_t at opaque ++ * + VMStateField.num_offset (subject to VMS_MULTIPLY_ELEMENTS) ++ * contains the number of entries in the array. See the VMS_ARRAY ++ * description regarding array handling in general. May not be ++ * combined with VMS_ARRAY or any other VMS_VARRAY*. */ ++ VMS_VARRAY_UINT32 = 0x800, ++ ++ /* Fail loading the serialised VM state if this field is missing ++ * from the input. */ ++ VMS_MUST_EXIST = 0x1000, ++ ++ /* When loading serialised VM state, allocate memory for the ++ * (entire) field. Only valid in combination with ++ * VMS_POINTER. Note: Not all combinations with other flags are ++ * currently supported, e.g. VMS_ALLOC|VMS_ARRAY_OF_POINTER won't ++ * cause the individual entries to be allocated. */ ++ VMS_ALLOC = 0x2000, ++ ++ /* Multiply the number of entries given by the integer at opaque + ++ * VMStateField.num_offset (see VMS_VARRAY*) with VMStateField.num ++ * to determine the number of entries in the array. Only valid in ++ * combination with one of VMS_VARRAY*. */ ++ VMS_MULTIPLY_ELEMENTS = 0x4000, ++ ++ /* A structure field that is like VMS_STRUCT, but uses ++ * VMStateField.struct_version_id to tell which version of the ++ * structure we are referencing to use. */ ++ VMS_VSTRUCT = 0x8000, ++}; ++ ++struct VMStateField { ++ const char *name; ++ size_t offset; ++ size_t size; ++ size_t start; ++ int num; ++ size_t num_offset; ++ size_t size_offset; ++ const VMStateInfo *info; ++ enum VMStateFlags flags; ++ const VMStateDescription *vmsd; ++ int version_id; ++ int struct_version_id; ++ bool (*field_exists)(void *opaque, int version_id); ++}; ++ ++struct VMStateDescription { ++ const char *name; ++ int version_id; ++ int (*pre_load)(void *opaque); ++ int (*post_load)(void *opaque, int version_id); ++ int (*pre_save)(void *opaque); ++ VMStateField *fields; ++}; ++ ++ ++extern const VMStateInfo slirp_vmstate_info_int16; ++extern const VMStateInfo slirp_vmstate_info_int32; ++extern const VMStateInfo slirp_vmstate_info_uint8; ++extern const VMStateInfo slirp_vmstate_info_uint16; ++extern const VMStateInfo slirp_vmstate_info_uint32; ++ ++/** Put this in the stream when migrating a null pointer.*/ ++#define VMS_NULLPTR_MARKER (0x30U) /* '0' */ ++extern const VMStateInfo slirp_vmstate_info_nullptr; ++ ++extern const VMStateInfo slirp_vmstate_info_buffer; ++extern const VMStateInfo slirp_vmstate_info_tmp; ++ ++#define type_check_array(t1, t2, n) ((t1(*)[n])0 - (t2 *)0) ++#define type_check_pointer(t1, t2) ((t1 **)0 - (t2 *)0) ++#define typeof_field(type, field) typeof(((type *)0)->field) ++#define type_check(t1, t2) ((t1 *)0 - (t2 *)0) ++ ++#define vmstate_offset_value(_state, _field, _type) \ ++ (offsetof(_state, _field) + type_check(_type, typeof_field(_state, _field))) ++ ++#define vmstate_offset_pointer(_state, _field, _type) \ ++ (offsetof(_state, _field) + \ ++ type_check_pointer(_type, typeof_field(_state, _field))) ++ ++#define vmstate_offset_array(_state, _field, _type, _num) \ ++ (offsetof(_state, _field) + \ ++ type_check_array(_type, typeof_field(_state, _field), _num)) ++ ++#define vmstate_offset_buffer(_state, _field) \ ++ vmstate_offset_array(_state, _field, uint8_t, \ ++ sizeof(typeof_field(_state, _field))) ++ ++/* In the macros below, if there is a _version, that means the macro's ++ * field will be processed only if the version being received is >= ++ * the _version specified. In general, if you add a new field, you ++ * would increment the structure's version and put that version ++ * number into the new field so it would only be processed with the ++ * new version. ++ * ++ * In particular, for VMSTATE_STRUCT() and friends the _version does ++ * *NOT* pick the version of the sub-structure. It works just as ++ * specified above. The version of the top-level structure received ++ * is passed down to all sub-structures. This means that the ++ * sub-structures must have version that are compatible with all the ++ * structures that use them. ++ * ++ * If you want to specify the version of the sub-structure, use ++ * VMSTATE_VSTRUCT(), which allows the specific sub-structure version ++ * to be directly specified. ++ */ ++ ++#define VMSTATE_SINGLE_TEST(_field, _state, _test, _version, _info, _type) \ ++ { \ ++ .name = (stringify(_field)), .version_id = (_version), \ ++ .field_exists = (_test), .size = sizeof(_type), .info = &(_info), \ ++ .flags = VMS_SINGLE, \ ++ .offset = vmstate_offset_value(_state, _field, _type), \ ++ } ++ ++#define VMSTATE_ARRAY(_field, _state, _num, _version, _info, _type) \ ++ { \ ++ .name = (stringify(_field)), .version_id = (_version), .num = (_num), \ ++ .info = &(_info), .size = sizeof(_type), .flags = VMS_ARRAY, \ ++ .offset = vmstate_offset_array(_state, _field, _type, _num), \ ++ } ++ ++#define VMSTATE_STRUCT_TEST(_field, _state, _test, _version, _vmsd, _type) \ ++ { \ ++ .name = (stringify(_field)), .version_id = (_version), \ ++ .field_exists = (_test), .vmsd = &(_vmsd), .size = sizeof(_type), \ ++ .flags = VMS_STRUCT, \ ++ .offset = vmstate_offset_value(_state, _field, _type), \ ++ } ++ ++#define VMSTATE_STRUCT_POINTER_V(_field, _state, _version, _vmsd, _type) \ ++ { \ ++ .name = (stringify(_field)), .version_id = (_version), \ ++ .vmsd = &(_vmsd), .size = sizeof(_type *), \ ++ .flags = VMS_STRUCT | VMS_POINTER, \ ++ .offset = vmstate_offset_pointer(_state, _field, _type), \ ++ } ++ ++#define VMSTATE_STRUCT_ARRAY_TEST(_field, _state, _num, _test, _version, \ ++ _vmsd, _type) \ ++ { \ ++ .name = (stringify(_field)), .num = (_num), .field_exists = (_test), \ ++ .version_id = (_version), .vmsd = &(_vmsd), .size = sizeof(_type), \ ++ .flags = VMS_STRUCT | VMS_ARRAY, \ ++ .offset = vmstate_offset_array(_state, _field, _type, _num), \ ++ } ++ ++#define VMSTATE_STATIC_BUFFER(_field, _state, _version, _test, _start, _size) \ ++ { \ ++ .name = (stringify(_field)), .version_id = (_version), \ ++ .field_exists = (_test), .size = (_size - _start), \ ++ .info = &slirp_vmstate_info_buffer, .flags = VMS_BUFFER, \ ++ .offset = vmstate_offset_buffer(_state, _field) + _start, \ ++ } ++ ++#define VMSTATE_VBUFFER_UINT32(_field, _state, _version, _test, _field_size) \ ++ { \ ++ .name = (stringify(_field)), .version_id = (_version), \ ++ .field_exists = (_test), \ ++ .size_offset = vmstate_offset_value(_state, _field_size, uint32_t), \ ++ .info = &slirp_vmstate_info_buffer, \ ++ .flags = VMS_VBUFFER | VMS_POINTER, \ ++ .offset = offsetof(_state, _field), \ ++ } ++ ++#define QEMU_BUILD_BUG_ON_STRUCT(x) \ ++ struct { \ ++ int : (x) ? -1 : 1; \ ++ } ++ ++#define QEMU_BUILD_BUG_ON_ZERO(x) \ ++ (sizeof(QEMU_BUILD_BUG_ON_STRUCT(x)) - sizeof(QEMU_BUILD_BUG_ON_STRUCT(x))) ++ ++/* Allocate a temporary of type 'tmp_type', set tmp->parent to _state ++ * and execute the vmsd on the temporary. Note that we're working with ++ * the whole of _state here, not a field within it. ++ * We compile time check that: ++ * That _tmp_type contains a 'parent' member that's a pointer to the ++ * '_state' type ++ * That the pointer is right at the start of _tmp_type. ++ */ ++#define VMSTATE_WITH_TMP(_state, _tmp_type, _vmsd) \ ++ { \ ++ .name = "tmp", \ ++ .size = sizeof(_tmp_type) + \ ++ QEMU_BUILD_BUG_ON_ZERO(offsetof(_tmp_type, parent) != 0) + \ ++ type_check_pointer(_state, typeof_field(_tmp_type, parent)), \ ++ .vmsd = &(_vmsd), .info = &slirp_vmstate_info_tmp, \ ++ } ++ ++#define VMSTATE_SINGLE(_field, _state, _version, _info, _type) \ ++ VMSTATE_SINGLE_TEST(_field, _state, NULL, _version, _info, _type) ++ ++#define VMSTATE_STRUCT(_field, _state, _version, _vmsd, _type) \ ++ VMSTATE_STRUCT_TEST(_field, _state, NULL, _version, _vmsd, _type) ++ ++#define VMSTATE_STRUCT_POINTER(_field, _state, _vmsd, _type) \ ++ VMSTATE_STRUCT_POINTER_V(_field, _state, 0, _vmsd, _type) ++ ++#define VMSTATE_STRUCT_ARRAY(_field, _state, _num, _version, _vmsd, _type) \ ++ VMSTATE_STRUCT_ARRAY_TEST(_field, _state, _num, NULL, _version, _vmsd, \ ++ _type) ++ ++#define VMSTATE_INT16_V(_f, _s, _v) \ ++ VMSTATE_SINGLE(_f, _s, _v, slirp_vmstate_info_int16, int16_t) ++#define VMSTATE_INT32_V(_f, _s, _v) \ ++ VMSTATE_SINGLE(_f, _s, _v, slirp_vmstate_info_int32, int32_t) ++ ++#define VMSTATE_UINT8_V(_f, _s, _v) \ ++ VMSTATE_SINGLE(_f, _s, _v, slirp_vmstate_info_uint8, uint8_t) ++#define VMSTATE_UINT16_V(_f, _s, _v) \ ++ VMSTATE_SINGLE(_f, _s, _v, slirp_vmstate_info_uint16, uint16_t) ++#define VMSTATE_UINT32_V(_f, _s, _v) \ ++ VMSTATE_SINGLE(_f, _s, _v, slirp_vmstate_info_uint32, uint32_t) ++ ++#define VMSTATE_INT16(_f, _s) VMSTATE_INT16_V(_f, _s, 0) ++#define VMSTATE_INT32(_f, _s) VMSTATE_INT32_V(_f, _s, 0) ++ ++#define VMSTATE_UINT8(_f, _s) VMSTATE_UINT8_V(_f, _s, 0) ++#define VMSTATE_UINT16(_f, _s) VMSTATE_UINT16_V(_f, _s, 0) ++#define VMSTATE_UINT32(_f, _s) VMSTATE_UINT32_V(_f, _s, 0) ++ ++#define VMSTATE_UINT16_TEST(_f, _s, _t) \ ++ VMSTATE_SINGLE_TEST(_f, _s, _t, 0, slirp_vmstate_info_uint16, uint16_t) ++ ++#define VMSTATE_UINT32_TEST(_f, _s, _t) \ ++ VMSTATE_SINGLE_TEST(_f, _s, _t, 0, slirp_vmstate_info_uint32, uint32_t) ++ ++#define VMSTATE_INT16_ARRAY_V(_f, _s, _n, _v) \ ++ VMSTATE_ARRAY(_f, _s, _n, _v, slirp_vmstate_info_int16, int16_t) ++ ++#define VMSTATE_INT16_ARRAY(_f, _s, _n) VMSTATE_INT16_ARRAY_V(_f, _s, _n, 0) ++ ++#define VMSTATE_BUFFER_V(_f, _s, _v) \ ++ VMSTATE_STATIC_BUFFER(_f, _s, _v, NULL, 0, sizeof(typeof_field(_s, _f))) ++ ++#define VMSTATE_BUFFER(_f, _s) VMSTATE_BUFFER_V(_f, _s, 0) ++ ++#define VMSTATE_END_OF_LIST() \ ++ { \ ++ } ++ ++#endif +-- +1.8.3.1 + diff --git a/0005-Initial-redhat-build.patch b/0005-Initial-redhat-build.patch index cde66a1..174bcc9 100644 --- a/0005-Initial-redhat-build.patch +++ b/0005-Initial-redhat-build.patch @@ -1,4 +1,4 @@ -From 4df157781801c50224373be57fa3c8c3741c0535 Mon Sep 17 00:00:00 2001 +From 4b7f57db366243eeafc0528d3cff4fa6967e7522 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 12 Oct 2018 07:31:11 +0200 Subject: Initial redhat build @@ -10,10 +10,8 @@ several issues are fixed in QEMU tree: - As we use qemu-kvm as name in all places, this is updated to be consistent - Man page renamed from qemu to qemu-kvm - man page is installed using make install so we have to fix it in qemu tree - - Use "/share/qemu-kvm" as SHARE_SUFFIX - - We reconfigured our share to qemu-kvm to be consistent with used name -This rebase includes changes up to qemu-kvm-4.1.0-18.el8 +This rebase includes changes up to qemu-kvm-4.2.0-20.el8 Rebase notes (3.1.0): - added new configure options @@ -49,6 +47,44 @@ Rebase notes (4.2.0): - Removed spapr-rtas.bin (upstream) - Require newer SLOF (20191022) +Rebase notes (weekly-200115): +- Added index.html (upstream) + +Rebase notes (weekly-200122): +- Use python3 for virtio_seg_max_adjust.py test +- Removed qemu-trace-stap shebang from spec file + +Rebase notes (weekly-200129): +- Ship docs/qemu-kvm/system help files (added upstream) + +Rebase notes (weekly-200212): +- Added virtiofsd.1 (upstream) + +Rebase notes (weekly-200219): +- Use out-of-tree build + +Rebase notes (weekly-200226): +- added tools documentation (upstream) + +Rebase notes (weekly-200304): +- Update local build + +Rebase notes (weekly-200311): +- Add docs/qemu-kvm/user help files (added upstream) +- Removing installed qemu-storage-daemon (added upstream) + +Rebase notes (weekly-200318): +- Removing opensbi-riscv32-sifive_u-fw_jump.bin (added upstream) + +Rebase notes (weekly-200325): +- Disable iotests (moved from Enable make check commit) + +Rebase notes (5.0.0 rc2): +- Added missing configure options + +Rebase notes (5.0.0 rc3): +- Reorder configure options + Merged patches (3.1.0): - 01f0c9f RHEL8: Add disable configure options to qemu spec file - Spec file cleanups @@ -71,30 +107,35 @@ Merged patches (4.2.0): - 69e1fb2 enable virgla - d4f6115 enable virgl, for real this time ... -Signed-off-by: Danilo C. L. de Paula +Merged patches (weekly-200205): +- 5edf6bd Add support for rh-brew-module +- f77d52d redhat: ship virtiofsd vhost-user device backend + +Conflicts: + gdbstub.c --- - .gitignore | 1 + - Makefile | 3 +- - configure | 1 + - os-posix.c | 2 +- - redhat/Makefile | 82 + - redhat/Makefile.common | 51 + - redhat/README.tests | 39 + - redhat/qemu-kvm.spec.template | 2434 +++++++++++++++++++++++++++++ - redhat/scripts/process-patches.sh | 7 +- - tests/Makefile.include | 2 +- - ui/vnc.c | 2 +- - 11 files changed, 2615 insertions(+), 9 deletions(-) + .gitignore | 1 + + Makefile | 3 +- + configure | 1 + + redhat/Makefile | 88 ++ + redhat/Makefile.common | 51 + + redhat/README.tests | 39 + + redhat/qemu-kvm.spec.template | 2820 +++++++++++++++++++++++++++++++++++ + redhat/scripts/extract_build_cmd.py | 2 +- + redhat/scripts/process-patches.sh | 7 +- + tests/check-block.sh | 2 + + ui/vnc.c | 2 +- + 11 files changed, 3008 insertions(+), 8 deletions(-) create mode 100644 redhat/Makefile create mode 100644 redhat/Makefile.common create mode 100644 redhat/README.tests create mode 100644 redhat/qemu-kvm.spec.template diff --git a/Makefile b/Makefile -index b437a346d7..086727dbb9 100644 +index 34275f5..aee2e8e 100644 --- a/Makefile +++ b/Makefile -@@ -512,6 +512,7 @@ CAP_CFLAGS += -DCAPSTONE_HAS_ARM +@@ -548,6 +548,7 @@ CAP_CFLAGS += -DCAPSTONE_HAS_ARM CAP_CFLAGS += -DCAPSTONE_HAS_ARM64 CAP_CFLAGS += -DCAPSTONE_HAS_POWERPC CAP_CFLAGS += -DCAPSTONE_HAS_X86 @@ -102,20 +143,20 @@ index b437a346d7..086727dbb9 100644 .PHONY: capstone/all capstone/all: .git-submodule-status -@@ -826,7 +827,7 @@ install-doc: $(DOCS) install-sphinxdocs +@@ -883,7 +884,7 @@ install-doc: $(DOCS) install-sphinxdocs $(INSTALL_DATA) docs/interop/qemu-qmp-ref.txt "$(DESTDIR)$(qemu_docdir)" ifdef CONFIG_POSIX $(INSTALL_DIR) "$(DESTDIR)$(mandir)/man1" -- $(INSTALL_DATA) qemu.1 "$(DESTDIR)$(mandir)/man1" -+ $(INSTALL_DATA) qemu.1 "$(DESTDIR)$(mandir)/man1/qemu-kvm.1" +- $(INSTALL_DATA) $(MANUAL_BUILDDIR)/system/qemu.1 "$(DESTDIR)$(mandir)/man1" ++ $(INSTALL_DATA) $(MANUAL_BUILDDIR)/system/qemu.1 "$(DESTDIR)$(mandir)/man1/qemu-kvm.1" $(INSTALL_DIR) "$(DESTDIR)$(mandir)/man7" $(INSTALL_DATA) docs/interop/qemu-qmp-ref.7 "$(DESTDIR)$(mandir)/man7" - $(INSTALL_DATA) docs/qemu-block-drivers.7 "$(DESTDIR)$(mandir)/man7" + $(INSTALL_DATA) $(MANUAL_BUILDDIR)/system/qemu-block-drivers.7 "$(DESTDIR)$(mandir)/man7" diff --git a/configure b/configure -index 6099be1d84..16564f8ccc 100755 +index 23b5e93..1b40d7e 100755 --- a/configure +++ b/configure -@@ -2424,6 +2424,7 @@ if test "$seccomp" != "no" ; then +@@ -2538,6 +2538,7 @@ if test "$seccomp" != "no" ; then seccomp="no" fi fi @@ -123,37 +164,24 @@ index 6099be1d84..16564f8ccc 100755 ########################################## # xen probe -diff --git a/os-posix.c b/os-posix.c -index 86cffd2c7d..1c9f86768d 100644 ---- a/os-posix.c -+++ b/os-posix.c -@@ -83,7 +83,7 @@ void os_setup_signal_handling(void) - /* Find a likely location for support files using the location of the binary. - For installed binaries this will be "$bindir/../share/qemu". When - running from the build tree this will be "$bindir/../pc-bios". */ --#define SHARE_SUFFIX "/share/qemu" -+#define SHARE_SUFFIX "/share/qemu-kvm" - #define BUILD_SUFFIX "/pc-bios" - char *os_find_datadir(void) - { -diff --git a/tests/Makefile.include b/tests/Makefile.include -index 8566f5f119..b483790cf3 100644 ---- a/tests/Makefile.include -+++ b/tests/Makefile.include -@@ -1194,7 +1194,7 @@ check-acceptance: check-venv $(TESTS_RESULTS_DIR) - check-qapi-schema: check-tests/qapi-schema/frontend check-tests/qapi-schema/doc-good.texi - check-qtest: $(patsubst %,check-qtest-%, $(QTEST_TARGETS)) - check-block: $(patsubst %,check-%, $(check-block-y)) --check: check-block check-qapi-schema check-unit check-softfloat check-qtest check-decodetree -+check: check-qapi-schema check-unit check-softfloat check-qtest check-decodetree - check-clean: - rm -rf $(check-unit-y) tests/*.o $(QEMU_IOTESTS_HELPERS-y) - rm -rf $(sort $(foreach target,$(SYSEMU_TARGET_LIST), $(check-qtest-$(target)-y)) $(check-qtest-generic-y)) +diff --git a/tests/check-block.sh b/tests/check-block.sh +index ad320c2..1f26083 100755 +--- a/tests/check-block.sh ++++ b/tests/check-block.sh +@@ -43,6 +43,8 @@ if ! (sed --version | grep 'GNU sed') > /dev/null 2>&1 ; then + fi + fi + ++exit 0 ++ + cd tests/qemu-iotests + + ret=0 diff --git a/ui/vnc.c b/ui/vnc.c -index 87b8045afe..ecf6276f5b 100644 +index 1d7138a..1fc55b7 100644 --- a/ui/vnc.c +++ b/ui/vnc.c -@@ -3987,7 +3987,7 @@ void vnc_display_open(const char *id, Error **errp) +@@ -3972,7 +3972,7 @@ void vnc_display_open(const char *id, Error **errp) #ifdef CONFIG_VNC_SASL if (sasl) { @@ -163,5 +191,5 @@ index 87b8045afe..ecf6276f5b 100644 if (saslErr != SASL_OK) { error_setg(errp, "Failed to initialize SASL auth: %s", -- -2.21.0 +1.8.3.1 diff --git a/0006-Enable-disable-devices-for-RHEL.patch b/0006-Enable-disable-devices-for-RHEL.patch index b14bb1b..ff90dbb 100644 --- a/0006-Enable-disable-devices-for-RHEL.patch +++ b/0006-Enable-disable-devices-for-RHEL.patch @@ -1,4 +1,4 @@ -From 67511676246cce57becbd2dcf5abccf08d9ef737 Mon Sep 17 00:00:00 2001 +From db33b6f22ff44edfb3ca54ef7bf051ccfc53e479 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Mon, 11 Jan 2016 11:53:33 +0100 Subject: Enable/disable devices for RHEL @@ -39,6 +39,12 @@ Rebase notes (4.2.0-rc3): - Disabled ccid-card-emulated (patch 92566) - Disabled vfio-pci-igd-lpc-bridge (patch 92565) +Rebase notes (weekly-200205): +- added CONFIG_PCI_EXPRESS on ppc64 (due to upstream dependency) + +Rebase notes (weekly-200226): +- Added CONFIG_NVDIMM + Merged patches (qemu 3.1.0): - d51e082 Re-enable CONFIG_HYPERV_TESTDEV - 4b889f3 Declare cirrus-vga as deprecated @@ -57,82 +63,72 @@ Merged patches (4.1.0): Merged patches (4.2.0): - f7587dd RHEL: disable hostmem-memfd -Signed-off-by: Danilo C. L. de Paula +Merged patches (weekly-200108): +- 4543a3c i386: Remove cpu64-rhel6 CPU model + +Conflicts: + target/arm/cpu.c --- - Makefile.objs | 4 +- backends/Makefile.objs | 3 +- - default-configs/aarch64-rh-devices.mak | 20 +++++ - default-configs/aarch64-softmmu.mak | 10 ++- - default-configs/ppc64-rh-devices.mak | 32 ++++++++ - default-configs/ppc64-softmmu.mak | 8 +- - default-configs/rh-virtio.mak | 10 +++ - default-configs/s390x-rh-devices.mak | 15 ++++ + default-configs/aarch64-rh-devices.mak | 21 +++++++ + default-configs/aarch64-softmmu.mak | 10 ++-- + default-configs/ppc64-rh-devices.mak | 34 +++++++++++ + default-configs/ppc64-softmmu.mak | 10 ++-- + default-configs/rh-virtio.mak | 10 ++++ + default-configs/s390x-rh-devices.mak | 15 +++++ default-configs/s390x-softmmu.mak | 4 +- - default-configs/x86_64-rh-devices.mak | 100 +++++++++++++++++++++++++ + default-configs/x86_64-rh-devices.mak | 100 +++++++++++++++++++++++++++++++++ default-configs/x86_64-softmmu.mak | 4 +- hw/acpi/ich9.c | 4 +- hw/arm/Makefile.objs | 2 +- - hw/block/fdc.c | 10 +++ - hw/bt/Makefile.objs | 4 +- + hw/block/fdc.c | 10 ++++ + hw/bt/Makefile.objs | 3 + hw/cpu/Makefile.objs | 5 +- hw/display/Makefile.objs | 5 +- hw/display/cirrus_vga.c | 3 + hw/ide/piix.c | 5 +- hw/input/pckbd.c | 2 + hw/net/e1000.c | 2 + - hw/pci-host/i440fx.c | 4 + hw/ppc/spapr_cpu_core.c | 2 + hw/usb/Makefile.objs | 4 +- - hw/vfio/pci-quirks.c | 9 +++ hw/vfio/pci.c | 5 ++ - qemu-options.hx | 7 +- + qemu-options.hx | 4 -- redhat/qemu-kvm.spec.template | 5 +- - target/arm/cpu.c | 4 +- - target/i386/cpu.c | 35 +++++++-- - target/ppc/cpu-models.c | 10 +++ + softmmu/vl.c | 2 +- + target/arm/cpu.c | 3 + + target/i386/cpu.c | 17 +++--- + target/ppc/cpu-models.c | 10 ++++ target/s390x/cpu_models.c | 3 + - target/s390x/kvm.c | 8 ++ + target/s390x/kvm.c | 8 +++ util/memfd.c | 2 +- - vl.c | 8 +- - 35 files changed, 317 insertions(+), 41 deletions(-) + 32 files changed, 281 insertions(+), 36 deletions(-) create mode 100644 default-configs/aarch64-rh-devices.mak create mode 100644 default-configs/ppc64-rh-devices.mak create mode 100644 default-configs/rh-virtio.mak create mode 100644 default-configs/s390x-rh-devices.mak create mode 100644 default-configs/x86_64-rh-devices.mak + create mode 100644 hw/bt/Makefile.objs -diff --git a/Makefile.objs b/Makefile.objs -index 11ba1a36bd..fcf63e1096 100644 ---- a/Makefile.objs -+++ b/Makefile.objs -@@ -65,8 +65,8 @@ common-obj-y += replay/ - - common-obj-y += ui/ - common-obj-m += ui/ --common-obj-y += bt-host.o bt-vhci.o --bt-host.o-cflags := $(BLUEZ_CFLAGS) -+#common-obj-y += bt-host.o bt-vhci.o -+#bt-host.o-cflags := $(BLUEZ_CFLAGS) - - common-obj-y += dma-helpers.o - common-obj-y += vl.o diff --git a/backends/Makefile.objs b/backends/Makefile.objs -index f0691116e8..f328d404bf 100644 +index 28a847c..0eda216 100644 --- a/backends/Makefile.objs +++ b/backends/Makefile.objs -@@ -16,4 +16,5 @@ endif +@@ -16,7 +16,8 @@ endif common-obj-$(call land,$(CONFIG_VHOST_USER),$(CONFIG_VIRTIO)) += vhost-user.o -common-obj-$(CONFIG_LINUX) += hostmem-memfd.o +# RHEL: disable memfd +# common-obj-$(CONFIG_LINUX) += hostmem-memfd.o + + common-obj-$(CONFIG_GIO) += dbus-vmstate.o + dbus-vmstate.o-cflags = $(GIO_CFLAGS) diff --git a/default-configs/aarch64-rh-devices.mak b/default-configs/aarch64-rh-devices.mak new file mode 100644 -index 0000000000..a1ed641174 +index 0000000..0d756a2 --- /dev/null +++ b/default-configs/aarch64-rh-devices.mak -@@ -0,0 +1,20 @@ +@@ -0,0 +1,21 @@ +include rh-virtio.mak + +CONFIG_ARM_GIC_KVM=y @@ -153,8 +149,9 @@ index 0000000000..a1ed641174 +CONFIG_VIRTIO_MMIO=y +CONFIG_VIRTIO_PCI=y +CONFIG_XIO3130=y ++CONFIG_NVDIMM=y diff --git a/default-configs/aarch64-softmmu.mak b/default-configs/aarch64-softmmu.mak -index 958b1e08e4..8f6867d48a 100644 +index 958b1e0..8f6867d 100644 --- a/default-configs/aarch64-softmmu.mak +++ b/default-configs/aarch64-softmmu.mak @@ -1,8 +1,10 @@ @@ -174,17 +171,19 @@ index 958b1e08e4..8f6867d48a 100644 +include aarch64-rh-devices.mak diff --git a/default-configs/ppc64-rh-devices.mak b/default-configs/ppc64-rh-devices.mak new file mode 100644 -index 0000000000..35f2106d06 +index 0000000..ecbe53f --- /dev/null +++ b/default-configs/ppc64-rh-devices.mak -@@ -0,0 +1,32 @@ +@@ -0,0 +1,34 @@ +include rh-virtio.mak + +CONFIG_DIMM=y +CONFIG_MEM_DEVICE=y ++CONFIG_NVDIMM=y +CONFIG_PCI=y +CONFIG_PCI_DEVICES=y +CONFIG_PCI_TESTDEV=y ++CONFIG_PCI_EXPRESS=y +CONFIG_PSERIES=y +CONFIG_SCSI=y +CONFIG_SPAPR_VSCSI=y @@ -211,10 +210,10 @@ index 0000000000..35f2106d06 +CONFIG_XIVE_SPAPR=y +CONFIG_XIVE_KVM=y diff --git a/default-configs/ppc64-softmmu.mak b/default-configs/ppc64-softmmu.mak -index cca52665d9..fec354f327 100644 +index ae0841f..040e557 100644 --- a/default-configs/ppc64-softmmu.mak +++ b/default-configs/ppc64-softmmu.mak -@@ -1,10 +1,12 @@ +@@ -1,11 +1,13 @@ # Default configuration for ppc64-softmmu # Include all 32-bit boards @@ -227,12 +226,14 @@ index cca52665d9..fec354f327 100644 # For pSeries -CONFIG_PSERIES=y +-CONFIG_NVDIMM=y +#CONFIG_PSERIES=y ++#CONFIG_NVDIMM=y + +include ppc64-rh-devices.mak diff --git a/default-configs/rh-virtio.mak b/default-configs/rh-virtio.mak new file mode 100644 -index 0000000000..94ede1b5f6 +index 0000000..94ede1b --- /dev/null +++ b/default-configs/rh-virtio.mak @@ -0,0 +1,10 @@ @@ -248,7 +249,7 @@ index 0000000000..94ede1b5f6 +CONFIG_VIRTIO_SERIAL=y diff --git a/default-configs/s390x-rh-devices.mak b/default-configs/s390x-rh-devices.mak new file mode 100644 -index 0000000000..c3c73fe752 +index 0000000..c3c73fe --- /dev/null +++ b/default-configs/s390x-rh-devices.mak @@ -0,0 +1,15 @@ @@ -268,7 +269,7 @@ index 0000000000..c3c73fe752 +CONFIG_VIRTIO_CCW=y +CONFIG_WDT_DIAG288=y diff --git a/default-configs/s390x-softmmu.mak b/default-configs/s390x-softmmu.mak -index f2287a133f..3e2e388e91 100644 +index f2287a1..3e2e388 100644 --- a/default-configs/s390x-softmmu.mak +++ b/default-configs/s390x-softmmu.mak @@ -10,4 +10,6 @@ @@ -281,7 +282,7 @@ index f2287a133f..3e2e388e91 100644 +include s390x-rh-devices.mak diff --git a/default-configs/x86_64-rh-devices.mak b/default-configs/x86_64-rh-devices.mak new file mode 100644 -index 0000000000..d59b6d9bb5 +index 0000000..d59b6d9 --- /dev/null +++ b/default-configs/x86_64-rh-devices.mak @@ -0,0 +1,100 @@ @@ -386,7 +387,7 @@ index 0000000000..d59b6d9bb5 +CONFIG_TPM_EMULATOR=y +CONFIG_TPM_PASSTHROUGH=y diff --git a/default-configs/x86_64-softmmu.mak b/default-configs/x86_64-softmmu.mak -index 64b2ee2960..b5de7e5279 100644 +index 64b2ee2..b5de7e5 100644 --- a/default-configs/x86_64-softmmu.mak +++ b/default-configs/x86_64-softmmu.mak @@ -1,3 +1,5 @@ @@ -397,10 +398,10 @@ index 64b2ee2960..b5de7e5279 100644 + +include x86_64-rh-devices.mak diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c -index 2034dd749e..ab203ad448 100644 +index 336cace..ae86900 100644 --- a/hw/acpi/ich9.c +++ b/hw/acpi/ich9.c -@@ -449,8 +449,8 @@ void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm, Error **errp) +@@ -374,8 +374,8 @@ void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm, Error **errp) static const uint32_t gpe0_len = ICH9_PMIO_GPE0_LEN; pm->acpi_memory_hotplug.is_enabled = true; pm->cpu_hotplug_legacy = true; @@ -412,10 +413,10 @@ index 2034dd749e..ab203ad448 100644 object_property_add_uint32_ptr(obj, ACPI_PM_PROP_PM_IO_BASE, diff --git a/hw/arm/Makefile.objs b/hw/arm/Makefile.objs -index fe749f65fd..2aa1a9efdd 100644 +index 534a6a1..bd62442 100644 --- a/hw/arm/Makefile.objs +++ b/hw/arm/Makefile.objs -@@ -27,7 +27,7 @@ obj-$(CONFIG_VEXPRESS) += vexpress.o +@@ -28,7 +28,7 @@ obj-$(CONFIG_VEXPRESS) += vexpress.o obj-$(CONFIG_ZYNQ) += xilinx_zynq.o obj-$(CONFIG_SABRELITE) += sabrelite.o @@ -425,7 +426,7 @@ index fe749f65fd..2aa1a9efdd 100644 obj-$(CONFIG_PXA2XX) += pxa2xx.o pxa2xx_gpio.o pxa2xx_pic.o obj-$(CONFIG_DIGIC) += digic.o diff --git a/hw/block/fdc.c b/hw/block/fdc.c -index ac5d31e8c1..e925bac002 100644 +index 9628cc1..37989fe 100644 --- a/hw/block/fdc.c +++ b/hw/block/fdc.c @@ -46,6 +46,8 @@ @@ -437,7 +438,7 @@ index ac5d31e8c1..e925bac002 100644 /********************************************************/ /* debug Floppy devices */ -@@ -2638,6 +2640,14 @@ static void fdctrl_realize_common(DeviceState *dev, FDCtrl *fdctrl, +@@ -2613,6 +2615,14 @@ static void fdctrl_realize_common(DeviceState *dev, FDCtrl *fdctrl, int i, j; static int command_tables_inited = 0; @@ -451,19 +452,18 @@ index ac5d31e8c1..e925bac002 100644 + if (fdctrl->fallback == FLOPPY_DRIVE_TYPE_AUTO) { error_setg(errp, "Cannot choose a fallback FDrive type of 'auto'"); - } + return; diff --git a/hw/bt/Makefile.objs b/hw/bt/Makefile.objs -index 867a7d2e8a..e678e9ee3c 100644 ---- a/hw/bt/Makefile.objs +new file mode 100644 +index 0000000..e678e9e +--- /dev/null +++ b/hw/bt/Makefile.objs -@@ -1,3 +1,3 @@ --common-obj-y += core.o l2cap.o sdp.o hci.o hid.o --common-obj-y += hci-csr.o +@@ -0,0 +1,3 @@ +#common-obj-y += core.o l2cap.o sdp.o hci.o hid.o +#common-obj-y += hci-csr.o - ++ diff --git a/hw/cpu/Makefile.objs b/hw/cpu/Makefile.objs -index 8db9e8a7b3..1601ea93c7 100644 +index 8db9e8a..1601ea9 100644 --- a/hw/cpu/Makefile.objs +++ b/hw/cpu/Makefile.objs @@ -1,5 +1,6 @@ @@ -476,7 +476,7 @@ index 8db9e8a7b3..1601ea93c7 100644 +common-obj-y += core.o +# cluster.o diff --git a/hw/display/Makefile.objs b/hw/display/Makefile.objs -index f2182e3bef..3d0cda1b52 100644 +index 77a7d62..68c793e 100644 --- a/hw/display/Makefile.objs +++ b/hw/display/Makefile.objs @@ -1,8 +1,9 @@ @@ -492,10 +492,10 @@ index f2182e3bef..3d0cda1b52 100644 common-obj-$(CONFIG_ADS7846) += ads7846.o common-obj-$(CONFIG_VGA_CIRRUS) += cirrus_vga.o diff --git a/hw/display/cirrus_vga.c b/hw/display/cirrus_vga.c -index cd283e53b4..93afa26fda 100644 +index 1f29731..cac9e40 100644 --- a/hw/display/cirrus_vga.c +++ b/hw/display/cirrus_vga.c -@@ -2975,6 +2975,9 @@ static void pci_cirrus_vga_realize(PCIDevice *dev, Error **errp) +@@ -2973,6 +2973,9 @@ static void pci_cirrus_vga_realize(PCIDevice *dev, Error **errp) PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(dev); int16_t device_id = pc->device_id; @@ -506,10 +506,10 @@ index cd283e53b4..93afa26fda 100644 Also accept 8 MB/16 MB for backward compatibility. */ if (s->vga.vram_size_mb != 4 && s->vga.vram_size_mb != 8 && diff --git a/hw/ide/piix.c b/hw/ide/piix.c -index db313dd3b1..e14858ca64 100644 +index 3b2de4c..980c35e 100644 --- a/hw/ide/piix.c +++ b/hw/ide/piix.c -@@ -251,7 +251,8 @@ static void piix3_ide_class_init(ObjectClass *klass, void *data) +@@ -221,7 +221,8 @@ static void piix3_ide_class_init(ObjectClass *klass, void *data) k->device_id = PCI_DEVICE_ID_INTEL_82371SB_1; k->class_id = PCI_CLASS_STORAGE_IDE; set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); @@ -519,7 +519,7 @@ index db313dd3b1..e14858ca64 100644 } static const TypeInfo piix3_ide_info = { -@@ -279,6 +280,8 @@ static void piix4_ide_class_init(ObjectClass *klass, void *data) +@@ -250,6 +251,8 @@ static void piix4_ide_class_init(ObjectClass *klass, void *data) k->class_id = PCI_CLASS_STORAGE_IDE; set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); dc->hotpluggable = false; @@ -529,10 +529,10 @@ index db313dd3b1..e14858ca64 100644 static const TypeInfo piix4_ide_info = { diff --git a/hw/input/pckbd.c b/hw/input/pckbd.c -index f0acfd86f7..390eb6579c 100644 +index 60a4130..b2f1f91 100644 --- a/hw/input/pckbd.c +++ b/hw/input/pckbd.c -@@ -571,6 +571,8 @@ static void i8042_class_initfn(ObjectClass *klass, void *data) +@@ -568,6 +568,8 @@ static void i8042_class_initfn(ObjectClass *klass, void *data) dc->realize = i8042_realizefn; dc->vmsd = &vmstate_kbd_isa; set_bit(DEVICE_CATEGORY_INPUT, dc->categories); @@ -542,10 +542,10 @@ index f0acfd86f7..390eb6579c 100644 static const TypeInfo i8042_info = { diff --git a/hw/net/e1000.c b/hw/net/e1000.c -index a73f8d404e..fc73fdd6fa 100644 +index 2a69eee..af3ec17 100644 --- a/hw/net/e1000.c +++ b/hw/net/e1000.c -@@ -1795,6 +1795,7 @@ static const E1000Info e1000_devices[] = { +@@ -1797,6 +1797,7 @@ static const E1000Info e1000_devices[] = { .revision = 0x03, .phy_id2 = E1000_PHY_ID2_8254xx_DEFAULT, }, @@ -553,7 +553,7 @@ index a73f8d404e..fc73fdd6fa 100644 { .name = "e1000-82544gc", .device_id = E1000_DEV_ID_82544GC_COPPER, -@@ -1807,6 +1808,7 @@ static const E1000Info e1000_devices[] = { +@@ -1809,6 +1810,7 @@ static const E1000Info e1000_devices[] = { .revision = 0x03, .phy_id2 = E1000_PHY_ID2_8254xx_DEFAULT, }, @@ -561,41 +561,11 @@ index a73f8d404e..fc73fdd6fa 100644 }; static void e1000_register_types(void) -diff --git a/hw/pci-host/i440fx.c b/hw/pci-host/i440fx.c -index f27131102d..17f10efae2 100644 ---- a/hw/pci-host/i440fx.c -+++ b/hw/pci-host/i440fx.c -@@ -386,6 +386,7 @@ static const TypeInfo i440fx_info = { - }, - }; - -+#if 0 /* Disabled in Red Hat Enterprise Linux */ - /* IGD Passthrough Host Bridge. */ - typedef struct { - uint8_t offset; -@@ -469,6 +470,7 @@ static const TypeInfo igd_passthrough_i440fx_info = { - .instance_size = sizeof(PCII440FXState), - .class_init = igd_passthrough_i440fx_class_init, - }; -+#endif - - static const char *i440fx_pcihost_root_bus_path(PCIHostState *host_bridge, - PCIBus *rootbus) -@@ -514,7 +516,9 @@ static const TypeInfo i440fx_pcihost_info = { - static void i440fx_register_types(void) - { - type_register_static(&i440fx_info); -+#if 0 /* Disabled in Red Hat Enterprise Linux */ - type_register_static(&igd_passthrough_i440fx_info); -+#endif - type_register_static(&i440fx_pcihost_info); - } - diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c -index 8339c4c0f8..301cd7b4e4 100644 +index ac1c109..542c19e 100644 --- a/hw/ppc/spapr_cpu_core.c +++ b/hw/ppc/spapr_cpu_core.c -@@ -403,10 +403,12 @@ static const TypeInfo spapr_cpu_core_type_infos[] = { +@@ -399,10 +399,12 @@ static const TypeInfo spapr_cpu_core_type_infos[] = { .instance_size = sizeof(SpaprCpuCore), .class_size = sizeof(SpaprCpuCoreClass), }, @@ -609,10 +579,10 @@ index 8339c4c0f8..301cd7b4e4 100644 DEFINE_SPAPR_CPU_CORE_TYPE("power7+_v2.1"), DEFINE_SPAPR_CPU_CORE_TYPE("power8_v2.0"), diff --git a/hw/usb/Makefile.objs b/hw/usb/Makefile.objs -index 303ac084a0..700a91886e 100644 +index 66835e5..1b03645 100644 --- a/hw/usb/Makefile.objs +++ b/hw/usb/Makefile.objs -@@ -30,7 +30,9 @@ common-obj-$(CONFIG_USB_BLUETOOTH) += dev-bluetooth.o +@@ -29,7 +29,9 @@ common-obj-$(CONFIG_USB_NETWORK) += dev-network.o ifeq ($(CONFIG_USB_SMARTCARD),y) common-obj-y += dev-smartcard-reader.o common-obj-$(CONFIG_SMARTCARD) += smartcard.mo @@ -623,76 +593,11 @@ index 303ac084a0..700a91886e 100644 smartcard.mo-cflags := $(SMARTCARD_CFLAGS) smartcard.mo-libs := $(SMARTCARD_LIBS) endif -diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c -index 136f3a9ad6..4505ffe48a 100644 ---- a/hw/vfio/pci-quirks.c -+++ b/hw/vfio/pci-quirks.c -@@ -1166,6 +1166,7 @@ static void vfio_probe_rtl8168_bar2_quirk(VFIOPCIDevice *vdev, int nr) - trace_vfio_quirk_rtl8168_probe(vdev->vbasedev.name); - } - -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - /* - * Intel IGD support - * -@@ -1239,6 +1240,7 @@ static int igd_gen(VFIOPCIDevice *vdev) - - return 8; /* Assume newer is compatible */ - } -+#endif - - typedef struct VFIOIGDQuirk { - struct VFIOPCIDevice *vdev; -@@ -1311,6 +1313,7 @@ typedef struct { - uint8_t len; - } IGDHostInfo; - -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - static const IGDHostInfo igd_host_bridge_infos[] = { - {PCI_REVISION_ID, 2}, - {PCI_SUBSYSTEM_VENDOR_ID, 2}, -@@ -1559,9 +1562,11 @@ static const MemoryRegionOps vfio_igd_index_quirk = { - .write = vfio_igd_quirk_index_write, - .endianness = DEVICE_LITTLE_ENDIAN, - }; -+#endif - - static void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr) - { -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - struct vfio_region_info *rom = NULL, *opregion = NULL, - *host = NULL, *lpc = NULL; - VFIOQuirk *quirk; -@@ -1572,6 +1577,7 @@ static void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr) - uint32_t gmch; - uint16_t cmd_orig, cmd; - Error *err = NULL; -+#endif - - /* - * This must be an Intel VGA device at address 00:02.0 for us to even -@@ -1585,6 +1591,8 @@ static void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr) - return; - } - -+#if 0 /* Disabled for Red Hat Enterprise Linux */ -+ - /* - * We need to create an LPC/ISA bridge at PCI bus address 00:1f.0 that we - * can stuff host values into, so if there's already one there and it's not -@@ -1809,6 +1817,7 @@ out: - g_free(opregion); - g_free(host); - g_free(lpc); -+#endif - } - - /* diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index 2d40b396f2..c8534d3035 100644 +index 5e75a95..e265d77 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c -@@ -3220,6 +3220,7 @@ static const TypeInfo vfio_pci_dev_info = { +@@ -3222,6 +3222,7 @@ static const TypeInfo vfio_pci_dev_info = { }, }; @@ -700,7 +605,7 @@ index 2d40b396f2..c8534d3035 100644 static Property vfio_pci_dev_nohotplug_properties[] = { DEFINE_PROP_BOOL("ramfb", VFIOPCIDevice, enable_ramfb, false), DEFINE_PROP_END_OF_LIST(), -@@ -3239,11 +3240,15 @@ static const TypeInfo vfio_pci_nohotplug_dev_info = { +@@ -3241,11 +3242,15 @@ static const TypeInfo vfio_pci_nohotplug_dev_info = { .instance_size = sizeof(VFIOPCIDevice), .class_init = vfio_pci_nohotplug_dev_class_init, }; @@ -717,57 +622,59 @@ index 2d40b396f2..c8534d3035 100644 type_init(register_vfio_pci_dev_type) diff --git a/qemu-options.hx b/qemu-options.hx -index 65c9473b73..fc17aca631 100644 +index 292d4e7..1df25ae 100644 --- a/qemu-options.hx +++ b/qemu-options.hx -@@ -2111,11 +2111,6 @@ ETEXI +@@ -2239,10 +2239,6 @@ ERST DEF("no-hpet", 0, QEMU_OPTION_no_hpet, "-no-hpet disable HPET\n", QEMU_ARCH_I386) --STEXI --@item -no-hpet --@findex -no-hpet --Disable HPET support. --ETEXI +-SRST +-``-no-hpet`` +- Disable HPET support. +-ERST DEF("acpitable", HAS_ARG, QEMU_OPTION_acpitable, "-acpitable [sig=str][,rev=n][,oem_id=str][,oem_table_id=str][,oem_rev=n][,asl_compiler_id=str][,asl_compiler_rev=n][,{data|file}=file1[:file2]...]\n" -@@ -3125,6 +3120,7 @@ STEXI - ETEXI - DEFHEADING() - -+#if 0 - DEFHEADING(Bluetooth(R) options:) - STEXI - @table @option -@@ -3203,6 +3199,7 @@ STEXI - @end table - ETEXI - DEFHEADING() -+#endif - - #ifdef CONFIG_TPM - DEFHEADING(TPM device options:) +diff --git a/softmmu/vl.c b/softmmu/vl.c +index afd2615..00f7604 100644 +--- a/softmmu/vl.c ++++ b/softmmu/vl.c +@@ -144,7 +144,7 @@ static Chardev **serial_hds; + Chardev *parallel_hds[MAX_PARALLEL_PORTS]; + int win2k_install_hack = 0; + int singlestep = 0; +-int no_hpet = 0; ++int no_hpet = 1; /* Always disabled for Red Hat Enterprise Linux */ + int fd_bootchk = 1; + static int no_reboot; + int no_shutdown = 0; diff --git a/target/arm/cpu.c b/target/arm/cpu.c -index 7a4ac9339b..3788fc3c4a 100644 +index 5d64adf..f1d18b8 100644 --- a/target/arm/cpu.c +++ b/target/arm/cpu.c -@@ -2744,7 +2744,9 @@ static void arm_cpu_register_types(void) - type_register_static(&idau_interface_type_info); +@@ -2904,6 +2904,9 @@ static void arm_cpu_register_types(void) while (info->name) { -- cpu_register(info); + arm_cpu_register(info); + /* RHEL specific: Filter out unsupported cpu models */ -+ if (!strcmp(info->name, "cortex-a15")) -+ cpu_register(info); ++ if (!strcmp(info->name, "cortex-a15-arm-cpu")) ++ arm_cpu_register(info); info++; } diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index 69f518a21a..1b7880ae3a 100644 +index 9c256ab..26a8584 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c -@@ -1835,14 +1835,14 @@ static X86CPUDefinition builtin_x86_defs[] = { +@@ -1801,20 +1801,21 @@ static CPUCaches epyc_rome_cache_info = { + + static X86CPUDefinition builtin_x86_defs[] = { + { ++ /* qemu64 is the default CPU model for all machine-types */ + .name = "qemu64", + .level = 0xd, + .vendor = CPUID_VENDOR_AMD, .family = 6, .model = 6, .stepping = 3, @@ -790,34 +697,8 @@ index 69f518a21a..1b7880ae3a 100644 .features[FEAT_8000_0001_ECX] = CPUID_EXT3_LAHF_LM | CPUID_EXT3_SVM, .xlevel = 0x8000000A, -@@ -2128,6 +2128,25 @@ static X86CPUDefinition builtin_x86_defs[] = { - .xlevel = 0x80000008, - .model_id = "Intel(R) Atom(TM) CPU N270 @ 1.60GHz", - }, -+ { -+ .name = "cpu64-rhel6", -+ .level = 4, -+ .vendor = CPUID_VENDOR_AMD, -+ .family = 6, -+ .model = 13, -+ .stepping = 3, -+ .features[FEAT_1_EDX] = CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | -+ CPUID_MMX | CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | -+ CPUID_MCA | CPUID_PGE | CPUID_MTRR | CPUID_SEP | CPUID_APIC | -+ CPUID_CX8 | CPUID_MCE | CPUID_PAE | CPUID_MSR | CPUID_TSC | -+ CPUID_PSE | CPUID_DE | CPUID_FP87, -+ .features[FEAT_1_ECX] = CPUID_EXT_CX16 | CPUID_EXT_SSE3, -+ .features[FEAT_8000_0001_EDX] = CPUID_EXT2_LM | CPUID_EXT2_NX | CPUID_EXT2_SYSCALL, -+ .features[FEAT_8000_0001_ECX] = CPUID_EXT3_SSE4A | CPUID_EXT3_ABM | -+ CPUID_EXT3_SVM | CPUID_EXT3_LAHF_LM, -+ .xlevel = 0x8000000A, -+ .model_id = "QEMU Virtual CPU version (cpu64-rhel6)", -+ }, - { - .name = "Conroe", - .level = 10, diff --git a/target/ppc/cpu-models.c b/target/ppc/cpu-models.c -index 086548e9b9..1bbf378c18 100644 +index 4ad1686..16b2185 100644 --- a/target/ppc/cpu-models.c +++ b/target/ppc/cpu-models.c @@ -66,6 +66,7 @@ @@ -847,7 +728,7 @@ index 086548e9b9..1bbf378c18 100644 POWERPC_DEF("power7_v2.3", CPU_POWERPC_POWER7_v23, POWER7, "POWER7 v2.3") POWERPC_DEF("power7+_v2.1", CPU_POWERPC_POWER7P_v21, POWER7, -@@ -780,6 +784,7 @@ +@@ -782,6 +786,7 @@ /* PowerPC CPU aliases */ PowerPCCPUAlias ppc_cpu_aliases[] = { @@ -855,7 +736,7 @@ index 086548e9b9..1bbf378c18 100644 { "403", "403gc" }, { "405", "405d4" }, { "405cr", "405crc" }, -@@ -938,12 +943,15 @@ PowerPCCPUAlias ppc_cpu_aliases[] = { +@@ -940,12 +945,15 @@ PowerPCCPUAlias ppc_cpu_aliases[] = { { "7447a", "7447a_v1.2" }, { "7457a", "7457a_v1.2" }, { "apollo7pm", "7457a_v1.0" }, @@ -871,15 +752,15 @@ index 086548e9b9..1bbf378c18 100644 { "power7", "power7_v2.3" }, { "power7+", "power7+_v2.1" }, { "power8e", "power8e_v2.1" }, -@@ -952,6 +960,7 @@ PowerPCCPUAlias ppc_cpu_aliases[] = { - { "power9", "power9_v2.0" }, +@@ -955,6 +963,7 @@ PowerPCCPUAlias ppc_cpu_aliases[] = { + { "power10", "power10_v1.0" }, #endif +#if 0 /* Disabled for Red Hat Enterprise Linux */ /* Generic PowerPCs */ #if defined(TARGET_PPC64) { "ppc64", "970fx_v3.1" }, -@@ -959,5 +968,6 @@ PowerPCCPUAlias ppc_cpu_aliases[] = { +@@ -962,5 +971,6 @@ PowerPCCPUAlias ppc_cpu_aliases[] = { { "ppc32", "604" }, { "ppc", "604" }, { "default", "604" }, @@ -887,7 +768,7 @@ index 086548e9b9..1bbf378c18 100644 { NULL, NULL } }; diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c -index 7e92fb2e15..be718220d7 100644 +index 7c32180..88bf4a9 100644 --- a/target/s390x/cpu_models.c +++ b/target/s390x/cpu_models.c @@ -404,6 +404,9 @@ static void check_unavailable_features(const S390CPUModel *max_model, @@ -901,10 +782,10 @@ index 7e92fb2e15..be718220d7 100644 /* detect missing features if any to properly report them */ diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c -index 0c9d14b4b1..a02d569537 100644 +index 69881a0..9802878 100644 --- a/target/s390x/kvm.c +++ b/target/s390x/kvm.c -@@ -2387,6 +2387,14 @@ void kvm_s390_apply_cpu_model(const S390CPUModel *model, Error **errp) +@@ -2494,6 +2494,14 @@ void kvm_s390_apply_cpu_model(const S390CPUModel *model, Error **errp) error_setg(errp, "KVM doesn't support CPU models"); return; } @@ -920,7 +801,7 @@ index 0c9d14b4b1..a02d569537 100644 prop.ibc = s390_ibc_from_cpu_model(model); /* configure cpu features indicated via STFL(e) */ diff --git a/util/memfd.c b/util/memfd.c -index 4a3c07e0be..3303ec9da4 100644 +index 4a3c07e..3303ec9 100644 --- a/util/memfd.c +++ b/util/memfd.c @@ -193,7 +193,7 @@ bool qemu_memfd_alloc_check(void) @@ -932,63 +813,6 @@ index 4a3c07e0be..3303ec9da4 100644 int mfd = memfd_create("test", flags | MFD_CLOEXEC); if (mfd >= 0) { -diff --git a/vl.c b/vl.c -index 6a65a64bfd..668a34577e 100644 ---- a/vl.c -+++ b/vl.c -@@ -166,7 +166,7 @@ Chardev *parallel_hds[MAX_PARALLEL_PORTS]; - int win2k_install_hack = 0; - int singlestep = 0; - int acpi_enabled = 1; --int no_hpet = 0; -+int no_hpet = 1; /* Always disabled for Red Hat Enterprise Linux */ - int fd_bootchk = 1; - static int no_reboot; - int no_shutdown = 0; -@@ -914,6 +914,7 @@ static void configure_rtc(QemuOpts *opts) - } - } - -+#if 0 // Disabled for Red Hat Enterprise Linux - /***********************************************************/ - /* Bluetooth support */ - static int nb_hcis; -@@ -1035,6 +1036,7 @@ static int bt_parse(const char *opt) - error_report("bad bluetooth parameter '%s'", opt); - return 1; - } -+#endif - - static int parse_name(void *opaque, QemuOpts *opts, Error **errp) - { -@@ -3128,6 +3130,7 @@ int main(int argc, char **argv, char **envp) - } - break; - #endif -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - case QEMU_OPTION_bt: - warn_report("The bluetooth subsystem is deprecated and will " - "be removed soon. If the bluetooth subsystem is " -@@ -3135,6 +3138,7 @@ int main(int argc, char **argv, char **envp) - "qemu-devel@nongnu.org with your usecase."); - add_device_config(DEV_BT, optarg); - break; -+#endif - case QEMU_OPTION_audio_help: - audio_legacy_help(); - exit (0); -@@ -4282,9 +4286,11 @@ int main(int argc, char **argv, char **envp) - - tpm_init(); - -+#if 0 // Disabled for Red Hat Enterprise Linux - /* init the bluetooth world */ - if (foreach_device_config(DEV_BT, bt_parse)) - exit(1); -+#endif - - if (!xen_enabled()) { - /* On 32-bit hosts, QEMU is limited by virtual address space */ -- -2.21.0 +1.8.3.1 diff --git a/0007-Machine-type-related-general-changes.patch b/0007-Machine-type-related-general-changes.patch index 4ae3966..a302d2c 100644 --- a/0007-Machine-type-related-general-changes.patch +++ b/0007-Machine-type-related-general-changes.patch @@ -1,4 +1,4 @@ -From 113078b23a4747b07eb363719d7cbc0af403dd2a Mon Sep 17 00:00:00 2001 +From 799c934a1ec957ae2e163f367f5f7550949178da Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 11 Jan 2019 09:54:45 +0100 Subject: Machine type related general changes @@ -33,33 +33,42 @@ Merged patches (4.2.0): - ca4a5e8 virtio: Make disable-legacy/disable-modern compat properties optional - compat: Generic hw_compat_rhel_8_1 (patch 93040/92956) -Signed-off-by: Danilo C. L. de Paula +Merged patches (weekly-200318): +- e6c3fbf hw/smbios: set new default SMBIOS fields for Windows driver support (partialy) + +Merged patches (weekly-200506): +- 8f9f4d8 compat: disable 'edid' for virtio-gpu-ccw --- - hw/acpi/ich9.c | 16 ++++ - hw/acpi/piix4.c | 5 +- - hw/char/serial.c | 16 ++++ - hw/core/machine.c | 170 ++++++++++++++++++++++++++++++++++++++++ - hw/display/vga-isa.c | 2 +- - hw/net/e1000e.c | 21 +++++ - hw/net/rtl8139.c | 4 +- - hw/rtc/mc146818rtc.c | 6 ++ - hw/smbios/smbios.c | 1 + - hw/timer/i8254_common.c | 2 +- - hw/usb/hcd-uhci.c | 4 +- - hw/usb/hcd-xhci.c | 20 +++++ - hw/usb/hcd-xhci.h | 2 + - include/hw/acpi/ich9.h | 3 + - include/hw/boards.h | 24 ++++++ - include/hw/usb.h | 4 + - migration/migration.c | 2 + - migration/migration.h | 5 ++ - 18 files changed, 301 insertions(+), 6 deletions(-) + hw/acpi/ich9.c | 16 ++++ + hw/acpi/piix4.c | 5 +- + hw/arm/virt.c | 2 +- + hw/char/serial.c | 16 ++++ + hw/core/machine.c | 170 +++++++++++++++++++++++++++++++++++++++++++ + hw/display/vga-isa.c | 2 +- + hw/i386/pc_piix.c | 2 + + hw/i386/pc_q35.c | 2 + + hw/net/e1000e.c | 21 ++++++ + hw/net/rtl8139.c | 4 +- + hw/rtc/mc146818rtc.c | 6 ++ + hw/smbios/smbios.c | 46 +++++++++++- + hw/timer/i8254_common.c | 2 +- + hw/usb/hcd-uhci.c | 4 +- + hw/usb/hcd-xhci.c | 20 +++++ + hw/usb/hcd-xhci.h | 2 + + include/hw/acpi/ich9.h | 3 + + include/hw/boards.h | 24 ++++++ + include/hw/firmware/smbios.h | 5 +- + include/hw/i386/pc.h | 3 + + include/hw/usb.h | 4 + + migration/migration.c | 2 + + migration/migration.h | 5 ++ + 23 files changed, 355 insertions(+), 11 deletions(-) diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c -index ab203ad448..7ec26884e8 100644 +index ae86900..9a8a627 100644 --- a/hw/acpi/ich9.c +++ b/hw/acpi/ich9.c -@@ -444,6 +444,18 @@ static void ich9_pm_set_enable_tco(Object *obj, bool value, Error **errp) +@@ -369,6 +369,18 @@ static void ich9_pm_set_enable_tco(Object *obj, bool value, Error **errp) s->pm.enable_tco = value; } @@ -78,7 +87,7 @@ index ab203ad448..7ec26884e8 100644 void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm, Error **errp) { static const uint32_t gpe0_len = ICH9_PMIO_GPE0_LEN; -@@ -468,6 +480,10 @@ void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm, Error **errp) +@@ -393,6 +405,10 @@ void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm, Error **errp) ich9_pm_get_cpu_hotplug_legacy, ich9_pm_set_cpu_hotplug_legacy, NULL); @@ -86,14 +95,14 @@ index ab203ad448..7ec26884e8 100644 + ich9_pm_get_force_rev1_fadt, + ich9_pm_set_force_rev1_fadt, + NULL); - object_property_add(obj, ACPI_PM_PROP_S3_DISABLED, "uint8", - ich9_pm_get_disable_s3, - ich9_pm_set_disable_s3, + object_property_add_uint8_ptr(obj, ACPI_PM_PROP_S3_DISABLED, + &pm->disable_s3, OBJ_PROP_FLAG_READWRITE, + NULL); diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c -index 93aec2dd2c..3a26193cbe 100644 +index 964d6f5..b8458ba 100644 --- a/hw/acpi/piix4.c +++ b/hw/acpi/piix4.c -@@ -274,6 +274,7 @@ static const VMStateDescription vmstate_acpi = { +@@ -275,6 +275,7 @@ static const VMStateDescription vmstate_acpi = { .name = "piix4_pm", .version_id = 3, .minimum_version_id = 3, @@ -101,7 +110,7 @@ index 93aec2dd2c..3a26193cbe 100644 .post_load = vmstate_acpi_post_load, .fields = (VMStateField[]) { VMSTATE_PCI_DEVICE(parent_obj, PIIX4PMState), -@@ -627,8 +628,8 @@ static void piix4_send_gpe(AcpiDeviceIf *adev, AcpiEventStatusBits ev) +@@ -628,8 +629,8 @@ static void piix4_send_gpe(AcpiDeviceIf *adev, AcpiEventStatusBits ev) static Property piix4_pm_properties[] = { DEFINE_PROP_UINT32("smb_io_base", PIIX4PMState, smb_io_base, 0), @@ -112,19 +121,32 @@ index 93aec2dd2c..3a26193cbe 100644 DEFINE_PROP_UINT8(ACPI_PM_PROP_S4_VAL, PIIX4PMState, s4_val, 2), DEFINE_PROP_BOOL("acpi-pci-hotplug-with-bridge-support", PIIX4PMState, use_acpi_pci_hotplug, true), +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 171e690..25e6839 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -1436,7 +1436,7 @@ static void virt_build_smbios(VirtMachineState *vms) + + smbios_set_defaults("QEMU", product, + vmc->smbios_old_sys_ver ? "1.0" : mc->name, false, +- true, SMBIOS_ENTRY_POINT_30); ++ true, NULL, NULL, SMBIOS_ENTRY_POINT_30); + + smbios_get_tables(MACHINE(vms), NULL, 0, &smbios_tables, &smbios_tables_len, + &smbios_anchor, &smbios_anchor_len); diff --git a/hw/char/serial.c b/hw/char/serial.c -index b4aa250950..0012f0e44d 100644 +index c822a9a..0c7f1ff 100644 --- a/hw/char/serial.c +++ b/hw/char/serial.c -@@ -34,6 +34,7 @@ - #include "sysemu/runstate.h" +@@ -35,6 +35,7 @@ #include "qemu/error-report.h" #include "trace.h" + #include "hw/qdev-properties.h" +#include "migration/migration.h" //#define DEBUG_SERIAL -@@ -703,6 +704,9 @@ static int serial_post_load(void *opaque, int version_id) +@@ -704,6 +705,9 @@ static int serial_post_load(void *opaque, int version_id) static bool serial_thr_ipending_needed(void *opaque) { SerialState *s = opaque; @@ -134,7 +156,7 @@ index b4aa250950..0012f0e44d 100644 if (s->ier & UART_IER_THRI) { bool expected_value = ((s->iir & UART_IIR_ID) == UART_IIR_THRI); -@@ -784,6 +788,10 @@ static const VMStateDescription vmstate_serial_xmit_fifo = { +@@ -785,6 +789,10 @@ static const VMStateDescription vmstate_serial_xmit_fifo = { static bool serial_fifo_timeout_timer_needed(void *opaque) { SerialState *s = (SerialState *)opaque; @@ -145,7 +167,7 @@ index b4aa250950..0012f0e44d 100644 return timer_pending(s->fifo_timeout_timer); } -@@ -801,6 +809,10 @@ static const VMStateDescription vmstate_serial_fifo_timeout_timer = { +@@ -802,6 +810,10 @@ static const VMStateDescription vmstate_serial_fifo_timeout_timer = { static bool serial_timeout_ipending_needed(void *opaque) { SerialState *s = (SerialState *)opaque; @@ -156,7 +178,7 @@ index b4aa250950..0012f0e44d 100644 return s->timeout_ipending != 0; } -@@ -818,6 +830,10 @@ static const VMStateDescription vmstate_serial_timeout_ipending = { +@@ -819,6 +831,10 @@ static const VMStateDescription vmstate_serial_timeout_ipending = { static bool serial_poll_needed(void *opaque) { SerialState *s = (SerialState *)opaque; @@ -168,12 +190,12 @@ index b4aa250950..0012f0e44d 100644 } diff --git a/hw/core/machine.c b/hw/core/machine.c -index 1689ad3bf8..e0e0eec8bf 100644 +index c1a444c..af407cc 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c -@@ -27,6 +27,176 @@ - #include "hw/pci/pci.h" +@@ -28,6 +28,176 @@ #include "hw/mem/nvdimm.h" + #include "migration/vmstate.h" +/* + * The same as hw_compat_4_1 @@ -220,7 +242,7 @@ index 1689ad3bf8..e0e0eec8bf 100644 + /* hw_compat_rhel_8_0 from hw_compat_4_0 */ + { "virtio-vga", "edid", "false" }, + /* hw_compat_rhel_8_0 from hw_compat_4_0 */ -+ { "virtio-gpu-pci", "edid", "false" }, ++ { "virtio-gpu-device", "edid", "false" }, + /* hw_compat_rhel_8_0 from hw_compat_4_0 */ + { "virtio-device", "use-started", "false" }, + /* hw_compat_rhel_8_0 from hw_compat_3_1 - that was added in 4.1 */ @@ -345,14 +367,14 @@ index 1689ad3bf8..e0e0eec8bf 100644 +}; +const size_t hw_compat_rhel_7_1_len = G_N_ELEMENTS(hw_compat_rhel_7_1); + - GlobalProperty hw_compat_4_1[] = { - { "virtio-pci", "x-pcie-flr-init", "off" }, - }; + GlobalProperty hw_compat_4_2[] = { + { "virtio-blk-device", "queue-size", "128"}, + { "virtio-scsi-device", "virtqueue_size", "128"}, diff --git a/hw/display/vga-isa.c b/hw/display/vga-isa.c -index 873e5e9706..d1a2efe47e 100644 +index 0633ed3..b703e9e 100644 --- a/hw/display/vga-isa.c +++ b/hw/display/vga-isa.c -@@ -82,7 +82,7 @@ static void vga_isa_realizefn(DeviceState *dev, Error **errp) +@@ -84,7 +84,7 @@ static void vga_isa_realizefn(DeviceState *dev, Error **errp) } static Property vga_isa_properties[] = { @@ -361,8 +383,34 @@ index 873e5e9706..d1a2efe47e 100644 DEFINE_PROP_END_OF_LIST(), }; +diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c +index b75087d..b255d56 100644 +--- a/hw/i386/pc_piix.c ++++ b/hw/i386/pc_piix.c +@@ -177,6 +177,8 @@ static void pc_init1(MachineState *machine, + smbios_set_defaults("QEMU", "Standard PC (i440FX + PIIX, 1996)", + mc->name, pcmc->smbios_legacy_mode, + pcmc->smbios_uuid_encoded, ++ pcmc->smbios_stream_product, ++ pcmc->smbios_stream_version, + SMBIOS_ENTRY_POINT_21); + } + +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index d2806c1..461e1cd 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -202,6 +202,8 @@ static void pc_q35_init(MachineState *machine) + smbios_set_defaults("QEMU", "Standard PC (Q35 + ICH9, 2009)", + mc->name, pcmc->smbios_legacy_mode, + pcmc->smbios_uuid_encoded, ++ pcmc->smbios_stream_product, ++ pcmc->smbios_stream_version, + SMBIOS_ENTRY_POINT_21); + } + diff --git a/hw/net/e1000e.c b/hw/net/e1000e.c -index b69fd7d8ad..d8be50a1ce 100644 +index 79ba158..311dbe0 100644 --- a/hw/net/e1000e.c +++ b/hw/net/e1000e.c @@ -79,6 +79,11 @@ typedef struct E1000EState { @@ -436,10 +484,10 @@ index b69fd7d8ad..d8be50a1ce 100644 e1000e_prop_disable_vnet, bool), DEFINE_PROP_SIGNED("subsys_ven", E1000EState, subsys_ven, diff --git a/hw/net/rtl8139.c b/hw/net/rtl8139.c -index 88a97d756d..21d80e96cf 100644 +index 70aca7e..0950cee 100644 --- a/hw/net/rtl8139.c +++ b/hw/net/rtl8139.c -@@ -3177,7 +3177,7 @@ static int rtl8139_pre_save(void *opaque) +@@ -3179,7 +3179,7 @@ static int rtl8139_pre_save(void *opaque) static const VMStateDescription vmstate_rtl8139 = { .name = "rtl8139", @@ -448,7 +496,7 @@ index 88a97d756d..21d80e96cf 100644 .minimum_version_id = 3, .post_load = rtl8139_post_load, .pre_save = rtl8139_pre_save, -@@ -3258,7 +3258,9 @@ static const VMStateDescription vmstate_rtl8139 = { +@@ -3260,7 +3260,9 @@ static const VMStateDescription vmstate_rtl8139 = { VMSTATE_UINT32(tally_counters.TxMCol, RTL8139State), VMSTATE_UINT64(tally_counters.RxOkPhy, RTL8139State), VMSTATE_UINT64(tally_counters.RxOkBrd, RTL8139State), @@ -459,7 +507,7 @@ index 88a97d756d..21d80e96cf 100644 VMSTATE_UINT16(tally_counters.TxUndrn, RTL8139State), diff --git a/hw/rtc/mc146818rtc.c b/hw/rtc/mc146818rtc.c -index 74ae74bc5c..73820517df 100644 +index d18c099..8a3bd68 100644 --- a/hw/rtc/mc146818rtc.c +++ b/hw/rtc/mc146818rtc.c @@ -42,6 +42,7 @@ @@ -483,19 +531,89 @@ index 74ae74bc5c..73820517df 100644 } diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c -index 11d476c4a2..e6e9355384 100644 +index ffd9872..7818b90 100644 --- a/hw/smbios/smbios.c +++ b/hw/smbios/smbios.c -@@ -777,6 +777,7 @@ void smbios_set_defaults(const char *manufacturer, const char *product, +@@ -56,6 +56,9 @@ static bool smbios_legacy = true; + static bool smbios_uuid_encoded = true; + /* end: legacy structures & constants for <= 2.0 machines */ + ++/* Set to true for modern Windows 10 HardwareID-6 compat */ ++static bool smbios_type2_required; ++ + + uint8_t *smbios_tables; + size_t smbios_tables_len; +@@ -531,7 +534,7 @@ static void smbios_build_type_1_table(void) + + static void smbios_build_type_2_table(void) + { +- SMBIOS_BUILD_TABLE_PRE(2, 0x200, false); /* optional */ ++ SMBIOS_BUILD_TABLE_PRE(2, 0x200, smbios_type2_required); + + SMBIOS_TABLE_SET_STR(2, manufacturer_str, type2.manufacturer); + SMBIOS_TABLE_SET_STR(2, product_str, type2.product); +@@ -752,7 +755,10 @@ void smbios_set_cpuid(uint32_t version, uint32_t features) + + void smbios_set_defaults(const char *manufacturer, const char *product, + const char *version, bool legacy_mode, +- bool uuid_encoded, SmbiosEntryPointType ep_type) ++ bool uuid_encoded, ++ const char *stream_product, ++ const char *stream_version, ++ SmbiosEntryPointType ep_type) + { + smbios_have_defaults = true; + smbios_legacy = legacy_mode; +@@ -773,11 +779,45 @@ void smbios_set_defaults(const char *manufacturer, const char *product, + g_free(smbios_entries); + } + ++ /* ++ * If @stream_product & @stream_version are non-NULL, then ++ * we're following rules for new Windows driver support. ++ * The data we have to report is defined in this doc: ++ * ++ * https://docs.microsoft.com/en-us/windows-hardware/drivers/install/specifying-hardware-ids-for-a-computer ++ * ++ * The Windows drivers are written to expect use of the ++ * scheme documented as "HardwareID-6" against Windows 10, ++ * which uses SMBIOS System (Type 1) and Base Board (Type 2) ++ * tables and will match on ++ * ++ * System Manufacturer = Red Hat (@manufacturer) ++ * System SKU Number = 8.2.0 (@stream_version) ++ * Baseboard Manufacturer = Red Hat (@manufacturer) ++ * Baseboard Product = RHEL-AV (@stream_product) ++ * ++ * NB, SKU must be changed with each RHEL-AV release ++ * ++ * Other fields can be freely used by applications using ++ * QEMU. For example apps can use the "System product" ++ * and "System version" to identify themselves. ++ * ++ * We get 'System Manufacturer' and 'Baseboard Manufacturer' ++ */ SMBIOS_SET_DEFAULT(type1.manufacturer, manufacturer); SMBIOS_SET_DEFAULT(type1.product, product); SMBIOS_SET_DEFAULT(type1.version, version); + SMBIOS_SET_DEFAULT(type1.family, "Red Hat Enterprise Linux"); ++ if (stream_version != NULL) { ++ SMBIOS_SET_DEFAULT(type1.sku, stream_version); ++ } SMBIOS_SET_DEFAULT(type2.manufacturer, manufacturer); - SMBIOS_SET_DEFAULT(type2.product, product); +- SMBIOS_SET_DEFAULT(type2.product, product); ++ if (stream_product != NULL) { ++ SMBIOS_SET_DEFAULT(type2.product, stream_product); ++ smbios_type2_required = true; ++ } else { ++ SMBIOS_SET_DEFAULT(type2.product, product); ++ } SMBIOS_SET_DEFAULT(type2.version, version); + SMBIOS_SET_DEFAULT(type3.manufacturer, manufacturer); + SMBIOS_SET_DEFAULT(type3.version, version); diff --git a/hw/timer/i8254_common.c b/hw/timer/i8254_common.c -index 050875b497..32935da46c 100644 +index 050875b..32935da 100644 --- a/hw/timer/i8254_common.c +++ b/hw/timer/i8254_common.c @@ -231,7 +231,7 @@ static const VMStateDescription vmstate_pit_common = { @@ -508,7 +626,7 @@ index 050875b497..32935da46c 100644 vmstate_pit_channel, PITChannelState), VMSTATE_INT64(channels[0].next_transition_time, diff --git a/hw/usb/hcd-uhci.c b/hw/usb/hcd-uhci.c -index 23507ad3b5..9fd87a7ad9 100644 +index 37f7beb..2741edc 100644 --- a/hw/usb/hcd-uhci.c +++ b/hw/usb/hcd-uhci.c @@ -1219,12 +1219,14 @@ static void usb_uhci_common_realize(PCIDevice *dev, Error **errp) @@ -528,10 +646,10 @@ index 23507ad3b5..9fd87a7ad9 100644 if (s->masterbus) { USBPort *ports[NB_PORTS]; diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c -index 80988bb305..8fed2eedd6 100644 +index b330e36..b25cce8 100644 --- a/hw/usb/hcd-xhci.c +++ b/hw/usb/hcd-xhci.c -@@ -3590,9 +3590,27 @@ static const VMStateDescription vmstate_xhci_slot = { +@@ -3600,9 +3600,27 @@ static const VMStateDescription vmstate_xhci_slot = { } }; @@ -559,7 +677,7 @@ index 80988bb305..8fed2eedd6 100644 .fields = (VMStateField[]) { VMSTATE_UINT32(type, XHCIEvent), VMSTATE_UINT32(ccode, XHCIEvent), -@@ -3601,6 +3619,8 @@ static const VMStateDescription vmstate_xhci_event = { +@@ -3611,6 +3629,8 @@ static const VMStateDescription vmstate_xhci_event = { VMSTATE_UINT32(flags, XHCIEvent), VMSTATE_UINT8(slotid, XHCIEvent), VMSTATE_UINT8(epid, XHCIEvent), @@ -569,7 +687,7 @@ index 80988bb305..8fed2eedd6 100644 } }; diff --git a/hw/usb/hcd-xhci.h b/hw/usb/hcd-xhci.h -index 2fad4df2a7..f554b671e3 100644 +index 2fad4df..f554b67 100644 --- a/hw/usb/hcd-xhci.h +++ b/hw/usb/hcd-xhci.h @@ -157,6 +157,8 @@ typedef struct XHCIEvent { @@ -582,7 +700,7 @@ index 2fad4df2a7..f554b671e3 100644 typedef struct XHCIInterrupter { diff --git a/include/hw/acpi/ich9.h b/include/hw/acpi/ich9.h -index 41568d1837..1a23ccc412 100644 +index 41568d1..1a23ccc 100644 --- a/include/hw/acpi/ich9.h +++ b/include/hw/acpi/ich9.h @@ -61,6 +61,9 @@ typedef struct ICH9LPCPMRegs { @@ -596,10 +714,10 @@ index 41568d1837..1a23ccc412 100644 #define ACPI_PM_PROP_TCO_ENABLED "enable_tco" diff --git a/include/hw/boards.h b/include/hw/boards.h -index de45087f34..6f85a0e032 100644 +index fd4d62b..0046ab5 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h -@@ -377,4 +377,28 @@ extern const size_t hw_compat_2_2_len; +@@ -369,4 +369,28 @@ extern const size_t hw_compat_2_2_len; extern GlobalProperty hw_compat_2_1[]; extern const size_t hw_compat_2_1_len; @@ -628,8 +746,38 @@ index de45087f34..6f85a0e032 100644 +extern const size_t hw_compat_rhel_7_1_len; + #endif +diff --git a/include/hw/firmware/smbios.h b/include/hw/firmware/smbios.h +index 02a0ced..67e38a1 100644 +--- a/include/hw/firmware/smbios.h ++++ b/include/hw/firmware/smbios.h +@@ -267,7 +267,10 @@ void smbios_entry_add(QemuOpts *opts, Error **errp); + void smbios_set_cpuid(uint32_t version, uint32_t features); + void smbios_set_defaults(const char *manufacturer, const char *product, + const char *version, bool legacy_mode, +- bool uuid_encoded, SmbiosEntryPointType ep_type); ++ bool uuid_encoded, ++ const char *stream_product, ++ const char *stream_version, ++ SmbiosEntryPointType ep_type); + uint8_t *smbios_get_table_legacy(MachineState *ms, size_t *length); + void smbios_get_tables(MachineState *ms, + const struct smbios_phys_mem_area *mem_array, +diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h +index 05e1945..811c3d5 100644 +--- a/include/hw/i386/pc.h ++++ b/include/hw/i386/pc.h +@@ -100,6 +100,9 @@ typedef struct PCMachineClass { + bool smbios_defaults; + bool smbios_legacy_mode; + bool smbios_uuid_encoded; ++ /* New fields needed for Windows HardwareID-6 matching */ ++ const char *smbios_stream_product; ++ const char *smbios_stream_version; + + /* RAM / address space compat: */ + bool gigabyte_align; diff --git a/include/hw/usb.h b/include/hw/usb.h -index c24d968a19..b353438ea0 100644 +index c24d968..b353438 100644 --- a/include/hw/usb.h +++ b/include/hw/usb.h @@ -605,4 +605,8 @@ int usb_get_quirks(uint16_t vendor_id, uint16_t product_id, @@ -642,10 +790,10 @@ index c24d968a19..b353438ea0 100644 + #endif diff --git a/migration/migration.c b/migration/migration.c -index 354ad072fa..30c53c623b 100644 +index 177cce9..2864560 100644 --- a/migration/migration.c +++ b/migration/migration.c -@@ -121,6 +121,8 @@ enum mig_rp_message_type { +@@ -128,6 +128,8 @@ enum mig_rp_message_type { MIG_RP_MSG_MAX }; @@ -655,10 +803,10 @@ index 354ad072fa..30c53c623b 100644 migrations at once. For now we don't need to add dynamic creation of migration */ diff --git a/migration/migration.h b/migration/migration.h -index 79b3dda146..0b1b0d4df5 100644 +index 507284e..0baa337 100644 --- a/migration/migration.h +++ b/migration/migration.h -@@ -335,6 +335,11 @@ void init_dirty_bitmap_incoming_migration(void); +@@ -339,6 +339,11 @@ void init_dirty_bitmap_incoming_migration(void); void migrate_add_address(SocketAddress *address); int foreach_not_ignored_block(RAMBlockIterFunc func, void *opaque); @@ -671,5 +819,5 @@ index 79b3dda146..0b1b0d4df5 100644 #define qemu_ram_foreach_block \ #warning "Use foreach_not_ignored_block in migration code" -- -2.21.0 +1.8.3.1 diff --git a/0008-Add-aarch64-machine-types.patch b/0008-Add-aarch64-machine-types.patch index 5397c8b..320b19f 100644 --- a/0008-Add-aarch64-machine-types.patch +++ b/0008-Add-aarch64-machine-types.patch @@ -1,4 +1,4 @@ -From 49164264d9928f73961acbbe4d56d8dfa23d8099 Mon Sep 17 00:00:00 2001 +From a373b0198f9268478a5211efb6a545d9c598b364 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 12:53:31 +0200 Subject: Add aarch64 machine types @@ -7,14 +7,14 @@ Adding changes to add RHEL machine types for aarch64 architecture. Signed-off-by: Miroslav Rezanina -Rebase changes (4.0.0): +Rebase notes (4.0.0): - Use upstream compat handling -Rebase changes (4.1.0-rc0): +Rebase notes (4.1.0-rc0): - Removed a15memmap (upstream) - Use virt_flash_create in rhel800_virt_instance_init -Rebase changes (4.2.0-rc0): +Rebase notes (4.2.0-rc0): - Set numa_mem_supported Rebase notes (4.2.0-rc3): @@ -23,6 +23,12 @@ Rebase notes (4.2.0-rc3): - aarch64: virt: Allow PCDIMM instantiation (patch 92247) - aarch64: virt: Enhance the comment related to gic-version (patch 92248) +Rebase notes (weekly-200226): +- Set default_ram_id in rhel_machine_class_init + +Rebase notes (5.0.0-rc1): +- Added setting acpi properties + Merged patches (4.0.0): - 7bfdb4c aarch64: Add virt-rhel8.0.0 machine type for ARM - 3433e69 aarch64: Set virt-rhel8.0.0 max_cpus to 512 @@ -31,26 +37,24 @@ Merged patches (4.0.0): Merged patches (4.1.0): - c3e39ef aarch64: Add virt-rhel8.1.0 machine type for ARM - 59a46d1 aarch64: Allow ARM VIRT iommu option in RHEL8.1 machine - -Signed-off-by: Danilo C. L. de Paula --- - hw/arm/virt.c | 161 +++++++++++++++++++++++++++++++++++++++++- - include/hw/arm/virt.h | 11 +++ - 2 files changed, 171 insertions(+), 1 deletion(-) + hw/arm/virt.c | 169 +++++++++++++++++++++++++++++++++++++++++++++++++- + include/hw/arm/virt.h | 11 ++++ + 2 files changed, 179 insertions(+), 1 deletion(-) diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index d4bedc2607..e10839100e 100644 +index 25e6839..1387ff6 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c -@@ -72,6 +72,7 @@ - #include "hw/mem/nvdimm.h" - #include "hw/acpi/generic_event_device.h" +@@ -79,6 +79,7 @@ + #include "hw/char/pl011.h" + #include "qemu/guest-random.h" +#if 0 /* Disabled for Red Hat Enterprise Linux */ #define DEFINE_VIRT_MACHINE_LATEST(major, minor, latest) \ static void virt_##major##_##minor##_class_init(ObjectClass *oc, \ void *data) \ -@@ -98,7 +99,49 @@ +@@ -105,7 +106,49 @@ DEFINE_VIRT_MACHINE_LATEST(major, minor, true) #define DEFINE_VIRT_MACHINE(major, minor) \ DEFINE_VIRT_MACHINE_LATEST(major, minor, false) @@ -101,7 +105,7 @@ index d4bedc2607..e10839100e 100644 /* Number of external interrupt lines to configure the GIC with */ #define NUM_IRQS 256 -@@ -1763,6 +1806,7 @@ static void machvirt_init(MachineState *machine) +@@ -1914,6 +1957,7 @@ static void machvirt_init(MachineState *machine) qemu_add_machine_init_done_notifier(&vms->machine_done); } @@ -109,7 +113,7 @@ index d4bedc2607..e10839100e 100644 static bool virt_get_secure(Object *obj, Error **errp) { VirtMachineState *vms = VIRT_MACHINE(obj); -@@ -1791,6 +1835,7 @@ static void virt_set_virt(Object *obj, bool value, Error **errp) +@@ -1942,6 +1986,7 @@ static void virt_set_virt(Object *obj, bool value, Error **errp) vms->virt = value; } @@ -117,7 +121,7 @@ index d4bedc2607..e10839100e 100644 static bool virt_get_highmem(Object *obj, Error **errp) { VirtMachineState *vms = VIRT_MACHINE(obj); -@@ -2022,6 +2067,7 @@ static int virt_kvm_type(MachineState *ms, const char *type_str) +@@ -2218,6 +2263,7 @@ static int virt_kvm_type(MachineState *ms, const char *type_str) return requested_pa_size > 40 ? requested_pa_size : 0; } @@ -125,7 +129,7 @@ index d4bedc2607..e10839100e 100644 static void virt_machine_class_init(ObjectClass *oc, void *data) { MachineClass *mc = MACHINE_CLASS(oc); -@@ -2258,3 +2304,116 @@ static void virt_machine_2_6_options(MachineClass *mc) +@@ -2478,3 +2524,124 @@ static void virt_machine_2_6_options(MachineClass *mc) vmc->no_pmu = true; } DEFINE_VIRT_MACHINE(2, 6) @@ -159,7 +163,15 @@ index d4bedc2607..e10839100e 100644 + hc->plug = virt_machine_device_plug_cb; + hc->unplug_request = virt_machine_device_unplug_request_cb; + mc->numa_mem_supported = true; ++ mc->nvdimm_supported = true; + mc->auto_enable_numa_with_memhp = true; ++ mc->default_ram_id = "mach-virt.ram"; ++ ++ object_class_property_add(oc, "acpi", "OnOffAuto", ++ virt_get_acpi, virt_set_acpi, ++ NULL, NULL, &error_abort); ++ object_class_property_set_description(oc, "acpi", ++ "Enable ACPI", &error_abort); +} + +static const TypeInfo rhel_machine_info = { @@ -243,10 +255,10 @@ index d4bedc2607..e10839100e 100644 +} +DEFINE_RHEL_MACHINE_AS_LATEST(8, 2, 0) diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h -index 0b41083e9d..53fdf16563 100644 +index 6d67ace..e44e25c 100644 --- a/include/hw/arm/virt.h +++ b/include/hw/arm/virt.h -@@ -142,6 +142,7 @@ typedef struct { +@@ -156,6 +156,7 @@ typedef struct { #define VIRT_ECAM_ID(high) (high ? VIRT_HIGH_PCIE_ECAM : VIRT_PCIE_ECAM) @@ -254,7 +266,7 @@ index 0b41083e9d..53fdf16563 100644 #define TYPE_VIRT_MACHINE MACHINE_TYPE_NAME("virt") #define VIRT_MACHINE(obj) \ OBJECT_CHECK(VirtMachineState, (obj), TYPE_VIRT_MACHINE) -@@ -150,6 +151,16 @@ typedef struct { +@@ -164,6 +165,16 @@ typedef struct { #define VIRT_MACHINE_CLASS(klass) \ OBJECT_CLASS_CHECK(VirtMachineClass, klass, TYPE_VIRT_MACHINE) @@ -269,8 +281,8 @@ index 0b41083e9d..53fdf16563 100644 +#endif + void virt_acpi_setup(VirtMachineState *vms); + bool virt_is_acpi_enabled(VirtMachineState *vms); - /* Return the number of used redistributor regions */ -- -2.21.0 +1.8.3.1 diff --git a/0009-Add-ppc64-machine-types.patch b/0009-Add-ppc64-machine-types.patch index a3f1a54..b14fcee 100644 --- a/0009-Add-ppc64-machine-types.patch +++ b/0009-Add-ppc64-machine-types.patch @@ -1,4 +1,4 @@ -From 136eae41007e2e5b0d693cc656f3ec36cbabf16f Mon Sep 17 00:00:00 2001 +From c50a71e2a577b532a904e70d23f7533aca0b3a6f Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:27:13 +0200 Subject: Add ppc64 machine types @@ -31,36 +31,37 @@ Merged patches (4.2.0): - redhat: update pseries-rhel-7.6.0 machine type (patch 93039) - redhat: define pseries-rhel8.2.0 machine type (patch 93041) -Signed-off-by: Danilo C. L. de Paula +Merged patches (weekly-200226): +- eb121ff spapr: Enable DD2.3 accelerated count cache flush in pseries-5.0 machine (partial) --- - hw/ppc/spapr.c | 278 ++++++++++++++++++++++++++++++++++++++++ - hw/ppc/spapr_cpu_core.c | 13 ++ + hw/ppc/spapr.c | 280 ++++++++++++++++++++++++++++++++++++++++++++++++ + hw/ppc/spapr_cpu_core.c | 13 +++ include/hw/ppc/spapr.h | 1 + - target/ppc/compat.c | 13 +- + target/ppc/compat.c | 13 ++- target/ppc/cpu.h | 1 + - 5 files changed, 305 insertions(+), 1 deletion(-) + 5 files changed, 307 insertions(+), 1 deletion(-) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c -index e076f6023c..8749c72066 100644 +index 9a2bd50..20b3437 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c -@@ -4447,6 +4447,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) - smc->linux_pci_probe = true; +@@ -4549,6 +4549,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) smc->smp_threads_vsmt = true; smc->nr_xirqs = SPAPR_NR_XIRQS; + xfc->match_nvt = spapr_match_nvt; + smc->has_power9_support = true; } static const TypeInfo spapr_machine_info = { -@@ -4491,6 +4492,7 @@ static const TypeInfo spapr_machine_info = { +@@ -4599,6 +4600,7 @@ static void spapr_machine_latest_class_options(MachineClass *mc) } \ type_init(spapr_machine_register_##suffix) +#if 0 /* Disabled for Red Hat Enterprise Linux */ /* - * pseries-4.2 + * pseries-5.0 */ -@@ -4520,6 +4522,7 @@ static void spapr_machine_4_1_class_options(MachineClass *mc) +@@ -4645,6 +4647,7 @@ static void spapr_machine_4_1_class_options(MachineClass *mc) } DEFINE_SPAPR_MACHINE(4_1, "4.1", false); @@ -68,7 +69,7 @@ index e076f6023c..8749c72066 100644 /* * pseries-4.0 -@@ -4536,6 +4539,7 @@ static void phb_placement_4_0(SpaprMachineState *spapr, uint32_t index, +@@ -4661,6 +4664,7 @@ static void phb_placement_4_0(SpaprMachineState *spapr, uint32_t index, *nv2atsd = 0; } @@ -76,7 +77,7 @@ index e076f6023c..8749c72066 100644 static void spapr_machine_4_0_class_options(MachineClass *mc) { SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); -@@ -4695,6 +4699,7 @@ DEFINE_SPAPR_MACHINE(2_8, "2.8", false); +@@ -4820,6 +4824,7 @@ DEFINE_SPAPR_MACHINE(2_8, "2.8", false); /* * pseries-2.7 */ @@ -84,7 +85,7 @@ index e076f6023c..8749c72066 100644 static void phb_placement_2_7(SpaprMachineState *spapr, uint32_t index, uint64_t *buid, hwaddr *pio, -@@ -4749,6 +4754,7 @@ static void phb_placement_2_7(SpaprMachineState *spapr, uint32_t index, +@@ -4874,6 +4879,7 @@ static void phb_placement_2_7(SpaprMachineState *spapr, uint32_t index, *nv2atsd = 0; } @@ -92,7 +93,7 @@ index e076f6023c..8749c72066 100644 static void spapr_machine_2_7_class_options(MachineClass *mc) { SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); -@@ -4863,6 +4869,278 @@ static void spapr_machine_2_1_class_options(MachineClass *mc) +@@ -4988,6 +4994,280 @@ static void spapr_machine_2_1_class_options(MachineClass *mc) compat_props_add(mc->compat_props, hw_compat_2_1, hw_compat_2_1_len); } DEFINE_SPAPR_MACHINE(2_1, "2.1", false); @@ -131,6 +132,8 @@ index e076f6023c..8749c72066 100644 + hw_compat_rhel_8_1_len); + compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); + ++ /* from pseries-4.2 */ ++ smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF; +} + +DEFINE_SPAPR_MACHINE(rhel810, "rhel8.1.0", false); @@ -372,7 +375,7 @@ index e076f6023c..8749c72066 100644 static void spapr_machine_register_types(void) { diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c -index 301cd7b4e4..ba5a8fb82b 100644 +index 542c19e..916ab0e 100644 --- a/hw/ppc/spapr_cpu_core.c +++ b/hw/ppc/spapr_cpu_core.c @@ -24,6 +24,7 @@ @@ -383,7 +386,7 @@ index 301cd7b4e4..ba5a8fb82b 100644 static void spapr_reset_vcpu(PowerPCCPU *cpu) { -@@ -242,6 +243,7 @@ static void spapr_realize_vcpu(PowerPCCPU *cpu, SpaprMachineState *spapr, +@@ -238,6 +239,7 @@ static void spapr_realize_vcpu(PowerPCCPU *cpu, SpaprMachineState *spapr, CPUPPCState *env = &cpu->env; CPUState *cs = CPU(cpu); Error *local_err = NULL; @@ -391,7 +394,7 @@ index 301cd7b4e4..ba5a8fb82b 100644 object_property_set_bool(OBJECT(cpu), true, "realized", &local_err); if (local_err) { -@@ -254,6 +256,17 @@ static void spapr_realize_vcpu(PowerPCCPU *cpu, SpaprMachineState *spapr, +@@ -250,6 +252,17 @@ static void spapr_realize_vcpu(PowerPCCPU *cpu, SpaprMachineState *spapr, cpu_ppc_set_vhyp(cpu, PPC_VIRTUAL_HYPERVISOR(spapr)); kvmppc_set_papr(cpu); @@ -410,22 +413,22 @@ index 301cd7b4e4..ba5a8fb82b 100644 goto error_intc_create; } diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h -index d5ab5ea7b2..aa89cc4a95 100644 +index 42d64a0..c03611f 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h -@@ -125,6 +125,7 @@ struct SpaprMachineClass { - bool linux_pci_probe; +@@ -128,6 +128,7 @@ struct SpaprMachineClass { bool smp_threads_vsmt; /* set VSMT to smp_threads by default */ + hwaddr rma_limit; /* clamp the RMA to this size */ + bool has_power9_support; void (*phb_placement)(SpaprMachineState *spapr, uint32_t index, uint64_t *buid, hwaddr *pio, hwaddr *mmio32, hwaddr *mmio64, diff --git a/target/ppc/compat.c b/target/ppc/compat.c -index 7de4bf3122..3e2e35342d 100644 +index f48df25..34e3d0f 100644 --- a/target/ppc/compat.c +++ b/target/ppc/compat.c -@@ -105,8 +105,19 @@ static const CompatInfo *compat_by_pvr(uint32_t pvr) +@@ -114,8 +114,19 @@ static const CompatInfo *compat_by_pvr(uint32_t pvr) return NULL; } @@ -447,10 +450,10 @@ index 7de4bf3122..3e2e35342d 100644 const CompatInfo *compat = compat_by_pvr(compat_pvr); const CompatInfo *min = compat_by_pvr(min_compat_pvr); diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h -index e3e82327b7..5c53801cfd 100644 +index 88d9449..0d7f5f5 100644 --- a/target/ppc/cpu.h +++ b/target/ppc/cpu.h -@@ -1367,6 +1367,7 @@ static inline int cpu_mmu_index(CPUPPCState *env, bool ifetch) +@@ -1323,6 +1323,7 @@ static inline int cpu_mmu_index(CPUPPCState *env, bool ifetch) /* Compatibility modes */ #if defined(TARGET_PPC64) @@ -459,5 +462,5 @@ index e3e82327b7..5c53801cfd 100644 uint32_t min_compat_pvr, uint32_t max_compat_pvr); bool ppc_type_check_compat(const char *cputype, uint32_t compat_pvr, -- -2.21.0 +1.8.3.1 diff --git a/0010-Add-s390x-machine-types.patch b/0010-Add-s390x-machine-types.patch index d0f6669..bed50e9 100644 --- a/0010-Add-s390x-machine-types.patch +++ b/0010-Add-s390x-machine-types.patch @@ -1,4 +1,4 @@ -From 0842700b3a01891c316e9169fa651f26714cafa5 Mon Sep 17 00:00:00 2001 +From 349c332a69933b977b40f4a2198236611d002818 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:47:32 +0200 Subject: Add s390x machine types @@ -20,17 +20,15 @@ Merged patches (4.2.0): - fb192e5 redhat: s390x: Rename s390-ccw-virtio-rhel8.0.0 to s390-ccw-virtio-rhel8.1.0 - a9b22e8 redhat: s390x: Add proper compatibility options for the -rhel7.6.0 machine - hw/s390x: Add the s390-ccw-virtio-rhel8.2.0 machine types (patch 92954) - -Signed-off-by: Danilo C. L. de Paula --- - hw/s390x/s390-virtio-ccw.c | 70 +++++++++++++++++++++++++++++++++++++- - 1 file changed, 69 insertions(+), 1 deletion(-) + hw/s390x/s390-virtio-ccw.c | 71 +++++++++++++++++++++++++++++++++++++++++++++- + 1 file changed, 70 insertions(+), 1 deletion(-) diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c -index d3edeef0ad..c2c83d2fce 100644 +index 45292fb..1b3a04c 100644 --- a/hw/s390x/s390-virtio-ccw.c +++ b/hw/s390x/s390-virtio-ccw.c -@@ -615,7 +615,7 @@ bool css_migration_enabled(void) +@@ -777,7 +777,7 @@ bool css_migration_enabled(void) { \ MachineClass *mc = MACHINE_CLASS(oc); \ ccw_machine_##suffix##_class_options(mc); \ @@ -38,16 +36,16 @@ index d3edeef0ad..c2c83d2fce 100644 + mc->desc = "VirtIO-ccw based S390 machine " verstr; \ if (latest) { \ mc->alias = "s390-ccw-virtio"; \ - mc->is_default = 1; \ -@@ -639,6 +639,7 @@ bool css_migration_enabled(void) + mc->is_default = true; \ +@@ -801,6 +801,7 @@ bool css_migration_enabled(void) } \ type_init(ccw_machine_register_##suffix) +#if 0 /* Disabled for Red Hat Enterprise Linux */ - static void ccw_machine_4_2_instance_options(MachineState *machine) + static void ccw_machine_5_0_instance_options(MachineState *machine) { } -@@ -866,6 +867,73 @@ static void ccw_machine_2_4_class_options(MachineClass *mc) +@@ -1041,6 +1042,74 @@ static void ccw_machine_2_4_class_options(MachineClass *mc) compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); } DEFINE_CCW_MACHINE(2_4, "2.4", false); @@ -59,6 +57,7 @@ index d3edeef0ad..c2c83d2fce 100644 + +static void ccw_machine_rhel820_class_options(MachineClass *mc) +{ ++ mc->fixup_ram_size = s390_fixup_ram_size; +} +DEFINE_CCW_MACHINE(rhel820, "rhel8.2.0", true); + @@ -122,5 +121,5 @@ index d3edeef0ad..c2c83d2fce 100644 static void ccw_machine_register_types(void) { -- -2.21.0 +1.8.3.1 diff --git a/0011-Add-x86_64-machine-types.patch b/0011-Add-x86_64-machine-types.patch index 72a5159..fba69c6 100644 --- a/0011-Add-x86_64-machine-types.patch +++ b/0011-Add-x86_64-machine-types.patch @@ -1,4 +1,4 @@ -From 2ebaeca6e26950f401a8169d1324be2bafd11741 Mon Sep 17 00:00:00 2001 +From 9da7d3c4b5a90c155ea4227c412b0ebd4d2a9b87 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:10:31 +0200 Subject: Add x86_64 machine types @@ -37,23 +37,27 @@ Merged patches (4.2.0): - 0784125 x86 machine types: add pc-q35-rhel8.1.0 - machines/x86: Add rhel 8.2 machine type (patch 92959) -Signed-off-by: Danilo C. L. de Paula +Merged patches (weekly-200122): +- 481357e RHEL: hw/i386: disable nested PERF_GLOBAL_CTRL MSR support + +Merged patches (weekly-200318): +- e6c3fbf hw/smbios: set new default SMBIOS fields for Windows driver support (partialy) --- hw/i386/acpi-build.c | 3 + - hw/i386/pc.c | 263 ++++++++++++++++++++++++++++++++++++++++++- - hw/i386/pc_piix.c | 210 +++++++++++++++++++++++++++++++++- - hw/i386/pc_q35.c | 156 ++++++++++++++++++++++++- + hw/i386/pc.c | 265 ++++++++++++++++++++++++++++++++++++++++++++++++++- + hw/i386/pc_piix.c | 210 +++++++++++++++++++++++++++++++++++++++- + hw/i386/pc_q35.c | 162 ++++++++++++++++++++++++++++++- include/hw/boards.h | 2 + - include/hw/i386/pc.h | 33 ++++++ - target/i386/cpu.c | 9 +- + include/hw/i386/pc.h | 33 +++++++ + target/i386/cpu.c | 3 +- target/i386/kvm.c | 4 + - 8 files changed, 673 insertions(+), 7 deletions(-) + 8 files changed, 675 insertions(+), 7 deletions(-) diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c -index 12ff55fcfb..64001893ab 100644 +index 2e15f68..8dbf49b 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c -@@ -204,6 +204,9 @@ static void acpi_get_pm_info(MachineState *machine, AcpiPmInfo *pm) +@@ -213,6 +213,9 @@ static void acpi_get_pm_info(MachineState *machine, AcpiPmInfo *pm) pm->fadt.reset_reg = r; pm->fadt.reset_val = 0xf; pm->fadt.flags |= 1 << ACPI_FADT_F_RESET_REG_SUP; @@ -64,10 +68,10 @@ index 12ff55fcfb..64001893ab 100644 } diff --git a/hw/i386/pc.c b/hw/i386/pc.c -index ac08e63604..61e70e4811 100644 +index f6b8431..2ed002f 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c -@@ -344,6 +344,261 @@ GlobalProperty pc_compat_1_4[] = { +@@ -341,6 +341,263 @@ GlobalProperty pc_compat_1_4[] = { }; const size_t pc_compat_1_4_len = G_N_ELEMENTS(pc_compat_1_4); @@ -78,6 +82,8 @@ index ac08e63604..61e70e4811 100644 +GlobalProperty pc_rhel_compat[] = { + { TYPE_X86_CPU, "host-phys-bits", "on" }, + { TYPE_X86_CPU, "host-phys-bits-limit", "48" }, ++ { TYPE_X86_CPU, "vmx-entry-load-perf-global-ctrl", "off" }, ++ { TYPE_X86_CPU, "vmx-exit-load-perf-global-ctrl", "off" }, + /* bz 1508330 */ + { "vfio-pci", "x-no-geforce-quirks", "on" }, +}; @@ -326,10 +332,10 @@ index ac08e63604..61e70e4811 100644 +}; +const size_t pc_rhel_7_0_compat_len = G_N_ELEMENTS(pc_rhel_7_0_compat); + - void gsi_handler(void *opaque, int n, int level) + GSIState *pc_gsi_create(qemu_irq **irqs, bool pci_enabled) { - GSIState *s = opaque; -@@ -1225,7 +1480,8 @@ void pc_memory_init(PCMachineState *pcms, + GSIState *s; +@@ -1031,7 +1288,8 @@ void pc_memory_init(PCMachineState *pcms, option_rom_mr = g_malloc(sizeof(*option_rom_mr)); memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE, &error_fatal); @@ -339,7 +345,7 @@ index ac08e63604..61e70e4811 100644 memory_region_set_readonly(option_rom_mr, true); } memory_region_add_subregion_overlap(rom_memory, -@@ -2198,6 +2454,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) +@@ -1937,6 +2195,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) pcmc->linuxboot_dma_enabled = true; pcmc->pvh_enabled = true; assert(!mc->get_hotplug_handler); @@ -348,7 +354,7 @@ index ac08e63604..61e70e4811 100644 mc->get_hotplug_handler = pc_get_hotplug_handler; mc->hotplug_allowed = pc_hotplug_allowed; mc->cpu_index_to_instance_props = x86_cpu_index_to_props; -@@ -2209,7 +2467,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) +@@ -1948,7 +2208,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) mc->hot_add_cpu = pc_hot_add_cpu; mc->smp_parse = pc_smp_parse; mc->block_default_type = IF_IDE; @@ -359,7 +365,7 @@ index ac08e63604..61e70e4811 100644 mc->wakeup = pc_machine_wakeup; hc->pre_plug = pc_machine_device_pre_plug_cb; diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index 1bd70d1abb..bd7fdb99bb 100644 +index b255d56..8eb64d1 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -53,6 +53,7 @@ @@ -370,7 +376,7 @@ index 1bd70d1abb..bd7fdb99bb 100644 #ifdef CONFIG_XEN #include #include "hw/xen/xen_pt.h" -@@ -173,8 +174,8 @@ static void pc_init1(MachineState *machine, +@@ -174,8 +175,8 @@ static void pc_init1(MachineState *machine, if (pcmc->smbios_defaults) { MachineClass *mc = MACHINE_GET_CLASS(machine); /* These values are guest ABI, do not change */ @@ -379,19 +385,19 @@ index 1bd70d1abb..bd7fdb99bb 100644 + smbios_set_defaults("Red Hat", "KVM", + mc->desc, pcmc->smbios_legacy_mode, pcmc->smbios_uuid_encoded, - SMBIOS_ENTRY_POINT_21); - } -@@ -307,6 +308,7 @@ else { + pcmc->smbios_stream_product, + pcmc->smbios_stream_version, +@@ -312,6 +313,7 @@ static void pc_init1(MachineState *machine, * hw_compat_*, pc_compat_*, or * pc_*_machine_options(). */ +#if 0 /* Disabled for Red Hat Enterprise Linux */ static void pc_compat_2_3_fn(MachineState *machine) { - PCMachineState *pcms = PC_MACHINE(machine); -@@ -1026,3 +1028,207 @@ static void xenfv_machine_options(MachineClass *m) - DEFINE_PC_MACHINE(xenfv, "xenfv", pc_xen_hvm_init, - xenfv_machine_options); + X86MachineState *x86ms = X86_MACHINE(machine); +@@ -975,3 +977,207 @@ static void xenfv_3_1_machine_options(MachineClass *m) + DEFINE_PC_MACHINE(xenfv, "xenfv-3.1", pc_xen_hvm_init, + xenfv_3_1_machine_options); #endif +#endif /* Disabled for Red Hat Enterprise Linux */ + @@ -598,10 +604,10 @@ index 1bd70d1abb..bd7fdb99bb 100644 +DEFINE_PC_MACHINE(rhel700, "pc-i440fx-rhel7.0.0", pc_init_rhel700, + pc_machine_rhel700_options); diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index 385e5cffb1..7531d8ed76 100644 +index 461e1cd..f5ae759 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c -@@ -197,8 +197,8 @@ static void pc_q35_init(MachineState *machine) +@@ -199,8 +199,8 @@ static void pc_q35_init(MachineState *machine) if (pcmc->smbios_defaults) { /* These values are guest ABI, do not change */ @@ -610,9 +616,9 @@ index 385e5cffb1..7531d8ed76 100644 + smbios_set_defaults("Red Hat", "KVM", + mc->desc, pcmc->smbios_legacy_mode, pcmc->smbios_uuid_encoded, - SMBIOS_ENTRY_POINT_21); - } -@@ -330,6 +330,7 @@ static void pc_q35_init(MachineState *machine) + pcmc->smbios_stream_product, + pcmc->smbios_stream_version, +@@ -335,6 +335,7 @@ static void pc_q35_init(MachineState *machine) DEFINE_PC_MACHINE(suffix, name, pc_init_##suffix, optionfn) @@ -620,7 +626,7 @@ index 385e5cffb1..7531d8ed76 100644 static void pc_q35_machine_options(MachineClass *m) { PCMachineClass *pcmc = PC_MACHINE_CLASS(m); -@@ -533,3 +534,154 @@ static void pc_q35_2_4_machine_options(MachineClass *m) +@@ -549,3 +550,160 @@ static void pc_q35_2_4_machine_options(MachineClass *m) DEFINE_Q35_MACHINE(v2_4, "pc-q35-2.4", NULL, pc_q35_2_4_machine_options); @@ -655,8 +661,11 @@ index 385e5cffb1..7531d8ed76 100644 + +static void pc_q35_machine_rhel820_options(MachineClass *m) +{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); + pc_q35_machine_rhel_options(m); + m->desc = "RHEL-8.2.0 PC (Q35 + ICH9, 2009)"; ++ pcmc->smbios_stream_product = "RHEL-AV"; ++ pcmc->smbios_stream_version = "8.2.0"; +} + +DEFINE_PC_MACHINE(q35_rhel820, "pc-q35-rhel8.2.0", pc_q35_init_rhel820, @@ -669,9 +678,12 @@ index 385e5cffb1..7531d8ed76 100644 + +static void pc_q35_machine_rhel810_options(MachineClass *m) +{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); + pc_q35_machine_rhel820_options(m); + m->desc = "RHEL-8.1.0 PC (Q35 + ICH9, 2009)"; + m->alias = NULL; ++ pcmc->smbios_stream_product = NULL; ++ pcmc->smbios_stream_version = NULL; + compat_props_add(m->compat_props, hw_compat_rhel_8_1, hw_compat_rhel_8_1_len); + compat_props_add(m->compat_props, pc_rhel_8_1_compat, pc_rhel_8_1_compat_len); +} @@ -776,10 +788,10 @@ index 385e5cffb1..7531d8ed76 100644 +DEFINE_PC_MACHINE(q35_rhel730, "pc-q35-rhel7.3.0", pc_q35_init_rhel730, + pc_q35_machine_rhel730_options); diff --git a/include/hw/boards.h b/include/hw/boards.h -index 6f85a0e032..2920bdef5b 100644 +index 0046ab5..d81225b 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h -@@ -222,6 +222,8 @@ struct MachineClass { +@@ -207,6 +207,8 @@ struct MachineClass { const char **valid_cpu_types; strList *allowed_dynamic_sysbus_devices; bool auto_enable_numa_with_memhp; @@ -789,10 +801,10 @@ index 6f85a0e032..2920bdef5b 100644 int nb_nodes, ram_addr_t size); bool ignore_boot_device_suffixes; diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h -index 1f86eba3f9..2e362c8faa 100644 +index 811c3d5..5b90f8b 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h -@@ -124,6 +124,9 @@ typedef struct PCMachineClass { +@@ -118,6 +118,9 @@ typedef struct PCMachineClass { /* use PVH to load kernels that support this feature */ bool pvh_enabled; @@ -802,7 +814,7 @@ index 1f86eba3f9..2e362c8faa 100644 } PCMachineClass; #define TYPE_PC_MACHINE "generic-pc-machine" -@@ -300,6 +303,36 @@ extern const size_t pc_compat_1_5_len; +@@ -260,6 +263,36 @@ extern const size_t pc_compat_1_5_len; extern GlobalProperty pc_compat_1_4[]; extern const size_t pc_compat_1_4_len; @@ -840,20 +852,10 @@ index 1f86eba3f9..2e362c8faa 100644 * depending on QEMU versions up to QEMU 2.4. */ diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index 1b7880ae3a..790db778ab 100644 +index 26a8584..dc4d1c9 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c -@@ -1829,11 +1829,17 @@ static CPUCaches epyc_cache_info = { - - static X86CPUDefinition builtin_x86_defs[] = { - { -+ /* qemu64 is the default CPU model for all *-rhel7.* machine-types. -+ * The default on RHEL-6 was cpu64-rhel6. -+ * libvirt assumes that qemu64 is the default for _all_ machine-types, -+ * so we should try to keep qemu64 and cpu64-rhel6 as similar as -+ * possible. -+ */ - .name = "qemu64", +@@ -1806,7 +1806,7 @@ static X86CPUDefinition builtin_x86_defs[] = { .level = 0xd, .vendor = CPUID_VENDOR_AMD, .family = 6, @@ -862,7 +864,7 @@ index 1b7880ae3a..790db778ab 100644 .stepping = 3, .features[FEAT_1_EDX] = CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX | CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | -@@ -3932,6 +3938,7 @@ static PropValue kvm_default_props[] = { +@@ -4085,6 +4085,7 @@ static PropValue kvm_default_props[] = { { "acpi", "off" }, { "monitor", "off" }, { "svm", "off" }, @@ -871,10 +873,10 @@ index 1b7880ae3a..790db778ab 100644 }; diff --git a/target/i386/kvm.c b/target/i386/kvm.c -index 1d10046a6c..86d9a1f364 100644 +index 4901c6d..e41cff2 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c -@@ -3079,6 +3079,7 @@ static int kvm_get_msrs(X86CPU *cpu) +@@ -3113,6 +3113,7 @@ static int kvm_get_msrs(X86CPU *cpu) struct kvm_msr_entry *msrs = cpu->kvm_msr_buf->entries; int ret, i; uint64_t mtrr_top_bits; @@ -882,7 +884,7 @@ index 1d10046a6c..86d9a1f364 100644 kvm_msr_buf_reset(cpu); -@@ -3388,6 +3389,9 @@ static int kvm_get_msrs(X86CPU *cpu) +@@ -3422,6 +3423,9 @@ static int kvm_get_msrs(X86CPU *cpu) break; case MSR_KVM_ASYNC_PF_EN: env->async_pf_en_msr = msrs[i].data; @@ -893,5 +895,5 @@ index 1d10046a6c..86d9a1f364 100644 case MSR_KVM_PV_EOI_EN: env->pv_eoi_en_msr = msrs[i].data; -- -2.21.0 +1.8.3.1 diff --git a/0012-Enable-make-check.patch b/0012-Enable-make-check.patch index 09f7b4e..cab4413 100644 --- a/0012-Enable-make-check.patch +++ b/0012-Enable-make-check.patch @@ -1,4 +1,4 @@ -From 154215041df085271a780a2989f4f481226e3e34 Mon Sep 17 00:00:00 2001 +From 0114b7010c87be70014b170ffdf66e1317f6becc Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:48:41 +0200 Subject: Enable make check @@ -22,185 +22,36 @@ Rebase changes (4.1.0-rc1): Rebase changes (4.2.0-rc0): - partially disable hd-geo-test (requires lsi53c895a) +Rebase changes (weekly-200129): +- Disable qtest/q35-test (uses upstream machine types) + +Rebased changes (weekly-200212): +- Do not run iotests on make check + Merged patches (4.0.0): - f7ffd13 Remove 7 qcow2 and luks iotests that are taking > 25 sec to run during the fast train build proce Merged patches (4.1.0-rc0): - 41288ff redhat: Remove raw iotest 205 - -Signed-off-by: Danilo C. L. de Paula --- - redhat/qemu-kvm.spec.template | 2 +- - tests/Makefile.include | 10 +++++----- - tests/boot-serial-test.c | 6 +++++- - tests/cpu-plug-test.c | 4 ++-- - tests/e1000-test.c | 2 ++ - tests/hd-geo-test.c | 4 ++++ - tests/prom-env-test.c | 4 ++++ - tests/qemu-iotests/051 | 12 ++++++------ - tests/qemu-iotests/group | 4 ++-- - tests/test-x86-cpuid-compat.c | 2 ++ - tests/usb-hcd-xhci-test.c | 4 ++++ - 11 files changed, 37 insertions(+), 17 deletions(-) + redhat/qemu-kvm.spec.template | 2 +- + tests/qemu-iotests/051 | 12 ++++++------ + tests/qemu-iotests/group | 4 ++-- + tests/qtest/Makefile.include | 12 ++++++------ + tests/qtest/boot-serial-test.c | 6 +++++- + tests/qtest/cpu-plug-test.c | 4 ++-- + tests/qtest/e1000-test.c | 2 ++ + tests/qtest/hd-geo-test.c | 4 ++++ + tests/qtest/prom-env-test.c | 4 ++++ + tests/qtest/test-x86-cpuid-compat.c | 2 ++ + tests/qtest/usb-hcd-xhci-test.c | 4 ++++ + 11 files changed, 38 insertions(+), 18 deletions(-) -diff --git a/tests/Makefile.include b/tests/Makefile.include -index b483790cf3..53bdbdfee0 100644 ---- a/tests/Makefile.include -+++ b/tests/Makefile.include -@@ -172,7 +172,7 @@ check-qtest-i386-y += tests/ide-test$(EXESUF) - check-qtest-i386-y += tests/ahci-test$(EXESUF) - check-qtest-i386-y += tests/hd-geo-test$(EXESUF) - check-qtest-i386-y += tests/boot-order-test$(EXESUF) --check-qtest-i386-y += tests/bios-tables-test$(EXESUF) -+#check-qtest-i386-y += tests/bios-tables-test$(EXESUF) - check-qtest-i386-$(CONFIG_SGA) += tests/boot-serial-test$(EXESUF) - check-qtest-i386-$(CONFIG_SLIRP) += tests/pxe-test$(EXESUF) - check-qtest-i386-y += tests/rtc-test$(EXESUF) -@@ -230,7 +230,7 @@ check-qtest-mips64el-$(CONFIG_VGA) += tests/display-vga-test$(EXESUF) - check-qtest-moxie-y += tests/boot-serial-test$(EXESUF) - - check-qtest-ppc-$(CONFIG_ISA_TESTDEV) = tests/endianness-test$(EXESUF) --check-qtest-ppc-y += tests/boot-order-test$(EXESUF) -+#check-qtest-ppc-y += tests/boot-order-test$(EXESUF) - check-qtest-ppc-y += tests/prom-env-test$(EXESUF) - check-qtest-ppc-y += tests/drive_del-test$(EXESUF) - check-qtest-ppc-y += tests/boot-serial-test$(EXESUF) -@@ -244,8 +244,8 @@ check-qtest-ppc64-$(CONFIG_PSERIES) += tests/rtas-test$(EXESUF) - check-qtest-ppc64-$(CONFIG_SLIRP) += tests/pxe-test$(EXESUF) - check-qtest-ppc64-$(CONFIG_USB_UHCI) += tests/usb-hcd-uhci-test$(EXESUF) - check-qtest-ppc64-$(CONFIG_USB_XHCI_NEC) += tests/usb-hcd-xhci-test$(EXESUF) --check-qtest-ppc64-$(CONFIG_SLIRP) += tests/test-netfilter$(EXESUF) --check-qtest-ppc64-$(CONFIG_POSIX) += tests/test-filter-mirror$(EXESUF) -+#check-qtest-ppc64-$(CONFIG_SLIRP) += tests/test-netfilter$(EXESUF) -+#check-qtest-ppc64-$(CONFIG_POSIX) += tests/test-filter-mirror$(EXESUF) - check-qtest-ppc64-$(CONFIG_RTL8139_PCI) += tests/test-filter-redirector$(EXESUF) - check-qtest-ppc64-$(CONFIG_VGA) += tests/display-vga-test$(EXESUF) - check-qtest-ppc64-y += tests/numa-test$(EXESUF) -@@ -291,7 +291,7 @@ check-qtest-s390x-$(CONFIG_SLIRP) += tests/test-netfilter$(EXESUF) - check-qtest-s390x-$(CONFIG_POSIX) += tests/test-filter-mirror$(EXESUF) - check-qtest-s390x-$(CONFIG_POSIX) += tests/test-filter-redirector$(EXESUF) - check-qtest-s390x-y += tests/drive_del-test$(EXESUF) --check-qtest-s390x-y += tests/device-plug-test$(EXESUF) -+#check-qtest-s390x-y += tests/device-plug-test$(EXESUF) - check-qtest-s390x-y += tests/virtio-ccw-test$(EXESUF) - check-qtest-s390x-y += tests/cpu-plug-test$(EXESUF) - check-qtest-s390x-y += tests/migration-test$(EXESUF) -diff --git a/tests/boot-serial-test.c b/tests/boot-serial-test.c -index d3a54a0ba5..33ce72b89c 100644 ---- a/tests/boot-serial-test.c -+++ b/tests/boot-serial-test.c -@@ -108,19 +108,23 @@ static testdef_t tests[] = { - { "ppc", "g3beige", "", "PowerPC,750" }, - { "ppc", "mac99", "", "PowerPC,G4" }, - { "ppc", "sam460ex", "-m 256", "DRAM: 256 MiB" }, -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - { "ppc64", "ppce500", "", "U-Boot" }, - { "ppc64", "40p", "-m 192", "Memory: 192M" }, - { "ppc64", "mac99", "", "PowerPC,970FX" }, -+#endif - { "ppc64", "pseries", - "-machine cap-cfpc=broken,cap-sbbc=broken,cap-ibs=broken", - "Open Firmware" }, -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - { "ppc64", "powernv8", "", "OPAL" }, - { "ppc64", "powernv9", "", "OPAL" }, - { "ppc64", "sam460ex", "-device e1000", "8086 100e" }, -+#endif - { "i386", "isapc", "-cpu qemu32 -device sga", "SGABIOS" }, - { "i386", "pc", "-device sga", "SGABIOS" }, - { "i386", "q35", "-device sga", "SGABIOS" }, -- { "x86_64", "isapc", "-cpu qemu32 -device sga", "SGABIOS" }, -+ { "x86_64", "pc", "-cpu qemu32 -device sga", "SGABIOS" }, - { "x86_64", "q35", "-device sga", "SGABIOS" }, - { "sparc", "LX", "", "TMS390S10" }, - { "sparc", "SS-4", "", "MB86904" }, -diff --git a/tests/cpu-plug-test.c b/tests/cpu-plug-test.c -index 30e514bbfb..a04beae1c6 100644 ---- a/tests/cpu-plug-test.c -+++ b/tests/cpu-plug-test.c -@@ -185,8 +185,8 @@ static void add_pseries_test_case(const char *mname) - char *path; - PlugTestData *data; - -- if (!g_str_has_prefix(mname, "pseries-") || -- (g_str_has_prefix(mname, "pseries-2.") && atoi(&mname[10]) < 7)) { -+ if (!g_str_has_prefix(mname, "pseries-rhel") || -+ (g_str_has_prefix(mname, "pseries-rhel7.") && atoi(&mname[14]) < 4)) { - return; - } - data = g_new(PlugTestData, 1); -diff --git a/tests/e1000-test.c b/tests/e1000-test.c -index c387984ef6..c89112d6f8 100644 ---- a/tests/e1000-test.c -+++ b/tests/e1000-test.c -@@ -22,9 +22,11 @@ struct QE1000 { - - static const char *models[] = { - "e1000", -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - "e1000-82540em", - "e1000-82544gc", - "e1000-82545em", -+#endif - }; - - static void *e1000_get_driver(void *obj, const char *interface) -diff --git a/tests/hd-geo-test.c b/tests/hd-geo-test.c -index 7e86c5416c..cc068bad87 100644 ---- a/tests/hd-geo-test.c -+++ b/tests/hd-geo-test.c -@@ -732,6 +732,7 @@ static void test_override_ide(void) - test_override(args, expected); - } - -+#if 0 /* Require lsi53c895a - not supported on RHEL */ - static void test_override_scsi(void) - { - TestArgs *args = create_args(); -@@ -776,6 +777,7 @@ static void test_override_scsi_2_controllers(void) - add_scsi_disk(args, 3, 1, 0, 1, 2, 0, 1, 0); - test_override(args, expected); - } -+#endif - - static void test_override_virtio_blk(void) - { -@@ -951,9 +953,11 @@ int main(int argc, char **argv) - qtest_add_func("hd-geo/ide/device/user/chst", test_ide_device_user_chst); - if (have_qemu_img()) { - qtest_add_func("hd-geo/override/ide", test_override_ide); -+#if 0 /* Require lsi53c895a - not supported on RHEL */ - qtest_add_func("hd-geo/override/scsi", test_override_scsi); - qtest_add_func("hd-geo/override/scsi_2_controllers", - test_override_scsi_2_controllers); -+#endif - qtest_add_func("hd-geo/override/virtio_blk", test_override_virtio_blk); - qtest_add_func("hd-geo/override/zero_chs", test_override_zero_chs); - qtest_add_func("hd-geo/override/scsi_hot_unplug", -diff --git a/tests/prom-env-test.c b/tests/prom-env-test.c -index 61bc1d1e7b..028d45c7d7 100644 ---- a/tests/prom-env-test.c -+++ b/tests/prom-env-test.c -@@ -88,10 +88,14 @@ int main(int argc, char *argv[]) - if (!strcmp(arch, "ppc")) { - add_tests(ppc_machines); - } else if (!strcmp(arch, "ppc64")) { -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - add_tests(ppc_machines); - if (g_test_slow()) { -+#endif - qtest_add_data_func("prom-env/pseries", "pseries", test_machine); -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - } -+#endif - } else if (!strcmp(arch, "sparc")) { - add_tests(sparc_machines); - } else if (!strcmp(arch, "sparc64")) { diff --git a/tests/qemu-iotests/051 b/tests/qemu-iotests/051 -index 53bcdbc911..b387e0c233 100755 +index 034d3a3..aadc413 100755 --- a/tests/qemu-iotests/051 +++ b/tests/qemu-iotests/051 -@@ -181,11 +181,11 @@ run_qemu -drive if=virtio +@@ -183,11 +183,11 @@ run_qemu -drive if=virtio case "$QEMU_DEFAULT_MACHINE" in pc) run_qemu -drive if=none,id=disk -device ide-cd,drive=disk @@ -215,7 +66,7 @@ index 53bcdbc911..b387e0c233 100755 ;; *) ;; -@@ -234,11 +234,11 @@ run_qemu -drive file="$TEST_IMG",if=virtio,readonly=on +@@ -236,11 +236,11 @@ run_qemu -drive file="$TEST_IMG",if=virtio,readonly=on case "$QEMU_DEFAULT_MACHINE" in pc) run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device ide-cd,drive=disk @@ -231,7 +82,7 @@ index 53bcdbc911..b387e0c233 100755 *) ;; diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group -index 6b10a6a762..06cc734b26 100644 +index 1710470..0711b66 100644 --- a/tests/qemu-iotests/group +++ b/tests/qemu-iotests/group @@ -92,7 +92,7 @@ @@ -252,10 +103,172 @@ index 6b10a6a762..06cc734b26 100644 # 100 was removed, do not reuse 101 rw quick 102 rw quick -diff --git a/tests/test-x86-cpuid-compat.c b/tests/test-x86-cpuid-compat.c -index 772287bdb4..e7c075ed98 100644 ---- a/tests/test-x86-cpuid-compat.c -+++ b/tests/test-x86-cpuid-compat.c +diff --git a/tests/qtest/Makefile.include b/tests/qtest/Makefile.include +index 9e5a51d..0732f59 100644 +--- a/tests/qtest/Makefile.include ++++ b/tests/qtest/Makefile.include +@@ -29,7 +29,7 @@ check-qtest-i386-y += ide-test + check-qtest-i386-$(CONFIG_TOOLS) += ahci-test + check-qtest-i386-y += hd-geo-test + check-qtest-i386-y += boot-order-test +-check-qtest-i386-y += bios-tables-test ++#check-qtest-i386-y += bios-tables-test + check-qtest-i386-$(CONFIG_SGA) += boot-serial-test + check-qtest-i386-$(CONFIG_SLIRP) += pxe-test + check-qtest-i386-y += rtc-test +@@ -51,7 +51,7 @@ check-qtest-i386-$(CONFIG_USB_UHCI) += usb-hcd-uhci-test + check-qtest-i386-$(call land,$(CONFIG_USB_EHCI),$(CONFIG_USB_UHCI)) += usb-hcd-ehci-test + check-qtest-i386-$(CONFIG_USB_XHCI_NEC) += usb-hcd-xhci-test + check-qtest-i386-y += cpu-plug-test +-check-qtest-i386-y += q35-test ++#check-qtest-i386-y += q35-test + check-qtest-i386-y += vmgenid-test + check-qtest-i386-$(CONFIG_TPM_CRB) += tpm-crb-swtpm-test + check-qtest-i386-$(CONFIG_TPM_CRB) += tpm-crb-test +@@ -88,7 +88,7 @@ check-qtest-mips64el-$(CONFIG_VGA) += display-vga-test + check-qtest-moxie-y += boot-serial-test + + check-qtest-ppc-$(CONFIG_ISA_TESTDEV) = endianness-test +-check-qtest-ppc-y += boot-order-test ++#check-qtest-ppc-y += boot-order-test + check-qtest-ppc-y += prom-env-test + check-qtest-ppc-y += drive_del-test + check-qtest-ppc-y += boot-serial-test +@@ -102,8 +102,8 @@ check-qtest-ppc64-$(CONFIG_PSERIES) += rtas-test + check-qtest-ppc64-$(CONFIG_SLIRP) += pxe-test + check-qtest-ppc64-$(CONFIG_USB_UHCI) += usb-hcd-uhci-test + check-qtest-ppc64-$(CONFIG_USB_XHCI_NEC) += usb-hcd-xhci-test +-check-qtest-ppc64-$(CONFIG_SLIRP) += test-netfilter +-check-qtest-ppc64-$(CONFIG_POSIX) += test-filter-mirror ++#check-qtest-ppc64-$(CONFIG_SLIRP) += test-netfilter ++#check-qtest-ppc64-$(CONFIG_POSIX) += test-filter-mirror + check-qtest-ppc64-$(CONFIG_RTL8139_PCI) += test-filter-redirector + check-qtest-ppc64-$(CONFIG_VGA) += display-vga-test + check-qtest-ppc64-y += numa-test +@@ -152,7 +152,7 @@ check-qtest-s390x-$(CONFIG_SLIRP) += test-netfilter + check-qtest-s390x-$(CONFIG_POSIX) += test-filter-mirror + check-qtest-s390x-$(CONFIG_POSIX) += test-filter-redirector + check-qtest-s390x-y += drive_del-test +-check-qtest-s390x-y += device-plug-test ++#check-qtest-s390x-y += device-plug-test + check-qtest-s390x-y += virtio-ccw-test + check-qtest-s390x-y += cpu-plug-test + check-qtest-s390x-y += migration-test +diff --git a/tests/qtest/boot-serial-test.c b/tests/qtest/boot-serial-test.c +index 85a3614..1c18441 100644 +--- a/tests/qtest/boot-serial-test.c ++++ b/tests/qtest/boot-serial-test.c +@@ -109,19 +109,23 @@ static testdef_t tests[] = { + { "ppc", "g3beige", "", "PowerPC,750" }, + { "ppc", "mac99", "", "PowerPC,G4" }, + { "ppc", "sam460ex", "-m 256", "DRAM: 256 MiB" }, ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + { "ppc64", "ppce500", "", "U-Boot" }, + { "ppc64", "40p", "-m 192", "Memory: 192M" }, + { "ppc64", "mac99", "", "PowerPC,970FX" }, ++#endif + { "ppc64", "pseries", + "-machine " PSERIES_DEFAULT_CAPABILITIES, + "Open Firmware" }, ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + { "ppc64", "powernv8", "", "OPAL" }, + { "ppc64", "powernv9", "", "OPAL" }, + { "ppc64", "sam460ex", "-device e1000", "8086 100e" }, ++#endif + { "i386", "isapc", "-cpu qemu32 -device sga", "SGABIOS" }, + { "i386", "pc", "-device sga", "SGABIOS" }, + { "i386", "q35", "-device sga", "SGABIOS" }, +- { "x86_64", "isapc", "-cpu qemu32 -device sga", "SGABIOS" }, ++ { "x86_64", "pc", "-cpu qemu32 -device sga", "SGABIOS" }, + { "x86_64", "q35", "-device sga", "SGABIOS" }, + { "sparc", "LX", "", "TMS390S10" }, + { "sparc", "SS-4", "", "MB86904" }, +diff --git a/tests/qtest/cpu-plug-test.c b/tests/qtest/cpu-plug-test.c +index e8ffbbc..fda7269 100644 +--- a/tests/qtest/cpu-plug-test.c ++++ b/tests/qtest/cpu-plug-test.c +@@ -181,8 +181,8 @@ static void add_pseries_test_case(const char *mname) + char *path; + PlugTestData *data; + +- if (!g_str_has_prefix(mname, "pseries-") || +- (g_str_has_prefix(mname, "pseries-2.") && atoi(&mname[10]) < 7)) { ++ if (!g_str_has_prefix(mname, "pseries-rhel") || ++ (g_str_has_prefix(mname, "pseries-rhel7.") && atoi(&mname[14]) < 4)) { + return; + } + data = g_new(PlugTestData, 1); +diff --git a/tests/qtest/e1000-test.c b/tests/qtest/e1000-test.c +index c387984..c89112d 100644 +--- a/tests/qtest/e1000-test.c ++++ b/tests/qtest/e1000-test.c +@@ -22,9 +22,11 @@ struct QE1000 { + + static const char *models[] = { + "e1000", ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + "e1000-82540em", + "e1000-82544gc", + "e1000-82545em", ++#endif + }; + + static void *e1000_get_driver(void *obj, const char *interface) +diff --git a/tests/qtest/hd-geo-test.c b/tests/qtest/hd-geo-test.c +index 48e8e02..6496196 100644 +--- a/tests/qtest/hd-geo-test.c ++++ b/tests/qtest/hd-geo-test.c +@@ -737,6 +737,7 @@ static void test_override_ide(void) + test_override(args, expected); + } + ++#if 0 /* Require lsi53c895a - not supported on RHEL */ + static void test_override_scsi(void) + { + TestArgs *args = create_args(); +@@ -781,6 +782,7 @@ static void test_override_scsi_2_controllers(void) + add_scsi_disk(args, 3, 1, 0, 1, 2, 0, 1, 0); + test_override(args, expected); + } ++#endif + + static void test_override_virtio_blk(void) + { +@@ -960,9 +962,11 @@ int main(int argc, char **argv) + qtest_add_func("hd-geo/ide/device/user/chst", test_ide_device_user_chst); + if (have_qemu_img()) { + qtest_add_func("hd-geo/override/ide", test_override_ide); ++#if 0 /* Require lsi53c895a - not supported on RHEL */ + qtest_add_func("hd-geo/override/scsi", test_override_scsi); + qtest_add_func("hd-geo/override/scsi_2_controllers", + test_override_scsi_2_controllers); ++#endif + qtest_add_func("hd-geo/override/virtio_blk", test_override_virtio_blk); + qtest_add_func("hd-geo/override/zero_chs", test_override_zero_chs); + qtest_add_func("hd-geo/override/scsi_hot_unplug", +diff --git a/tests/qtest/prom-env-test.c b/tests/qtest/prom-env-test.c +index 60e6ec3..f9d6adc 100644 +--- a/tests/qtest/prom-env-test.c ++++ b/tests/qtest/prom-env-test.c +@@ -89,10 +89,14 @@ int main(int argc, char *argv[]) + if (!strcmp(arch, "ppc")) { + add_tests(ppc_machines); + } else if (!strcmp(arch, "ppc64")) { ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + add_tests(ppc_machines); + if (g_test_slow()) { ++#endif + qtest_add_data_func("prom-env/pseries", "pseries", test_machine); ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + } ++#endif + } else if (!strcmp(arch, "sparc")) { + add_tests(sparc_machines); + } else if (!strcmp(arch, "sparc64")) { +diff --git a/tests/qtest/test-x86-cpuid-compat.c b/tests/qtest/test-x86-cpuid-compat.c +index 772287b..e7c075e 100644 +--- a/tests/qtest/test-x86-cpuid-compat.c ++++ b/tests/qtest/test-x86-cpuid-compat.c @@ -300,6 +300,7 @@ int main(int argc, char **argv) "-cpu 486,xlevel2=0xC0000002,+xstore", "xlevel2", 0xC0000002); @@ -272,10 +285,10 @@ index 772287bdb4..e7c075ed98 100644 /* Test feature parsing */ add_feature_test("x86/cpuid/features/plus", -diff --git a/tests/usb-hcd-xhci-test.c b/tests/usb-hcd-xhci-test.c -index 10ef9d2a91..3855873050 100644 ---- a/tests/usb-hcd-xhci-test.c -+++ b/tests/usb-hcd-xhci-test.c +diff --git a/tests/qtest/usb-hcd-xhci-test.c b/tests/qtest/usb-hcd-xhci-test.c +index 10ef9d2..3855873 100644 +--- a/tests/qtest/usb-hcd-xhci-test.c ++++ b/tests/qtest/usb-hcd-xhci-test.c @@ -21,6 +21,7 @@ static void test_xhci_hotplug(void) usb_test_hotplug(global_qtest, "xhci", "1", NULL); } @@ -303,5 +316,5 @@ index 10ef9d2a91..3855873050 100644 qtest_start("-device nec-usb-xhci,id=xhci" -- -2.21.0 +1.8.3.1 diff --git a/0013-vfio-cap-number-of-devices-that-can-be-assigned.patch b/0013-vfio-cap-number-of-devices-that-can-be-assigned.patch index db776c4..77ec099 100644 --- a/0013-vfio-cap-number-of-devices-that-can-be-assigned.patch +++ b/0013-vfio-cap-number-of-devices-that-can-be-assigned.patch @@ -1,4 +1,4 @@ -From de433da59448eaad4ac1b902d07d57b57f922aff Mon Sep 17 00:00:00 2001 +From e77808a25ee638b717e1507a1e55cbf8350afbfd Mon Sep 17 00:00:00 2001 From: Bandan Das Date: Tue, 3 Dec 2013 20:05:13 +0100 Subject: vfio: cap number of devices that can be assigned @@ -35,14 +35,13 @@ Merged patches (2.9.0): (cherry picked from commit 9fa3c9fc6dfcde76d80db1aa601b2d577f72ceec) (cherry picked from commit 3cb35556dc7d994f203d732fe952f95fcdb03c0a) -Signed-off-by: Danilo C. L. de Paula --- hw/vfio/pci.c | 29 ++++++++++++++++++++++++++++- hw/vfio/pci.h | 1 + 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index c8534d3035..309535f306 100644 +index e265d77..41d00a3 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -47,6 +47,9 @@ @@ -87,7 +86,7 @@ index c8534d3035..309535f306 100644 if (!vdev->vbasedev.sysfsdev) { if (!(~vdev->host.domain || ~vdev->host.bus || ~vdev->host.slot || ~vdev->host.function)) { -@@ -3167,6 +3191,9 @@ static Property vfio_pci_dev_properties[] = { +@@ -3169,6 +3193,9 @@ static Property vfio_pci_dev_properties[] = { DEFINE_PROP_BOOL("x-no-kvm-msix", VFIOPCIDevice, no_kvm_msix, false), DEFINE_PROP_BOOL("x-no-geforce-quirks", VFIOPCIDevice, no_geforce_quirks, false), @@ -98,7 +97,7 @@ index c8534d3035..309535f306 100644 false), DEFINE_PROP_BOOL("x-no-vfio-ioeventfd", VFIOPCIDevice, no_vfio_ioeventfd, diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h -index 35626cd63e..0cd4803aee 100644 +index 0da7a20..5d2b0d2 100644 --- a/hw/vfio/pci.h +++ b/hw/vfio/pci.h @@ -135,6 +135,7 @@ typedef struct VFIOPCIDevice { @@ -110,5 +109,5 @@ index 35626cd63e..0cd4803aee 100644 uint32_t device_id; uint32_t sub_vendor_id; -- -2.21.0 +1.8.3.1 diff --git a/0014-Add-support-statement-to-help-output.patch b/0014-Add-support-statement-to-help-output.patch index cb77bfe..d87713a 100644 --- a/0014-Add-support-statement-to-help-output.patch +++ b/0014-Add-support-statement-to-help-output.patch @@ -1,4 +1,4 @@ -From 2754dd8da8975757753fd491985d5e7b36966106 Mon Sep 17 00:00:00 2001 +From 8b189d52c8b8e2c251d76c7b00dc4a2a0a570bf8 Mon Sep 17 00:00:00 2001 From: Eduardo Habkost Date: Wed, 4 Dec 2013 18:53:17 +0100 Subject: Add support statement to -help output @@ -18,16 +18,15 @@ as unsupported by Red Hat, and advising users to use libvirt instead. Signed-off-by: Eduardo Habkost (cherry picked from commit 2a07700936e39856cc9f149c6a6517f0715536a6) (cherry picked from commit 5dd2f4706e2fef945771949e59a8fcc1b5452de9) -Signed-off-by: Danilo C. L. de Paula --- - vl.c | 9 +++++++++ + softmmu/vl.c | 9 +++++++++ 1 file changed, 9 insertions(+) -diff --git a/vl.c b/vl.c -index 668a34577e..9f3e7e7733 100644 ---- a/vl.c -+++ b/vl.c -@@ -1822,9 +1822,17 @@ static void version(void) +diff --git a/softmmu/vl.c b/softmmu/vl.c +index 00f7604..5ba8c19 100644 +--- a/softmmu/vl.c ++++ b/softmmu/vl.c +@@ -1674,9 +1674,17 @@ static void version(void) QEMU_COPYRIGHT "\n"); } @@ -45,7 +44,7 @@ index 668a34577e..9f3e7e7733 100644 printf("usage: %s [options] [disk_image]\n\n" "'disk_image' is a raw hard disk image for IDE hard disk 0\n\n", error_get_progname()); -@@ -1841,6 +1849,7 @@ static void help(int exitcode) +@@ -1693,6 +1701,7 @@ static void help(int exitcode) "\n" QEMU_HELP_BOTTOM "\n"); @@ -54,5 +53,5 @@ index 668a34577e..9f3e7e7733 100644 } -- -2.21.0 +1.8.3.1 diff --git a/0015-globally-limit-the-maximum-number-of-CPUs.patch b/0015-globally-limit-the-maximum-number-of-CPUs.patch index cec862d..89e14e7 100644 --- a/0015-globally-limit-the-maximum-number-of-CPUs.patch +++ b/0015-globally-limit-the-maximum-number-of-CPUs.patch @@ -1,4 +1,4 @@ -From c9c3cf721b0e9e359418f64c2a5121c3f8b5d27a Mon Sep 17 00:00:00 2001 +From e0aee69fcafe1c3656db2676b8a0d379a48c299c Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Tue, 21 Jan 2014 10:46:52 +0100 Subject: globally limit the maximum number of CPUs @@ -74,18 +74,16 @@ The recommended vcpu max limit (KVM_CAP_NR_VCPUS) should be used instead of the actual max vcpu limit (KVM_CAP_MAX_VCPUS) to give an error. This commit matches the limit to current KVM_CAP_NR_VCPUS value. - -Signed-off-by: Danilo C. L. de Paula --- accel/kvm/kvm-all.c | 12 ++++++++++++ - vl.c | 18 ++++++++++++++++++ + softmmu/vl.c | 18 ++++++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c -index ca00daa2f5..dc3ed7f04e 100644 +index 439a4ef..6f804b8 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c -@@ -1943,6 +1943,18 @@ static int kvm_init(MachineState *ms) +@@ -1975,6 +1975,18 @@ static int kvm_init(MachineState *ms) soft_vcpus_limit = kvm_recommended_vcpus(s); hard_vcpus_limit = kvm_max_vcpus(s); @@ -104,11 +102,11 @@ index ca00daa2f5..dc3ed7f04e 100644 while (nc->name) { if (nc->num > soft_vcpus_limit) { warn_report("Number of %s cpus requested (%d) exceeds " -diff --git a/vl.c b/vl.c -index 9f3e7e7733..1550aa2aaa 100644 ---- a/vl.c -+++ b/vl.c -@@ -134,6 +134,8 @@ int main(int argc, char **argv) +diff --git a/softmmu/vl.c b/softmmu/vl.c +index 5ba8c19..e98ab6b 100644 +--- a/softmmu/vl.c ++++ b/softmmu/vl.c +@@ -115,6 +115,8 @@ #define MAX_VIRTIO_CONSOLES 1 @@ -117,8 +115,8 @@ index 9f3e7e7733..1550aa2aaa 100644 static const char *data_dir[16]; static int data_dir_idx; const char *bios_name = NULL; -@@ -1339,6 +1341,20 @@ static MachineClass *find_default_machine(GSList *machines) - return NULL; +@@ -1177,6 +1179,20 @@ static MachineClass *find_default_machine(GSList *machines) + return default_machineclass; } +/* Maximum number of CPUs limited for Red Hat Enterprise Linux */ @@ -138,7 +136,7 @@ index 9f3e7e7733..1550aa2aaa 100644 static int machine_help_func(QemuOpts *opts, MachineState *machine) { ObjectProperty *prop; -@@ -3857,6 +3873,8 @@ int main(int argc, char **argv, char **envp) +@@ -3829,6 +3845,8 @@ void qemu_init(int argc, char **argv, char **envp) "mutually exclusive"); exit(EXIT_FAILURE); } @@ -148,5 +146,5 @@ index 9f3e7e7733..1550aa2aaa 100644 configure_rtc(qemu_find_opts_singleton("rtc")); -- -2.21.0 +1.8.3.1 diff --git a/0016-Add-support-for-simpletrace.patch b/0016-Add-support-for-simpletrace.patch index 9624855..d64625b 100644 --- a/0016-Add-support-for-simpletrace.patch +++ b/0016-Add-support-for-simpletrace.patch @@ -1,4 +1,4 @@ -From 26128b3ede339e292a3c50a84e3248af46ecd0ec Mon Sep 17 00:00:00 2001 +From 565cee8e4965ece9e0c271cad813263b606b3e65 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Thu, 8 Oct 2015 09:50:17 +0200 Subject: Add support for simpletrace @@ -16,30 +16,31 @@ Rebase notes (2.9.0): Rebase notes (2.8.0): - Changed tracetool.py parameters +Rebase notes (weekly-200219): +- Removed python shenigan (done upstream) + Merged patches (2.3.0): - db959d6 redhat/qemu-kvm.spec.template: Install qemu-kvm-simpletrace.stp - 5292fc3 trace: add SystemTap init scripts for simpletrace bridge - eda9e5e simpletrace: install simpletrace.py - 85c4c8f trace: add systemtap-initscript README file to RPM - -Signed-off-by: Danilo C. L. de Paula --- .gitignore | 2 ++ Makefile | 4 +++ - README.systemtap | 43 +++++++++++++++++++++++++ - redhat/qemu-kvm.spec.template | 26 ++++++++++++++- + README.systemtap | 43 +++++++++++++++++++++++++++++++++ + redhat/qemu-kvm.spec.template | 25 ++++++++++++++++++- scripts/systemtap/conf.d/qemu_kvm.conf | 4 +++ scripts/systemtap/script.d/qemu_kvm.stp | 1 + - 6 files changed, 79 insertions(+), 1 deletion(-) + 6 files changed, 78 insertions(+), 1 deletion(-) create mode 100644 README.systemtap create mode 100644 scripts/systemtap/conf.d/qemu_kvm.conf create mode 100644 scripts/systemtap/script.d/qemu_kvm.stp diff --git a/Makefile b/Makefile -index 086727dbb9..4254950f7f 100644 +index aee2e8e..ded56e5 100644 --- a/Makefile +++ b/Makefile -@@ -939,6 +939,10 @@ endif +@@ -999,6 +999,10 @@ endif $(INSTALL_DATA) $(SRC_PATH)/pc-bios/keymaps/$$x "$(DESTDIR)$(qemu_datadir)/keymaps"; \ done $(INSTALL_DATA) $(BUILD_DIR)/trace-events-all "$(DESTDIR)$(qemu_datadir)/trace-events-all" @@ -52,7 +53,7 @@ index 086727dbb9..4254950f7f 100644 ctags: diff --git a/README.systemtap b/README.systemtap new file mode 100644 -index 0000000000..ad913fc990 +index 0000000..ad913fc --- /dev/null +++ b/README.systemtap @@ -0,0 +1,43 @@ @@ -101,7 +102,7 @@ index 0000000000..ad913fc990 + # /usr/share/qemu-kvm/simpletrace.py --no-header /usr/share/qemu-kvm/trace-events /tmp/trace.log diff --git a/scripts/systemtap/conf.d/qemu_kvm.conf b/scripts/systemtap/conf.d/qemu_kvm.conf new file mode 100644 -index 0000000000..372d8160a4 +index 0000000..372d816 --- /dev/null +++ b/scripts/systemtap/conf.d/qemu_kvm.conf @@ -0,0 +1,4 @@ @@ -111,11 +112,11 @@ index 0000000000..372d8160a4 +qemu_kvm_OPT="-s4" # per-CPU buffer size, in megabytes diff --git a/scripts/systemtap/script.d/qemu_kvm.stp b/scripts/systemtap/script.d/qemu_kvm.stp new file mode 100644 -index 0000000000..c04abf9449 +index 0000000..c04abf9 --- /dev/null +++ b/scripts/systemtap/script.d/qemu_kvm.stp @@ -0,0 +1 @@ +probe qemu.kvm.simpletrace.handle_qmp_command,qemu.kvm.simpletrace.monitor_protocol_*,qemu.kvm.simpletrace.migrate_set_state {} -- -2.21.0 +1.8.3.1 diff --git a/0017-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch b/0017-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch index ef83445..7065d7d 100644 --- a/0017-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch +++ b/0017-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch @@ -1,6 +1,6 @@ -From 97ed62562b883c384346bfef3e1c7e379f03ccab Mon Sep 17 00:00:00 2001 +From 4375e8b568866c7ddbde19de1bb999cf3ebfe6fe Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina -Date: Fri, 30 Nov 2018 09:11:03 +0100 +Date: Wed, 29 Jan 2020 09:30:03 +0100 Subject: Use qemu-kvm in documentation instead of qemu-system- Patchwork-id: 62380 @@ -16,103 +16,3702 @@ We change the name and location of qemu-kvm binaries. Update documentation to reflect this change. Only architectures available in RHEL are updated. Signed-off-by: Miroslav Rezanina -Signed-off-by: Danilo C. L. de Paula ---- - docs/qemu-block-drivers.texi | 2 +- - docs/qemu-cpu-models.texi | 2 +- - qemu-doc.texi | 6 +++--- - qemu-options.hx | 16 ++++++++-------- - 4 files changed, 13 insertions(+), 13 deletions(-) -diff --git a/docs/qemu-block-drivers.texi b/docs/qemu-block-drivers.texi -index 2c7ea49c32..5d0afb3dee 100644 ---- a/docs/qemu-block-drivers.texi -+++ b/docs/qemu-block-drivers.texi -@@ -2,7 +2,7 @@ - QEMU block driver reference manual - @c man end - --@set qemu_system qemu-system-x86_64 -+@set qemu_system qemu-kvm - - @c man begin DESCRIPTION - +--- + +Rebase notes (weekly-200129): +- qemu-block-drivers.texi converted to qemu-block-drivers.rst (upstream) + +Conflicts: + docs/qemu-cpu-models.texi + docs/system/qemu-block-drivers.rst + qemu-doc.texi + qemu-options.hx +--- + docs/qemu-cpu-models.texi | 677 +++++++++++ + qemu-doc.texi | 2967 +++++++++++++++++++++++++++++++++++++++++++++ + qemu-options.hx | 10 +- + 3 files changed, 3649 insertions(+), 5 deletions(-) + create mode 100644 docs/qemu-cpu-models.texi + create mode 100644 qemu-doc.texi + diff --git a/docs/qemu-cpu-models.texi b/docs/qemu-cpu-models.texi -index f88a1def0d..c82cf8fab7 100644 ---- a/docs/qemu-cpu-models.texi +new file mode 100644 +index 0000000..c82cf8f +--- /dev/null +++ b/docs/qemu-cpu-models.texi -@@ -2,7 +2,7 @@ - QEMU / KVM CPU model configuration - @c man end - --@set qemu_system_x86 qemu-system-x86_64 +@@ -0,0 +1,677 @@ ++@c man begin SYNOPSIS ++QEMU / KVM CPU model configuration ++@c man end ++ +@set qemu_system_x86 qemu-kvm - - @c man begin DESCRIPTION - ++ ++@c man begin DESCRIPTION ++ ++@menu ++* recommendations_cpu_models_x86:: Recommendations for KVM CPU model configuration on x86 hosts ++* recommendations_cpu_models_MIPS:: Supported CPU model configurations on MIPS hosts ++* cpu_model_syntax_apps:: Syntax for configuring CPU models ++@end menu ++ ++QEMU / KVM virtualization supports two ways to configure CPU models ++ ++@table @option ++ ++@item Host passthrough ++ ++This passes the host CPU model features, model, stepping, exactly to the ++guest. Note that KVM may filter out some host CPU model features if they ++cannot be supported with virtualization. Live migration is unsafe when ++this mode is used as libvirt / QEMU cannot guarantee a stable CPU is ++exposed to the guest across hosts. This is the recommended CPU to use, ++provided live migration is not required. ++ ++@item Named model ++ ++QEMU comes with a number of predefined named CPU models, that typically ++refer to specific generations of hardware released by Intel and AMD. ++These allow the guest VMs to have a degree of isolation from the host CPU, ++allowing greater flexibility in live migrating between hosts with differing ++hardware. ++@end table ++ ++In both cases, it is possible to optionally add or remove individual CPU ++features, to alter what is presented to the guest by default. ++ ++Libvirt supports a third way to configure CPU models known as "Host model". ++This uses the QEMU "Named model" feature, automatically picking a CPU model ++that is similar the host CPU, and then adding extra features to approximate ++the host model as closely as possible. This does not guarantee the CPU family, ++stepping, etc will precisely match the host CPU, as they would with "Host ++passthrough", but gives much of the benefit of passthrough, while making ++live migration safe. ++ ++@node recommendations_cpu_models_x86 ++@subsection Recommendations for KVM CPU model configuration on x86 hosts ++ ++The information that follows provides recommendations for configuring ++CPU models on x86 hosts. The goals are to maximise performance, while ++protecting guest OS against various CPU hardware flaws, and optionally ++enabling live migration between hosts with heterogeneous CPU models. ++ ++@menu ++* preferred_cpu_models_intel_x86:: Preferred CPU models for Intel x86 hosts ++* important_cpu_features_intel_x86:: Important CPU features for Intel x86 hosts ++* preferred_cpu_models_amd_x86:: Preferred CPU models for AMD x86 hosts ++* important_cpu_features_amd_x86:: Important CPU features for AMD x86 hosts ++* default_cpu_models_x86:: Default x86 CPU models ++* other_non_recommended_cpu_models_x86:: Other non-recommended x86 CPUs ++@end menu ++ ++@node preferred_cpu_models_intel_x86 ++@subsubsection Preferred CPU models for Intel x86 hosts ++ ++The following CPU models are preferred for use on Intel hosts. Administrators / ++applications are recommended to use the CPU model that matches the generation ++of the host CPUs in use. In a deployment with a mixture of host CPU models ++between machines, if live migration compatibility is required, use the newest ++CPU model that is compatible across all desired hosts. ++ ++@table @option ++@item @code{Skylake-Server} ++@item @code{Skylake-Server-IBRS} ++ ++Intel Xeon Processor (Skylake, 2016) ++ ++ ++@item @code{Skylake-Client} ++@item @code{Skylake-Client-IBRS} ++ ++Intel Core Processor (Skylake, 2015) ++ ++ ++@item @code{Broadwell} ++@item @code{Broadwell-IBRS} ++@item @code{Broadwell-noTSX} ++@item @code{Broadwell-noTSX-IBRS} ++ ++Intel Core Processor (Broadwell, 2014) ++ ++ ++@item @code{Haswell} ++@item @code{Haswell-IBRS} ++@item @code{Haswell-noTSX} ++@item @code{Haswell-noTSX-IBRS} ++ ++Intel Core Processor (Haswell, 2013) ++ ++ ++@item @code{IvyBridge} ++@item @code{IvyBridge-IBRS} ++ ++Intel Xeon E3-12xx v2 (Ivy Bridge, 2012) ++ ++ ++@item @code{SandyBridge} ++@item @code{SandyBridge-IBRS} ++ ++Intel Xeon E312xx (Sandy Bridge, 2011) ++ ++ ++@item @code{Westmere} ++@item @code{Westmere-IBRS} ++ ++Westmere E56xx/L56xx/X56xx (Nehalem-C, 2010) ++ ++ ++@item @code{Nehalem} ++@item @code{Nehalem-IBRS} ++ ++Intel Core i7 9xx (Nehalem Class Core i7, 2008) ++ ++ ++@item @code{Penryn} ++ ++Intel Core 2 Duo P9xxx (Penryn Class Core 2, 2007) ++ ++ ++@item @code{Conroe} ++ ++Intel Celeron_4x0 (Conroe/Merom Class Core 2, 2006) ++ ++@end table ++ ++@node important_cpu_features_intel_x86 ++@subsubsection Important CPU features for Intel x86 hosts ++ ++The following are important CPU features that should be used on Intel x86 ++hosts, when available in the host CPU. Some of them require explicit ++configuration to enable, as they are not included by default in some, or all, ++of the named CPU models listed above. In general all of these features are ++included if using "Host passthrough" or "Host model". ++ ++ ++@table @option ++ ++@item @code{pcid} ++ ++Recommended to mitigate the cost of the Meltdown (CVE-2017-5754) fix ++ ++Included by default in Haswell, Broadwell & Skylake Intel CPU models. ++ ++Should be explicitly turned on for Westmere, SandyBridge, and IvyBridge ++Intel CPU models. Note that some desktop/mobile Westmere CPUs cannot ++support this feature. ++ ++ ++@item @code{spec-ctrl} ++ ++Required to enable the Spectre v2 (CVE-2017-5715) fix. ++ ++Included by default in Intel CPU models with -IBRS suffix. ++ ++Must be explicitly turned on for Intel CPU models without -IBRS suffix. ++ ++Requires the host CPU microcode to support this feature before it ++can be used for guest CPUs. ++ ++ ++@item @code{stibp} ++ ++Required to enable stronger Spectre v2 (CVE-2017-5715) fixes in some ++operating systems. ++ ++Must be explicitly turned on for all Intel CPU models. ++ ++Requires the host CPU microcode to support this feature before it ++can be used for guest CPUs. ++ ++ ++@item @code{ssbd} ++ ++Required to enable the CVE-2018-3639 fix ++ ++Not included by default in any Intel CPU model. ++ ++Must be explicitly turned on for all Intel CPU models. ++ ++Requires the host CPU microcode to support this feature before it ++can be used for guest CPUs. ++ ++ ++@item @code{pdpe1gb} ++ ++Recommended to allow guest OS to use 1GB size pages ++ ++Not included by default in any Intel CPU model. ++ ++Should be explicitly turned on for all Intel CPU models. ++ ++Note that not all CPU hardware will support this feature. ++ ++@item @code{md-clear} ++ ++Required to confirm the MDS (CVE-2018-12126, CVE-2018-12127, CVE-2018-12130, ++CVE-2019-11091) fixes. ++ ++Not included by default in any Intel CPU model. ++ ++Must be explicitly turned on for all Intel CPU models. ++ ++Requires the host CPU microcode to support this feature before it ++can be used for guest CPUs. ++@end table ++ ++ ++@node preferred_cpu_models_amd_x86 ++@subsubsection Preferred CPU models for AMD x86 hosts ++ ++The following CPU models are preferred for use on Intel hosts. Administrators / ++applications are recommended to use the CPU model that matches the generation ++of the host CPUs in use. In a deployment with a mixture of host CPU models ++between machines, if live migration compatibility is required, use the newest ++CPU model that is compatible across all desired hosts. ++ ++@table @option ++ ++@item @code{EPYC} ++@item @code{EPYC-IBPB} ++ ++AMD EPYC Processor (2017) ++ ++ ++@item @code{Opteron_G5} ++ ++AMD Opteron 63xx class CPU (2012) ++ ++ ++@item @code{Opteron_G4} ++ ++AMD Opteron 62xx class CPU (2011) ++ ++ ++@item @code{Opteron_G3} ++ ++AMD Opteron 23xx (Gen 3 Class Opteron, 2009) ++ ++ ++@item @code{Opteron_G2} ++ ++AMD Opteron 22xx (Gen 2 Class Opteron, 2006) ++ ++ ++@item @code{Opteron_G1} ++ ++AMD Opteron 240 (Gen 1 Class Opteron, 2004) ++@end table ++ ++@node important_cpu_features_amd_x86 ++@subsubsection Important CPU features for AMD x86 hosts ++ ++The following are important CPU features that should be used on AMD x86 ++hosts, when available in the host CPU. Some of them require explicit ++configuration to enable, as they are not included by default in some, or all, ++of the named CPU models listed above. In general all of these features are ++included if using "Host passthrough" or "Host model". ++ ++ ++@table @option ++ ++@item @code{ibpb} ++ ++Required to enable the Spectre v2 (CVE-2017-5715) fix. ++ ++Included by default in AMD CPU models with -IBPB suffix. ++ ++Must be explicitly turned on for AMD CPU models without -IBPB suffix. ++ ++Requires the host CPU microcode to support this feature before it ++can be used for guest CPUs. ++ ++ ++@item @code{stibp} ++ ++Required to enable stronger Spectre v2 (CVE-2017-5715) fixes in some ++operating systems. ++ ++Must be explicitly turned on for all AMD CPU models. ++ ++Requires the host CPU microcode to support this feature before it ++can be used for guest CPUs. ++ ++ ++@item @code{virt-ssbd} ++ ++Required to enable the CVE-2018-3639 fix ++ ++Not included by default in any AMD CPU model. ++ ++Must be explicitly turned on for all AMD CPU models. ++ ++This should be provided to guests, even if amd-ssbd is also ++provided, for maximum guest compatibility. ++ ++Note for some QEMU / libvirt versions, this must be force enabled ++when when using "Host model", because this is a virtual feature ++that doesn't exist in the physical host CPUs. ++ ++ ++@item @code{amd-ssbd} ++ ++Required to enable the CVE-2018-3639 fix ++ ++Not included by default in any AMD CPU model. ++ ++Must be explicitly turned on for all AMD CPU models. ++ ++This provides higher performance than virt-ssbd so should be ++exposed to guests whenever available in the host. virt-ssbd ++should none the less also be exposed for maximum guest ++compatibility as some kernels only know about virt-ssbd. ++ ++ ++@item @code{amd-no-ssb} ++ ++Recommended to indicate the host is not vulnerable CVE-2018-3639 ++ ++Not included by default in any AMD CPU model. ++ ++Future hardware generations of CPU will not be vulnerable to ++CVE-2018-3639, and thus the guest should be told not to enable ++its mitigations, by exposing amd-no-ssb. This is mutually ++exclusive with virt-ssbd and amd-ssbd. ++ ++ ++@item @code{pdpe1gb} ++ ++Recommended to allow guest OS to use 1GB size pages ++ ++Not included by default in any AMD CPU model. ++ ++Should be explicitly turned on for all AMD CPU models. ++ ++Note that not all CPU hardware will support this feature. ++@end table ++ ++ ++@node default_cpu_models_x86 ++@subsubsection Default x86 CPU models ++ ++The default QEMU CPU models are designed such that they can run on all hosts. ++If an application does not wish to do perform any host compatibility checks ++before launching guests, the default is guaranteed to work. ++ ++The default CPU models will, however, leave the guest OS vulnerable to various ++CPU hardware flaws, so their use is strongly discouraged. Applications should ++follow the earlier guidance to setup a better CPU configuration, with host ++passthrough recommended if live migration is not needed. ++ ++@table @option ++@item @code{qemu32} ++@item @code{qemu64} ++ ++QEMU Virtual CPU version 2.5+ (32 & 64 bit variants) ++ ++qemu64 is used for x86_64 guests and qemu32 is used for i686 guests, when no ++-cpu argument is given to QEMU, or no is provided in libvirt XML. ++@end table ++ ++ ++@node other_non_recommended_cpu_models_x86 ++@subsubsection Other non-recommended x86 CPUs ++ ++The following CPUs models are compatible with most AMD and Intel x86 hosts, but ++their usage is discouraged, as they expose a very limited featureset, which ++prevents guests having optimal performance. ++ ++@table @option ++ ++@item @code{kvm32} ++@item @code{kvm64} ++ ++Common KVM processor (32 & 64 bit variants) ++ ++Legacy models just for historical compatibility with ancient QEMU versions. ++ ++ ++@item @code{486} ++@item @code{athlon} ++@item @code{phenom} ++@item @code{coreduo} ++@item @code{core2duo} ++@item @code{n270} ++@item @code{pentium} ++@item @code{pentium2} ++@item @code{pentium3} ++ ++Various very old x86 CPU models, mostly predating the introduction of ++hardware assisted virtualization, that should thus not be required for ++running virtual machines. ++@end table ++ ++@node recommendations_cpu_models_MIPS ++@subsection Supported CPU model configurations on MIPS hosts ++ ++QEMU supports variety of MIPS CPU models: ++ ++@menu ++* cpu_models_MIPS32:: Supported CPU models for MIPS32 hosts ++* cpu_models_MIPS64:: Supported CPU models for MIPS64 hosts ++* cpu_models_nanoMIPS:: Supported CPU models for nanoMIPS hosts ++* preferred_cpu_models_MIPS:: Preferred CPU models for MIPS hosts ++@end menu ++ ++@node cpu_models_MIPS32 ++@subsubsection Supported CPU models for MIPS32 hosts ++ ++The following CPU models are supported for use on MIPS32 hosts. Administrators / ++applications are recommended to use the CPU model that matches the generation ++of the host CPUs in use. In a deployment with a mixture of host CPU models ++between machines, if live migration compatibility is required, use the newest ++CPU model that is compatible across all desired hosts. ++ ++@table @option ++@item @code{mips32r6-generic} ++ ++MIPS32 Processor (Release 6, 2015) ++ ++ ++@item @code{P5600} ++ ++MIPS32 Processor (P5600, 2014) ++ ++ ++@item @code{M14K} ++@item @code{M14Kc} ++ ++MIPS32 Processor (M14K, 2009) ++ ++ ++@item @code{74Kf} ++ ++MIPS32 Processor (74K, 2007) ++ ++ ++@item @code{34Kf} ++ ++MIPS32 Processor (34K, 2006) ++ ++ ++@item @code{24Kc} ++@item @code{24KEc} ++@item @code{24Kf} ++ ++MIPS32 Processor (24K, 2003) ++ ++ ++@item @code{4Kc} ++@item @code{4Km} ++@item @code{4KEcR1} ++@item @code{4KEmR1} ++@item @code{4KEc} ++@item @code{4KEm} ++ ++MIPS32 Processor (4K, 1999) ++@end table ++ ++@node cpu_models_MIPS64 ++@subsubsection Supported CPU models for MIPS64 hosts ++ ++The following CPU models are supported for use on MIPS64 hosts. Administrators / ++applications are recommended to use the CPU model that matches the generation ++of the host CPUs in use. In a deployment with a mixture of host CPU models ++between machines, if live migration compatibility is required, use the newest ++CPU model that is compatible across all desired hosts. ++ ++@table @option ++@item @code{I6400} ++ ++MIPS64 Processor (Release 6, 2014) ++ ++ ++@item @code{Loongson-2F} ++ ++MIPS64 Processor (Loongson 2, 2008) ++ ++ ++@item @code{Loongson-2E} ++ ++MIPS64 Processor (Loongson 2, 2006) ++ ++ ++@item @code{mips64dspr2} ++ ++MIPS64 Processor (Release 2, 2006) ++ ++ ++@item @code{MIPS64R2-generic} ++@item @code{5KEc} ++@item @code{5KEf} ++ ++MIPS64 Processor (Release 2, 2002) ++ ++ ++@item @code{20Kc} ++ ++MIPS64 Processor (20K, 2000) ++ ++ ++@item @code{5Kc} ++@item @code{5Kf} ++ ++MIPS64 Processor (5K, 1999) ++ ++ ++@item @code{VR5432} ++ ++MIPS64 Processor (VR, 1998) ++ ++ ++@item @code{R4000} ++ ++MIPS64 Processor (MIPS III, 1991) ++@end table ++ ++@node cpu_models_nanoMIPS ++@subsubsection Supported CPU models for nanoMIPS hosts ++ ++The following CPU models are supported for use on nanoMIPS hosts. Administrators / ++applications are recommended to use the CPU model that matches the generation ++of the host CPUs in use. In a deployment with a mixture of host CPU models ++between machines, if live migration compatibility is required, use the newest ++CPU model that is compatible across all desired hosts. ++ ++@table @option ++@item @code{I7200} ++ ++MIPS I7200 (nanoMIPS, 2018) ++ ++@end table ++ ++@node preferred_cpu_models_MIPS ++@subsubsection Preferred CPU models for MIPS hosts ++ ++The following CPU models are preferred for use on different MIPS hosts: ++ ++@table @option ++@item @code{MIPS III} ++R4000 ++ ++@item @code{MIPS32R2} ++34Kf ++ ++@item @code{MIPS64R6} ++I6400 ++ ++@item @code{nanoMIPS} ++I7200 ++@end table ++ ++@node cpu_model_syntax_apps ++@subsection Syntax for configuring CPU models ++ ++The example below illustrate the approach to configuring the various ++CPU models / features in QEMU and libvirt ++ ++@menu ++* cpu_model_syntax_qemu:: QEMU command line ++* cpu_model_syntax_libvirt:: Libvirt guest XML ++@end menu ++ ++@node cpu_model_syntax_qemu ++@subsubsection QEMU command line ++ ++@table @option ++ ++@item Host passthrough ++ ++@example ++ $ @value{qemu_system_x86} -cpu host ++@end example ++ ++With feature customization: ++ ++@example ++ $ @value{qemu_system_x86} -cpu host,-vmx,... ++@end example ++ ++@item Named CPU models ++ ++@example ++ $ @value{qemu_system_x86} -cpu Westmere ++@end example ++ ++With feature customization: ++ ++@example ++ $ @value{qemu_system_x86} -cpu Westmere,+pcid,... ++@end example ++ ++@end table ++ ++@node cpu_model_syntax_libvirt ++@subsubsection Libvirt guest XML ++ ++@table @option ++ ++@item Host passthrough ++ ++@example ++ ++@end example ++ ++With feature customization: ++ ++@example ++ ++ ++ ... ++ ++@end example ++ ++@item Host model ++ ++@example ++ ++@end example ++ ++With feature customization: ++ ++@example ++ ++ ++ ... ++ ++@end example ++ ++@item Named model ++ ++@example ++ ++ ++ ++@end example ++ ++With feature customization: ++ ++@example ++ ++ ++ ++ ... ++ ++@end example ++ ++@end table ++ ++@c man end ++ ++@ignore ++ ++@setfilename qemu-cpu-models ++@settitle QEMU / KVM CPU model configuration ++ ++@c man begin SEEALSO ++The HTML documentation of QEMU for more precise information and Linux ++user mode emulator invocation. ++@c man end ++ ++@c man begin AUTHOR ++Daniel P. Berrange ++@c man end ++ ++@end ignore diff --git a/qemu-doc.texi b/qemu-doc.texi -index 3ddf5c0a68..d460f8d2c0 100644 ---- a/qemu-doc.texi +new file mode 100644 +index 0000000..10cd1de +--- /dev/null +++ b/qemu-doc.texi -@@ -11,8 +11,8 @@ - @paragraphindent 0 - @c %**end of header - --@set qemu_system qemu-system-x86_64 --@set qemu_system_x86 qemu-system-x86_64 +@@ -0,0 +1,2967 @@ ++\input texinfo @c -*- texinfo -*- ++@c %**start of header ++@setfilename qemu-doc.info ++@include version.texi ++ ++@documentlanguage en ++@documentencoding UTF-8 ++ ++@settitle QEMU version @value{VERSION} User Documentation ++@exampleindent 0 ++@paragraphindent 0 ++@c %**end of header ++ +@set qemu_system qemu-kvm +@set qemu_system_x86 qemu-kvm - - @ifinfo - @direntry -@@ -1827,7 +1827,7 @@ Set the initial VGA graphic mode. The default is 800x600x32. - Set OpenBIOS variables in NVRAM, for example: - - @example --qemu-system-ppc -prom-env 'auto-boot?=false' \ ++ ++@ifinfo ++@direntry ++* QEMU: (qemu-doc). The QEMU Emulator User Documentation. ++@end direntry ++@end ifinfo ++ ++@iftex ++@titlepage ++@sp 7 ++@center @titlefont{QEMU version @value{VERSION}} ++@sp 1 ++@center @titlefont{User Documentation} ++@sp 3 ++@end titlepage ++@end iftex ++ ++@ifnottex ++@node Top ++@top ++ ++@menu ++* Introduction:: ++* QEMU PC System emulator:: ++* QEMU System emulator for non PC targets:: ++* QEMU User space emulator:: ++* System requirements:: ++* Security:: ++* Implementation notes:: ++* Deprecated features:: ++* Recently removed features:: ++* Supported build platforms:: ++* License:: ++* Index:: ++@end menu ++@end ifnottex ++ ++@contents ++ ++@node Introduction ++@chapter Introduction ++ ++@menu ++* intro_features:: Features ++@end menu ++ ++@node intro_features ++@section Features ++ ++QEMU is a FAST! processor emulator using dynamic translation to ++achieve good emulation speed. ++ ++@cindex operating modes ++QEMU has two operating modes: ++ ++@itemize ++@cindex system emulation ++@item Full system emulation. In this mode, QEMU emulates a full system (for ++example a PC), including one or several processors and various ++peripherals. It can be used to launch different Operating Systems ++without rebooting the PC or to debug system code. ++ ++@cindex user mode emulation ++@item User mode emulation. In this mode, QEMU can launch ++processes compiled for one CPU on another CPU. It can be used to ++launch the Wine Windows API emulator (@url{https://www.winehq.org}) or ++to ease cross-compilation and cross-debugging. ++ ++@end itemize ++ ++QEMU has the following features: ++ ++@itemize ++@item QEMU can run without a host kernel driver and yet gives acceptable ++performance. It uses dynamic translation to native code for reasonable speed, ++with support for self-modifying code and precise exceptions. ++ ++@item It is portable to several operating systems (GNU/Linux, *BSD, Mac OS X, ++Windows) and architectures. ++ ++@item It performs accurate software emulation of the FPU. ++@end itemize ++ ++QEMU user mode emulation has the following features: ++@itemize ++@item Generic Linux system call converter, including most ioctls. ++ ++@item clone() emulation using native CPU clone() to use Linux scheduler for threads. ++ ++@item Accurate signal handling by remapping host signals to target signals. ++@end itemize ++ ++QEMU full system emulation has the following features: ++@itemize ++@item ++QEMU uses a full software MMU for maximum portability. ++ ++@item ++QEMU can optionally use an in-kernel accelerator, like kvm. The accelerators ++execute most of the guest code natively, while ++continuing to emulate the rest of the machine. ++ ++@item ++Various hardware devices can be emulated and in some cases, host ++devices (e.g. serial and parallel ports, USB, drives) can be used ++transparently by the guest Operating System. Host device passthrough ++can be used for talking to external physical peripherals (e.g. a ++webcam, modem or tape drive). ++ ++@item ++Symmetric multiprocessing (SMP) support. Currently, an in-kernel ++accelerator is required to use more than one host CPU for emulation. ++ ++@end itemize ++ ++ ++@node QEMU PC System emulator ++@chapter QEMU PC System emulator ++@cindex system emulation (PC) ++ ++@menu ++* pcsys_introduction:: Introduction ++* pcsys_quickstart:: Quick Start ++* sec_invocation:: Invocation ++* pcsys_keys:: Keys in the graphical frontends ++* mux_keys:: Keys in the character backend multiplexer ++* pcsys_monitor:: QEMU Monitor ++* cpu_models:: CPU models ++* disk_images:: Disk Images ++* pcsys_network:: Network emulation ++* pcsys_other_devs:: Other Devices ++* direct_linux_boot:: Direct Linux Boot ++* pcsys_usb:: USB emulation ++* vnc_security:: VNC security ++* network_tls:: TLS setup for network services ++* gdb_usage:: GDB usage ++* pcsys_os_specific:: Target OS specific information ++@end menu ++ ++@node pcsys_introduction ++@section Introduction ++ ++@c man begin DESCRIPTION ++ ++The QEMU PC System emulator simulates the ++following peripherals: ++ ++@itemize @minus ++@item ++i440FX host PCI bridge and PIIX3 PCI to ISA bridge ++@item ++Cirrus CLGD 5446 PCI VGA card or dummy VGA card with Bochs VESA ++extensions (hardware level, including all non standard modes). ++@item ++PS/2 mouse and keyboard ++@item ++2 PCI IDE interfaces with hard disk and CD-ROM support ++@item ++Floppy disk ++@item ++PCI and ISA network adapters ++@item ++Serial ports ++@item ++IPMI BMC, either and internal or external one ++@item ++Creative SoundBlaster 16 sound card ++@item ++ENSONIQ AudioPCI ES1370 sound card ++@item ++Intel 82801AA AC97 Audio compatible sound card ++@item ++Intel HD Audio Controller and HDA codec ++@item ++Adlib (OPL2) - Yamaha YM3812 compatible chip ++@item ++Gravis Ultrasound GF1 sound card ++@item ++CS4231A compatible sound card ++@item ++PCI UHCI, OHCI, EHCI or XHCI USB controller and a virtual USB-1.1 hub. ++@end itemize ++ ++SMP is supported with up to 255 CPUs. ++ ++QEMU uses the PC BIOS from the Seabios project and the Plex86/Bochs LGPL ++VGA BIOS. ++ ++QEMU uses YM3812 emulation by Tatsuyuki Satoh. ++ ++QEMU uses GUS emulation (GUSEMU32 @url{http://www.deinmeister.de/gusemu/}) ++by Tibor "TS" Schütz. ++ ++Note that, by default, GUS shares IRQ(7) with parallel ports and so ++QEMU must be told to not have parallel ports to have working GUS. ++ ++@example ++@value{qemu_system_x86} dos.img -soundhw gus -parallel none ++@end example ++ ++Alternatively: ++@example ++@value{qemu_system_x86} dos.img -device gus,irq=5 ++@end example ++ ++Or some other unclaimed IRQ. ++ ++CS4231A is the chip used in Windows Sound System and GUSMAX products ++ ++@c man end ++ ++@node pcsys_quickstart ++@section Quick Start ++@cindex quick start ++ ++Download and uncompress a hard disk image with Linux installed (e.g. ++@file{linux.img}) and type: ++ ++@example ++@value{qemu_system} linux.img ++@end example ++ ++Linux should boot and give you a prompt. ++ ++@node sec_invocation ++@section Invocation ++ ++@example ++@c man begin SYNOPSIS ++@command{@value{qemu_system}} [@var{options}] [@var{disk_image}] ++@c man end ++@end example ++ ++@c man begin OPTIONS ++@var{disk_image} is a raw hard disk image for IDE hard disk 0. Some ++targets do not need a disk image. ++ ++@include qemu-options.texi ++ ++@c man end ++ ++@subsection Device URL Syntax ++@c TODO merge this with section Disk Images ++ ++@c man begin NOTES ++ ++In addition to using normal file images for the emulated storage devices, ++QEMU can also use networked resources such as iSCSI devices. These are ++specified using a special URL syntax. ++ ++@table @option ++@item iSCSI ++iSCSI support allows QEMU to access iSCSI resources directly and use as ++images for the guest storage. Both disk and cdrom images are supported. ++ ++Syntax for specifying iSCSI LUNs is ++``iscsi://[:]//'' ++ ++By default qemu will use the iSCSI initiator-name ++'iqn.2008-11.org.linux-kvm[:]' but this can also be set from the command ++line or a configuration file. ++ ++Since version Qemu 2.4 it is possible to specify a iSCSI request timeout to detect ++stalled requests and force a reestablishment of the session. The timeout ++is specified in seconds. The default is 0 which means no timeout. Libiscsi ++1.15.0 or greater is required for this feature. ++ ++Example (without authentication): ++@example ++@value{qemu_system} -iscsi initiator-name=iqn.2001-04.com.example:my-initiator \ ++ -cdrom iscsi://192.0.2.1/iqn.2001-04.com.example/2 \ ++ -drive file=iscsi://192.0.2.1/iqn.2001-04.com.example/1 ++@end example ++ ++Example (CHAP username/password via URL): ++@example ++@value{qemu_system} -drive file=iscsi://user%password@@192.0.2.1/iqn.2001-04.com.example/1 ++@end example ++ ++Example (CHAP username/password via environment variables): ++@example ++LIBISCSI_CHAP_USERNAME="user" \ ++LIBISCSI_CHAP_PASSWORD="password" \ ++@value{qemu_system} -drive file=iscsi://192.0.2.1/iqn.2001-04.com.example/1 ++@end example ++ ++@item NBD ++QEMU supports NBD (Network Block Devices) both using TCP protocol as well ++as Unix Domain Sockets. With TCP, the default port is 10809. ++ ++Syntax for specifying a NBD device using TCP, in preferred URI form: ++``nbd://[:]/[]'' ++ ++Syntax for specifying a NBD device using Unix Domain Sockets; remember ++that '?' is a shell glob character and may need quoting: ++``nbd+unix:///[]?socket='' ++ ++Older syntax that is also recognized: ++``nbd::[:exportname=]'' ++ ++Syntax for specifying a NBD device using Unix Domain Sockets ++``nbd:unix:[:exportname=]'' ++ ++Example for TCP ++@example ++@value{qemu_system} --drive file=nbd:192.0.2.1:30000 ++@end example ++ ++Example for Unix Domain Sockets ++@example ++@value{qemu_system} --drive file=nbd:unix:/tmp/nbd-socket ++@end example ++ ++@item SSH ++QEMU supports SSH (Secure Shell) access to remote disks. ++ ++Examples: ++@example ++@value{qemu_system} -drive file=ssh://user@@host/path/to/disk.img ++@value{qemu_system} -drive file.driver=ssh,file.user=user,file.host=host,file.port=22,file.path=/path/to/disk.img ++@end example ++ ++Currently authentication must be done using ssh-agent. Other ++authentication methods may be supported in future. ++ ++@item Sheepdog ++Sheepdog is a distributed storage system for QEMU. ++QEMU supports using either local sheepdog devices or remote networked ++devices. ++ ++Syntax for specifying a sheepdog device ++@example ++sheepdog[+tcp|+unix]://[host:port]/vdiname[?socket=path][#snapid|#tag] ++@end example ++ ++Example ++@example ++@value{qemu_system} --drive file=sheepdog://192.0.2.1:30000/MyVirtualMachine ++@end example ++ ++See also @url{https://sheepdog.github.io/sheepdog/}. ++ ++@item GlusterFS ++GlusterFS is a user space distributed file system. ++QEMU supports the use of GlusterFS volumes for hosting VM disk images using ++TCP, Unix Domain Sockets and RDMA transport protocols. ++ ++Syntax for specifying a VM disk image on GlusterFS volume is ++@example ++ ++URI: ++gluster[+type]://[host[:port]]/volume/path[?socket=...][,debug=N][,logfile=...] ++ ++JSON: ++'json:@{"driver":"qcow2","file":@{"driver":"gluster","volume":"testvol","path":"a.img","debug":N,"logfile":"...", ++@ "server":[@{"type":"tcp","host":"...","port":"..."@}, ++@ @{"type":"unix","socket":"..."@}]@}@}' ++@end example ++ ++ ++Example ++@example ++URI: ++@value{qemu_system} --drive file=gluster://192.0.2.1/testvol/a.img, ++@ file.debug=9,file.logfile=/var/log/qemu-gluster.log ++ ++JSON: ++@value{qemu_system} 'json:@{"driver":"qcow2", ++@ "file":@{"driver":"gluster", ++@ "volume":"testvol","path":"a.img", ++@ "debug":9,"logfile":"/var/log/qemu-gluster.log", ++@ "server":[@{"type":"tcp","host":"1.2.3.4","port":24007@}, ++@ @{"type":"unix","socket":"/var/run/glusterd.socket"@}]@}@}' ++@value{qemu_system} -drive driver=qcow2,file.driver=gluster,file.volume=testvol,file.path=/path/a.img, ++@ file.debug=9,file.logfile=/var/log/qemu-gluster.log, ++@ file.server.0.type=tcp,file.server.0.host=1.2.3.4,file.server.0.port=24007, ++@ file.server.1.type=unix,file.server.1.socket=/var/run/glusterd.socket ++@end example ++ ++See also @url{http://www.gluster.org}. ++ ++@item HTTP/HTTPS/FTP/FTPS ++QEMU supports read-only access to files accessed over http(s) and ftp(s). ++ ++Syntax using a single filename: ++@example ++://[[:]@@]/ ++@end example ++ ++where: ++@table @option ++@item protocol ++'http', 'https', 'ftp', or 'ftps'. ++ ++@item username ++Optional username for authentication to the remote server. ++ ++@item password ++Optional password for authentication to the remote server. ++ ++@item host ++Address of the remote server. ++ ++@item path ++Path on the remote server, including any query string. ++@end table ++ ++The following options are also supported: ++@table @option ++@item url ++The full URL when passing options to the driver explicitly. ++ ++@item readahead ++The amount of data to read ahead with each range request to the remote server. ++This value may optionally have the suffix 'T', 'G', 'M', 'K', 'k' or 'b'. If it ++does not have a suffix, it will be assumed to be in bytes. The value must be a ++multiple of 512 bytes. It defaults to 256k. ++ ++@item sslverify ++Whether to verify the remote server's certificate when connecting over SSL. It ++can have the value 'on' or 'off'. It defaults to 'on'. ++ ++@item cookie ++Send this cookie (it can also be a list of cookies separated by ';') with ++each outgoing request. Only supported when using protocols such as HTTP ++which support cookies, otherwise ignored. ++ ++@item timeout ++Set the timeout in seconds of the CURL connection. This timeout is the time ++that CURL waits for a response from the remote server to get the size of the ++image to be downloaded. If not set, the default timeout of 5 seconds is used. ++@end table ++ ++Note that when passing options to qemu explicitly, @option{driver} is the value ++of . ++ ++Example: boot from a remote Fedora 20 live ISO image ++@example ++@value{qemu_system_x86} --drive media=cdrom,file=https://archives.fedoraproject.org/pub/archive/fedora/linux/releases/20/Live/x86_64/Fedora-Live-Desktop-x86_64-20-1.iso,readonly ++ ++@value{qemu_system_x86} --drive media=cdrom,file.driver=http,file.url=http://archives.fedoraproject.org/pub/fedora/linux/releases/20/Live/x86_64/Fedora-Live-Desktop-x86_64-20-1.iso,readonly ++@end example ++ ++Example: boot from a remote Fedora 20 cloud image using a local overlay for ++writes, copy-on-read, and a readahead of 64k ++@example ++qemu-img create -f qcow2 -o backing_file='json:@{"file.driver":"http",, "file.url":"http://archives.fedoraproject.org/pub/archive/fedora/linux/releases/20/Images/x86_64/Fedora-x86_64-20-20131211.1-sda.qcow2",, "file.readahead":"64k"@}' /tmp/Fedora-x86_64-20-20131211.1-sda.qcow2 ++ ++@value{qemu_system_x86} -drive file=/tmp/Fedora-x86_64-20-20131211.1-sda.qcow2,copy-on-read=on ++@end example ++ ++Example: boot from an image stored on a VMware vSphere server with a self-signed ++certificate using a local overlay for writes, a readahead of 64k and a timeout ++of 10 seconds. ++@example ++qemu-img create -f qcow2 -o backing_file='json:@{"file.driver":"https",, "file.url":"https://user:password@@vsphere.example.com/folder/test/test-flat.vmdk?dcPath=Datacenter&dsName=datastore1",, "file.sslverify":"off",, "file.readahead":"64k",, "file.timeout":10@}' /tmp/test.qcow2 ++ ++@value{qemu_system_x86} -drive file=/tmp/test.qcow2 ++@end example ++ ++@end table ++ ++@c man end ++ ++@node pcsys_keys ++@section Keys in the graphical frontends ++ ++@c man begin OPTIONS ++ ++During the graphical emulation, you can use special key combinations to change ++modes. The default key mappings are shown below, but if you use @code{-alt-grab} ++then the modifier is Ctrl-Alt-Shift (instead of Ctrl-Alt) and if you use ++@code{-ctrl-grab} then the modifier is the right Ctrl key (instead of Ctrl-Alt): ++ ++@table @key ++@item Ctrl-Alt-f ++@kindex Ctrl-Alt-f ++Toggle full screen ++ ++@item Ctrl-Alt-+ ++@kindex Ctrl-Alt-+ ++Enlarge the screen ++ ++@item Ctrl-Alt-- ++@kindex Ctrl-Alt-- ++Shrink the screen ++ ++@item Ctrl-Alt-u ++@kindex Ctrl-Alt-u ++Restore the screen's un-scaled dimensions ++ ++@item Ctrl-Alt-n ++@kindex Ctrl-Alt-n ++Switch to virtual console 'n'. Standard console mappings are: ++@table @emph ++@item 1 ++Target system display ++@item 2 ++Monitor ++@item 3 ++Serial port ++@end table ++ ++@item Ctrl-Alt ++@kindex Ctrl-Alt ++Toggle mouse and keyboard grab. ++@end table ++ ++@kindex Ctrl-Up ++@kindex Ctrl-Down ++@kindex Ctrl-PageUp ++@kindex Ctrl-PageDown ++In the virtual consoles, you can use @key{Ctrl-Up}, @key{Ctrl-Down}, ++@key{Ctrl-PageUp} and @key{Ctrl-PageDown} to move in the back log. ++ ++@c man end ++ ++@node mux_keys ++@section Keys in the character backend multiplexer ++ ++@c man begin OPTIONS ++ ++During emulation, if you are using a character backend multiplexer ++(which is the default if you are using @option{-nographic}) then ++several commands are available via an escape sequence. These ++key sequences all start with an escape character, which is @key{Ctrl-a} ++by default, but can be changed with @option{-echr}. The list below assumes ++you're using the default. ++ ++@table @key ++@item Ctrl-a h ++@kindex Ctrl-a h ++Print this help ++@item Ctrl-a x ++@kindex Ctrl-a x ++Exit emulator ++@item Ctrl-a s ++@kindex Ctrl-a s ++Save disk data back to file (if -snapshot) ++@item Ctrl-a t ++@kindex Ctrl-a t ++Toggle console timestamps ++@item Ctrl-a b ++@kindex Ctrl-a b ++Send break (magic sysrq in Linux) ++@item Ctrl-a c ++@kindex Ctrl-a c ++Rotate between the frontends connected to the multiplexer (usually ++this switches between the monitor and the console) ++@item Ctrl-a Ctrl-a ++@kindex Ctrl-a Ctrl-a ++Send the escape character to the frontend ++@end table ++@c man end ++ ++@ignore ++ ++@c man begin SEEALSO ++The HTML documentation of QEMU for more precise information and Linux ++user mode emulator invocation. ++@c man end ++ ++@c man begin AUTHOR ++Fabrice Bellard ++@c man end ++ ++@end ignore ++ ++@node pcsys_monitor ++@section QEMU Monitor ++@cindex QEMU monitor ++ ++The QEMU monitor is used to give complex commands to the QEMU ++emulator. You can use it to: ++ ++@itemize @minus ++ ++@item ++Remove or insert removable media images ++(such as CD-ROM or floppies). ++ ++@item ++Freeze/unfreeze the Virtual Machine (VM) and save or restore its state ++from a disk file. ++ ++@item Inspect the VM state without an external debugger. ++ ++@end itemize ++ ++@subsection Commands ++ ++The following commands are available: ++ ++@include qemu-monitor.texi ++ ++@include qemu-monitor-info.texi ++ ++@subsection Integer expressions ++ ++The monitor understands integers expressions for every integer ++argument. You can use register names to get the value of specifics ++CPU registers by prefixing them with @emph{$}. ++ ++@node cpu_models ++@section CPU models ++ ++@include docs/qemu-cpu-models.texi ++ ++@node disk_images ++@section Disk Images ++ ++QEMU supports many disk image formats, including growable disk images ++(their size increase as non empty sectors are written), compressed and ++encrypted disk images. ++ ++@menu ++* disk_images_quickstart:: Quick start for disk image creation ++* disk_images_snapshot_mode:: Snapshot mode ++* vm_snapshots:: VM snapshots ++@end menu ++ ++@node disk_images_quickstart ++@subsection Quick start for disk image creation ++ ++You can create a disk image with the command: ++@example ++qemu-img create myimage.img mysize ++@end example ++where @var{myimage.img} is the disk image filename and @var{mysize} is its ++size in kilobytes. You can add an @code{M} suffix to give the size in ++megabytes and a @code{G} suffix for gigabytes. ++ ++@c When this document is converted to rst we should make this into ++@c a proper linked reference to the qemu-img documentation again: ++See the qemu-img invocation documentation for more information. ++ ++@node disk_images_snapshot_mode ++@subsection Snapshot mode ++ ++If you use the option @option{-snapshot}, all disk images are ++considered as read only. When sectors in written, they are written in ++a temporary file created in @file{/tmp}. You can however force the ++write back to the raw disk images by using the @code{commit} monitor ++command (or @key{C-a s} in the serial console). ++ ++@node vm_snapshots ++@subsection VM snapshots ++ ++VM snapshots are snapshots of the complete virtual machine including ++CPU state, RAM, device state and the content of all the writable ++disks. In order to use VM snapshots, you must have at least one non ++removable and writable block device using the @code{qcow2} disk image ++format. Normally this device is the first virtual hard drive. ++ ++Use the monitor command @code{savevm} to create a new VM snapshot or ++replace an existing one. A human readable name can be assigned to each ++snapshot in addition to its numerical ID. ++ ++Use @code{loadvm} to restore a VM snapshot and @code{delvm} to remove ++a VM snapshot. @code{info snapshots} lists the available snapshots ++with their associated information: ++ ++@example ++(qemu) info snapshots ++Snapshot devices: hda ++Snapshot list (from hda): ++ID TAG VM SIZE DATE VM CLOCK ++1 start 41M 2006-08-06 12:38:02 00:00:14.954 ++2 40M 2006-08-06 12:43:29 00:00:18.633 ++3 msys 40M 2006-08-06 12:44:04 00:00:23.514 ++@end example ++ ++A VM snapshot is made of a VM state info (its size is shown in ++@code{info snapshots}) and a snapshot of every writable disk image. ++The VM state info is stored in the first @code{qcow2} non removable ++and writable block device. The disk image snapshots are stored in ++every disk image. The size of a snapshot in a disk image is difficult ++to evaluate and is not shown by @code{info snapshots} because the ++associated disk sectors are shared among all the snapshots to save ++disk space (otherwise each snapshot would need a full copy of all the ++disk images). ++ ++When using the (unrelated) @code{-snapshot} option ++(@ref{disk_images_snapshot_mode}), you can always make VM snapshots, ++but they are deleted as soon as you exit QEMU. ++ ++VM snapshots currently have the following known limitations: ++@itemize ++@item ++They cannot cope with removable devices if they are removed or ++inserted after a snapshot is done. ++@item ++A few device drivers still have incomplete snapshot support so their ++state is not saved or restored properly (in particular USB). ++@end itemize ++ ++@node pcsys_network ++@section Network emulation ++ ++QEMU can simulate several network cards (e.g. PCI or ISA cards on the PC ++target) and can connect them to a network backend on the host or an emulated ++hub. The various host network backends can either be used to connect the NIC of ++the guest to a real network (e.g. by using a TAP devices or the non-privileged ++user mode network stack), or to other guest instances running in another QEMU ++process (e.g. by using the socket host network backend). ++ ++@subsection Using TAP network interfaces ++ ++This is the standard way to connect QEMU to a real network. QEMU adds ++a virtual network device on your host (called @code{tapN}), and you ++can then configure it as if it was a real ethernet card. ++ ++@subsubsection Linux host ++ ++As an example, you can download the @file{linux-test-xxx.tar.gz} ++archive and copy the script @file{qemu-ifup} in @file{/etc} and ++configure properly @code{sudo} so that the command @code{ifconfig} ++contained in @file{qemu-ifup} can be executed as root. You must verify ++that your host kernel supports the TAP network interfaces: the ++device @file{/dev/net/tun} must be present. ++ ++See @ref{sec_invocation} to have examples of command lines using the ++TAP network interfaces. ++ ++@subsubsection Windows host ++ ++There is a virtual ethernet driver for Windows 2000/XP systems, called ++TAP-Win32. But it is not included in standard QEMU for Windows, ++so you will need to get it separately. It is part of OpenVPN package, ++so download OpenVPN from : @url{https://openvpn.net/}. ++ ++@subsection Using the user mode network stack ++ ++By using the option @option{-net user} (default configuration if no ++@option{-net} option is specified), QEMU uses a completely user mode ++network stack (you don't need root privilege to use the virtual ++network). The virtual network configuration is the following: ++ ++@example ++ ++ guest (10.0.2.15) <------> Firewall/DHCP server <-----> Internet ++ | (10.0.2.2) ++ | ++ ----> DNS server (10.0.2.3) ++ | ++ ----> SMB server (10.0.2.4) ++@end example ++ ++The QEMU VM behaves as if it was behind a firewall which blocks all ++incoming connections. You can use a DHCP client to automatically ++configure the network in the QEMU VM. The DHCP server assign addresses ++to the hosts starting from 10.0.2.15. ++ ++In order to check that the user mode network is working, you can ping ++the address 10.0.2.2 and verify that you got an address in the range ++10.0.2.x from the QEMU virtual DHCP server. ++ ++Note that ICMP traffic in general does not work with user mode networking. ++@code{ping}, aka. ICMP echo, to the local router (10.0.2.2) shall work, ++however. If you're using QEMU on Linux >= 3.0, it can use unprivileged ICMP ++ping sockets to allow @code{ping} to the Internet. The host admin has to set ++the ping_group_range in order to grant access to those sockets. To allow ping ++for GID 100 (usually users group): ++ ++@example ++echo 100 100 > /proc/sys/net/ipv4/ping_group_range ++@end example ++ ++When using the built-in TFTP server, the router is also the TFTP ++server. ++ ++When using the @option{'-netdev user,hostfwd=...'} option, TCP or UDP ++connections can be redirected from the host to the guest. It allows for ++example to redirect X11, telnet or SSH connections. ++ ++@subsection Hubs ++ ++QEMU can simulate several hubs. A hub can be thought of as a virtual connection ++between several network devices. These devices can be for example QEMU virtual ++ethernet cards or virtual Host ethernet devices (TAP devices). You can connect ++guest NICs or host network backends to such a hub using the @option{-netdev ++hubport} or @option{-nic hubport} options. The legacy @option{-net} option ++also connects the given device to the emulated hub with ID 0 (i.e. the default ++hub) unless you specify a netdev with @option{-net nic,netdev=xxx} here. ++ ++@subsection Connecting emulated networks between QEMU instances ++ ++Using the @option{-netdev socket} (or @option{-nic socket} or ++@option{-net socket}) option, it is possible to create emulated ++networks that span several QEMU instances. ++See the description of the @option{-netdev socket} option in the ++@ref{sec_invocation,,Invocation chapter} to have a basic example. ++ ++@node pcsys_other_devs ++@section Other Devices ++ ++@subsection Inter-VM Shared Memory device ++ ++On Linux hosts, a shared memory device is available. The basic syntax ++is: ++ ++@example ++@value{qemu_system_x86} -device ivshmem-plain,memdev=@var{hostmem} ++@end example ++ ++where @var{hostmem} names a host memory backend. For a POSIX shared ++memory backend, use something like ++ ++@example ++-object memory-backend-file,size=1M,share,mem-path=/dev/shm/ivshmem,id=@var{hostmem} ++@end example ++ ++If desired, interrupts can be sent between guest VMs accessing the same shared ++memory region. Interrupt support requires using a shared memory server and ++using a chardev socket to connect to it. The code for the shared memory server ++is qemu.git/contrib/ivshmem-server. An example syntax when using the shared ++memory server is: ++ ++@example ++# First start the ivshmem server once and for all ++ivshmem-server -p @var{pidfile} -S @var{path} -m @var{shm-name} -l @var{shm-size} -n @var{vectors} ++ ++# Then start your qemu instances with matching arguments ++@value{qemu_system_x86} -device ivshmem-doorbell,vectors=@var{vectors},chardev=@var{id} ++ -chardev socket,path=@var{path},id=@var{id} ++@end example ++ ++When using the server, the guest will be assigned a VM ID (>=0) that allows guests ++using the same server to communicate via interrupts. Guests can read their ++VM ID from a device register (see ivshmem-spec.txt). ++ ++@subsubsection Migration with ivshmem ++ ++With device property @option{master=on}, the guest will copy the shared ++memory on migration to the destination host. With @option{master=off}, ++the guest will not be able to migrate with the device attached. In the ++latter case, the device should be detached and then reattached after ++migration using the PCI hotplug support. ++ ++At most one of the devices sharing the same memory can be master. The ++master must complete migration before you plug back the other devices. ++ ++@subsubsection ivshmem and hugepages ++ ++Instead of specifying the using POSIX shm, you may specify ++a memory backend that has hugepage support: ++ ++@example ++@value{qemu_system_x86} -object memory-backend-file,size=1G,mem-path=/dev/hugepages/my-shmem-file,share,id=mb1 ++ -device ivshmem-plain,memdev=mb1 ++@end example ++ ++ivshmem-server also supports hugepages mount points with the ++@option{-m} memory path argument. ++ ++@node direct_linux_boot ++@section Direct Linux Boot ++ ++This section explains how to launch a Linux kernel inside QEMU without ++having to make a full bootable image. It is very useful for fast Linux ++kernel testing. ++ ++The syntax is: ++@example ++@value{qemu_system} -kernel bzImage -hda rootdisk.img -append "root=/dev/hda" ++@end example ++ ++Use @option{-kernel} to provide the Linux kernel image and ++@option{-append} to give the kernel command line arguments. The ++@option{-initrd} option can be used to provide an INITRD image. ++ ++If you do not need graphical output, you can disable it and redirect ++the virtual serial port and the QEMU monitor to the console with the ++@option{-nographic} option. The typical command line is: ++@example ++@value{qemu_system} -kernel bzImage -hda rootdisk.img \ ++ -append "root=/dev/hda console=ttyS0" -nographic ++@end example ++ ++Use @key{Ctrl-a c} to switch between the serial console and the ++monitor (@pxref{pcsys_keys}). ++ ++@node pcsys_usb ++@section USB emulation ++ ++QEMU can emulate a PCI UHCI, OHCI, EHCI or XHCI USB controller. You can ++plug virtual USB devices or real host USB devices (only works with certain ++host operating systems). QEMU will automatically create and connect virtual ++USB hubs as necessary to connect multiple USB devices. ++ ++@menu ++* usb_devices:: ++* host_usb_devices:: ++@end menu ++@node usb_devices ++@subsection Connecting USB devices ++ ++USB devices can be connected with the @option{-device usb-...} command line ++option or the @code{device_add} monitor command. Available devices are: ++ ++@table @code ++@item usb-mouse ++Virtual Mouse. This will override the PS/2 mouse emulation when activated. ++@item usb-tablet ++Pointer device that uses absolute coordinates (like a touchscreen). ++This means QEMU is able to report the mouse position without having ++to grab the mouse. Also overrides the PS/2 mouse emulation when activated. ++@item usb-storage,drive=@var{drive_id} ++Mass storage device backed by @var{drive_id} (@pxref{disk_images}) ++@item usb-uas ++USB attached SCSI device, see ++@url{https://git.qemu.org/?p=qemu.git;a=blob_plain;f=docs/usb-storage.txt,usb-storage.txt} ++for details ++@item usb-bot ++Bulk-only transport storage device, see ++@url{https://git.qemu.org/?p=qemu.git;a=blob_plain;f=docs/usb-storage.txt,usb-storage.txt} ++for details here, too ++@item usb-mtp,rootdir=@var{dir} ++Media transfer protocol device, using @var{dir} as root of the file tree ++that is presented to the guest. ++@item usb-host,hostbus=@var{bus},hostaddr=@var{addr} ++Pass through the host device identified by @var{bus} and @var{addr} ++@item usb-host,vendorid=@var{vendor},productid=@var{product} ++Pass through the host device identified by @var{vendor} and @var{product} ID ++@item usb-wacom-tablet ++Virtual Wacom PenPartner tablet. This device is similar to the @code{tablet} ++above but it can be used with the tslib library because in addition to touch ++coordinates it reports touch pressure. ++@item usb-kbd ++Standard USB keyboard. Will override the PS/2 keyboard (if present). ++@item usb-serial,chardev=@var{id} ++Serial converter. This emulates an FTDI FT232BM chip connected to host character ++device @var{id}. ++@item usb-braille,chardev=@var{id} ++Braille device. This will use BrlAPI to display the braille output on a real ++or fake device referenced by @var{id}. ++@item usb-net[,netdev=@var{id}] ++Network adapter that supports CDC ethernet and RNDIS protocols. @var{id} ++specifies a netdev defined with @code{-netdev @dots{},id=@var{id}}. ++For instance, user-mode networking can be used with ++@example ++@value{qemu_system} [...] -netdev user,id=net0 -device usb-net,netdev=net0 ++@end example ++@item usb-ccid ++Smartcard reader device ++@item usb-audio ++USB audio device ++@end table ++ ++@node host_usb_devices ++@subsection Using host USB devices on a Linux host ++ ++WARNING: this is an experimental feature. QEMU will slow down when ++using it. USB devices requiring real time streaming (i.e. USB Video ++Cameras) are not supported yet. ++ ++@enumerate ++@item If you use an early Linux 2.4 kernel, verify that no Linux driver ++is actually using the USB device. A simple way to do that is simply to ++disable the corresponding kernel module by renaming it from @file{mydriver.o} ++to @file{mydriver.o.disabled}. ++ ++@item Verify that @file{/proc/bus/usb} is working (most Linux distributions should enable it by default). You should see something like that: ++@example ++ls /proc/bus/usb ++001 devices drivers ++@end example ++ ++@item Since only root can access to the USB devices directly, you can either launch QEMU as root or change the permissions of the USB devices you want to use. For testing, the following suffices: ++@example ++chown -R myuid /proc/bus/usb ++@end example ++ ++@item Launch QEMU and do in the monitor: ++@example ++info usbhost ++ Device 1.2, speed 480 Mb/s ++ Class 00: USB device 1234:5678, USB DISK ++@end example ++You should see the list of the devices you can use (Never try to use ++hubs, it won't work). ++ ++@item Add the device in QEMU by using: ++@example ++device_add usb-host,vendorid=0x1234,productid=0x5678 ++@end example ++ ++Normally the guest OS should report that a new USB device is plugged. ++You can use the option @option{-device usb-host,...} to do the same. ++ ++@item Now you can try to use the host USB device in QEMU. ++ ++@end enumerate ++ ++When relaunching QEMU, you may have to unplug and plug again the USB ++device to make it work again (this is a bug). ++ ++@node vnc_security ++@section VNC security ++ ++The VNC server capability provides access to the graphical console ++of the guest VM across the network. This has a number of security ++considerations depending on the deployment scenarios. ++ ++@menu ++* vnc_sec_none:: ++* vnc_sec_password:: ++* vnc_sec_certificate:: ++* vnc_sec_certificate_verify:: ++* vnc_sec_certificate_pw:: ++* vnc_sec_sasl:: ++* vnc_sec_certificate_sasl:: ++* vnc_setup_sasl:: ++@end menu ++@node vnc_sec_none ++@subsection Without passwords ++ ++The simplest VNC server setup does not include any form of authentication. ++For this setup it is recommended to restrict it to listen on a UNIX domain ++socket only. For example ++ ++@example ++@value{qemu_system} [...OPTIONS...] -vnc unix:/home/joebloggs/.qemu-myvm-vnc ++@end example ++ ++This ensures that only users on local box with read/write access to that ++path can access the VNC server. To securely access the VNC server from a ++remote machine, a combination of netcat+ssh can be used to provide a secure ++tunnel. ++ ++@node vnc_sec_password ++@subsection With passwords ++ ++The VNC protocol has limited support for password based authentication. Since ++the protocol limits passwords to 8 characters it should not be considered ++to provide high security. The password can be fairly easily brute-forced by ++a client making repeat connections. For this reason, a VNC server using password ++authentication should be restricted to only listen on the loopback interface ++or UNIX domain sockets. Password authentication is not supported when operating ++in FIPS 140-2 compliance mode as it requires the use of the DES cipher. Password ++authentication is requested with the @code{password} option, and then once QEMU ++is running the password is set with the monitor. Until the monitor is used to ++set the password all clients will be rejected. ++ ++@example ++@value{qemu_system} [...OPTIONS...] -vnc :1,password -monitor stdio ++(qemu) change vnc password ++Password: ******** ++(qemu) ++@end example ++ ++@node vnc_sec_certificate ++@subsection With x509 certificates ++ ++The QEMU VNC server also implements the VeNCrypt extension allowing use of ++TLS for encryption of the session, and x509 certificates for authentication. ++The use of x509 certificates is strongly recommended, because TLS on its ++own is susceptible to man-in-the-middle attacks. Basic x509 certificate ++support provides a secure session, but no authentication. This allows any ++client to connect, and provides an encrypted session. ++ ++@example ++@value{qemu_system} [...OPTIONS...] \ ++ -object tls-creds-x509,id=tls0,dir=/etc/pki/qemu,endpoint=server,verify-peer=no \ ++ -vnc :1,tls-creds=tls0 -monitor stdio ++@end example ++ ++In the above example @code{/etc/pki/qemu} should contain at least three files, ++@code{ca-cert.pem}, @code{server-cert.pem} and @code{server-key.pem}. Unprivileged ++users will want to use a private directory, for example @code{$HOME/.pki/qemu}. ++NB the @code{server-key.pem} file should be protected with file mode 0600 to ++only be readable by the user owning it. ++ ++@node vnc_sec_certificate_verify ++@subsection With x509 certificates and client verification ++ ++Certificates can also provide a means to authenticate the client connecting. ++The server will request that the client provide a certificate, which it will ++then validate against the CA certificate. This is a good choice if deploying ++in an environment with a private internal certificate authority. It uses the ++same syntax as previously, but with @code{verify-peer} set to @code{yes} ++instead. ++ ++@example ++@value{qemu_system} [...OPTIONS...] \ ++ -object tls-creds-x509,id=tls0,dir=/etc/pki/qemu,endpoint=server,verify-peer=yes \ ++ -vnc :1,tls-creds=tls0 -monitor stdio ++@end example ++ ++ ++@node vnc_sec_certificate_pw ++@subsection With x509 certificates, client verification and passwords ++ ++Finally, the previous method can be combined with VNC password authentication ++to provide two layers of authentication for clients. ++ ++@example ++@value{qemu_system} [...OPTIONS...] \ ++ -object tls-creds-x509,id=tls0,dir=/etc/pki/qemu,endpoint=server,verify-peer=yes \ ++ -vnc :1,tls-creds=tls0,password -monitor stdio ++(qemu) change vnc password ++Password: ******** ++(qemu) ++@end example ++ ++ ++@node vnc_sec_sasl ++@subsection With SASL authentication ++ ++The SASL authentication method is a VNC extension, that provides an ++easily extendable, pluggable authentication method. This allows for ++integration with a wide range of authentication mechanisms, such as ++PAM, GSSAPI/Kerberos, LDAP, SQL databases, one-time keys and more. ++The strength of the authentication depends on the exact mechanism ++configured. If the chosen mechanism also provides a SSF layer, then ++it will encrypt the datastream as well. ++ ++Refer to the later docs on how to choose the exact SASL mechanism ++used for authentication, but assuming use of one supporting SSF, ++then QEMU can be launched with: ++ ++@example ++@value{qemu_system} [...OPTIONS...] -vnc :1,sasl -monitor stdio ++@end example ++ ++@node vnc_sec_certificate_sasl ++@subsection With x509 certificates and SASL authentication ++ ++If the desired SASL authentication mechanism does not supported ++SSF layers, then it is strongly advised to run it in combination ++with TLS and x509 certificates. This provides securely encrypted ++data stream, avoiding risk of compromising of the security ++credentials. This can be enabled, by combining the 'sasl' option ++with the aforementioned TLS + x509 options: ++ ++@example ++@value{qemu_system} [...OPTIONS...] \ ++ -object tls-creds-x509,id=tls0,dir=/etc/pki/qemu,endpoint=server,verify-peer=yes \ ++ -vnc :1,tls-creds=tls0,sasl -monitor stdio ++@end example ++ ++@node vnc_setup_sasl ++ ++@subsection Configuring SASL mechanisms ++ ++The following documentation assumes use of the Cyrus SASL implementation on a ++Linux host, but the principles should apply to any other SASL implementation ++or host. When SASL is enabled, the mechanism configuration will be loaded from ++system default SASL service config /etc/sasl2/qemu.conf. If running QEMU as an ++unprivileged user, an environment variable SASL_CONF_PATH can be used to make ++it search alternate locations for the service config file. ++ ++If the TLS option is enabled for VNC, then it will provide session encryption, ++otherwise the SASL mechanism will have to provide encryption. In the latter ++case the list of possible plugins that can be used is drastically reduced. In ++fact only the GSSAPI SASL mechanism provides an acceptable level of security ++by modern standards. Previous versions of QEMU referred to the DIGEST-MD5 ++mechanism, however, it has multiple serious flaws described in detail in ++RFC 6331 and thus should never be used any more. The SCRAM-SHA-1 mechanism ++provides a simple username/password auth facility similar to DIGEST-MD5, but ++does not support session encryption, so can only be used in combination with ++TLS. ++ ++When not using TLS the recommended configuration is ++ ++@example ++mech_list: gssapi ++keytab: /etc/qemu/krb5.tab ++@end example ++ ++This says to use the 'GSSAPI' mechanism with the Kerberos v5 protocol, with ++the server principal stored in /etc/qemu/krb5.tab. For this to work the ++administrator of your KDC must generate a Kerberos principal for the server, ++with a name of 'qemu/somehost.example.com@@EXAMPLE.COM' replacing ++'somehost.example.com' with the fully qualified host name of the machine ++running QEMU, and 'EXAMPLE.COM' with the Kerberos Realm. ++ ++When using TLS, if username+password authentication is desired, then a ++reasonable configuration is ++ ++@example ++mech_list: scram-sha-1 ++sasldb_path: /etc/qemu/passwd.db ++@end example ++ ++The @code{saslpasswd2} program can be used to populate the @code{passwd.db} ++file with accounts. ++ ++Other SASL configurations will be left as an exercise for the reader. Note that ++all mechanisms, except GSSAPI, should be combined with use of TLS to ensure a ++secure data channel. ++ ++ ++@node network_tls ++@section TLS setup for network services ++ ++Almost all network services in QEMU have the ability to use TLS for ++session data encryption, along with x509 certificates for simple ++client authentication. What follows is a description of how to ++generate certificates suitable for usage with QEMU, and applies to ++the VNC server, character devices with the TCP backend, NBD server ++and client, and migration server and client. ++ ++At a high level, QEMU requires certificates and private keys to be ++provided in PEM format. Aside from the core fields, the certificates ++should include various extension data sets, including v3 basic ++constraints data, key purpose, key usage and subject alt name. ++ ++The GnuTLS package includes a command called @code{certtool} which can ++be used to easily generate certificates and keys in the required format ++with expected data present. Alternatively a certificate management ++service may be used. ++ ++At a minimum it is necessary to setup a certificate authority, and ++issue certificates to each server. If using x509 certificates for ++authentication, then each client will also need to be issued a ++certificate. ++ ++Assuming that the QEMU network services will only ever be exposed to ++clients on a private intranet, there is no need to use a commercial ++certificate authority to create certificates. A self-signed CA is ++sufficient, and in fact likely to be more secure since it removes ++the ability of malicious 3rd parties to trick the CA into mis-issuing ++certs for impersonating your services. The only likely exception ++where a commercial CA might be desirable is if enabling the VNC ++websockets server and exposing it directly to remote browser clients. ++In such a case it might be useful to use a commercial CA to avoid ++needing to install custom CA certs in the web browsers. ++ ++The recommendation is for the server to keep its certificates in either ++@code{/etc/pki/qemu} or for unprivileged users in @code{$HOME/.pki/qemu}. ++ ++@menu ++* tls_generate_ca:: ++* tls_generate_server:: ++* tls_generate_client:: ++* tls_creds_setup:: ++* tls_psk:: ++@end menu ++@node tls_generate_ca ++@subsection Setup the Certificate Authority ++ ++This step only needs to be performed once per organization / organizational ++unit. First the CA needs a private key. This key must be kept VERY secret ++and secure. If this key is compromised the entire trust chain of the certificates ++issued with it is lost. ++ ++@example ++# certtool --generate-privkey > ca-key.pem ++@end example ++ ++To generate a self-signed certificate requires one core piece of information, ++the name of the organization. A template file @code{ca.info} should be ++populated with the desired data to avoid having to deal with interactive ++prompts from certtool: ++@example ++# cat > ca.info < server-hostNNN.info < server-hostNNN-key.pem ++# certtool --generate-certificate \ ++ --load-ca-certificate ca-cert.pem \ ++ --load-ca-privkey ca-key.pem \ ++ --load-privkey server-hostNNN-key.pem \ ++ --template server-hostNNN.info \ ++ --outfile server-hostNNN-cert.pem ++@end example ++ ++The @code{dns_name} and @code{ip_address} fields in the template are setting ++the subject alt name extension data. The @code{tls_www_server} keyword is the ++key purpose extension to indicate this certificate is intended for usage in ++a web server. Although QEMU network services are not in fact HTTP servers ++(except for VNC websockets), setting this key purpose is still recommended. ++The @code{encryption_key} and @code{signing_key} keyword is the key usage ++extension to indicate this certificate is intended for usage in the data ++session. ++ ++The @code{server-hostNNN-key.pem} and @code{server-hostNNN-cert.pem} files ++should now be securely copied to the server for which they were generated, ++and renamed to @code{server-key.pem} and @code{server-cert.pem} when added ++to the @code{/etc/pki/qemu} directory on the target host. The @code{server-key.pem} ++file is security sensitive and should be kept protected with file mode 0600 ++to prevent disclosure. ++ ++@node tls_generate_client ++@subsection Issuing client certificates ++ ++The QEMU x509 TLS credential setup defaults to enabling client verification ++using certificates, providing a simple authentication mechanism. If this ++default is used, each client also needs to be issued a certificate. The client ++certificate contains enough metadata to uniquely identify the client with the ++scope of the certificate authority. The client certificate would typically ++include fields for organization, state, city, building, etc. ++ ++Once again on the host holding the CA, create template files containing the ++information for each client, and use it to issue client certificates. ++ ++ ++@example ++# cat > client-hostNNN.info < client-hostNNN-key.pem ++# certtool --generate-certificate \ ++ --load-ca-certificate ca-cert.pem \ ++ --load-ca-privkey ca-key.pem \ ++ --load-privkey client-hostNNN-key.pem \ ++ --template client-hostNNN.info \ ++ --outfile client-hostNNN-cert.pem ++@end example ++ ++The subject alt name extension data is not required for clients, so the ++the @code{dns_name} and @code{ip_address} fields are not included. ++The @code{tls_www_client} keyword is the key purpose extension to indicate ++this certificate is intended for usage in a web client. Although QEMU ++network clients are not in fact HTTP clients, setting this key purpose is ++still recommended. The @code{encryption_key} and @code{signing_key} keyword ++is the key usage extension to indicate this certificate is intended for ++usage in the data session. ++ ++The @code{client-hostNNN-key.pem} and @code{client-hostNNN-cert.pem} files ++should now be securely copied to the client for which they were generated, ++and renamed to @code{client-key.pem} and @code{client-cert.pem} when added ++to the @code{/etc/pki/qemu} directory on the target host. The @code{client-key.pem} ++file is security sensitive and should be kept protected with file mode 0600 ++to prevent disclosure. ++ ++If a single host is going to be using TLS in both a client and server ++role, it is possible to create a single certificate to cover both roles. ++This would be quite common for the migration and NBD services, where a ++QEMU process will be started by accepting a TLS protected incoming migration, ++and later itself be migrated out to another host. To generate a single ++certificate, simply include the template data from both the client and server ++instructions in one. ++ ++@example ++# cat > both-hostNNN.info < both-hostNNN-key.pem ++# certtool --generate-certificate \ ++ --load-ca-certificate ca-cert.pem \ ++ --load-ca-privkey ca-key.pem \ ++ --load-privkey both-hostNNN-key.pem \ ++ --template both-hostNNN.info \ ++ --outfile both-hostNNN-cert.pem ++@end example ++ ++When copying the PEM files to the target host, save them twice, ++once as @code{server-cert.pem} and @code{server-key.pem}, and ++again as @code{client-cert.pem} and @code{client-key.pem}. ++ ++@node tls_creds_setup ++@subsection TLS x509 credential configuration ++ ++QEMU has a standard mechanism for loading x509 credentials that will be ++used for network services and clients. It requires specifying the ++@code{tls-creds-x509} class name to the @code{--object} command line ++argument for the system emulators. Each set of credentials loaded should ++be given a unique string identifier via the @code{id} parameter. A single ++set of TLS credentials can be used for multiple network backends, so VNC, ++migration, NBD, character devices can all share the same credentials. Note, ++however, that credentials for use in a client endpoint must be loaded ++separately from those used in a server endpoint. ++ ++When specifying the object, the @code{dir} parameters specifies which ++directory contains the credential files. This directory is expected to ++contain files with the names mentioned previously, @code{ca-cert.pem}, ++@code{server-key.pem}, @code{server-cert.pem}, @code{client-key.pem} ++and @code{client-cert.pem} as appropriate. It is also possible to ++include a set of pre-generated Diffie-Hellman (DH) parameters in a file ++@code{dh-params.pem}, which can be created using the ++@code{certtool --generate-dh-params} command. If omitted, QEMU will ++dynamically generate DH parameters when loading the credentials. ++ ++The @code{endpoint} parameter indicates whether the credentials will ++be used for a network client or server, and determines which PEM ++files are loaded. ++ ++The @code{verify} parameter determines whether x509 certificate ++validation should be performed. This defaults to enabled, meaning ++clients will always validate the server hostname against the ++certificate subject alt name fields and/or CN field. It also ++means that servers will request that clients provide a certificate ++and validate them. Verification should never be turned off for ++client endpoints, however, it may be turned off for server endpoints ++if an alternative mechanism is used to authenticate clients. For ++example, the VNC server can use SASL to authenticate clients ++instead. ++ ++To load server credentials with client certificate validation ++enabled ++ ++@example ++@value{qemu_system} -object tls-creds-x509,id=tls0,dir=/etc/pki/qemu,endpoint=server ++@end example ++ ++while to load client credentials use ++ ++@example ++@value{qemu_system} -object tls-creds-x509,id=tls0,dir=/etc/pki/qemu,endpoint=client ++@end example ++ ++Network services which support TLS will all have a @code{tls-creds} ++parameter which expects the ID of the TLS credentials object. For ++example with VNC: ++ ++@example ++@value{qemu_system} -vnc 0.0.0.0:0,tls-creds=tls0 ++@end example ++ ++@node tls_psk ++@subsection TLS Pre-Shared Keys (PSK) ++ ++Instead of using certificates, you may also use TLS Pre-Shared Keys ++(TLS-PSK). This can be simpler to set up than certificates but is ++less scalable. ++ ++Use the GnuTLS @code{psktool} program to generate a @code{keys.psk} ++file containing one or more usernames and random keys: ++ ++@example ++mkdir -m 0700 /tmp/keys ++psktool -u rich -p /tmp/keys/keys.psk ++@end example ++ ++TLS-enabled servers such as qemu-nbd can use this directory like so: ++ ++@example ++qemu-nbd \ ++ -t -x / \ ++ --object tls-creds-psk,id=tls0,endpoint=server,dir=/tmp/keys \ ++ --tls-creds tls0 \ ++ image.qcow2 ++@end example ++ ++When connecting from a qemu-based client you must specify the ++directory containing @code{keys.psk} and an optional @var{username} ++(defaults to ``qemu''): ++ ++@example ++qemu-img info \ ++ --object tls-creds-psk,id=tls0,dir=/tmp/keys,username=rich,endpoint=client \ ++ --image-opts \ ++ file.driver=nbd,file.host=localhost,file.port=10809,file.tls-creds=tls0,file.export=/ ++@end example ++ ++@node gdb_usage ++@section GDB usage ++ ++QEMU has a primitive support to work with gdb, so that you can do ++'Ctrl-C' while the virtual machine is running and inspect its state. ++ ++In order to use gdb, launch QEMU with the '-s' option. It will wait for a ++gdb connection: ++@example ++@value{qemu_system} -s -kernel bzImage -hda rootdisk.img -append "root=/dev/hda" ++Connected to host network interface: tun0 ++Waiting gdb connection on port 1234 ++@end example ++ ++Then launch gdb on the 'vmlinux' executable: ++@example ++> gdb vmlinux ++@end example ++ ++In gdb, connect to QEMU: ++@example ++(gdb) target remote localhost:1234 ++@end example ++ ++Then you can use gdb normally. For example, type 'c' to launch the kernel: ++@example ++(gdb) c ++@end example ++ ++Here are some useful tips in order to use gdb on system code: ++ ++@enumerate ++@item ++Use @code{info reg} to display all the CPU registers. ++@item ++Use @code{x/10i $eip} to display the code at the PC position. ++@item ++Use @code{set architecture i8086} to dump 16 bit code. Then use ++@code{x/10i $cs*16+$eip} to dump the code at the PC position. ++@end enumerate ++ ++Advanced debugging options: ++ ++The default single stepping behavior is step with the IRQs and timer service routines off. It is set this way because when gdb executes a single step it expects to advance beyond the current instruction. With the IRQs and timer service routines on, a single step might jump into the one of the interrupt or exception vectors instead of executing the current instruction. This means you may hit the same breakpoint a number of times before executing the instruction gdb wants to have executed. Because there are rare circumstances where you want to single step into an interrupt vector the behavior can be controlled from GDB. There are three commands you can query and set the single step behavior: ++@table @code ++@item maintenance packet qqemu.sstepbits ++ ++This will display the MASK bits used to control the single stepping IE: ++@example ++(gdb) maintenance packet qqemu.sstepbits ++sending: "qqemu.sstepbits" ++received: "ENABLE=1,NOIRQ=2,NOTIMER=4" ++@end example ++@item maintenance packet qqemu.sstep ++ ++This will display the current value of the mask used when single stepping IE: ++@example ++(gdb) maintenance packet qqemu.sstep ++sending: "qqemu.sstep" ++received: "0x7" ++@end example ++@item maintenance packet Qqemu.sstep=HEX_VALUE ++ ++This will change the single step mask, so if wanted to enable IRQs on the single step, but not timers, you would use: ++@example ++(gdb) maintenance packet Qqemu.sstep=0x5 ++sending: "qemu.sstep=0x5" ++received: "OK" ++@end example ++@end table ++ ++@node pcsys_os_specific ++@section Target OS specific information ++ ++@subsection Linux ++ ++To have access to SVGA graphic modes under X11, use the @code{vesa} or ++the @code{cirrus} X11 driver. For optimal performances, use 16 bit ++color depth in the guest and the host OS. ++ ++When using a 2.6 guest Linux kernel, you should add the option ++@code{clock=pit} on the kernel command line because the 2.6 Linux ++kernels make very strict real time clock checks by default that QEMU ++cannot simulate exactly. ++ ++When using a 2.6 guest Linux kernel, verify that the 4G/4G patch is ++not activated because QEMU is slower with this patch. The QEMU ++Accelerator Module is also much slower in this case. Earlier Fedora ++Core 3 Linux kernel (< 2.6.9-1.724_FC3) were known to incorporate this ++patch by default. Newer kernels don't have it. ++ ++@subsection Windows ++ ++If you have a slow host, using Windows 95 is better as it gives the ++best speed. Windows 2000 is also a good choice. ++ ++@subsubsection SVGA graphic modes support ++ ++QEMU emulates a Cirrus Logic GD5446 Video ++card. All Windows versions starting from Windows 95 should recognize ++and use this graphic card. For optimal performances, use 16 bit color ++depth in the guest and the host OS. ++ ++If you are using Windows XP as guest OS and if you want to use high ++resolution modes which the Cirrus Logic BIOS does not support (i.e. >= ++1280x1024x16), then you should use the VESA VBE virtual graphic card ++(option @option{-std-vga}). ++ ++@subsubsection CPU usage reduction ++ ++Windows 9x does not correctly use the CPU HLT ++instruction. The result is that it takes host CPU cycles even when ++idle. You can install the utility from ++@url{https://web.archive.org/web/20060212132151/http://www.user.cityline.ru/~maxamn/amnhltm.zip} ++to solve this problem. Note that no such tool is needed for NT, 2000 or XP. ++ ++@subsubsection Windows 2000 disk full problem ++ ++Windows 2000 has a bug which gives a disk full problem during its ++installation. When installing it, use the @option{-win2k-hack} QEMU ++option to enable a specific workaround. After Windows 2000 is ++installed, you no longer need this option (this option slows down the ++IDE transfers). ++ ++@subsubsection Windows 2000 shutdown ++ ++Windows 2000 cannot automatically shutdown in QEMU although Windows 98 ++can. It comes from the fact that Windows 2000 does not automatically ++use the APM driver provided by the BIOS. ++ ++In order to correct that, do the following (thanks to Struan ++Bartlett): go to the Control Panel => Add/Remove Hardware & Next => ++Add/Troubleshoot a device => Add a new device & Next => No, select the ++hardware from a list & Next => NT Apm/Legacy Support & Next => Next ++(again) a few times. Now the driver is installed and Windows 2000 now ++correctly instructs QEMU to shutdown at the appropriate moment. ++ ++@subsubsection Share a directory between Unix and Windows ++ ++See @ref{sec_invocation} about the help of the option ++@option{'-netdev user,smb=...'}. ++ ++@subsubsection Windows XP security problem ++ ++Some releases of Windows XP install correctly but give a security ++error when booting: ++@example ++A problem is preventing Windows from accurately checking the ++license for this computer. Error code: 0x800703e6. ++@end example ++ ++The workaround is to install a service pack for XP after a boot in safe ++mode. Then reboot, and the problem should go away. Since there is no ++network while in safe mode, its recommended to download the full ++installation of SP1 or SP2 and transfer that via an ISO or using the ++vvfat block device ("-hdb fat:directory_which_holds_the_SP"). ++ ++@subsection MS-DOS and FreeDOS ++ ++@subsubsection CPU usage reduction ++ ++DOS does not correctly use the CPU HLT instruction. The result is that ++it takes host CPU cycles even when idle. You can install the utility from ++@url{https://web.archive.org/web/20051222085335/http://www.vmware.com/software/dosidle210.zip} ++to solve this problem. ++ ++@node QEMU System emulator for non PC targets ++@chapter QEMU System emulator for non PC targets ++ ++QEMU is a generic emulator and it emulates many non PC ++machines. Most of the options are similar to the PC emulator. The ++differences are mentioned in the following sections. ++ ++@menu ++* PowerPC System emulator:: ++* Sparc32 System emulator:: ++* Sparc64 System emulator:: ++* MIPS System emulator:: ++* ARM System emulator:: ++* ColdFire System emulator:: ++* Cris System emulator:: ++* Microblaze System emulator:: ++* SH4 System emulator:: ++* Xtensa System emulator:: ++@end menu ++ ++@node PowerPC System emulator ++@section PowerPC System emulator ++@cindex system emulation (PowerPC) ++ ++Use the executable @file{qemu-system-ppc} to simulate a complete 40P (PREP) ++or PowerMac PowerPC system. ++ ++QEMU emulates the following PowerMac peripherals: ++ ++@itemize @minus ++@item ++UniNorth or Grackle PCI Bridge ++@item ++PCI VGA compatible card with VESA Bochs Extensions ++@item ++2 PMAC IDE interfaces with hard disk and CD-ROM support ++@item ++NE2000 PCI adapters ++@item ++Non Volatile RAM ++@item ++VIA-CUDA with ADB keyboard and mouse. ++@end itemize ++ ++QEMU emulates the following 40P (PREP) peripherals: ++ ++@itemize @minus ++@item ++PCI Bridge ++@item ++PCI VGA compatible card with VESA Bochs Extensions ++@item ++2 IDE interfaces with hard disk and CD-ROM support ++@item ++Floppy disk ++@item ++PCnet network adapters ++@item ++Serial port ++@item ++PREP Non Volatile RAM ++@item ++PC compatible keyboard and mouse. ++@end itemize ++ ++Since version 0.9.1, QEMU uses OpenBIOS @url{https://www.openbios.org/} ++for the g3beige and mac99 PowerMac and the 40p machines. OpenBIOS is a free ++(GPL v2) portable firmware implementation. The goal is to implement a 100% ++IEEE 1275-1994 (referred to as Open Firmware) compliant firmware. ++ ++@c man begin OPTIONS ++ ++The following options are specific to the PowerPC emulation: ++ ++@table @option ++ ++@item -g @var{W}x@var{H}[x@var{DEPTH}] ++ ++Set the initial VGA graphic mode. The default is 800x600x32. ++ ++@item -prom-env @var{string} ++ ++Set OpenBIOS variables in NVRAM, for example: ++ ++@example +qemu-kvm -prom-env 'auto-boot?=false' \ - -prom-env 'boot-device=hd:2,\yaboot' \ - -prom-env 'boot-args=conf=hd:2,\yaboot.conf' - @end example ++ -prom-env 'boot-device=hd:2,\yaboot' \ ++ -prom-env 'boot-args=conf=hd:2,\yaboot.conf' ++@end example ++ ++@end table ++ ++@c man end ++ ++ ++More information is available at ++@url{http://perso.magic.fr/l_indien/qemu-ppc/}. ++ ++@node Sparc32 System emulator ++@section Sparc32 System emulator ++@cindex system emulation (Sparc32) ++ ++Use the executable @file{qemu-system-sparc} to simulate the following ++Sun4m architecture machines: ++@itemize @minus ++@item ++SPARCstation 4 ++@item ++SPARCstation 5 ++@item ++SPARCstation 10 ++@item ++SPARCstation 20 ++@item ++SPARCserver 600MP ++@item ++SPARCstation LX ++@item ++SPARCstation Voyager ++@item ++SPARCclassic ++@item ++SPARCbook ++@end itemize ++ ++The emulation is somewhat complete. SMP up to 16 CPUs is supported, ++but Linux limits the number of usable CPUs to 4. ++ ++QEMU emulates the following sun4m peripherals: ++ ++@itemize @minus ++@item ++IOMMU ++@item ++TCX or cgthree Frame buffer ++@item ++Lance (Am7990) Ethernet ++@item ++Non Volatile RAM M48T02/M48T08 ++@item ++Slave I/O: timers, interrupt controllers, Zilog serial ports, keyboard ++and power/reset logic ++@item ++ESP SCSI controller with hard disk and CD-ROM support ++@item ++Floppy drive (not on SS-600MP) ++@item ++CS4231 sound device (only on SS-5, not working yet) ++@end itemize ++ ++The number of peripherals is fixed in the architecture. Maximum ++memory size depends on the machine type, for SS-5 it is 256MB and for ++others 2047MB. ++ ++Since version 0.8.2, QEMU uses OpenBIOS ++@url{https://www.openbios.org/}. OpenBIOS is a free (GPL v2) portable ++firmware implementation. The goal is to implement a 100% IEEE ++1275-1994 (referred to as Open Firmware) compliant firmware. ++ ++A sample Linux 2.6 series kernel and ram disk image are available on ++the QEMU web site. There are still issues with NetBSD and OpenBSD, but ++most kernel versions work. Please note that currently older Solaris kernels ++don't work probably due to interface issues between OpenBIOS and ++Solaris. ++ ++@c man begin OPTIONS ++ ++The following options are specific to the Sparc32 emulation: ++ ++@table @option ++ ++@item -g @var{W}x@var{H}x[x@var{DEPTH}] ++ ++Set the initial graphics mode. For TCX, the default is 1024x768x8 with the ++option of 1024x768x24. For cgthree, the default is 1024x768x8 with the option ++of 1152x900x8 for people who wish to use OBP. ++ ++@item -prom-env @var{string} ++ ++Set OpenBIOS variables in NVRAM, for example: ++ ++@example ++qemu-system-sparc -prom-env 'auto-boot?=false' \ ++ -prom-env 'boot-device=sd(0,2,0):d' -prom-env 'boot-args=linux single' ++@end example ++ ++@item -M [SS-4|SS-5|SS-10|SS-20|SS-600MP|LX|Voyager|SPARCClassic] [|SPARCbook] ++ ++Set the emulated machine type. Default is SS-5. ++ ++@end table ++ ++@c man end ++ ++@node Sparc64 System emulator ++@section Sparc64 System emulator ++@cindex system emulation (Sparc64) ++ ++Use the executable @file{qemu-system-sparc64} to simulate a Sun4u ++(UltraSPARC PC-like machine), Sun4v (T1 PC-like machine), or generic ++Niagara (T1) machine. The Sun4u emulator is mostly complete, being ++able to run Linux, NetBSD and OpenBSD in headless (-nographic) mode. The ++Sun4v emulator is still a work in progress. ++ ++The Niagara T1 emulator makes use of firmware and OS binaries supplied in the S10image/ directory ++of the OpenSPARC T1 project @url{http://download.oracle.com/technetwork/systems/opensparc/OpenSPARCT1_Arch.1.5.tar.bz2} ++and is able to boot the disk.s10hw2 Solaris image. ++@example ++qemu-system-sparc64 -M niagara -L /path-to/S10image/ \ ++ -nographic -m 256 \ ++ -drive if=pflash,readonly=on,file=/S10image/disk.s10hw2 ++@end example ++ ++ ++QEMU emulates the following peripherals: ++ ++@itemize @minus ++@item ++UltraSparc IIi APB PCI Bridge ++@item ++PCI VGA compatible card with VESA Bochs Extensions ++@item ++PS/2 mouse and keyboard ++@item ++Non Volatile RAM M48T59 ++@item ++PC-compatible serial ports ++@item ++2 PCI IDE interfaces with hard disk and CD-ROM support ++@item ++Floppy disk ++@end itemize ++ ++@c man begin OPTIONS ++ ++The following options are specific to the Sparc64 emulation: ++ ++@table @option ++ ++@item -prom-env @var{string} ++ ++Set OpenBIOS variables in NVRAM, for example: ++ ++@example ++qemu-system-sparc64 -prom-env 'auto-boot?=false' ++@end example ++ ++@item -M [sun4u|sun4v|niagara] ++ ++Set the emulated machine type. The default is sun4u. ++ ++@end table ++ ++@c man end ++ ++@node MIPS System emulator ++@section MIPS System emulator ++@cindex system emulation (MIPS) ++ ++@menu ++* nanoMIPS System emulator :: ++@end menu ++ ++Four executables cover simulation of 32 and 64-bit MIPS systems in ++both endian options, @file{qemu-system-mips}, @file{qemu-system-mipsel} ++@file{qemu-system-mips64} and @file{qemu-system-mips64el}. ++Five different machine types are emulated: ++ ++@itemize @minus ++@item ++A generic ISA PC-like machine "mips" ++@item ++The MIPS Malta prototype board "malta" ++@item ++An ACER Pica "pica61". This machine needs the 64-bit emulator. ++@item ++MIPS emulator pseudo board "mipssim" ++@item ++A MIPS Magnum R4000 machine "magnum". This machine needs the 64-bit emulator. ++@end itemize ++ ++The generic emulation is supported by Debian 'Etch' and is able to ++install Debian into a virtual disk image. The following devices are ++emulated: ++ ++@itemize @minus ++@item ++A range of MIPS CPUs, default is the 24Kf ++@item ++PC style serial port ++@item ++PC style IDE disk ++@item ++NE2000 network card ++@end itemize ++ ++The Malta emulation supports the following devices: ++ ++@itemize @minus ++@item ++Core board with MIPS 24Kf CPU and Galileo system controller ++@item ++PIIX4 PCI/USB/SMbus controller ++@item ++The Multi-I/O chip's serial device ++@item ++PCI network cards (PCnet32 and others) ++@item ++Malta FPGA serial device ++@item ++Cirrus (default) or any other PCI VGA graphics card ++@end itemize ++ ++The Boston board emulation supports the following devices: ++ ++@itemize @minus ++@item ++Xilinx FPGA, which includes a PCIe root port and an UART ++@item ++Intel EG20T PCH connects the I/O peripherals, but only the SATA bus is emulated ++@end itemize ++ ++The ACER Pica emulation supports: ++ ++@itemize @minus ++@item ++MIPS R4000 CPU ++@item ++PC-style IRQ and DMA controllers ++@item ++PC Keyboard ++@item ++IDE controller ++@end itemize ++ ++The MIPS Magnum R4000 emulation supports: ++ ++@itemize @minus ++@item ++MIPS R4000 CPU ++@item ++PC-style IRQ controller ++@item ++PC Keyboard ++@item ++SCSI controller ++@item ++G364 framebuffer ++@end itemize ++ ++The Fulong 2E emulation supports: ++ ++@itemize @minus ++@item ++Loongson 2E CPU ++@item ++Bonito64 system controller as North Bridge ++@item ++VT82C686 chipset as South Bridge ++@item ++RTL8139D as a network card chipset ++@end itemize ++ ++The mipssim pseudo board emulation provides an environment similar ++to what the proprietary MIPS emulator uses for running Linux. ++It supports: ++ ++@itemize @minus ++@item ++A range of MIPS CPUs, default is the 24Kf ++@item ++PC style serial port ++@item ++MIPSnet network emulation ++@end itemize ++ ++@node nanoMIPS System emulator ++@subsection nanoMIPS System emulator ++@cindex system emulation (nanoMIPS) ++ ++Executable @file{qemu-system-mipsel} also covers simulation of ++32-bit nanoMIPS system in little endian mode: ++ ++@itemize @minus ++@item ++nanoMIPS I7200 CPU ++@end itemize ++ ++Example of @file{qemu-system-mipsel} usage for nanoMIPS is shown below: ++ ++Download @code{} from @url{https://mipsdistros.mips.com/LinuxDistro/nanomips/buildroot/index.html}. ++ ++Download @code{} from @url{https://mipsdistros.mips.com/LinuxDistro/nanomips/kernels/v4.15.18-432-gb2eb9a8b07a1-20180627102142/index.html}. ++ ++Start system emulation of Malta board with nanoMIPS I7200 CPU: ++@example ++qemu-system-mipsel -cpu I7200 -kernel @code{} \ ++ -M malta -serial stdio -m @code{} -hda @code{} \ ++ -append "mem=256m@@0x0 rw console=ttyS0 vga=cirrus vesa=0x111 root=/dev/sda" ++@end example ++ ++ ++@node ARM System emulator ++@section ARM System emulator ++@cindex system emulation (ARM) ++ ++Use the executable @file{qemu-system-arm} to simulate a ARM ++machine. The ARM Integrator/CP board is emulated with the following ++devices: ++ ++@itemize @minus ++@item ++ARM926E, ARM1026E, ARM946E, ARM1136 or Cortex-A8 CPU ++@item ++Two PL011 UARTs ++@item ++SMC 91c111 Ethernet adapter ++@item ++PL110 LCD controller ++@item ++PL050 KMI with PS/2 keyboard and mouse. ++@item ++PL181 MultiMedia Card Interface with SD card. ++@end itemize ++ ++The ARM Versatile baseboard is emulated with the following devices: ++ ++@itemize @minus ++@item ++ARM926E, ARM1136 or Cortex-A8 CPU ++@item ++PL190 Vectored Interrupt Controller ++@item ++Four PL011 UARTs ++@item ++SMC 91c111 Ethernet adapter ++@item ++PL110 LCD controller ++@item ++PL050 KMI with PS/2 keyboard and mouse. ++@item ++PCI host bridge. Note the emulated PCI bridge only provides access to ++PCI memory space. It does not provide access to PCI IO space. ++This means some devices (eg. ne2k_pci NIC) are not usable, and others ++(eg. rtl8139 NIC) are only usable when the guest drivers use the memory ++mapped control registers. ++@item ++PCI OHCI USB controller. ++@item ++LSI53C895A PCI SCSI Host Bus Adapter with hard disk and CD-ROM devices. ++@item ++PL181 MultiMedia Card Interface with SD card. ++@end itemize ++ ++Several variants of the ARM RealView baseboard are emulated, ++including the EB, PB-A8 and PBX-A9. Due to interactions with the ++bootloader, only certain Linux kernel configurations work out ++of the box on these boards. ++ ++Kernels for the PB-A8 board should have CONFIG_REALVIEW_HIGH_PHYS_OFFSET ++enabled in the kernel, and expect 512M RAM. Kernels for The PBX-A9 board ++should have CONFIG_SPARSEMEM enabled, CONFIG_REALVIEW_HIGH_PHYS_OFFSET ++disabled and expect 1024M RAM. ++ ++The following devices are emulated: ++ ++@itemize @minus ++@item ++ARM926E, ARM1136, ARM11MPCore, Cortex-A8 or Cortex-A9 MPCore CPU ++@item ++ARM AMBA Generic/Distributed Interrupt Controller ++@item ++Four PL011 UARTs ++@item ++SMC 91c111 or SMSC LAN9118 Ethernet adapter ++@item ++PL110 LCD controller ++@item ++PL050 KMI with PS/2 keyboard and mouse ++@item ++PCI host bridge ++@item ++PCI OHCI USB controller ++@item ++LSI53C895A PCI SCSI Host Bus Adapter with hard disk and CD-ROM devices ++@item ++PL181 MultiMedia Card Interface with SD card. ++@end itemize ++ ++The XScale-based clamshell PDA models ("Spitz", "Akita", "Borzoi" ++and "Terrier") emulation includes the following peripherals: ++ ++@itemize @minus ++@item ++Intel PXA270 System-on-chip (ARM V5TE core) ++@item ++NAND Flash memory ++@item ++IBM/Hitachi DSCM microdrive in a PXA PCMCIA slot - not in "Akita" ++@item ++On-chip OHCI USB controller ++@item ++On-chip LCD controller ++@item ++On-chip Real Time Clock ++@item ++TI ADS7846 touchscreen controller on SSP bus ++@item ++Maxim MAX1111 analog-digital converter on I@math{^2}C bus ++@item ++GPIO-connected keyboard controller and LEDs ++@item ++Secure Digital card connected to PXA MMC/SD host ++@item ++Three on-chip UARTs ++@item ++WM8750 audio CODEC on I@math{^2}C and I@math{^2}S busses ++@end itemize ++ ++The Palm Tungsten|E PDA (codename "Cheetah") emulation includes the ++following elements: ++ ++@itemize @minus ++@item ++Texas Instruments OMAP310 System-on-chip (ARM 925T core) ++@item ++ROM and RAM memories (ROM firmware image can be loaded with -option-rom) ++@item ++On-chip LCD controller ++@item ++On-chip Real Time Clock ++@item ++TI TSC2102i touchscreen controller / analog-digital converter / Audio ++CODEC, connected through MicroWire and I@math{^2}S busses ++@item ++GPIO-connected matrix keypad ++@item ++Secure Digital card connected to OMAP MMC/SD host ++@item ++Three on-chip UARTs ++@end itemize ++ ++Nokia N800 and N810 internet tablets (known also as RX-34 and RX-44 / 48) ++emulation supports the following elements: ++ ++@itemize @minus ++@item ++Texas Instruments OMAP2420 System-on-chip (ARM 1136 core) ++@item ++RAM and non-volatile OneNAND Flash memories ++@item ++Display connected to EPSON remote framebuffer chip and OMAP on-chip ++display controller and a LS041y3 MIPI DBI-C controller ++@item ++TI TSC2301 (in N800) and TI TSC2005 (in N810) touchscreen controllers ++driven through SPI bus ++@item ++National Semiconductor LM8323-controlled qwerty keyboard driven ++through I@math{^2}C bus ++@item ++Secure Digital card connected to OMAP MMC/SD host ++@item ++Three OMAP on-chip UARTs and on-chip STI debugging console ++@item ++Mentor Graphics "Inventra" dual-role USB controller embedded in a TI ++TUSB6010 chip - only USB host mode is supported ++@item ++TI TMP105 temperature sensor driven through I@math{^2}C bus ++@item ++TI TWL92230C power management companion with an RTC on I@math{^2}C bus ++@item ++Nokia RETU and TAHVO multi-purpose chips with an RTC, connected ++through CBUS ++@end itemize ++ ++The Luminary Micro Stellaris LM3S811EVB emulation includes the following ++devices: ++ ++@itemize @minus ++@item ++Cortex-M3 CPU core. ++@item ++64k Flash and 8k SRAM. ++@item ++Timers, UARTs, ADC and I@math{^2}C interface. ++@item ++OSRAM Pictiva 96x16 OLED with SSD0303 controller on I@math{^2}C bus. ++@end itemize ++ ++The Luminary Micro Stellaris LM3S6965EVB emulation includes the following ++devices: ++ ++@itemize @minus ++@item ++Cortex-M3 CPU core. ++@item ++256k Flash and 64k SRAM. ++@item ++Timers, UARTs, ADC, I@math{^2}C and SSI interfaces. ++@item ++OSRAM Pictiva 128x64 OLED with SSD0323 controller connected via SSI. ++@end itemize ++ ++The Freecom MusicPal internet radio emulation includes the following ++elements: ++ ++@itemize @minus ++@item ++Marvell MV88W8618 ARM core. ++@item ++32 MB RAM, 256 KB SRAM, 8 MB flash. ++@item ++Up to 2 16550 UARTs ++@item ++MV88W8xx8 Ethernet controller ++@item ++MV88W8618 audio controller, WM8750 CODEC and mixer ++@item ++128×64 display with brightness control ++@item ++2 buttons, 2 navigation wheels with button function ++@end itemize ++ ++The Siemens SX1 models v1 and v2 (default) basic emulation. ++The emulation includes the following elements: ++ ++@itemize @minus ++@item ++Texas Instruments OMAP310 System-on-chip (ARM 925T core) ++@item ++ROM and RAM memories (ROM firmware image can be loaded with -pflash) ++V1 ++1 Flash of 16MB and 1 Flash of 8MB ++V2 ++1 Flash of 32MB ++@item ++On-chip LCD controller ++@item ++On-chip Real Time Clock ++@item ++Secure Digital card connected to OMAP MMC/SD host ++@item ++Three on-chip UARTs ++@end itemize ++ ++A Linux 2.6 test image is available on the QEMU web site. More ++information is available in the QEMU mailing-list archive. ++ ++@c man begin OPTIONS ++ ++The following options are specific to the ARM emulation: ++ ++@table @option ++ ++@item -semihosting ++Enable semihosting syscall emulation. ++ ++On ARM this implements the "Angel" interface. ++ ++Note that this allows guest direct access to the host filesystem, ++so should only be used with trusted guest OS. ++ ++@end table ++ ++@c man end ++ ++@node ColdFire System emulator ++@section ColdFire System emulator ++@cindex system emulation (ColdFire) ++@cindex system emulation (M68K) ++ ++Use the executable @file{qemu-system-m68k} to simulate a ColdFire machine. ++The emulator is able to boot a uClinux kernel. ++ ++The M5208EVB emulation includes the following devices: ++ ++@itemize @minus ++@item ++MCF5208 ColdFire V2 Microprocessor (ISA A+ with EMAC). ++@item ++Three Two on-chip UARTs. ++@item ++Fast Ethernet Controller (FEC) ++@end itemize ++ ++The AN5206 emulation includes the following devices: ++ ++@itemize @minus ++@item ++MCF5206 ColdFire V2 Microprocessor. ++@item ++Two on-chip UARTs. ++@end itemize ++ ++@c man begin OPTIONS ++ ++The following options are specific to the ColdFire emulation: ++ ++@table @option ++ ++@item -semihosting ++Enable semihosting syscall emulation. ++ ++On M68K this implements the "ColdFire GDB" interface used by libgloss. ++ ++Note that this allows guest direct access to the host filesystem, ++so should only be used with trusted guest OS. ++ ++@end table ++ ++@c man end ++ ++@node Cris System emulator ++@section Cris System emulator ++@cindex system emulation (Cris) ++ ++TODO ++ ++@node Microblaze System emulator ++@section Microblaze System emulator ++@cindex system emulation (Microblaze) ++ ++TODO ++ ++@node SH4 System emulator ++@section SH4 System emulator ++@cindex system emulation (SH4) ++ ++TODO ++ ++@node Xtensa System emulator ++@section Xtensa System emulator ++@cindex system emulation (Xtensa) ++ ++Two executables cover simulation of both Xtensa endian options, ++@file{qemu-system-xtensa} and @file{qemu-system-xtensaeb}. ++Two different machine types are emulated: ++ ++@itemize @minus ++@item ++Xtensa emulator pseudo board "sim" ++@item ++Avnet LX60/LX110/LX200 board ++@end itemize ++ ++The sim pseudo board emulation provides an environment similar ++to one provided by the proprietary Tensilica ISS. ++It supports: ++ ++@itemize @minus ++@item ++A range of Xtensa CPUs, default is the DC232B ++@item ++Console and filesystem access via semihosting calls ++@end itemize ++ ++The Avnet LX60/LX110/LX200 emulation supports: ++ ++@itemize @minus ++@item ++A range of Xtensa CPUs, default is the DC232B ++@item ++16550 UART ++@item ++OpenCores 10/100 Mbps Ethernet MAC ++@end itemize ++ ++@c man begin OPTIONS ++ ++The following options are specific to the Xtensa emulation: ++ ++@table @option ++ ++@item -semihosting ++Enable semihosting syscall emulation. ++ ++Xtensa semihosting provides basic file IO calls, such as open/read/write/seek/select. ++Tensilica baremetal libc for ISS and linux platform "sim" use this interface. ++ ++Note that this allows guest direct access to the host filesystem, ++so should only be used with trusted guest OS. ++ ++@end table ++ ++@c man end ++ ++@node QEMU User space emulator ++@chapter QEMU User space emulator ++ ++@menu ++* Supported Operating Systems :: ++* Features:: ++* Linux User space emulator:: ++* BSD User space emulator :: ++@end menu ++ ++@node Supported Operating Systems ++@section Supported Operating Systems ++ ++The following OS are supported in user space emulation: ++ ++@itemize @minus ++@item ++Linux (referred as qemu-linux-user) ++@item ++BSD (referred as qemu-bsd-user) ++@end itemize ++ ++@node Features ++@section Features ++ ++QEMU user space emulation has the following notable features: ++ ++@table @strong ++@item System call translation: ++QEMU includes a generic system call translator. This means that ++the parameters of the system calls can be converted to fix ++endianness and 32/64-bit mismatches between hosts and targets. ++IOCTLs can be converted too. ++ ++@item POSIX signal handling: ++QEMU can redirect to the running program all signals coming from ++the host (such as @code{SIGALRM}), as well as synthesize signals from ++virtual CPU exceptions (for example @code{SIGFPE} when the program ++executes a division by zero). ++ ++QEMU relies on the host kernel to emulate most signal system ++calls, for example to emulate the signal mask. On Linux, QEMU ++supports both normal and real-time signals. ++ ++@item Threading: ++On Linux, QEMU can emulate the @code{clone} syscall and create a real ++host thread (with a separate virtual CPU) for each emulated thread. ++Note that not all targets currently emulate atomic operations correctly. ++x86 and ARM use a global lock in order to preserve their semantics. ++@end table ++ ++QEMU was conceived so that ultimately it can emulate itself. Although ++it is not very useful, it is an important test to show the power of the ++emulator. ++ ++@node Linux User space emulator ++@section Linux User space emulator ++ ++@menu ++* Quick Start:: ++* Wine launch:: ++* Command line options:: ++* Other binaries:: ++@end menu ++ ++@node Quick Start ++@subsection Quick Start ++ ++In order to launch a Linux process, QEMU needs the process executable ++itself and all the target (x86) dynamic libraries used by it. ++ ++@itemize ++ ++@item On x86, you can just try to launch any process by using the native ++libraries: ++ ++@example ++qemu-i386 -L / /bin/ls ++@end example ++ ++@code{-L /} tells that the x86 dynamic linker must be searched with a ++@file{/} prefix. ++ ++@item Since QEMU is also a linux process, you can launch QEMU with ++QEMU (NOTE: you can only do that if you compiled QEMU from the sources): ++ ++@example ++qemu-i386 -L / qemu-i386 -L / /bin/ls ++@end example ++ ++@item On non x86 CPUs, you need first to download at least an x86 glibc ++(@file{qemu-runtime-i386-XXX-.tar.gz} on the QEMU web page). Ensure that ++@code{LD_LIBRARY_PATH} is not set: ++ ++@example ++unset LD_LIBRARY_PATH ++@end example ++ ++Then you can launch the precompiled @file{ls} x86 executable: ++ ++@example ++qemu-i386 tests/i386/ls ++@end example ++You can look at @file{scripts/qemu-binfmt-conf.sh} so that ++QEMU is automatically launched by the Linux kernel when you try to ++launch x86 executables. It requires the @code{binfmt_misc} module in the ++Linux kernel. ++ ++@item The x86 version of QEMU is also included. You can try weird things such as: ++@example ++qemu-i386 /usr/local/qemu-i386/bin/qemu-i386 \ ++ /usr/local/qemu-i386/bin/ls-i386 ++@end example ++ ++@end itemize ++ ++@node Wine launch ++@subsection Wine launch ++ ++@itemize ++ ++@item Ensure that you have a working QEMU with the x86 glibc ++distribution (see previous section). In order to verify it, you must be ++able to do: ++ ++@example ++qemu-i386 /usr/local/qemu-i386/bin/ls-i386 ++@end example ++ ++@item Download the binary x86 Wine install ++(@file{qemu-XXX-i386-wine.tar.gz} on the QEMU web page). ++ ++@item Configure Wine on your account. Look at the provided script ++@file{/usr/local/qemu-i386/@/bin/wine-conf.sh}. Your previous ++@code{$@{HOME@}/.wine} directory is saved to @code{$@{HOME@}/.wine.org}. ++ ++@item Then you can try the example @file{putty.exe}: ++ ++@example ++qemu-i386 /usr/local/qemu-i386/wine/bin/wine \ ++ /usr/local/qemu-i386/wine/c/Program\ Files/putty.exe ++@end example ++ ++@end itemize ++ ++@node Command line options ++@subsection Command line options ++ ++@example ++@command{qemu-i386} [@option{-h]} [@option{-d]} [@option{-L} @var{path}] [@option{-s} @var{size}] [@option{-cpu} @var{model}] [@option{-g} @var{port}] [@option{-B} @var{offset}] [@option{-R} @var{size}] @var{program} [@var{arguments}...] ++@end example ++ ++@table @option ++@item -h ++Print the help ++@item -L path ++Set the x86 elf interpreter prefix (default=/usr/local/qemu-i386) ++@item -s size ++Set the x86 stack size in bytes (default=524288) ++@item -cpu model ++Select CPU model (-cpu help for list and additional feature selection) ++@item -E @var{var}=@var{value} ++Set environment @var{var} to @var{value}. ++@item -U @var{var} ++Remove @var{var} from the environment. ++@item -B offset ++Offset guest address by the specified number of bytes. This is useful when ++the address region required by guest applications is reserved on the host. ++This option is currently only supported on some hosts. ++@item -R size ++Pre-allocate a guest virtual address space of the given size (in bytes). ++"G", "M", and "k" suffixes may be used when specifying the size. ++@end table ++ ++Debug options: ++ ++@table @option ++@item -d item1,... ++Activate logging of the specified items (use '-d help' for a list of log items) ++@item -p pagesize ++Act as if the host page size was 'pagesize' bytes ++@item -g port ++Wait gdb connection to port ++@item -singlestep ++Run the emulation in single step mode. ++@end table ++ ++Environment variables: ++ ++@table @env ++@item QEMU_STRACE ++Print system calls and arguments similar to the 'strace' program ++(NOTE: the actual 'strace' program will not work because the user ++space emulator hasn't implemented ptrace). At the moment this is ++incomplete. All system calls that don't have a specific argument ++format are printed with information for six arguments. Many ++flag-style arguments don't have decoders and will show up as numbers. ++@end table ++ ++@node Other binaries ++@subsection Other binaries ++ ++@cindex user mode (Alpha) ++@command{qemu-alpha} TODO. ++ ++@cindex user mode (ARM) ++@command{qemu-armeb} TODO. ++ ++@cindex user mode (ARM) ++@command{qemu-arm} is also capable of running ARM "Angel" semihosted ELF ++binaries (as implemented by the arm-elf and arm-eabi Newlib/GDB ++configurations), and arm-uclinux bFLT format binaries. ++ ++@cindex user mode (ColdFire) ++@cindex user mode (M68K) ++@command{qemu-m68k} is capable of running semihosted binaries using the BDM ++(m5xxx-ram-hosted.ld) or m68k-sim (sim.ld) syscall interfaces, and ++coldfire uClinux bFLT format binaries. ++ ++The binary format is detected automatically. ++ ++@cindex user mode (Cris) ++@command{qemu-cris} TODO. ++ ++@cindex user mode (i386) ++@command{qemu-i386} TODO. ++@command{qemu-x86_64} TODO. ++ ++@cindex user mode (Microblaze) ++@command{qemu-microblaze} TODO. ++ ++@cindex user mode (MIPS) ++@command{qemu-mips} executes 32-bit big endian MIPS binaries (MIPS O32 ABI). ++ ++@command{qemu-mipsel} executes 32-bit little endian MIPS binaries (MIPS O32 ABI). ++ ++@command{qemu-mips64} executes 64-bit big endian MIPS binaries (MIPS N64 ABI). ++ ++@command{qemu-mips64el} executes 64-bit little endian MIPS binaries (MIPS N64 ABI). ++ ++@command{qemu-mipsn32} executes 32-bit big endian MIPS binaries (MIPS N32 ABI). ++ ++@command{qemu-mipsn32el} executes 32-bit little endian MIPS binaries (MIPS N32 ABI). ++ ++@cindex user mode (NiosII) ++@command{qemu-nios2} TODO. ++ ++@cindex user mode (PowerPC) ++@command{qemu-ppc64abi32} TODO. ++@command{qemu-ppc64} TODO. ++@command{qemu-ppc} TODO. ++ ++@cindex user mode (SH4) ++@command{qemu-sh4eb} TODO. ++@command{qemu-sh4} TODO. ++ ++@cindex user mode (SPARC) ++@command{qemu-sparc} can execute Sparc32 binaries (Sparc32 CPU, 32 bit ABI). ++ ++@command{qemu-sparc32plus} can execute Sparc32 and SPARC32PLUS binaries ++(Sparc64 CPU, 32 bit ABI). ++ ++@command{qemu-sparc64} can execute some Sparc64 (Sparc64 CPU, 64 bit ABI) and ++SPARC32PLUS binaries (Sparc64 CPU, 32 bit ABI). ++ ++@node BSD User space emulator ++@section BSD User space emulator ++ ++@menu ++* BSD Status:: ++* BSD Quick Start:: ++* BSD Command line options:: ++@end menu ++ ++@node BSD Status ++@subsection BSD Status ++ ++@itemize @minus ++@item ++target Sparc64 on Sparc64: Some trivial programs work. ++@end itemize ++ ++@node BSD Quick Start ++@subsection Quick Start ++ ++In order to launch a BSD process, QEMU needs the process executable ++itself and all the target dynamic libraries used by it. ++ ++@itemize ++ ++@item On Sparc64, you can just try to launch any process by using the native ++libraries: ++ ++@example ++qemu-sparc64 /bin/ls ++@end example ++ ++@end itemize ++ ++@node BSD Command line options ++@subsection Command line options ++ ++@example ++@command{qemu-sparc64} [@option{-h]} [@option{-d]} [@option{-L} @var{path}] [@option{-s} @var{size}] [@option{-bsd} @var{type}] @var{program} [@var{arguments}...] ++@end example ++ ++@table @option ++@item -h ++Print the help ++@item -L path ++Set the library root path (default=/) ++@item -s size ++Set the stack size in bytes (default=524288) ++@item -ignore-environment ++Start with an empty environment. Without this option, ++the initial environment is a copy of the caller's environment. ++@item -E @var{var}=@var{value} ++Set environment @var{var} to @var{value}. ++@item -U @var{var} ++Remove @var{var} from the environment. ++@item -bsd type ++Set the type of the emulated BSD Operating system. Valid values are ++FreeBSD, NetBSD and OpenBSD (default). ++@end table ++ ++Debug options: ++ ++@table @option ++@item -d item1,... ++Activate logging of the specified items (use '-d help' for a list of log items) ++@item -p pagesize ++Act as if the host page size was 'pagesize' bytes ++@item -singlestep ++Run the emulation in single step mode. ++@end table ++ ++@node System requirements ++@chapter System requirements ++ ++@section KVM kernel module ++ ++On x86_64 hosts, the default set of CPU features enabled by the KVM accelerator ++require the host to be running Linux v4.5 or newer. ++ ++The OpteronG[345] CPU models require KVM support for RDTSCP, which was ++added with Linux 4.5 which is supported by the major distros. And even ++if RHEL7 has kernel 3.10, KVM there has the required functionality there ++to make it close to a 4.5 or newer kernel. ++ ++@include docs/security.texi ++ ++@include qemu-tech.texi ++ ++@include qemu-deprecated.texi ++ ++@node Supported build platforms ++@appendix Supported build platforms ++ ++QEMU aims to support building and executing on multiple host OS platforms. ++This appendix outlines which platforms are the major build targets. These ++platforms are used as the basis for deciding upon the minimum required ++versions of 3rd party software QEMU depends on. The supported platforms ++are the targets for automated testing performed by the project when patches ++are submitted for review, and tested before and after merge. ++ ++If a platform is not listed here, it does not imply that QEMU won't work. ++If an unlisted platform has comparable software versions to a listed platform, ++there is every expectation that it will work. Bug reports are welcome for ++problems encountered on unlisted platforms unless they are clearly older ++vintage than what is described here. ++ ++Note that when considering software versions shipped in distros as support ++targets, QEMU considers only the version number, and assumes the features in ++that distro match the upstream release with the same version. In other words, ++if a distro backports extra features to the software in their distro, QEMU ++upstream code will not add explicit support for those backports, unless the ++feature is auto-detectable in a manner that works for the upstream releases ++too. ++ ++The Repology site @url{https://repology.org} is a useful resource to identify ++currently shipped versions of software in various operating systems, though ++it does not cover all distros listed below. ++ ++@section Linux OS ++ ++For distributions with frequent, short-lifetime releases, the project will ++aim to support all versions that are not end of life by their respective ++vendors. For the purposes of identifying supported software versions, the ++project will look at Fedora, Ubuntu, and openSUSE distros. Other short- ++lifetime distros will be assumed to ship similar software versions. ++ ++For distributions with long-lifetime releases, the project will aim to support ++the most recent major version at all times. Support for the previous major ++version will be dropped 2 years after the new major version is released, ++or when it reaches ``end of life''. For the purposes of identifying ++supported software versions, the project will look at RHEL, Debian, ++Ubuntu LTS, and SLES distros. Other long-lifetime distros will be ++assumed to ship similar software versions. ++ ++@section Windows ++ ++The project supports building with current versions of the MinGW toolchain, ++hosted on Linux. ++ ++@section macOS ++ ++The project supports building with the two most recent versions of macOS, with ++the current homebrew package set available. ++ ++@section FreeBSD ++ ++The project aims to support the all the versions which are not end of life. ++ ++@section NetBSD ++ ++The project aims to support the most recent major version at all times. Support ++for the previous major version will be dropped 2 years after the new major ++version is released. ++ ++@section OpenBSD ++ ++The project aims to support the all the versions which are not end of life. ++ ++@node License ++@appendix License ++ ++QEMU is a trademark of Fabrice Bellard. ++ ++QEMU is released under the ++@url{https://www.gnu.org/licenses/gpl-2.0.txt,GNU General Public License}, ++version 2. Parts of QEMU have specific licenses, see file ++@url{https://git.qemu.org/?p=qemu.git;a=blob_plain;f=LICENSE,LICENSE}. ++ ++@node Index ++@appendix Index ++@menu ++* Concept Index:: ++* Function Index:: ++* Keystroke Index:: ++* Program Index:: ++* Data Type Index:: ++* Variable Index:: ++@end menu ++ ++@node Concept Index ++@section Concept Index ++This is the main index. Should we combine all keywords in one index? TODO ++@printindex cp ++ ++@node Function Index ++@section Function Index ++This index could be used for command line options and monitor functions. ++@printindex fn ++ ++@node Keystroke Index ++@section Keystroke Index ++ ++This is a list of all keystrokes which have a special function ++in system emulation. ++ ++@printindex ky ++ ++@node Program Index ++@section Program Index ++@printindex pg ++ ++@node Data Type Index ++@section Data Type Index ++ ++This index could be used for qdev device names and options. ++ ++@printindex tp ++ ++@node Variable Index ++@section Variable Index ++@printindex vr ++ ++@bye diff --git a/qemu-options.hx b/qemu-options.hx -index fc17aca631..df1d27b6f2 100644 +index 1df25ae..8c48b40 100644 --- a/qemu-options.hx +++ b/qemu-options.hx -@@ -2737,11 +2737,11 @@ be created for multiqueue vhost-user. +@@ -2878,11 +2878,11 @@ SRST - Example: - @example --qemu -m 512 -object memory-backend-file,id=mem,size=512M,mem-path=/hugetlbfs,share=on \ -- -numa node,memdev=mem \ -- -chardev socket,id=chr0,path=/path/to/socket \ -- -netdev type=vhost-user,id=net0,chardev=chr0 \ -- -device virtio-net-pci,netdev=net0 -+qemu-kvm -m 512 -object memory-backend-file,id=mem,size=512M,mem-path=/hugetlbfs,share=on \ -+ -numa node,memdev=mem \ -+ -chardev socket,id=chr0,path=/path/to/socket \ -+ -netdev type=vhost-user,id=net0,chardev=chr0 \ -+ -device virtio-net-pci,netdev=net0 - @end example + :: - @item -netdev hubport,id=@var{id},hubid=@var{hubid}[,netdev=@var{nd}] -@@ -3631,14 +3631,14 @@ ETEXI - - DEF("realtime", HAS_ARG, QEMU_OPTION_realtime, - "-realtime [mlock=on|off]\n" -- " run qemu with realtime features\n" -+ " run qemu-kvm with realtime features\n" - " mlock=on|off controls mlock support (default: on)\n", - QEMU_ARCH_ALL) - STEXI - @item -realtime mlock=on|off - @findex -realtime --Run qemu with realtime features. --mlocking qemu and guest memory can be enabled via @option{mlock=on} -+Run qemu-kvm with realtime features. -+mlocking qemu-kvm and guest memory can be enabled via @option{mlock=on} - (enabled by default). - ETEXI +- qemu -m 512 -object memory-backend-file,id=mem,size=512M,mem-path=/hugetlbfs,share=on \ +- -numa node,memdev=mem \ +- -chardev socket,id=chr0,path=/path/to/socket \ +- -netdev type=vhost-user,id=net0,chardev=chr0 \ +- -device virtio-net-pci,netdev=net0 ++ qemu-kvm -m 512 -object memory-backend-file,id=mem,size=512M,mem-path=/hugetlbfs,share=on \ ++ -numa node,memdev=mem \ ++ -chardev socket,id=chr0,path=/path/to/socket \ ++ -netdev type=vhost-user,id=net0,chardev=chr0 \ ++ -device virtio-net-pci,netdev=net0 + ``-netdev hubport,id=id,hubid=hubid[,netdev=nd]`` + Create a hub port on the emulated hub with ID hubid. -- -2.21.0 +1.8.3.1 diff --git a/0018-usb-xhci-Fix-PCI-capability-order.patch b/0018-usb-xhci-Fix-PCI-capability-order.patch index bc6146d..8137171 100644 --- a/0018-usb-xhci-Fix-PCI-capability-order.patch +++ b/0018-usb-xhci-Fix-PCI-capability-order.patch @@ -1,4 +1,4 @@ -From b13a7d3527c5c91e7a50236de30a2244b8453911 Mon Sep 17 00:00:00 2001 +From 50d4f1973a86696cb7487173cbdbc68453445c54 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Fri, 5 May 2017 19:06:14 +0200 Subject: usb-xhci: Fix PCI capability order @@ -56,16 +56,15 @@ Conflicts: (cherry picked from commit a42f86dc906cc7d2c16d02bf125ed76847b469cb) (cherry picked from commit 992ab2e4f6e15d3e51bc716763aa8d6f45c6d29d) -Signed-off-by: Danilo C. L. de Paula --- hw/usb/hcd-xhci.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c -index 8fed2eedd6..d2b9744030 100644 +index b25cce8..9582d81 100644 --- a/hw/usb/hcd-xhci.c +++ b/hw/usb/hcd-xhci.c -@@ -3403,6 +3403,12 @@ static void usb_xhci_realize(struct PCIDevice *dev, Error **errp) +@@ -3413,6 +3413,12 @@ static void usb_xhci_realize(struct PCIDevice *dev, Error **errp) xhci->max_pstreams_mask = 0; } @@ -78,7 +77,7 @@ index 8fed2eedd6..d2b9744030 100644 if (xhci->msi != ON_OFF_AUTO_OFF) { ret = msi_init(dev, 0x70, xhci->numintrs, true, false, &err); /* Any error other than -ENOTSUP(board's MSI support is broken) -@@ -3451,12 +3457,6 @@ static void usb_xhci_realize(struct PCIDevice *dev, Error **errp) +@@ -3461,12 +3467,6 @@ static void usb_xhci_realize(struct PCIDevice *dev, Error **errp) PCI_BASE_ADDRESS_SPACE_MEMORY|PCI_BASE_ADDRESS_MEM_TYPE_64, &xhci->mem); @@ -92,5 +91,5 @@ index 8fed2eedd6..d2b9744030 100644 /* TODO check for errors, and should fail when msix=on */ msix_init(dev, xhci->numintrs, -- -2.21.0 +1.8.3.1 diff --git a/0019-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch b/0019-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch index e167b2e..c087f5e 100644 --- a/0019-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch +++ b/0019-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch @@ -1,4 +1,4 @@ -From 3fab8f5e8a9e190c1ed6916ac13c7c4d65e874b7 Mon Sep 17 00:00:00 2001 +From 5d9529f40e7cc092a57f9203aad22f3644a2b6d6 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Wed, 14 Jun 2017 15:37:01 +0200 Subject: virtio-scsi: Reject scsi-cd if data plane enabled [RHEL only] @@ -45,10 +45,10 @@ Signed-off-by: Danilo C. L. de Paula 1 file changed, 9 insertions(+) diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c -index e8b2b64d09..54108c0056 100644 +index 472bbd2..ba2dac8 100644 --- a/hw/scsi/virtio-scsi.c +++ b/hw/scsi/virtio-scsi.c -@@ -808,6 +808,15 @@ static void virtio_scsi_hotplug(HotplugHandler *hotplug_dev, DeviceState *dev, +@@ -814,6 +814,15 @@ static void virtio_scsi_hotplug(HotplugHandler *hotplug_dev, DeviceState *dev, SCSIDevice *sd = SCSI_DEVICE(dev); int ret; @@ -65,5 +65,5 @@ index e8b2b64d09..54108c0056 100644 if (blk_op_is_blocked(sd->conf.blk, BLOCK_OP_TYPE_DATAPLANE, errp)) { return; -- -2.21.0 +1.8.3.1 diff --git a/0020-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch b/0020-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch index b3350da..41c655c 100644 --- a/0020-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch +++ b/0020-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch @@ -1,4 +1,4 @@ -From 148e9e80a3a430615b552075082fad22d007d851 Mon Sep 17 00:00:00 2001 +From 3ea4a35afce28805241b3be3c11de605600ecda1 Mon Sep 17 00:00:00 2001 From: David Gibson Date: Wed, 6 Feb 2019 03:58:56 +0000 Subject: BZ1653590: Require at least 64kiB pages for downstream guests & hosts @@ -32,7 +32,7 @@ Signed-off-by: Danilo C. L. de Paula 1 file changed, 7 insertions(+) diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c -index 481dfd2a27..805f38533e 100644 +index eb54f94..ecefb08 100644 --- a/hw/ppc/spapr_caps.c +++ b/hw/ppc/spapr_caps.c @@ -351,12 +351,19 @@ void spapr_check_pagesize(SpaprMachineState *spapr, hwaddr pagesize, @@ -56,5 +56,5 @@ index 481dfd2a27..805f38533e 100644 spapr_check_pagesize(spapr, qemu_minrampagesize(), errp); } -- -2.21.0 +1.8.3.1 diff --git a/0021-Using-ip_deq-after-m_free-might-read-pointers-from-a.patch b/0021-Using-ip_deq-after-m_free-might-read-pointers-from-a.patch deleted file mode 100644 index a2a800b..0000000 --- a/0021-Using-ip_deq-after-m_free-might-read-pointers-from-a.patch +++ /dev/null @@ -1,61 +0,0 @@ -From ab9ebc29bb9bb142e73a160750a451d40bfe9746 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= -Date: Mon, 16 Sep 2019 17:07:00 +0100 -Subject: Using ip_deq after m_free might read pointers from an allocation - reuse. -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Philippe Mathieu-Daudé -Message-id: <20190916170700.647-2-philmd@redhat.com> -Patchwork-id: 90470 -O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH 1/1] Using ip_deq after m_free might read pointers from an allocation reuse. -Bugzilla: 1749737 -RH-Acked-by: Danilo de Paula -RH-Acked-by: John Snow - -From: Samuel Thibault - -This would be difficult to exploit, but that is still related with -CVE-2019-14378 which generates fragmented IP packets that would trigger this -issue and at least produce a DoS. - -Signed-off-by: Samuel Thibault -(cherry picked from libslirp commit c59279437eda91841b9d26079c70b8a540d41204) -Signed-off-by: Philippe Mathieu-Daudé - -Signed-off-by: Danilo C. L. de Paula ---- - slirp/src/ip_input.c | 7 +++++-- - 1 file changed, 5 insertions(+), 2 deletions(-) - -diff --git a/slirp/src/ip_input.c b/slirp/src/ip_input.c -index 8c75d91495..df1c846ade 100644 ---- a/slirp/src/ip_input.c -+++ b/slirp/src/ip_input.c -@@ -292,6 +292,7 @@ static struct ip *ip_reass(Slirp *slirp, struct ip *ip, struct ipq *fp) - */ - while (q != (struct ipasfrag *)&fp->frag_link && - ip->ip_off + ip->ip_len > q->ipf_off) { -+ struct ipasfrag *prev; - i = (ip->ip_off + ip->ip_len) - q->ipf_off; - if (i < q->ipf_len) { - q->ipf_len -= i; -@@ -299,9 +300,11 @@ static struct ip *ip_reass(Slirp *slirp, struct ip *ip, struct ipq *fp) - m_adj(dtom(slirp, q), i); - break; - } -+ prev = q; - q = q->ipf_next; -- m_free(dtom(slirp, q->ipf_prev)); -- ip_deq(q->ipf_prev); -+ ip_deq(prev); -+ m_free(dtom(slirp, prev)); -+ - } - - insert: --- -2.21.0 - diff --git a/kvm-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch b/0021-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch similarity index 90% rename from kvm-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch rename to 0021-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch index ea796d5..be042ba 100644 --- a/kvm-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch +++ b/0021-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch @@ -1,8 +1,7 @@ -From 371d312300251c0dc24522607b06b7e47e760b53 Mon Sep 17 00:00:00 2001 +From f07c3ee209b3897efebb4cf008c88a390205a5dd Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 13 Mar 2020 12:34:32 +0000 -Subject: [PATCH 12/20] block: Versioned x-blockdev-reopen API with feature - flag +Subject: block: Versioned x-blockdev-reopen API with feature flag RH-Author: Kevin Wolf Message-id: <20200313123439.10548-7-kwolf@redhat.com> @@ -30,10 +29,10 @@ Signed-off-by: Danilo C. L. de Paula 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/qapi/block-core.json b/qapi/block-core.json -index 0cf68fe..a1e85b0 100644 +index 943df19..50b99fb 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json -@@ -4202,10 +4202,17 @@ +@@ -4126,10 +4126,17 @@ # image does not have a default backing file name as part of its # metadata. # diff --git a/kvm-RHEL-hw-i386-disable-nested-PERF_GLOBAL_CTRL-MSR-sup.patch b/kvm-RHEL-hw-i386-disable-nested-PERF_GLOBAL_CTRL-MSR-sup.patch deleted file mode 100644 index 1435017..0000000 --- a/kvm-RHEL-hw-i386-disable-nested-PERF_GLOBAL_CTRL-MSR-sup.patch +++ /dev/null @@ -1,53 +0,0 @@ -From 481357ea8ae32b6894860c296cf6a2898260195f Mon Sep 17 00:00:00 2001 -From: Paolo Bonzini -Date: Fri, 17 Jan 2020 13:18:27 +0100 -Subject: [PATCH 4/4] RHEL: hw/i386: disable nested PERF_GLOBAL_CTRL MSR - support - -RH-Author: Paolo Bonzini -Message-id: <20200117131827.20361-1-pbonzini@redhat.com> -Patchwork-id: 93405 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v3] RHEL: hw/i386: disable nested PERF_GLOBAL_CTRL MSR support -Bugzilla: 1559846 -RH-Acked-by: Vitaly Kuznetsov -RH-Acked-by: Laszlo Ersek -RH-Acked-by: Miroslav Rezanina - -BZ: 1559846 -BRANCH: rhel-av-8.2.0 -BREW: 25775160 -UPSTREAM: RHEL only - -Nested PERF_GLOBAL_CTRL support is not present in the 8.2 kernel. Drop the -features via compat properties, they will be moved to 8.2 machine type compat -properties in the 8.3 timeframe. - -Signed-off-by: Paolo Bonzini ---- - No change, for v2 I mistakenly wrote "origin/rhel-av-8.2.0" as the - branch. :( - - hw/i386/pc.c | 2 ++ - 1 file changed, 2 insertions(+) - -Signed-off-by: Miroslav Rezanina ---- - hw/i386/pc.c | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/hw/i386/pc.c b/hw/i386/pc.c -index 61e70e4..73a0f11 100644 ---- a/hw/i386/pc.c -+++ b/hw/i386/pc.c -@@ -351,6 +351,8 @@ const size_t pc_compat_1_4_len = G_N_ELEMENTS(pc_compat_1_4); - GlobalProperty pc_rhel_compat[] = { - { TYPE_X86_CPU, "host-phys-bits", "on" }, - { TYPE_X86_CPU, "host-phys-bits-limit", "48" }, -+ { TYPE_X86_CPU, "vmx-entry-load-perf-global-ctrl", "off" }, -+ { TYPE_X86_CPU, "vmx-exit-load-perf-global-ctrl", "off" }, - /* bz 1508330 */ - { "vfio-pci", "x-no-geforce-quirks", "on" }, - }; --- -1.8.3.1 - diff --git a/kvm-Reallocate-dirty_bmap-when-we-change-a-slot.patch b/kvm-Reallocate-dirty_bmap-when-we-change-a-slot.patch deleted file mode 100644 index d717ae2..0000000 --- a/kvm-Reallocate-dirty_bmap-when-we-change-a-slot.patch +++ /dev/null @@ -1,115 +0,0 @@ -From c477581ccc6962651d4d6c702a6c3e2fcc5e4205 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Thu, 2 Jan 2020 11:56:51 +0000 -Subject: [PATCH 2/2] kvm: Reallocate dirty_bmap when we change a slot - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200102115651.140177-1-dgilbert@redhat.com> -Patchwork-id: 93256 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/1] kvm: Reallocate dirty_bmap when we change a slot -Bugzilla: 1772774 -RH-Acked-by: Peter Xu -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Laszlo Ersek - -From: "Dr. David Alan Gilbert" - -bz: https://bugzilla.redhat.com/show_bug.cgi?id=1772774 -brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=25575691 -branch: rhel-av-8.2.0 - -kvm_set_phys_mem can be called to reallocate a slot by something the -guest does (e.g. writing to PAM and other chipset registers). -This can happen in the middle of a migration, and if we're unlucky -it can now happen between the split 'sync' and 'clear'; the clear -asserts if there's no bmap to clear. Recreate the bmap whenever -we change the slot, keeping the clear path happy. - -Typically this is triggered by the guest rebooting during a migrate. - -Corresponds to: -https://bugzilla.redhat.com/show_bug.cgi?id=1772774 -https://bugzilla.redhat.com/show_bug.cgi?id=1771032 - -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Peter Xu -(cherry picked from commit 9b3a31c745b61758aaa5466a3a9fc0526d409188) -Signed-off-by: Danilo C. L. de Paula ---- - accel/kvm/kvm-all.c | 44 +++++++++++++++++++++++++++++--------------- - 1 file changed, 29 insertions(+), 15 deletions(-) - -diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c -index dc3ed7f..5007bda 100644 ---- a/accel/kvm/kvm-all.c -+++ b/accel/kvm/kvm-all.c -@@ -518,6 +518,27 @@ static int kvm_get_dirty_pages_log_range(MemoryRegionSection *section, - - #define ALIGN(x, y) (((x)+(y)-1) & ~((y)-1)) - -+/* Allocate the dirty bitmap for a slot */ -+static void kvm_memslot_init_dirty_bitmap(KVMSlot *mem) -+{ -+ /* -+ * XXX bad kernel interface alert -+ * For dirty bitmap, kernel allocates array of size aligned to -+ * bits-per-long. But for case when the kernel is 64bits and -+ * the userspace is 32bits, userspace can't align to the same -+ * bits-per-long, since sizeof(long) is different between kernel -+ * and user space. This way, userspace will provide buffer which -+ * may be 4 bytes less than the kernel will use, resulting in -+ * userspace memory corruption (which is not detectable by valgrind -+ * too, in most cases). -+ * So for now, let's align to 64 instead of HOST_LONG_BITS here, in -+ * a hope that sizeof(long) won't become >8 any time soon. -+ */ -+ hwaddr bitmap_size = ALIGN(((mem->memory_size) >> TARGET_PAGE_BITS), -+ /*HOST_LONG_BITS*/ 64) / 8; -+ mem->dirty_bmap = g_malloc0(bitmap_size); -+} -+ - /** - * kvm_physical_sync_dirty_bitmap - Sync dirty bitmap from kernel space - * -@@ -550,23 +571,9 @@ static int kvm_physical_sync_dirty_bitmap(KVMMemoryListener *kml, - goto out; - } - -- /* XXX bad kernel interface alert -- * For dirty bitmap, kernel allocates array of size aligned to -- * bits-per-long. But for case when the kernel is 64bits and -- * the userspace is 32bits, userspace can't align to the same -- * bits-per-long, since sizeof(long) is different between kernel -- * and user space. This way, userspace will provide buffer which -- * may be 4 bytes less than the kernel will use, resulting in -- * userspace memory corruption (which is not detectable by valgrind -- * too, in most cases). -- * So for now, let's align to 64 instead of HOST_LONG_BITS here, in -- * a hope that sizeof(long) won't become >8 any time soon. -- */ - if (!mem->dirty_bmap) { -- hwaddr bitmap_size = ALIGN(((mem->memory_size) >> TARGET_PAGE_BITS), -- /*HOST_LONG_BITS*/ 64) / 8; - /* Allocate on the first log_sync, once and for all */ -- mem->dirty_bmap = g_malloc0(bitmap_size); -+ kvm_memslot_init_dirty_bitmap(mem); - } - - d.dirty_bitmap = mem->dirty_bmap; -@@ -1067,6 +1074,13 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml, - mem->ram = ram; - mem->flags = kvm_mem_flags(mr); - -+ if (mem->flags & KVM_MEM_LOG_DIRTY_PAGES) { -+ /* -+ * Reallocate the bmap; it means it doesn't disappear in -+ * middle of a migrate. -+ */ -+ kvm_memslot_init_dirty_bitmap(mem); -+ } - err = kvm_set_user_memory_region(kml, mem, true); - if (err) { - fprintf(stderr, "%s: error registering slot: %s\n", __func__, --- -1.8.3.1 - diff --git a/kvm-Revert-mirror-Don-t-let-an-operation-wait-for-itself.patch b/kvm-Revert-mirror-Don-t-let-an-operation-wait-for-itself.patch deleted file mode 100644 index 0c1c37f..0000000 --- a/kvm-Revert-mirror-Don-t-let-an-operation-wait-for-itself.patch +++ /dev/null @@ -1,121 +0,0 @@ -From 71b5267ed33f9e60bc98acbabcbed62f01a96ff4 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Mon, 30 Mar 2020 11:19:23 +0100 -Subject: [PATCH 3/4] Revert "mirror: Don't let an operation wait for itself" - -RH-Author: Kevin Wolf -Message-id: <20200330111924.22938-2-kwolf@redhat.com> -Patchwork-id: 94464 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/2] Revert "mirror: Don't let an operation wait for itself" -Bugzilla: 1794692 -RH-Acked-by: Maxim Levitsky -RH-Acked-by: Danilo de Paula -RH-Acked-by: Max Reitz - -This reverts commit 7e6c4ff792734e196c8ca82564c56b5e7c6288ca. - -The fix was incomplete as it only protected against requests waiting for -themselves, but not against requests waiting for each other. We need a -different solution. - -Signed-off-by: Kevin Wolf -Message-Id: <20200326153628.4869-2-kwolf@redhat.com> -Reviewed-by: Eric Blake -Signed-off-by: Kevin Wolf -(cherry picked from commit 9178f4fe5f083064f5c91f04d98c815ce5a5af1c) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - block/mirror.c | 21 +++++++++------------ - 1 file changed, 9 insertions(+), 12 deletions(-) - -diff --git a/block/mirror.c b/block/mirror.c -index cacbc70..8959e42 100644 ---- a/block/mirror.c -+++ b/block/mirror.c -@@ -283,14 +283,11 @@ static int mirror_cow_align(MirrorBlockJob *s, int64_t *offset, - } - - static inline void coroutine_fn --mirror_wait_for_any_operation(MirrorBlockJob *s, MirrorOp *self, bool active) -+mirror_wait_for_any_operation(MirrorBlockJob *s, bool active) - { - MirrorOp *op; - - QTAILQ_FOREACH(op, &s->ops_in_flight, next) { -- if (self == op) { -- continue; -- } - /* Do not wait on pseudo ops, because it may in turn wait on - * some other operation to start, which may in fact be the - * caller of this function. Since there is only one pseudo op -@@ -305,10 +302,10 @@ mirror_wait_for_any_operation(MirrorBlockJob *s, MirrorOp *self, bool active) - } - - static inline void coroutine_fn --mirror_wait_for_free_in_flight_slot(MirrorBlockJob *s, MirrorOp *self) -+mirror_wait_for_free_in_flight_slot(MirrorBlockJob *s) - { - /* Only non-active operations use up in-flight slots */ -- mirror_wait_for_any_operation(s, self, false); -+ mirror_wait_for_any_operation(s, false); - } - - /* Perform a mirror copy operation. -@@ -351,7 +348,7 @@ static void coroutine_fn mirror_co_read(void *opaque) - - while (s->buf_free_count < nb_chunks) { - trace_mirror_yield_in_flight(s, op->offset, s->in_flight); -- mirror_wait_for_free_in_flight_slot(s, op); -+ mirror_wait_for_free_in_flight_slot(s); - } - - /* Now make a QEMUIOVector taking enough granularity-sized chunks -@@ -558,7 +555,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) - - while (s->in_flight >= MAX_IN_FLIGHT) { - trace_mirror_yield_in_flight(s, offset, s->in_flight); -- mirror_wait_for_free_in_flight_slot(s, pseudo_op); -+ mirror_wait_for_free_in_flight_slot(s); - } - - if (s->ret < 0) { -@@ -612,7 +609,7 @@ static void mirror_free_init(MirrorBlockJob *s) - static void coroutine_fn mirror_wait_for_all_io(MirrorBlockJob *s) - { - while (s->in_flight > 0) { -- mirror_wait_for_free_in_flight_slot(s, NULL); -+ mirror_wait_for_free_in_flight_slot(s); - } - } - -@@ -797,7 +794,7 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s) - if (s->in_flight >= MAX_IN_FLIGHT) { - trace_mirror_yield(s, UINT64_MAX, s->buf_free_count, - s->in_flight); -- mirror_wait_for_free_in_flight_slot(s, NULL); -+ mirror_wait_for_free_in_flight_slot(s); - continue; - } - -@@ -950,7 +947,7 @@ static int coroutine_fn mirror_run(Job *job, Error **errp) - /* Do not start passive operations while there are active - * writes in progress */ - while (s->in_active_write_counter) { -- mirror_wait_for_any_operation(s, NULL, true); -+ mirror_wait_for_any_operation(s, true); - } - - if (s->ret < 0) { -@@ -976,7 +973,7 @@ static int coroutine_fn mirror_run(Job *job, Error **errp) - if (s->in_flight >= MAX_IN_FLIGHT || s->buf_free_count == 0 || - (cnt == 0 && s->in_flight > 0)) { - trace_mirror_yield(s, cnt, s->buf_free_count, s->in_flight); -- mirror_wait_for_free_in_flight_slot(s, NULL); -+ mirror_wait_for_free_in_flight_slot(s); - continue; - } else if (cnt != 0) { - delay_ns = mirror_iteration(s); --- -1.8.3.1 - diff --git a/kvm-Virtiofsd-fix-memory-leak-on-fuse-queueinfo.patch b/kvm-Virtiofsd-fix-memory-leak-on-fuse-queueinfo.patch deleted file mode 100644 index dc65c26..0000000 --- a/kvm-Virtiofsd-fix-memory-leak-on-fuse-queueinfo.patch +++ /dev/null @@ -1,63 +0,0 @@ -From ceb6d97674b8bc9a072db1be4167411bc0ee48d7 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:02:02 +0100 -Subject: [PATCH 091/116] Virtiofsd: fix memory leak on fuse queueinfo -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-88-dgilbert@redhat.com> -Patchwork-id: 93542 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 087/112] Virtiofsd: fix memory leak on fuse queueinfo -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Liu Bo - -For fuse's queueinfo, both queueinfo array and queueinfos are allocated in -fv_queue_set_started() but not cleaned up when the daemon process quits. - -This fixes the leak in proper places. - -Signed-off-by: Liu Bo -Signed-off-by: Eric Ren -Reviewed-by: Misono Tomohiro -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 740b0b700a6338a1cf60c26229651ac5f6724944) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_virtio.c | 8 ++++++++ - 1 file changed, 8 insertions(+) - -diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c -index b7948de..fb8d6d1 100644 ---- a/tools/virtiofsd/fuse_virtio.c -+++ b/tools/virtiofsd/fuse_virtio.c -@@ -625,6 +625,8 @@ static void fv_queue_cleanup_thread(struct fv_VuDev *vud, int qidx) - } - close(ourqi->kill_fd); - ourqi->kick_fd = -1; -+ free(vud->qi[qidx]); -+ vud->qi[qidx] = NULL; - } - - /* Callback from libvhost-user on start or stop of a queue */ -@@ -884,6 +886,12 @@ int virtio_session_mount(struct fuse_session *se) - void virtio_session_close(struct fuse_session *se) - { - close(se->vu_socketfd); -+ -+ if (!se->virtio_dev) { -+ return; -+ } -+ -+ free(se->virtio_dev->qi); - free(se->virtio_dev); - se->virtio_dev = NULL; - } --- -1.8.3.1 - diff --git a/kvm-apic-Use-32bit-APIC-ID-for-migration-instance-ID.patch b/kvm-apic-Use-32bit-APIC-ID-for-migration-instance-ID.patch deleted file mode 100644 index becba21..0000000 --- a/kvm-apic-Use-32bit-APIC-ID-for-migration-instance-ID.patch +++ /dev/null @@ -1,62 +0,0 @@ -From 0d5a09173eb75b7e56122c2aefb2646a2be58400 Mon Sep 17 00:00:00 2001 -From: Peter Xu -Date: Fri, 31 Jan 2020 17:12:57 +0000 -Subject: [PATCH 15/15] apic: Use 32bit APIC ID for migration instance ID - -RH-Author: Peter Xu -Message-id: <20200131171257.1066593-4-peterx@redhat.com> -Patchwork-id: 93628 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 3/3] apic: Use 32bit APIC ID for migration instance ID -Bugzilla: 1529231 -RH-Acked-by: Eduardo Habkost -RH-Acked-by: Juan Quintela -RH-Acked-by: Dr. David Alan Gilbert - -Migration is silently broken now with x2apic config like this: - - -smp 200,maxcpus=288,sockets=2,cores=72,threads=2 \ - -device intel-iommu,intremap=on,eim=on - -After migration, the guest kernel could hang at anything, due to -x2apic bit not migrated correctly in IA32_APIC_BASE on some vcpus, so -any operations related to x2apic could be broken then (e.g., RDMSR on -x2apic MSRs could fail because KVM would think that the vcpu hasn't -enabled x2apic at all). - -The issue is that the x2apic bit was never applied correctly for vcpus -whose ID > 255 when migrate completes, and that's because when we -migrate APIC we use the APICCommonState.id as instance ID of the -migration stream, while that's too short for x2apic. - -Let's use the newly introduced initial_apic_id for that. - -Signed-off-by: Peter Xu -Reviewed-by: Juan Quintela -Reviewed-by: Eduardo Habkost -Signed-off-by: Juan Quintela -(cherry picked from commit 0ab994867c365db21e15f9503922c79234d8e40e) -Signed-off-by: Peter Xu -Signed-off-by: Danilo C. L. de Paula ---- - hw/intc/apic_common.c | 5 ++++- - 1 file changed, 4 insertions(+), 1 deletion(-) - -diff --git a/hw/intc/apic_common.c b/hw/intc/apic_common.c -index 54b8731..b5dbeb6 100644 ---- a/hw/intc/apic_common.c -+++ b/hw/intc/apic_common.c -@@ -268,7 +268,10 @@ static void apic_common_realize(DeviceState *dev, Error **errp) - APICCommonState *s = APIC_COMMON(dev); - APICCommonClass *info; - static DeviceState *vapic; -- uint32_t instance_id = s->id; -+ uint32_t instance_id = s->initial_apic_id; -+ -+ /* Normally initial APIC ID should be no more than hundreds */ -+ assert(instance_id != VMSTATE_INSTANCE_ID_ANY); - - info = APIC_COMMON_GET_CLASS(s); - info->realize(dev, errp); --- -1.8.3.1 - diff --git a/kvm-backup-don-t-acquire-aio_context-in-backup_clean.patch b/kvm-backup-don-t-acquire-aio_context-in-backup_clean.patch deleted file mode 100644 index 7fb76c1..0000000 --- a/kvm-backup-don-t-acquire-aio_context-in-backup_clean.patch +++ /dev/null @@ -1,57 +0,0 @@ -From 619b3aac9790a7ca7c01846144395a318a9ab250 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Wed, 8 Apr 2020 17:29:14 +0100 -Subject: [PATCH 3/6] backup: don't acquire aio_context in backup_clean - -RH-Author: Kevin Wolf -Message-id: <20200408172917.18712-4-kwolf@redhat.com> -Patchwork-id: 94596 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 3/6] backup: don't acquire aio_context in backup_clean -Bugzilla: 1817621 -RH-Acked-by: Eric Blake -RH-Acked-by: Danilo de Paula -RH-Acked-by: Max Reitz - -From: Stefan Reiter - -All code-paths leading to backup_clean (via job_clean) have the job's -context already acquired. The job's context is guaranteed to be the same -as the one used by backup_top via backup_job_create. - -Since the previous logic effectively acquired the lock twice, this -broke cleanup of backups for disks using IO threads, since the BDRV_POLL_WHILE -in bdrv_backup_top_drop -> bdrv_do_drained_begin would only release the lock -once, thus deadlocking with the IO thread. - -This is a partial revert of 0abf2581717a19. - -Signed-off-by: Stefan Reiter -Reviewed-by: Max Reitz -Message-Id: <20200407115651.69472-4-s.reiter@proxmox.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit eca0f3524a4eb57d03a56b0cbcef5527a0981ce4) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - block/backup.c | 4 ---- - 1 file changed, 4 deletions(-) - -diff --git a/block/backup.c b/block/backup.c -index 1383e21..ec50946 100644 ---- a/block/backup.c -+++ b/block/backup.c -@@ -135,11 +135,7 @@ static void backup_abort(Job *job) - static void backup_clean(Job *job) - { - BackupBlockJob *s = container_of(job, BackupBlockJob, common.job); -- AioContext *aio_context = bdrv_get_aio_context(s->backup_top); -- -- aio_context_acquire(aio_context); - bdrv_backup_top_drop(s->backup_top); -- aio_context_release(aio_context); - } - - void backup_do_checkpoint(BlockJob *job, Error **errp) --- -1.8.3.1 - diff --git a/kvm-backup-top-Begin-drain-earlier.patch b/kvm-backup-top-Begin-drain-earlier.patch deleted file mode 100644 index ef289b7..0000000 --- a/kvm-backup-top-Begin-drain-earlier.patch +++ /dev/null @@ -1,56 +0,0 @@ -From bc78ee07bf400cbff0021367e05d308870471710 Mon Sep 17 00:00:00 2001 -From: Sergio Lopez Pascual -Date: Fri, 7 Feb 2020 11:27:45 +0000 -Subject: [PATCH 12/18] backup-top: Begin drain earlier - -RH-Author: Sergio Lopez Pascual -Message-id: <20200207112749.25073-6-slp@redhat.com> -Patchwork-id: 93757 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 5/9] backup-top: Begin drain earlier -Bugzilla: 1745606 1746217 1773517 1779036 1782111 1782175 1783965 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Paolo Bonzini -RH-Acked-by: Max Reitz -RH-Acked-by: Stefan Hajnoczi - -From: Max Reitz - -When dropping backup-top, we need to drain the node before freeing the -BlockCopyState. Otherwise, requests may still be in flight and then the -assertion in shres_destroy() will fail. - -(This becomes visible in intermittent failure of 056.) - -Cc: qemu-stable@nongnu.org -Signed-off-by: Max Reitz -Message-id: 20191219182638.104621-1-mreitz@redhat.com -Signed-off-by: Max Reitz -(cherry picked from commit 503ca1262bab2c11c533a4816d1ff4297d4f58a6) -Signed-off-by: Sergio Lopez -Signed-off-by: Danilo C. L. de Paula ---- - block/backup-top.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/block/backup-top.c b/block/backup-top.c -index 7cdb1f8..818d3f2 100644 ---- a/block/backup-top.c -+++ b/block/backup-top.c -@@ -257,12 +257,12 @@ void bdrv_backup_top_drop(BlockDriverState *bs) - BDRVBackupTopState *s = bs->opaque; - AioContext *aio_context = bdrv_get_aio_context(bs); - -- block_copy_state_free(s->bcs); -- - aio_context_acquire(aio_context); - - bdrv_drained_begin(bs); - -+ block_copy_state_free(s->bcs); -+ - s->active = false; - bdrv_child_refresh_perms(bs, bs->backing, &error_abort); - bdrv_replace_node(bs, backing_bs(bs), &error_abort); --- -1.8.3.1 - diff --git a/kvm-block-Activate-recursively-even-for-already-active-n.patch b/kvm-block-Activate-recursively-even-for-already-active-n.patch deleted file mode 100644 index d6cad06..0000000 --- a/kvm-block-Activate-recursively-even-for-already-active-n.patch +++ /dev/null @@ -1,116 +0,0 @@ -From 0ef6691ce8964bb2bbd677756c4e594793ca3ad8 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 7 Feb 2020 11:24:01 +0000 -Subject: [PATCH 04/18] block: Activate recursively even for already active - nodes - -RH-Author: Kevin Wolf -Message-id: <20200207112404.25198-4-kwolf@redhat.com> -Patchwork-id: 93749 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 3/6] block: Activate recursively even for already active nodes -Bugzilla: 1781637 -RH-Acked-by: Sergio Lopez Pascual -RH-Acked-by: Max Reitz -RH-Acked-by: Stefan Hajnoczi - -bdrv_invalidate_cache_all() assumes that all nodes in a given subtree -are either active or inactive when it starts. Therefore, as soon as it -arrives at an already active node, it stops. - -However, this assumption is wrong. For example, it's possible to take a -snapshot of an inactive node, which results in an active overlay over an -inactive backing file. The active overlay is probably also the root node -of an inactive BlockBackend (blk->disable_perm == true). - -In this case, bdrv_invalidate_cache_all() does not need to do anything -to activate the overlay node, but it still needs to recurse into the -children and the parents to make sure that after returning success, -really everything is activated. - -Cc: qemu-stable@nongnu.org -Signed-off-by: Kevin Wolf -Reviewed-by: Max Reitz -(cherry picked from commit 7bb4941ace471fc7dd6ded4749b95b9622baa6ed) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - block.c | 50 ++++++++++++++++++++++++-------------------------- - 1 file changed, 24 insertions(+), 26 deletions(-) - -diff --git a/block.c b/block.c -index 473eb6e..2e5e8b6 100644 ---- a/block.c -+++ b/block.c -@@ -5335,10 +5335,6 @@ static void coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs, - return; - } - -- if (!(bs->open_flags & BDRV_O_INACTIVE)) { -- return; -- } -- - QLIST_FOREACH(child, &bs->children, next) { - bdrv_co_invalidate_cache(child->bs, &local_err); - if (local_err) { -@@ -5360,34 +5356,36 @@ static void coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs, - * just keep the extended permissions for the next time that an activation - * of the image is tried. - */ -- bs->open_flags &= ~BDRV_O_INACTIVE; -- bdrv_get_cumulative_perm(bs, &perm, &shared_perm); -- ret = bdrv_check_perm(bs, NULL, perm, shared_perm, NULL, NULL, &local_err); -- if (ret < 0) { -- bs->open_flags |= BDRV_O_INACTIVE; -- error_propagate(errp, local_err); -- return; -- } -- bdrv_set_perm(bs, perm, shared_perm); -- -- if (bs->drv->bdrv_co_invalidate_cache) { -- bs->drv->bdrv_co_invalidate_cache(bs, &local_err); -- if (local_err) { -+ if (bs->open_flags & BDRV_O_INACTIVE) { -+ bs->open_flags &= ~BDRV_O_INACTIVE; -+ bdrv_get_cumulative_perm(bs, &perm, &shared_perm); -+ ret = bdrv_check_perm(bs, NULL, perm, shared_perm, NULL, NULL, &local_err); -+ if (ret < 0) { - bs->open_flags |= BDRV_O_INACTIVE; - error_propagate(errp, local_err); - return; - } -- } -+ bdrv_set_perm(bs, perm, shared_perm); - -- FOR_EACH_DIRTY_BITMAP(bs, bm) { -- bdrv_dirty_bitmap_skip_store(bm, false); -- } -+ if (bs->drv->bdrv_co_invalidate_cache) { -+ bs->drv->bdrv_co_invalidate_cache(bs, &local_err); -+ if (local_err) { -+ bs->open_flags |= BDRV_O_INACTIVE; -+ error_propagate(errp, local_err); -+ return; -+ } -+ } - -- ret = refresh_total_sectors(bs, bs->total_sectors); -- if (ret < 0) { -- bs->open_flags |= BDRV_O_INACTIVE; -- error_setg_errno(errp, -ret, "Could not refresh total sector count"); -- return; -+ FOR_EACH_DIRTY_BITMAP(bs, bm) { -+ bdrv_dirty_bitmap_skip_store(bm, false); -+ } -+ -+ ret = refresh_total_sectors(bs, bs->total_sectors); -+ if (ret < 0) { -+ bs->open_flags |= BDRV_O_INACTIVE; -+ error_setg_errno(errp, -ret, "Could not refresh total sector count"); -+ return; -+ } - } - - QLIST_FOREACH(parent, &bs->parents, next_parent) { --- -1.8.3.1 - diff --git a/kvm-block-Fix-blk-in_flight-during-blk_wait_while_draine.patch b/kvm-block-Fix-blk-in_flight-during-blk_wait_while_draine.patch deleted file mode 100644 index b16c0b7..0000000 --- a/kvm-block-Fix-blk-in_flight-during-blk_wait_while_draine.patch +++ /dev/null @@ -1,84 +0,0 @@ -From f17b37b58a57d849d2ff5fa04f149d9415803a39 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Wed, 8 Apr 2020 17:29:17 +0100 -Subject: [PATCH 6/6] block: Fix blk->in_flight during blk_wait_while_drained() - -RH-Author: Kevin Wolf -Message-id: <20200408172917.18712-7-kwolf@redhat.com> -Patchwork-id: 94599 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 6/6] block: Fix blk->in_flight during blk_wait_while_drained() -Bugzilla: 1817621 -RH-Acked-by: Eric Blake -RH-Acked-by: Danilo de Paula -RH-Acked-by: Max Reitz - -Waiting in blk_wait_while_drained() while blk->in_flight is increased -for the current request is wrong because it will cause the drain -operation to deadlock. - -This patch makes sure that blk_wait_while_drained() is called with -blk->in_flight increased exactly once for the current request, and that -it temporarily decreases the counter while it waits. - -Fixes: cf3129323f900ef5ddbccbe86e4fa801e88c566e -Signed-off-by: Kevin Wolf -Reviewed-by: Vladimir Sementsov-Ogievskiy -Reviewed-by: Max Reitz -Message-Id: <20200407121259.21350-4-kwolf@redhat.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit 7f16476fab14fc32388e0ebae793f64673848efa) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - block/block-backend.c | 17 +++++------------ - 1 file changed, 5 insertions(+), 12 deletions(-) - -diff --git a/block/block-backend.c b/block/block-backend.c -index 610dbfa..38ae413 100644 ---- a/block/block-backend.c -+++ b/block/block-backend.c -@@ -1140,10 +1140,15 @@ static int blk_check_byte_request(BlockBackend *blk, int64_t offset, - return 0; - } - -+/* To be called between exactly one pair of blk_inc/dec_in_flight() */ - static void coroutine_fn blk_wait_while_drained(BlockBackend *blk) - { -+ assert(blk->in_flight > 0); -+ - if (blk->quiesce_counter && !blk->disable_request_queuing) { -+ blk_dec_in_flight(blk); - qemu_co_queue_wait(&blk->queued_requests, NULL); -+ blk_inc_in_flight(blk); - } - } - -@@ -1418,12 +1423,6 @@ static void blk_aio_read_entry(void *opaque) - BlkRwCo *rwco = &acb->rwco; - QEMUIOVector *qiov = rwco->iobuf; - -- if (rwco->blk->quiesce_counter) { -- blk_dec_in_flight(rwco->blk); -- blk_wait_while_drained(rwco->blk); -- blk_inc_in_flight(rwco->blk); -- } -- - assert(qiov->size == acb->bytes); - rwco->ret = blk_do_preadv(rwco->blk, rwco->offset, acb->bytes, - qiov, rwco->flags); -@@ -1436,12 +1435,6 @@ static void blk_aio_write_entry(void *opaque) - BlkRwCo *rwco = &acb->rwco; - QEMUIOVector *qiov = rwco->iobuf; - -- if (rwco->blk->quiesce_counter) { -- blk_dec_in_flight(rwco->blk); -- blk_wait_while_drained(rwco->blk); -- blk_inc_in_flight(rwco->blk); -- } -- - assert(!qiov || qiov->size == acb->bytes); - rwco->ret = blk_do_pwritev_part(rwco->blk, rwco->offset, acb->bytes, - qiov, 0, rwco->flags); --- -1.8.3.1 - diff --git a/kvm-block-Fix-cross-AioContext-blockdev-snapshot.patch b/kvm-block-Fix-cross-AioContext-blockdev-snapshot.patch deleted file mode 100644 index 0bad890..0000000 --- a/kvm-block-Fix-cross-AioContext-blockdev-snapshot.patch +++ /dev/null @@ -1,91 +0,0 @@ -From 5774af5a3c713d0c93010c30453812eae6a749cd Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 13 Mar 2020 12:34:37 +0000 -Subject: [PATCH 17/20] block: Fix cross-AioContext blockdev-snapshot - -RH-Author: Kevin Wolf -Message-id: <20200313123439.10548-12-kwolf@redhat.com> -Patchwork-id: 94286 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 11/13] block: Fix cross-AioContext blockdev-snapshot -Bugzilla: 1790482 1805143 -RH-Acked-by: John Snow -RH-Acked-by: Daniel P. Berrange -RH-Acked-by: Peter Krempa - -external_snapshot_prepare() tries to move the overlay to the AioContext -of the backing file (the snapshotted node). However, it's possible that -this doesn't work, but the backing file can instead be moved to the -overlay's AioContext (e.g. opening the backing chain for a mirror -target). - -bdrv_append() already indirectly uses bdrv_attach_node(), which takes -care to move nodes to make sure they use the same AioContext and which -tries both directions. - -So the problem has a simple fix: Just delete the unnecessary extra -bdrv_try_set_aio_context() call in external_snapshot_prepare() and -instead assert in bdrv_append() that both nodes were indeed moved to the -same AioContext. - -Signed-off-by: Kevin Wolf -Message-Id: <20200310113831.27293-6-kwolf@redhat.com> -Tested-by: Peter Krempa -Signed-off-by: Kevin Wolf -(cherry picked from commit 30dd65f307b647eef8156c4a33bd007823ef85cb) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - block.c | 1 + - blockdev.c | 16 ---------------- - 2 files changed, 1 insertion(+), 16 deletions(-) - -diff --git a/block.c b/block.c -index 354d388..ec29b1e 100644 ---- a/block.c -+++ b/block.c -@@ -4327,6 +4327,7 @@ void bdrv_replace_node(BlockDriverState *from, BlockDriverState *to, - bdrv_ref(from); - - assert(qemu_get_current_aio_context() == qemu_get_aio_context()); -+ assert(bdrv_get_aio_context(from) == bdrv_get_aio_context(to)); - bdrv_drained_begin(from); - - /* Put all parents into @list and calculate their cumulative permissions */ -diff --git a/blockdev.c b/blockdev.c -index 7918533..c8d4b51 100644 ---- a/blockdev.c -+++ b/blockdev.c -@@ -1535,9 +1535,7 @@ static void external_snapshot_prepare(BlkActionState *common, - DO_UPCAST(ExternalSnapshotState, common, common); - TransactionAction *action = common->action; - AioContext *aio_context; -- AioContext *old_context; - uint64_t perm, shared; -- int ret; - - /* 'blockdev-snapshot' and 'blockdev-snapshot-sync' have similar - * purpose but a different set of parameters */ -@@ -1678,20 +1676,6 @@ static void external_snapshot_prepare(BlkActionState *common, - goto out; - } - -- /* Honor bdrv_try_set_aio_context() context acquisition requirements. */ -- old_context = bdrv_get_aio_context(state->new_bs); -- aio_context_release(aio_context); -- aio_context_acquire(old_context); -- -- ret = bdrv_try_set_aio_context(state->new_bs, aio_context, errp); -- -- aio_context_release(old_context); -- aio_context_acquire(aio_context); -- -- if (ret < 0) { -- goto out; -- } -- - /* This removes our old bs and adds the new bs. This is an operation that - * can fail, so we need to do it in .prepare; undoing it for abort is - * always possible. */ --- -1.8.3.1 - diff --git a/kvm-block-Fix-leak-in-bdrv_create_file_fallback.patch b/kvm-block-Fix-leak-in-bdrv_create_file_fallback.patch deleted file mode 100644 index 1735dc0..0000000 --- a/kvm-block-Fix-leak-in-bdrv_create_file_fallback.patch +++ /dev/null @@ -1,60 +0,0 @@ -From 05452efd7e0fb0522099ae09a396f8f97e66014a Mon Sep 17 00:00:00 2001 -From: Maxim Levitsky -Date: Wed, 11 Mar 2020 10:51:47 +0000 -Subject: [PATCH 06/20] block: Fix leak in bdrv_create_file_fallback() - -RH-Author: Maxim Levitsky -Message-id: <20200311105147.13208-7-mlevitsk@redhat.com> -Patchwork-id: 94229 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 6/6] block: Fix leak in bdrv_create_file_fallback() -Bugzilla: 1640894 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: John Snow -RH-Acked-by: Max Reitz - -From: Max Reitz - -@options is leaked by the first two return statements in this function. - -Note that blk_new_open() takes the reference to @options even on -failure, so all we need to do to fix the leak is to move the QDict -allocation down to where we actually need it. - -Reported-by: Coverity (CID 1419884) -Fixes: fd17146cd93d1704cd96d7c2757b325fc7aac6fd - ("block: Generic file creation fallback") -Signed-off-by: Max Reitz -Message-Id: <20200225155618.133412-1-mreitz@redhat.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit eeea1faa099f82328f5831cf252f8ce0a59a9287) -Signed-off-by: Maxim Levitsky - -Signed-off-by: Danilo C. L. de Paula ---- - block.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/block.c b/block.c -index 3beec7f..e1a4e38 100644 ---- a/block.c -+++ b/block.c -@@ -600,7 +600,7 @@ static int bdrv_create_file_fallback(const char *filename, BlockDriver *drv, - QemuOpts *opts, Error **errp) - { - BlockBackend *blk; -- QDict *options = qdict_new(); -+ QDict *options; - int64_t size = 0; - char *buf = NULL; - PreallocMode prealloc; -@@ -623,6 +623,7 @@ static int bdrv_create_file_fallback(const char *filename, BlockDriver *drv, - return -ENOTSUP; - } - -+ options = qdict_new(); - qdict_put_str(options, "driver", drv->format_name); - - blk = blk_new_open(filename, NULL, options, --- -1.8.3.1 - diff --git a/kvm-block-Generic-file-creation-fallback.patch b/kvm-block-Generic-file-creation-fallback.patch deleted file mode 100644 index a5dd1d7..0000000 --- a/kvm-block-Generic-file-creation-fallback.patch +++ /dev/null @@ -1,227 +0,0 @@ -From 882d09226b7f45b72c5b7763c4c4aba182e0f8a1 Mon Sep 17 00:00:00 2001 -From: Maxim Levitsky -Date: Wed, 11 Mar 2020 10:51:43 +0000 -Subject: [PATCH 02/20] block: Generic file creation fallback - -RH-Author: Maxim Levitsky -Message-id: <20200311105147.13208-3-mlevitsk@redhat.com> -Patchwork-id: 94227 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 2/6] block: Generic file creation fallback -Bugzilla: 1640894 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: John Snow -RH-Acked-by: Max Reitz - -From: Max Reitz - -If a protocol driver does not support image creation, we can see whether -maybe the file exists already. If so, just truncating it will be -sufficient. - -Signed-off-by: Max Reitz -Message-Id: <20200122164532.178040-3-mreitz@redhat.com> -Signed-off-by: Max Reitz -(cherry picked from commit fd17146cd93d1704cd96d7c2757b325fc7aac6fd) -Signed-off-by: Maxim Levitsky -Signed-off-by: Danilo C. L. de Paula ---- - block.c | 159 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----- - 1 file changed, 147 insertions(+), 12 deletions(-) - -diff --git a/block.c b/block.c -index 2e5e8b6..3beec7f 100644 ---- a/block.c -+++ b/block.c -@@ -532,20 +532,139 @@ out: - return ret; - } - --int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp) -+/** -+ * Helper function for bdrv_create_file_fallback(): Resize @blk to at -+ * least the given @minimum_size. -+ * -+ * On success, return @blk's actual length. -+ * Otherwise, return -errno. -+ */ -+static int64_t create_file_fallback_truncate(BlockBackend *blk, -+ int64_t minimum_size, Error **errp) - { -- BlockDriver *drv; -+ Error *local_err = NULL; -+ int64_t size; -+ int ret; -+ -+ ret = blk_truncate(blk, minimum_size, false, PREALLOC_MODE_OFF, &local_err); -+ if (ret < 0 && ret != -ENOTSUP) { -+ error_propagate(errp, local_err); -+ return ret; -+ } -+ -+ size = blk_getlength(blk); -+ if (size < 0) { -+ error_free(local_err); -+ error_setg_errno(errp, -size, -+ "Failed to inquire the new image file's length"); -+ return size; -+ } -+ -+ if (size < minimum_size) { -+ /* Need to grow the image, but we failed to do that */ -+ error_propagate(errp, local_err); -+ return -ENOTSUP; -+ } -+ -+ error_free(local_err); -+ local_err = NULL; -+ -+ return size; -+} -+ -+/** -+ * Helper function for bdrv_create_file_fallback(): Zero the first -+ * sector to remove any potentially pre-existing image header. -+ */ -+static int create_file_fallback_zero_first_sector(BlockBackend *blk, -+ int64_t current_size, -+ Error **errp) -+{ -+ int64_t bytes_to_clear; -+ int ret; -+ -+ bytes_to_clear = MIN(current_size, BDRV_SECTOR_SIZE); -+ if (bytes_to_clear) { -+ ret = blk_pwrite_zeroes(blk, 0, bytes_to_clear, BDRV_REQ_MAY_UNMAP); -+ if (ret < 0) { -+ error_setg_errno(errp, -ret, -+ "Failed to clear the new image's first sector"); -+ return ret; -+ } -+ } -+ -+ return 0; -+} -+ -+static int bdrv_create_file_fallback(const char *filename, BlockDriver *drv, -+ QemuOpts *opts, Error **errp) -+{ -+ BlockBackend *blk; -+ QDict *options = qdict_new(); -+ int64_t size = 0; -+ char *buf = NULL; -+ PreallocMode prealloc; - Error *local_err = NULL; - int ret; - -+ size = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0); -+ buf = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC); -+ prealloc = qapi_enum_parse(&PreallocMode_lookup, buf, -+ PREALLOC_MODE_OFF, &local_err); -+ g_free(buf); -+ if (local_err) { -+ error_propagate(errp, local_err); -+ return -EINVAL; -+ } -+ -+ if (prealloc != PREALLOC_MODE_OFF) { -+ error_setg(errp, "Unsupported preallocation mode '%s'", -+ PreallocMode_str(prealloc)); -+ return -ENOTSUP; -+ } -+ -+ qdict_put_str(options, "driver", drv->format_name); -+ -+ blk = blk_new_open(filename, NULL, options, -+ BDRV_O_RDWR | BDRV_O_RESIZE, errp); -+ if (!blk) { -+ error_prepend(errp, "Protocol driver '%s' does not support image " -+ "creation, and opening the image failed: ", -+ drv->format_name); -+ return -EINVAL; -+ } -+ -+ size = create_file_fallback_truncate(blk, size, errp); -+ if (size < 0) { -+ ret = size; -+ goto out; -+ } -+ -+ ret = create_file_fallback_zero_first_sector(blk, size, errp); -+ if (ret < 0) { -+ goto out; -+ } -+ -+ ret = 0; -+out: -+ blk_unref(blk); -+ return ret; -+} -+ -+int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp) -+{ -+ BlockDriver *drv; -+ - drv = bdrv_find_protocol(filename, true, errp); - if (drv == NULL) { - return -ENOENT; - } - -- ret = bdrv_create(drv, filename, opts, &local_err); -- error_propagate(errp, local_err); -- return ret; -+ if (drv->bdrv_co_create_opts) { -+ return bdrv_create(drv, filename, opts, errp); -+ } else { -+ return bdrv_create_file_fallback(filename, drv, opts, errp); -+ } - } - - /** -@@ -1422,6 +1541,24 @@ QemuOptsList bdrv_runtime_opts = { - }, - }; - -+static QemuOptsList fallback_create_opts = { -+ .name = "fallback-create-opts", -+ .head = QTAILQ_HEAD_INITIALIZER(fallback_create_opts.head), -+ .desc = { -+ { -+ .name = BLOCK_OPT_SIZE, -+ .type = QEMU_OPT_SIZE, -+ .help = "Virtual disk size" -+ }, -+ { -+ .name = BLOCK_OPT_PREALLOC, -+ .type = QEMU_OPT_STRING, -+ .help = "Preallocation mode (allowed values: off)" -+ }, -+ { /* end of list */ } -+ } -+}; -+ - /* - * Common part for opening disk images and files - * -@@ -5743,14 +5880,12 @@ void bdrv_img_create(const char *filename, const char *fmt, - return; - } - -- if (!proto_drv->create_opts) { -- error_setg(errp, "Protocol driver '%s' does not support image creation", -- proto_drv->format_name); -- return; -- } -- - create_opts = qemu_opts_append(create_opts, drv->create_opts); -- create_opts = qemu_opts_append(create_opts, proto_drv->create_opts); -+ if (proto_drv->create_opts) { -+ create_opts = qemu_opts_append(create_opts, proto_drv->create_opts); -+ } else { -+ create_opts = qemu_opts_append(create_opts, &fallback_create_opts); -+ } - - /* Create parameter list with default values */ - opts = qemu_opts_create(create_opts, NULL, 0, &error_abort); --- -1.8.3.1 - diff --git a/kvm-block-Increase-BB.in_flight-for-coroutine-and-sync-i.patch b/kvm-block-Increase-BB.in_flight-for-coroutine-and-sync-i.patch deleted file mode 100644 index 463501a..0000000 --- a/kvm-block-Increase-BB.in_flight-for-coroutine-and-sync-i.patch +++ /dev/null @@ -1,295 +0,0 @@ -From 52cc1d1cd2f695c5761d65baec961d14552a79ed Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Wed, 8 Apr 2020 17:29:16 +0100 -Subject: [PATCH 5/6] block: Increase BB.in_flight for coroutine and sync - interfaces - -RH-Author: Kevin Wolf -Message-id: <20200408172917.18712-6-kwolf@redhat.com> -Patchwork-id: 94600 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 5/6] block: Increase BB.in_flight for coroutine and sync interfaces -Bugzilla: 1817621 -RH-Acked-by: Eric Blake -RH-Acked-by: Danilo de Paula -RH-Acked-by: Max Reitz - -External callers of blk_co_*() and of the synchronous blk_*() functions -don't currently increase the BlockBackend.in_flight counter, but calls -from blk_aio_*() do, so there is an inconsistency whether the counter -has been increased or not. - -This patch moves the actual operations to static functions that can -later know they will always be called with in_flight increased exactly -once, even for external callers using the blk_co_*() coroutine -interfaces. - -If the public blk_co_*() interface is unused, remove it. - -Signed-off-by: Kevin Wolf -Message-Id: <20200407121259.21350-3-kwolf@redhat.com> -Reviewed-by: Max Reitz -Signed-off-by: Kevin Wolf -(cherry picked from commit fbb92b6798894d3bf62fe3578d99fa62c720b242) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - block/block-backend.c | 103 ++++++++++++++++++++++++++++++++--------- - include/sysemu/block-backend.h | 1 - - 2 files changed, 80 insertions(+), 24 deletions(-) - -diff --git a/block/block-backend.c b/block/block-backend.c -index 17b2e87..610dbfa 100644 ---- a/block/block-backend.c -+++ b/block/block-backend.c -@@ -1147,9 +1147,10 @@ static void coroutine_fn blk_wait_while_drained(BlockBackend *blk) - } - } - --int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset, -- unsigned int bytes, QEMUIOVector *qiov, -- BdrvRequestFlags flags) -+/* To be called between exactly one pair of blk_inc/dec_in_flight() */ -+static int coroutine_fn -+blk_do_preadv(BlockBackend *blk, int64_t offset, unsigned int bytes, -+ QEMUIOVector *qiov, BdrvRequestFlags flags) - { - int ret; - BlockDriverState *bs; -@@ -1178,10 +1179,24 @@ int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset, - return ret; - } - --int coroutine_fn blk_co_pwritev_part(BlockBackend *blk, int64_t offset, -- unsigned int bytes, -- QEMUIOVector *qiov, size_t qiov_offset, -- BdrvRequestFlags flags) -+int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset, -+ unsigned int bytes, QEMUIOVector *qiov, -+ BdrvRequestFlags flags) -+{ -+ int ret; -+ -+ blk_inc_in_flight(blk); -+ ret = blk_do_preadv(blk, offset, bytes, qiov, flags); -+ blk_dec_in_flight(blk); -+ -+ return ret; -+} -+ -+/* To be called between exactly one pair of blk_inc/dec_in_flight() */ -+static int coroutine_fn -+blk_do_pwritev_part(BlockBackend *blk, int64_t offset, unsigned int bytes, -+ QEMUIOVector *qiov, size_t qiov_offset, -+ BdrvRequestFlags flags) - { - int ret; - BlockDriverState *bs; -@@ -1214,6 +1229,20 @@ int coroutine_fn blk_co_pwritev_part(BlockBackend *blk, int64_t offset, - return ret; - } - -+int coroutine_fn blk_co_pwritev_part(BlockBackend *blk, int64_t offset, -+ unsigned int bytes, -+ QEMUIOVector *qiov, size_t qiov_offset, -+ BdrvRequestFlags flags) -+{ -+ int ret; -+ -+ blk_inc_in_flight(blk); -+ ret = blk_do_pwritev_part(blk, offset, bytes, qiov, qiov_offset, flags); -+ blk_dec_in_flight(blk); -+ -+ return ret; -+} -+ - int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset, - unsigned int bytes, QEMUIOVector *qiov, - BdrvRequestFlags flags) -@@ -1234,7 +1263,7 @@ static void blk_read_entry(void *opaque) - BlkRwCo *rwco = opaque; - QEMUIOVector *qiov = rwco->iobuf; - -- rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, qiov->size, -+ rwco->ret = blk_do_preadv(rwco->blk, rwco->offset, qiov->size, - qiov, rwco->flags); - aio_wait_kick(); - } -@@ -1244,8 +1273,8 @@ static void blk_write_entry(void *opaque) - BlkRwCo *rwco = opaque; - QEMUIOVector *qiov = rwco->iobuf; - -- rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, qiov->size, -- qiov, rwco->flags); -+ rwco->ret = blk_do_pwritev_part(rwco->blk, rwco->offset, qiov->size, -+ qiov, 0, rwco->flags); - aio_wait_kick(); - } - -@@ -1262,6 +1291,7 @@ static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf, - .ret = NOT_DONE, - }; - -+ blk_inc_in_flight(blk); - if (qemu_in_coroutine()) { - /* Fast-path if already in coroutine context */ - co_entry(&rwco); -@@ -1270,6 +1300,7 @@ static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf, - bdrv_coroutine_enter(blk_bs(blk), co); - BDRV_POLL_WHILE(blk_bs(blk), rwco.ret == NOT_DONE); - } -+ blk_dec_in_flight(blk); - - return rwco.ret; - } -@@ -1394,7 +1425,7 @@ static void blk_aio_read_entry(void *opaque) - } - - assert(qiov->size == acb->bytes); -- rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, acb->bytes, -+ rwco->ret = blk_do_preadv(rwco->blk, rwco->offset, acb->bytes, - qiov, rwco->flags); - blk_aio_complete(acb); - } -@@ -1412,8 +1443,8 @@ static void blk_aio_write_entry(void *opaque) - } - - assert(!qiov || qiov->size == acb->bytes); -- rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, acb->bytes, -- qiov, rwco->flags); -+ rwco->ret = blk_do_pwritev_part(rwco->blk, rwco->offset, acb->bytes, -+ qiov, 0, rwco->flags); - blk_aio_complete(acb); - } - -@@ -1498,7 +1529,9 @@ void blk_aio_cancel_async(BlockAIOCB *acb) - bdrv_aio_cancel_async(acb); - } - --int blk_co_ioctl(BlockBackend *blk, unsigned long int req, void *buf) -+/* To be called between exactly one pair of blk_inc/dec_in_flight() */ -+static int coroutine_fn -+blk_do_ioctl(BlockBackend *blk, unsigned long int req, void *buf) - { - blk_wait_while_drained(blk); - -@@ -1514,8 +1547,7 @@ static void blk_ioctl_entry(void *opaque) - BlkRwCo *rwco = opaque; - QEMUIOVector *qiov = rwco->iobuf; - -- rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset, -- qiov->iov[0].iov_base); -+ rwco->ret = blk_do_ioctl(rwco->blk, rwco->offset, qiov->iov[0].iov_base); - aio_wait_kick(); - } - -@@ -1529,7 +1561,7 @@ static void blk_aio_ioctl_entry(void *opaque) - BlkAioEmAIOCB *acb = opaque; - BlkRwCo *rwco = &acb->rwco; - -- rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset, rwco->iobuf); -+ rwco->ret = blk_do_ioctl(rwco->blk, rwco->offset, rwco->iobuf); - - blk_aio_complete(acb); - } -@@ -1540,7 +1572,9 @@ BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf, - return blk_aio_prwv(blk, req, 0, buf, blk_aio_ioctl_entry, 0, cb, opaque); - } - --int blk_co_pdiscard(BlockBackend *blk, int64_t offset, int bytes) -+/* To be called between exactly one pair of blk_inc/dec_in_flight() */ -+static int coroutine_fn -+blk_do_pdiscard(BlockBackend *blk, int64_t offset, int bytes) - { - int ret; - -@@ -1559,7 +1593,7 @@ static void blk_aio_pdiscard_entry(void *opaque) - BlkAioEmAIOCB *acb = opaque; - BlkRwCo *rwco = &acb->rwco; - -- rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, acb->bytes); -+ rwco->ret = blk_do_pdiscard(rwco->blk, rwco->offset, acb->bytes); - blk_aio_complete(acb); - } - -@@ -1571,12 +1605,23 @@ BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk, - cb, opaque); - } - -+int coroutine_fn blk_co_pdiscard(BlockBackend *blk, int64_t offset, int bytes) -+{ -+ int ret; -+ -+ blk_inc_in_flight(blk); -+ ret = blk_do_pdiscard(blk, offset, bytes); -+ blk_dec_in_flight(blk); -+ -+ return ret; -+} -+ - static void blk_pdiscard_entry(void *opaque) - { - BlkRwCo *rwco = opaque; - QEMUIOVector *qiov = rwco->iobuf; - -- rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, qiov->size); -+ rwco->ret = blk_do_pdiscard(rwco->blk, rwco->offset, qiov->size); - aio_wait_kick(); - } - -@@ -1585,7 +1630,8 @@ int blk_pdiscard(BlockBackend *blk, int64_t offset, int bytes) - return blk_prw(blk, offset, NULL, bytes, blk_pdiscard_entry, 0); - } - --int blk_co_flush(BlockBackend *blk) -+/* To be called between exactly one pair of blk_inc/dec_in_flight() */ -+static int coroutine_fn blk_do_flush(BlockBackend *blk) - { - blk_wait_while_drained(blk); - -@@ -1601,7 +1647,7 @@ static void blk_aio_flush_entry(void *opaque) - BlkAioEmAIOCB *acb = opaque; - BlkRwCo *rwco = &acb->rwco; - -- rwco->ret = blk_co_flush(rwco->blk); -+ rwco->ret = blk_do_flush(rwco->blk); - blk_aio_complete(acb); - } - -@@ -1611,10 +1657,21 @@ BlockAIOCB *blk_aio_flush(BlockBackend *blk, - return blk_aio_prwv(blk, 0, 0, NULL, blk_aio_flush_entry, 0, cb, opaque); - } - -+int coroutine_fn blk_co_flush(BlockBackend *blk) -+{ -+ int ret; -+ -+ blk_inc_in_flight(blk); -+ ret = blk_do_flush(blk); -+ blk_dec_in_flight(blk); -+ -+ return ret; -+} -+ - static void blk_flush_entry(void *opaque) - { - BlkRwCo *rwco = opaque; -- rwco->ret = blk_co_flush(rwco->blk); -+ rwco->ret = blk_do_flush(rwco->blk); - aio_wait_kick(); - } - -diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h -index b198dec..9bbdbd6 100644 ---- a/include/sysemu/block-backend.h -+++ b/include/sysemu/block-backend.h -@@ -171,7 +171,6 @@ BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk, int64_t offset, int bytes, - BlockCompletionFunc *cb, void *opaque); - void blk_aio_cancel(BlockAIOCB *acb); - void blk_aio_cancel_async(BlockAIOCB *acb); --int blk_co_ioctl(BlockBackend *blk, unsigned long int req, void *buf); - int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf); - BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf, - BlockCompletionFunc *cb, void *opaque); --- -1.8.3.1 - diff --git a/kvm-block-Introduce-bdrv_reopen_commit_post-step.patch b/kvm-block-Introduce-bdrv_reopen_commit_post-step.patch deleted file mode 100644 index 72c8986..0000000 --- a/kvm-block-Introduce-bdrv_reopen_commit_post-step.patch +++ /dev/null @@ -1,65 +0,0 @@ -From f7dd953c2d0380cef3c351afb03d68c6fcda1dca Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 13 Mar 2020 12:34:28 +0000 -Subject: [PATCH 08/20] block: Introduce 'bdrv_reopen_commit_post' step - -RH-Author: Kevin Wolf -Message-id: <20200313123439.10548-3-kwolf@redhat.com> -Patchwork-id: 94278 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 02/13] block: Introduce 'bdrv_reopen_commit_post' step -Bugzilla: 1790482 1805143 -RH-Acked-by: John Snow -RH-Acked-by: Daniel P. Berrange -RH-Acked-by: Peter Krempa - -From: Peter Krempa - -Add another step in the reopen process where driver can execute code -after permission changes are comitted. - -Signed-off-by: Peter Krempa -Message-Id: -Signed-off-by: Kevin Wolf -(cherry picked from commit 17e1e2be5f9e84e0298e28e70675655b43e225ea) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - block.c | 9 +++++++++ - include/block/block_int.h | 1 + - 2 files changed, 10 insertions(+) - -diff --git a/block.c b/block.c -index e1a4e38..a744bb5 100644 ---- a/block.c -+++ b/block.c -@@ -3657,6 +3657,15 @@ cleanup_perm: - } - } - } -+ -+ if (ret == 0) { -+ QTAILQ_FOREACH_REVERSE(bs_entry, bs_queue, entry) { -+ BlockDriverState *bs = bs_entry->state.bs; -+ -+ if (bs->drv->bdrv_reopen_commit_post) -+ bs->drv->bdrv_reopen_commit_post(&bs_entry->state); -+ } -+ } - cleanup: - QTAILQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) { - if (ret) { -diff --git a/include/block/block_int.h b/include/block/block_int.h -index dd033d0..c168690 100644 ---- a/include/block/block_int.h -+++ b/include/block/block_int.h -@@ -123,6 +123,7 @@ struct BlockDriver { - int (*bdrv_reopen_prepare)(BDRVReopenState *reopen_state, - BlockReopenQueue *queue, Error **errp); - void (*bdrv_reopen_commit)(BDRVReopenState *reopen_state); -+ void (*bdrv_reopen_commit_post)(BDRVReopenState *reopen_state); - void (*bdrv_reopen_abort)(BDRVReopenState *reopen_state); - void (*bdrv_join_options)(QDict *options, QDict *old_options); - --- -1.8.3.1 - diff --git a/kvm-block-Make-bdrv_get_cumulative_perm-public.patch b/kvm-block-Make-bdrv_get_cumulative_perm-public.patch deleted file mode 100644 index 2f0f999..0000000 --- a/kvm-block-Make-bdrv_get_cumulative_perm-public.patch +++ /dev/null @@ -1,67 +0,0 @@ -From 294ab4c4963295556d12ac15150b48c8536175a7 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 13 Mar 2020 12:34:33 +0000 -Subject: [PATCH 13/20] block: Make bdrv_get_cumulative_perm() public - -RH-Author: Kevin Wolf -Message-id: <20200313123439.10548-8-kwolf@redhat.com> -Patchwork-id: 94287 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 07/13] block: Make bdrv_get_cumulative_perm() public -Bugzilla: 1790482 1805143 -RH-Acked-by: John Snow -RH-Acked-by: Daniel P. Berrange -RH-Acked-by: Peter Krempa - -Signed-off-by: Kevin Wolf -Message-Id: <20200310113831.27293-2-kwolf@redhat.com> -Reviewed-by: Peter Krempa -Signed-off-by: Kevin Wolf -(cherry picked from commit c7a0f2be8f95b220cdadbba9a9236eaf115951dc) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - block.c | 6 ++---- - include/block/block_int.h | 3 +++ - 2 files changed, 5 insertions(+), 4 deletions(-) - -diff --git a/block.c b/block.c -index 39e4647..354d388 100644 ---- a/block.c -+++ b/block.c -@@ -1850,8 +1850,6 @@ static int bdrv_child_check_perm(BdrvChild *c, BlockReopenQueue *q, - bool *tighten_restrictions, Error **errp); - static void bdrv_child_abort_perm_update(BdrvChild *c); - static void bdrv_child_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared); --static void bdrv_get_cumulative_perm(BlockDriverState *bs, uint64_t *perm, -- uint64_t *shared_perm); - - typedef struct BlockReopenQueueEntry { - bool prepared; -@@ -2075,8 +2073,8 @@ static void bdrv_set_perm(BlockDriverState *bs, uint64_t cumulative_perms, - } - } - --static void bdrv_get_cumulative_perm(BlockDriverState *bs, uint64_t *perm, -- uint64_t *shared_perm) -+void bdrv_get_cumulative_perm(BlockDriverState *bs, uint64_t *perm, -+ uint64_t *shared_perm) - { - BdrvChild *c; - uint64_t cumulative_perms = 0; -diff --git a/include/block/block_int.h b/include/block/block_int.h -index c168690..96e327b 100644 ---- a/include/block/block_int.h -+++ b/include/block/block_int.h -@@ -1228,6 +1228,9 @@ BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs, - void *opaque, Error **errp); - void bdrv_root_unref_child(BdrvChild *child); - -+void bdrv_get_cumulative_perm(BlockDriverState *bs, uint64_t *perm, -+ uint64_t *shared_perm); -+ - /** - * Sets a BdrvChild's permissions. Avoid if the parent is a BDS; use - * bdrv_child_refresh_perms() instead and make the parent's --- -1.8.3.1 - diff --git a/kvm-block-Relax-restrictions-for-blockdev-snapshot.patch b/kvm-block-Relax-restrictions-for-blockdev-snapshot.patch deleted file mode 100644 index de85205..0000000 --- a/kvm-block-Relax-restrictions-for-blockdev-snapshot.patch +++ /dev/null @@ -1,117 +0,0 @@ -From 9ba321e18a357c1a3a238ceee301bbb174f96eee Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 13 Mar 2020 12:34:34 +0000 -Subject: [PATCH 14/20] block: Relax restrictions for blockdev-snapshot - -RH-Author: Kevin Wolf -Message-id: <20200313123439.10548-9-kwolf@redhat.com> -Patchwork-id: 94285 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 08/13] block: Relax restrictions for blockdev-snapshot -Bugzilla: 1790482 1805143 -RH-Acked-by: John Snow -RH-Acked-by: Daniel P. Berrange -RH-Acked-by: Peter Krempa - -blockdev-snapshot returned an error if the overlay was already in use, -which it defined as having any BlockBackend parent. This is in fact both -too strict (some parents can tolerate the change of visible data caused -by attaching a backing file) and too loose (some non-BlockBackend -parents may not be happy with it). - -One important use case that is prevented by the too strict check is live -storage migration with blockdev-mirror. Here, the target node is -usually opened without a backing file so that the active layer is -mirrored while its backing chain can be copied in the background. - -The backing chain should be attached to the mirror target node when -finalising the job, just before switching the users of the source node -to the new copy (at which point the mirror job still has a reference to -the node). drive-mirror did this automatically, but with blockdev-mirror -this is the job of the QMP client, so it needs a way to do this. - -blockdev-snapshot is the obvious way, so this patch makes it work in -this scenario. The new condition is that no parent uses CONSISTENT_READ -permissions. This will ensure that the operation will still be blocked -when the node is attached to the guest device, so blockdev-snapshot -remains safe. - -(For the sake of completeness, x-blockdev-reopen can be used to achieve -the same, however it is a big hammer, performs the graph change -completely unchecked and is still experimental. So even with the option -of using x-blockdev-reopen, there are reasons why blockdev-snapshot -should be able to perform this operation.) - -Signed-off-by: Kevin Wolf -Message-Id: <20200310113831.27293-3-kwolf@redhat.com> -Reviewed-by: Peter Krempa -Tested-by: Peter Krempa -Signed-off-by: Kevin Wolf -(cherry picked from commit d29d3d1f80b3947fb26e7139645c83de66d146a9) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - blockdev.c | 14 ++++++++------ - tests/qemu-iotests/085.out | 4 ++-- - 2 files changed, 10 insertions(+), 8 deletions(-) - -diff --git a/blockdev.c b/blockdev.c -index 4cd9a58..7918533 100644 ---- a/blockdev.c -+++ b/blockdev.c -@@ -1536,6 +1536,7 @@ static void external_snapshot_prepare(BlkActionState *common, - TransactionAction *action = common->action; - AioContext *aio_context; - AioContext *old_context; -+ uint64_t perm, shared; - int ret; - - /* 'blockdev-snapshot' and 'blockdev-snapshot-sync' have similar -@@ -1656,16 +1657,17 @@ static void external_snapshot_prepare(BlkActionState *common, - goto out; - } - -- if (bdrv_has_blk(state->new_bs)) { -+ /* -+ * Allow attaching a backing file to an overlay that's already in use only -+ * if the parents don't assume that they are already seeing a valid image. -+ * (Specifically, allow it as a mirror target, which is write-only access.) -+ */ -+ bdrv_get_cumulative_perm(state->new_bs, &perm, &shared); -+ if (perm & BLK_PERM_CONSISTENT_READ) { - error_setg(errp, "The overlay is already in use"); - goto out; - } - -- if (bdrv_op_is_blocked(state->new_bs, BLOCK_OP_TYPE_EXTERNAL_SNAPSHOT, -- errp)) { -- goto out; -- } -- - if (state->new_bs->backing != NULL) { - error_setg(errp, "The overlay already has a backing image"); - goto out; -diff --git a/tests/qemu-iotests/085.out b/tests/qemu-iotests/085.out -index bb50227..487d920 100644 ---- a/tests/qemu-iotests/085.out -+++ b/tests/qemu-iotests/085.out -@@ -82,7 +82,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 backing_file=TEST_DIR/ - === Invalid command - cannot create a snapshot using a file BDS === - - { 'execute': 'blockdev-snapshot', 'arguments': { 'node':'virtio0', 'overlay':'file_12' } } --{"error": {"class": "GenericError", "desc": "The overlay does not support backing images"}} -+{"error": {"class": "GenericError", "desc": "The overlay is already in use"}} - - === Invalid command - snapshot node used as active layer === - -@@ -96,7 +96,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 backing_file=TEST_DIR/ - === Invalid command - snapshot node used as backing hd === - - { 'execute': 'blockdev-snapshot', 'arguments': { 'node': 'virtio0', 'overlay':'snap_11' } } --{"error": {"class": "GenericError", "desc": "Node 'snap_11' is busy: node is used as backing hd of 'snap_12'"}} -+{"error": {"class": "GenericError", "desc": "The overlay is already in use"}} - - === Invalid command - snapshot node has a backing image === - --- -1.8.3.1 - diff --git a/kvm-block-backend-Reorder-flush-pdiscard-function-defini.patch b/kvm-block-backend-Reorder-flush-pdiscard-function-defini.patch deleted file mode 100644 index 9d49cfa..0000000 --- a/kvm-block-backend-Reorder-flush-pdiscard-function-defini.patch +++ /dev/null @@ -1,158 +0,0 @@ -From 6cc456c4c1e6557fdc7e138e8ef8171b71609222 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Wed, 8 Apr 2020 17:29:15 +0100 -Subject: [PATCH 4/6] block-backend: Reorder flush/pdiscard function - definitions - -RH-Author: Kevin Wolf -Message-id: <20200408172917.18712-5-kwolf@redhat.com> -Patchwork-id: 94598 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 4/6] block-backend: Reorder flush/pdiscard function definitions -Bugzilla: 1817621 -RH-Acked-by: Eric Blake -RH-Acked-by: Danilo de Paula -RH-Acked-by: Max Reitz - -Move all variants of the flush/pdiscard functions to a single place and -put the blk_co_*() version first because it is called by all other -variants (and will become static in the next patch). - -Signed-off-by: Kevin Wolf -Reviewed-by: Vladimir Sementsov-Ogievskiy -Reviewed-by: Max Reitz -Message-Id: <20200407121259.21350-2-kwolf@redhat.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit 564806c529d4e0acad209b1e5b864a8886092f1f) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - block/block-backend.c | 92 +++++++++++++++++++++++++-------------------------- - 1 file changed, 46 insertions(+), 46 deletions(-) - -diff --git a/block/block-backend.c b/block/block-backend.c -index 8b8f2a8..17b2e87 100644 ---- a/block/block-backend.c -+++ b/block/block-backend.c -@@ -1488,38 +1488,6 @@ BlockAIOCB *blk_aio_pwritev(BlockBackend *blk, int64_t offset, - blk_aio_write_entry, flags, cb, opaque); - } - --static void blk_aio_flush_entry(void *opaque) --{ -- BlkAioEmAIOCB *acb = opaque; -- BlkRwCo *rwco = &acb->rwco; -- -- rwco->ret = blk_co_flush(rwco->blk); -- blk_aio_complete(acb); --} -- --BlockAIOCB *blk_aio_flush(BlockBackend *blk, -- BlockCompletionFunc *cb, void *opaque) --{ -- return blk_aio_prwv(blk, 0, 0, NULL, blk_aio_flush_entry, 0, cb, opaque); --} -- --static void blk_aio_pdiscard_entry(void *opaque) --{ -- BlkAioEmAIOCB *acb = opaque; -- BlkRwCo *rwco = &acb->rwco; -- -- rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, acb->bytes); -- blk_aio_complete(acb); --} -- --BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk, -- int64_t offset, int bytes, -- BlockCompletionFunc *cb, void *opaque) --{ -- return blk_aio_prwv(blk, offset, bytes, NULL, blk_aio_pdiscard_entry, 0, -- cb, opaque); --} -- - void blk_aio_cancel(BlockAIOCB *acb) - { - bdrv_aio_cancel(acb); -@@ -1586,6 +1554,37 @@ int blk_co_pdiscard(BlockBackend *blk, int64_t offset, int bytes) - return bdrv_co_pdiscard(blk->root, offset, bytes); - } - -+static void blk_aio_pdiscard_entry(void *opaque) -+{ -+ BlkAioEmAIOCB *acb = opaque; -+ BlkRwCo *rwco = &acb->rwco; -+ -+ rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, acb->bytes); -+ blk_aio_complete(acb); -+} -+ -+BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk, -+ int64_t offset, int bytes, -+ BlockCompletionFunc *cb, void *opaque) -+{ -+ return blk_aio_prwv(blk, offset, bytes, NULL, blk_aio_pdiscard_entry, 0, -+ cb, opaque); -+} -+ -+static void blk_pdiscard_entry(void *opaque) -+{ -+ BlkRwCo *rwco = opaque; -+ QEMUIOVector *qiov = rwco->iobuf; -+ -+ rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, qiov->size); -+ aio_wait_kick(); -+} -+ -+int blk_pdiscard(BlockBackend *blk, int64_t offset, int bytes) -+{ -+ return blk_prw(blk, offset, NULL, bytes, blk_pdiscard_entry, 0); -+} -+ - int blk_co_flush(BlockBackend *blk) - { - blk_wait_while_drained(blk); -@@ -1597,6 +1596,21 @@ int blk_co_flush(BlockBackend *blk) - return bdrv_co_flush(blk_bs(blk)); - } - -+static void blk_aio_flush_entry(void *opaque) -+{ -+ BlkAioEmAIOCB *acb = opaque; -+ BlkRwCo *rwco = &acb->rwco; -+ -+ rwco->ret = blk_co_flush(rwco->blk); -+ blk_aio_complete(acb); -+} -+ -+BlockAIOCB *blk_aio_flush(BlockBackend *blk, -+ BlockCompletionFunc *cb, void *opaque) -+{ -+ return blk_aio_prwv(blk, 0, 0, NULL, blk_aio_flush_entry, 0, cb, opaque); -+} -+ - static void blk_flush_entry(void *opaque) - { - BlkRwCo *rwco = opaque; -@@ -2083,20 +2097,6 @@ int blk_truncate(BlockBackend *blk, int64_t offset, bool exact, - return bdrv_truncate(blk->root, offset, exact, prealloc, errp); - } - --static void blk_pdiscard_entry(void *opaque) --{ -- BlkRwCo *rwco = opaque; -- QEMUIOVector *qiov = rwco->iobuf; -- -- rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, qiov->size); -- aio_wait_kick(); --} -- --int blk_pdiscard(BlockBackend *blk, int64_t offset, int bytes) --{ -- return blk_prw(blk, offset, NULL, bytes, blk_pdiscard_entry, 0); --} -- - int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf, - int64_t pos, int size) - { --- -1.8.3.1 - diff --git a/kvm-block-backup-top-Don-t-acquire-context-while-droppin.patch b/kvm-block-backup-top-Don-t-acquire-context-while-droppin.patch deleted file mode 100644 index 45f506c..0000000 --- a/kvm-block-backup-top-Don-t-acquire-context-while-droppin.patch +++ /dev/null @@ -1,130 +0,0 @@ -From aefff389c4d11bd69180db7177135c4645a9b1bd Mon Sep 17 00:00:00 2001 -From: Sergio Lopez Pascual -Date: Fri, 7 Feb 2020 11:27:46 +0000 -Subject: [PATCH 13/18] block/backup-top: Don't acquire context while dropping - top - -RH-Author: Sergio Lopez Pascual -Message-id: <20200207112749.25073-7-slp@redhat.com> -Patchwork-id: 93759 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 6/9] block/backup-top: Don't acquire context while dropping top -Bugzilla: 1745606 1746217 1773517 1779036 1782111 1782175 1783965 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Paolo Bonzini -RH-Acked-by: Max Reitz -RH-Acked-by: Stefan Hajnoczi - -All paths that lead to bdrv_backup_top_drop(), except for the call -from backup_clean(), imply that the BDS AioContext has already been -acquired, so doing it there too can potentially lead to QEMU hanging -on AIO_WAIT_WHILE(). - -An easy way to trigger this situation is by issuing a two actions -transaction, with a proper and a bogus blockdev-backup, so the second -one will trigger a rollback. This will trigger a hang with an stack -trace like this one: - - #0 0x00007fb680c75016 in __GI_ppoll (fds=0x55e74580f7c0, nfds=1, timeout=, - timeout@entry=0x0, sigmask=sigmask@entry=0x0) at ../sysdeps/unix/sysv/linux/ppoll.c:39 - #1 0x000055e743386e09 in ppoll (__ss=0x0, __timeout=0x0, __nfds=, __fds=) - at /usr/include/bits/poll2.h:77 - #2 0x000055e743386e09 in qemu_poll_ns - (fds=, nfds=, timeout=) at util/qemu-timer.c:336 - #3 0x000055e743388dc4 in aio_poll (ctx=0x55e7458925d0, blocking=blocking@entry=true) - at util/aio-posix.c:669 - #4 0x000055e743305dea in bdrv_flush (bs=bs@entry=0x55e74593c0d0) at block/io.c:2878 - #5 0x000055e7432be58e in bdrv_close (bs=0x55e74593c0d0) at block.c:4017 - #6 0x000055e7432be58e in bdrv_delete (bs=) at block.c:4262 - #7 0x000055e7432be58e in bdrv_unref (bs=bs@entry=0x55e74593c0d0) at block.c:5644 - #8 0x000055e743316b9b in bdrv_backup_top_drop (bs=bs@entry=0x55e74593c0d0) at block/backup-top.c:273 - #9 0x000055e74331461f in backup_job_create - (job_id=0x0, bs=bs@entry=0x55e7458d5820, target=target@entry=0x55e74589f640, speed=0, sync_mode=MIRROR_SYNC_MODE_FULL, sync_bitmap=sync_bitmap@entry=0x0, bitmap_mode=BITMAP_SYNC_MODE_ON_SUCCESS, compress=false, filter_node_name=0x0, on_source_error=BLOCKDEV_ON_ERROR_REPORT, on_target_error=BLOCKDEV_ON_ERROR_REPORT, creation_flags=0, cb=0x0, opaque=0x0, txn=0x0, errp=0x7ffddfd1efb0) at block/backup.c:478 - #10 0x000055e74315bc52 in do_backup_common - (backup=backup@entry=0x55e746c066d0, bs=bs@entry=0x55e7458d5820, target_bs=target_bs@entry=0x55e74589f640, aio_context=aio_context@entry=0x55e7458a91e0, txn=txn@entry=0x0, errp=errp@entry=0x7ffddfd1efb0) - at blockdev.c:3580 - #11 0x000055e74315c37c in do_blockdev_backup - (backup=backup@entry=0x55e746c066d0, txn=0x0, errp=errp@entry=0x7ffddfd1efb0) - at /usr/src/debug/qemu-kvm-4.2.0-2.module+el8.2.0+5135+ed3b2489.x86_64/./qapi/qapi-types-block-core.h:1492 - #12 0x000055e74315c449 in blockdev_backup_prepare (common=0x55e746a8de90, errp=0x7ffddfd1f018) - at blockdev.c:1885 - #13 0x000055e743160152 in qmp_transaction - (dev_list=, has_props=, props=0x55e7467fe2c0, errp=errp@entry=0x7ffddfd1f088) at blockdev.c:2340 - #14 0x000055e743287ff5 in qmp_marshal_transaction - (args=, ret=, errp=0x7ffddfd1f0f8) - at qapi/qapi-commands-transaction.c:44 - #15 0x000055e74333de6c in do_qmp_dispatch - (errp=0x7ffddfd1f0f0, allow_oob=, request=, cmds=0x55e743c28d60 ) at qapi/qmp-dispatch.c:132 - #16 0x000055e74333de6c in qmp_dispatch - (cmds=0x55e743c28d60 , request=, allow_oob=) - at qapi/qmp-dispatch.c:175 - #17 0x000055e74325c061 in monitor_qmp_dispatch (mon=0x55e745908030, req=) - at monitor/qmp.c:145 - #18 0x000055e74325c6fa in monitor_qmp_bh_dispatcher (data=) at monitor/qmp.c:234 - #19 0x000055e743385866 in aio_bh_call (bh=0x55e745807ae0) at util/async.c:117 - #20 0x000055e743385866 in aio_bh_poll (ctx=ctx@entry=0x55e7458067a0) at util/async.c:117 - #21 0x000055e743388c54 in aio_dispatch (ctx=0x55e7458067a0) at util/aio-posix.c:459 - #22 0x000055e743385742 in aio_ctx_dispatch - (source=, callback=, user_data=) at util/async.c:260 - #23 0x00007fb68543e67d in g_main_dispatch (context=0x55e745893a40) at gmain.c:3176 - #24 0x00007fb68543e67d in g_main_context_dispatch (context=context@entry=0x55e745893a40) at gmain.c:3829 - #25 0x000055e743387d08 in glib_pollfds_poll () at util/main-loop.c:219 - #26 0x000055e743387d08 in os_host_main_loop_wait (timeout=) at util/main-loop.c:242 - #27 0x000055e743387d08 in main_loop_wait (nonblocking=) at util/main-loop.c:518 - #28 0x000055e74316a3c1 in main_loop () at vl.c:1828 - #29 0x000055e743016a72 in main (argc=, argv=, envp=) - at vl.c:4504 - -Fix this by not acquiring the AioContext there, and ensuring all paths -leading to it have it already acquired (backup_clean()). - -RHBZ: https://bugzilla.redhat.com/show_bug.cgi?id=1782111 -Signed-off-by: Sergio Lopez -Signed-off-by: Kevin Wolf -(cherry picked from commit 0abf2581717a19d9749d5c2ff8acd0ac203452c2) -Signed-off-by: Sergio Lopez -Signed-off-by: Danilo C. L. de Paula ---- - block/backup-top.c | 5 ----- - block/backup.c | 3 +++ - 2 files changed, 3 insertions(+), 5 deletions(-) - -diff --git a/block/backup-top.c b/block/backup-top.c -index 818d3f2..b8d863f 100644 ---- a/block/backup-top.c -+++ b/block/backup-top.c -@@ -255,9 +255,6 @@ append_failed: - void bdrv_backup_top_drop(BlockDriverState *bs) - { - BDRVBackupTopState *s = bs->opaque; -- AioContext *aio_context = bdrv_get_aio_context(bs); -- -- aio_context_acquire(aio_context); - - bdrv_drained_begin(bs); - -@@ -271,6 +268,4 @@ void bdrv_backup_top_drop(BlockDriverState *bs) - bdrv_drained_end(bs); - - bdrv_unref(bs); -- -- aio_context_release(aio_context); - } -diff --git a/block/backup.c b/block/backup.c -index cf62b1a..1383e21 100644 ---- a/block/backup.c -+++ b/block/backup.c -@@ -135,8 +135,11 @@ static void backup_abort(Job *job) - static void backup_clean(Job *job) - { - BackupBlockJob *s = container_of(job, BackupBlockJob, common.job); -+ AioContext *aio_context = bdrv_get_aio_context(s->backup_top); - -+ aio_context_acquire(aio_context); - bdrv_backup_top_drop(s->backup_top); -+ aio_context_release(aio_context); - } - - void backup_do_checkpoint(BlockJob *job, Error **errp) --- -1.8.3.1 - diff --git a/kvm-block-bdrv_reopen-with-backing-file-in-different-Aio.patch b/kvm-block-bdrv_reopen-with-backing-file-in-different-Aio.patch deleted file mode 100644 index 745be9f..0000000 --- a/kvm-block-bdrv_reopen-with-backing-file-in-different-Aio.patch +++ /dev/null @@ -1,114 +0,0 @@ -From 1e0582ad34e77a060e2067a35992979c9eae82c9 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 13 Mar 2020 12:34:31 +0000 -Subject: [PATCH 11/20] block: bdrv_reopen() with backing file in different - AioContext - -RH-Author: Kevin Wolf -Message-id: <20200313123439.10548-6-kwolf@redhat.com> -Patchwork-id: 94282 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 05/13] block: bdrv_reopen() with backing file in different AioContext -Bugzilla: 1790482 1805143 -RH-Acked-by: John Snow -RH-Acked-by: Daniel P. Berrange -RH-Acked-by: Peter Krempa - -This patch allows bdrv_reopen() (and therefore the x-blockdev-reopen QMP -command) to attach a node as the new backing file even if the node is in -a different AioContext than the parent if one of both nodes can be moved -to the AioContext of the other node. - -Signed-off-by: Kevin Wolf -Tested-by: Peter Krempa -Message-Id: <20200306141413.30705-3-kwolf@redhat.com> -Reviewed-by: Alberto Garcia -Signed-off-by: Kevin Wolf -(cherry picked from commit 1de6b45fb5c1489b450df7d1a4c692bba9678ce6) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - block.c | 32 ++++++++++++++++++++++++++------ - tests/qemu-iotests/245 | 8 +++----- - 2 files changed, 29 insertions(+), 11 deletions(-) - -diff --git a/block.c b/block.c -index a744bb5..39e4647 100644 ---- a/block.c -+++ b/block.c -@@ -3749,6 +3749,29 @@ static void bdrv_reopen_perm(BlockReopenQueue *q, BlockDriverState *bs, - *shared = cumulative_shared_perms; - } - -+static bool bdrv_reopen_can_attach(BlockDriverState *parent, -+ BdrvChild *child, -+ BlockDriverState *new_child, -+ Error **errp) -+{ -+ AioContext *parent_ctx = bdrv_get_aio_context(parent); -+ AioContext *child_ctx = bdrv_get_aio_context(new_child); -+ GSList *ignore; -+ bool ret; -+ -+ ignore = g_slist_prepend(NULL, child); -+ ret = bdrv_can_set_aio_context(new_child, parent_ctx, &ignore, NULL); -+ g_slist_free(ignore); -+ if (ret) { -+ return ret; -+ } -+ -+ ignore = g_slist_prepend(NULL, child); -+ ret = bdrv_can_set_aio_context(parent, child_ctx, &ignore, errp); -+ g_slist_free(ignore); -+ return ret; -+} -+ - /* - * Take a BDRVReopenState and check if the value of 'backing' in the - * reopen_state->options QDict is valid or not. -@@ -3800,14 +3823,11 @@ static int bdrv_reopen_parse_backing(BDRVReopenState *reopen_state, - } - - /* -- * TODO: before removing the x- prefix from x-blockdev-reopen we -- * should move the new backing file into the right AioContext -- * instead of returning an error. -+ * Check AioContext compatibility so that the bdrv_set_backing_hd() call in -+ * bdrv_reopen_commit() won't fail. - */ - if (new_backing_bs) { -- if (bdrv_get_aio_context(new_backing_bs) != bdrv_get_aio_context(bs)) { -- error_setg(errp, "Cannot use a new backing file " -- "with a different AioContext"); -+ if (!bdrv_reopen_can_attach(bs, bs->backing, new_backing_bs, errp)) { - return -EINVAL; - } - } -diff --git a/tests/qemu-iotests/245 b/tests/qemu-iotests/245 -index f69c2fa..919131d 100644 ---- a/tests/qemu-iotests/245 -+++ b/tests/qemu-iotests/245 -@@ -1013,18 +1013,16 @@ class TestBlockdevReopen(iotests.QMPTestCase): - # neither of them can switch to the other AioContext - def test_iothreads_error(self): - self.run_test_iothreads('iothread0', 'iothread1', -- "Cannot use a new backing file with a different AioContext") -+ "Cannot change iothread of active block backend") - - def test_iothreads_compatible_users(self): - self.run_test_iothreads('iothread0', 'iothread0') - - def test_iothreads_switch_backing(self): -- self.run_test_iothreads('iothread0', None, -- "Cannot use a new backing file with a different AioContext") -+ self.run_test_iothreads('iothread0', None) - - def test_iothreads_switch_overlay(self): -- self.run_test_iothreads(None, 'iothread0', -- "Cannot use a new backing file with a different AioContext") -+ self.run_test_iothreads(None, 'iothread0') - - if __name__ == '__main__': - iotests.main(supported_fmts=["qcow2"], --- -1.8.3.1 - diff --git a/kvm-block-nbd-Fix-hang-in-.bdrv_close.patch b/kvm-block-nbd-Fix-hang-in-.bdrv_close.patch deleted file mode 100644 index 378ae1a..0000000 --- a/kvm-block-nbd-Fix-hang-in-.bdrv_close.patch +++ /dev/null @@ -1,78 +0,0 @@ -From 4ef2c464a54b0b618d933641ac0a7012e629fed9 Mon Sep 17 00:00:00 2001 -From: Maxim Levitsky -Date: Wed, 11 Mar 2020 10:51:42 +0000 -Subject: [PATCH 01/20] block/nbd: Fix hang in .bdrv_close() - -RH-Author: Maxim Levitsky -Message-id: <20200311105147.13208-2-mlevitsk@redhat.com> -Patchwork-id: 94224 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 1/6] block/nbd: Fix hang in .bdrv_close() -Bugzilla: 1640894 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: John Snow -RH-Acked-by: Max Reitz - -From: Max Reitz - -When nbd_close() is called from a coroutine, the connection_co never -gets to run, and thus nbd_teardown_connection() hangs. - -This is because aio_co_enter() only puts the connection_co into the main -coroutine's wake-up queue, so this main coroutine needs to yield and -wait for connection_co to terminate. - -Suggested-by: Kevin Wolf -Signed-off-by: Max Reitz -Message-Id: <20200122164532.178040-2-mreitz@redhat.com> -Reviewed-by: Eric Blake -Reviewed-by: Maxim Levitsky -Signed-off-by: Max Reitz -(cherry picked from commit 78c81a3f108870d325b0a39d88711366afe6f703) -Signed-off-by: Maxim Levitsky -Signed-off-by: Danilo C. L. de Paula ---- - block/nbd.c | 14 +++++++++++++- - 1 file changed, 13 insertions(+), 1 deletion(-) - -diff --git a/block/nbd.c b/block/nbd.c -index 5f18f78..a73f0d9 100644 ---- a/block/nbd.c -+++ b/block/nbd.c -@@ -70,6 +70,7 @@ typedef struct BDRVNBDState { - CoMutex send_mutex; - CoQueue free_sema; - Coroutine *connection_co; -+ Coroutine *teardown_co; - QemuCoSleepState *connection_co_sleep_ns_state; - bool drained; - bool wait_drained_end; -@@ -203,7 +204,15 @@ static void nbd_teardown_connection(BlockDriverState *bs) - qemu_co_sleep_wake(s->connection_co_sleep_ns_state); - } - } -- BDRV_POLL_WHILE(bs, s->connection_co); -+ if (qemu_in_coroutine()) { -+ s->teardown_co = qemu_coroutine_self(); -+ /* connection_co resumes us when it terminates */ -+ qemu_coroutine_yield(); -+ s->teardown_co = NULL; -+ } else { -+ BDRV_POLL_WHILE(bs, s->connection_co); -+ } -+ assert(!s->connection_co); - } - - static bool nbd_client_connecting(BDRVNBDState *s) -@@ -395,6 +404,9 @@ static coroutine_fn void nbd_connection_entry(void *opaque) - s->ioc = NULL; - } - -+ if (s->teardown_co) { -+ aio_co_wake(s->teardown_co); -+ } - aio_wait_kick(); - } - --- -1.8.3.1 - diff --git a/kvm-block-pass-BlockDriver-reference-to-the-.bdrv_co_cre.patch b/kvm-block-pass-BlockDriver-reference-to-the-.bdrv_co_cre.patch deleted file mode 100644 index 43f9ffc..0000000 --- a/kvm-block-pass-BlockDriver-reference-to-the-.bdrv_co_cre.patch +++ /dev/null @@ -1,328 +0,0 @@ -From 25c528b30f8774f33e957d14060805398da524d9 Mon Sep 17 00:00:00 2001 -From: Maxim Levitsky -Date: Thu, 26 Mar 2020 20:23:06 +0000 -Subject: [PATCH 1/4] block: pass BlockDriver reference to the .bdrv_co_create - -RH-Author: Maxim Levitsky -Message-id: <20200326202307.9264-2-mlevitsk@redhat.com> -Patchwork-id: 94447 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/2] block: pass BlockDriver reference to the .bdrv_co_create -Bugzilla: 1816007 -RH-Acked-by: Danilo de Paula -RH-Acked-by: Kevin Wolf -RH-Acked-by: Max Reitz - -This will allow the reuse of a single generic .bdrv_co_create -implementation for several drivers. -No functional changes. - -Signed-off-by: Maxim Levitsky -Message-Id: <20200326011218.29230-2-mlevitsk@redhat.com> -Reviewed-by: Denis V. Lunev -Signed-off-by: Max Reitz -(cherry picked from commit b92902dfeaafbceaf744ab7473f2d070284f6172) -Signed-off-by: Maxim Levitsky -Signed-off-by: Danilo C. L. de Paula ---- - block.c | 3 ++- - block/crypto.c | 3 ++- - block/file-posix.c | 4 +++- - block/file-win32.c | 4 +++- - block/gluster.c | 3 ++- - block/nfs.c | 4 +++- - block/parallels.c | 3 ++- - block/qcow.c | 3 ++- - block/qcow2.c | 4 +++- - block/qed.c | 3 ++- - block/raw-format.c | 4 +++- - block/rbd.c | 3 ++- - block/sheepdog.c | 4 +++- - block/ssh.c | 4 +++- - block/vdi.c | 4 +++- - block/vhdx.c | 3 ++- - block/vmdk.c | 4 +++- - block/vpc.c | 6 ++++-- - include/block/block_int.h | 3 ++- - 19 files changed, 49 insertions(+), 20 deletions(-) - -diff --git a/block.c b/block.c -index ec29b1e..f9a1c5b 100644 ---- a/block.c -+++ b/block.c -@@ -482,7 +482,8 @@ static void coroutine_fn bdrv_create_co_entry(void *opaque) - CreateCo *cco = opaque; - assert(cco->drv); - -- ret = cco->drv->bdrv_co_create_opts(cco->filename, cco->opts, &local_err); -+ ret = cco->drv->bdrv_co_create_opts(cco->drv, -+ cco->filename, cco->opts, &local_err); - error_propagate(&cco->err, local_err); - cco->ret = ret; - } -diff --git a/block/crypto.c b/block/crypto.c -index 2482383..970d463 100644 ---- a/block/crypto.c -+++ b/block/crypto.c -@@ -539,7 +539,8 @@ fail: - return ret; - } - --static int coroutine_fn block_crypto_co_create_opts_luks(const char *filename, -+static int coroutine_fn block_crypto_co_create_opts_luks(BlockDriver *drv, -+ const char *filename, - QemuOpts *opts, - Error **errp) - { -diff --git a/block/file-posix.c b/block/file-posix.c -index fd29372..a2e0a74 100644 ---- a/block/file-posix.c -+++ b/block/file-posix.c -@@ -2346,7 +2346,9 @@ out: - return result; - } - --static int coroutine_fn raw_co_create_opts(const char *filename, QemuOpts *opts, -+static int coroutine_fn raw_co_create_opts(BlockDriver *drv, -+ const char *filename, -+ QemuOpts *opts, - Error **errp) - { - BlockdevCreateOptions options; -diff --git a/block/file-win32.c b/block/file-win32.c -index 77e8ff7..1585983 100644 ---- a/block/file-win32.c -+++ b/block/file-win32.c -@@ -588,7 +588,9 @@ static int raw_co_create(BlockdevCreateOptions *options, Error **errp) - return 0; - } - --static int coroutine_fn raw_co_create_opts(const char *filename, QemuOpts *opts, -+static int coroutine_fn raw_co_create_opts(BlockDriver *drv, -+ const char *filename, -+ QemuOpts *opts, - Error **errp) - { - BlockdevCreateOptions options; -diff --git a/block/gluster.c b/block/gluster.c -index 4fa4a77..0aa1f2c 100644 ---- a/block/gluster.c -+++ b/block/gluster.c -@@ -1130,7 +1130,8 @@ out: - return ret; - } - --static int coroutine_fn qemu_gluster_co_create_opts(const char *filename, -+static int coroutine_fn qemu_gluster_co_create_opts(BlockDriver *drv, -+ const char *filename, - QemuOpts *opts, - Error **errp) - { -diff --git a/block/nfs.c b/block/nfs.c -index 9a6311e..cc2413d 100644 ---- a/block/nfs.c -+++ b/block/nfs.c -@@ -662,7 +662,9 @@ out: - return ret; - } - --static int coroutine_fn nfs_file_co_create_opts(const char *url, QemuOpts *opts, -+static int coroutine_fn nfs_file_co_create_opts(BlockDriver *drv, -+ const char *url, -+ QemuOpts *opts, - Error **errp) - { - BlockdevCreateOptions *create_options; -diff --git a/block/parallels.c b/block/parallels.c -index 7a01997..6d4ed77 100644 ---- a/block/parallels.c -+++ b/block/parallels.c -@@ -609,7 +609,8 @@ exit: - goto out; - } - --static int coroutine_fn parallels_co_create_opts(const char *filename, -+static int coroutine_fn parallels_co_create_opts(BlockDriver *drv, -+ const char *filename, - QemuOpts *opts, - Error **errp) - { -diff --git a/block/qcow.c b/block/qcow.c -index fce8989..8973e4e 100644 ---- a/block/qcow.c -+++ b/block/qcow.c -@@ -934,7 +934,8 @@ exit: - return ret; - } - --static int coroutine_fn qcow_co_create_opts(const char *filename, -+static int coroutine_fn qcow_co_create_opts(BlockDriver *drv, -+ const char *filename, - QemuOpts *opts, Error **errp) - { - BlockdevCreateOptions *create_options = NULL; -diff --git a/block/qcow2.c b/block/qcow2.c -index 83b1fc0..71067c6 100644 ---- a/block/qcow2.c -+++ b/block/qcow2.c -@@ -3558,7 +3558,9 @@ out: - return ret; - } - --static int coroutine_fn qcow2_co_create_opts(const char *filename, QemuOpts *opts, -+static int coroutine_fn qcow2_co_create_opts(BlockDriver *drv, -+ const char *filename, -+ QemuOpts *opts, - Error **errp) - { - BlockdevCreateOptions *create_options = NULL; -diff --git a/block/qed.c b/block/qed.c -index d8c4e5f..1af9b3c 100644 ---- a/block/qed.c -+++ b/block/qed.c -@@ -720,7 +720,8 @@ out: - return ret; - } - --static int coroutine_fn bdrv_qed_co_create_opts(const char *filename, -+static int coroutine_fn bdrv_qed_co_create_opts(BlockDriver *drv, -+ const char *filename, - QemuOpts *opts, - Error **errp) - { -diff --git a/block/raw-format.c b/block/raw-format.c -index 3a76ec7..93b25e1 100644 ---- a/block/raw-format.c -+++ b/block/raw-format.c -@@ -419,7 +419,9 @@ static int raw_has_zero_init_truncate(BlockDriverState *bs) - return bdrv_has_zero_init_truncate(bs->file->bs); - } - --static int coroutine_fn raw_co_create_opts(const char *filename, QemuOpts *opts, -+static int coroutine_fn raw_co_create_opts(BlockDriver *drv, -+ const char *filename, -+ QemuOpts *opts, - Error **errp) - { - return bdrv_create_file(filename, opts, errp); -diff --git a/block/rbd.c b/block/rbd.c -index 027cbcc..8847259 100644 ---- a/block/rbd.c -+++ b/block/rbd.c -@@ -425,7 +425,8 @@ static int qemu_rbd_co_create(BlockdevCreateOptions *options, Error **errp) - return qemu_rbd_do_create(options, NULL, NULL, errp); - } - --static int coroutine_fn qemu_rbd_co_create_opts(const char *filename, -+static int coroutine_fn qemu_rbd_co_create_opts(BlockDriver *drv, -+ const char *filename, - QemuOpts *opts, - Error **errp) - { -diff --git a/block/sheepdog.c b/block/sheepdog.c -index cfa8433..a8a7e32 100644 ---- a/block/sheepdog.c -+++ b/block/sheepdog.c -@@ -2157,7 +2157,9 @@ out: - return ret; - } - --static int coroutine_fn sd_co_create_opts(const char *filename, QemuOpts *opts, -+static int coroutine_fn sd_co_create_opts(BlockDriver *drv, -+ const char *filename, -+ QemuOpts *opts, - Error **errp) - { - BlockdevCreateOptions *create_options = NULL; -diff --git a/block/ssh.c b/block/ssh.c -index b4375cf..84e9282 100644 ---- a/block/ssh.c -+++ b/block/ssh.c -@@ -963,7 +963,9 @@ fail: - return ret; - } - --static int coroutine_fn ssh_co_create_opts(const char *filename, QemuOpts *opts, -+static int coroutine_fn ssh_co_create_opts(BlockDriver *drv, -+ const char *filename, -+ QemuOpts *opts, - Error **errp) - { - BlockdevCreateOptions *create_options; -diff --git a/block/vdi.c b/block/vdi.c -index 0142da7..e1a11f2 100644 ---- a/block/vdi.c -+++ b/block/vdi.c -@@ -896,7 +896,9 @@ static int coroutine_fn vdi_co_create(BlockdevCreateOptions *create_options, - return vdi_co_do_create(create_options, DEFAULT_CLUSTER_SIZE, errp); - } - --static int coroutine_fn vdi_co_create_opts(const char *filename, QemuOpts *opts, -+static int coroutine_fn vdi_co_create_opts(BlockDriver *drv, -+ const char *filename, -+ QemuOpts *opts, - Error **errp) - { - QDict *qdict = NULL; -diff --git a/block/vhdx.c b/block/vhdx.c -index f02d261..33e57cd 100644 ---- a/block/vhdx.c -+++ b/block/vhdx.c -@@ -2046,7 +2046,8 @@ delete_and_exit: - return ret; - } - --static int coroutine_fn vhdx_co_create_opts(const char *filename, -+static int coroutine_fn vhdx_co_create_opts(BlockDriver *drv, -+ const char *filename, - QemuOpts *opts, - Error **errp) - { -diff --git a/block/vmdk.c b/block/vmdk.c -index 20e909d..eb726f2 100644 ---- a/block/vmdk.c -+++ b/block/vmdk.c -@@ -2588,7 +2588,9 @@ exit: - return blk; - } - --static int coroutine_fn vmdk_co_create_opts(const char *filename, QemuOpts *opts, -+static int coroutine_fn vmdk_co_create_opts(BlockDriver *drv, -+ const char *filename, -+ QemuOpts *opts, - Error **errp) - { - Error *local_err = NULL; -diff --git a/block/vpc.c b/block/vpc.c -index a655502..6df75e2 100644 ---- a/block/vpc.c -+++ b/block/vpc.c -@@ -1089,8 +1089,10 @@ out: - return ret; - } - --static int coroutine_fn vpc_co_create_opts(const char *filename, -- QemuOpts *opts, Error **errp) -+static int coroutine_fn vpc_co_create_opts(BlockDriver *drv, -+ const char *filename, -+ QemuOpts *opts, -+ Error **errp) - { - BlockdevCreateOptions *create_options = NULL; - QDict *qdict; -diff --git a/include/block/block_int.h b/include/block/block_int.h -index 96e327b..7ff81be 100644 ---- a/include/block/block_int.h -+++ b/include/block/block_int.h -@@ -136,7 +136,8 @@ struct BlockDriver { - void (*bdrv_close)(BlockDriverState *bs); - int coroutine_fn (*bdrv_co_create)(BlockdevCreateOptions *opts, - Error **errp); -- int coroutine_fn (*bdrv_co_create_opts)(const char *filename, -+ int coroutine_fn (*bdrv_co_create_opts)(BlockDriver *drv, -+ const char *filename, - QemuOpts *opts, - Error **errp); - int (*bdrv_make_empty)(BlockDriverState *bs); --- -1.8.3.1 - diff --git a/kvm-block-qcow2-Move-bitmap-reopen-into-bdrv_reopen_comm.patch b/kvm-block-qcow2-Move-bitmap-reopen-into-bdrv_reopen_comm.patch deleted file mode 100644 index 2c27fd2..0000000 --- a/kvm-block-qcow2-Move-bitmap-reopen-into-bdrv_reopen_comm.patch +++ /dev/null @@ -1,78 +0,0 @@ -From ec5408763c49cd0b63ee324bdc38a429ed1adeee Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 13 Mar 2020 12:34:29 +0000 -Subject: [PATCH 09/20] block/qcow2: Move bitmap reopen into - bdrv_reopen_commit_post - -RH-Author: Kevin Wolf -Message-id: <20200313123439.10548-4-kwolf@redhat.com> -Patchwork-id: 94280 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 03/13] block/qcow2: Move bitmap reopen into bdrv_reopen_commit_post -Bugzilla: 1790482 1805143 -RH-Acked-by: John Snow -RH-Acked-by: Daniel P. Berrange -RH-Acked-by: Peter Krempa - -From: Peter Krempa - -The bitmap code requires writing the 'file' child when the qcow2 driver -is reopened in read-write mode. - -If the 'file' child is being reopened due to a permissions change, the -modification is commited yet when qcow2_reopen_commit is called. This -means that any attempt to write the 'file' child will end with EBADFD -as the original fd was already closed. - -Moving bitmap reopening to the new callback which is called after -permission modifications are commited fixes this as the file descriptor -will be replaced with the correct one. - -The above problem manifests itself when reopening 'qcow2' format layer -which uses a 'file-posix' file child which was opened with the -'auto-read-only' property set. - -Signed-off-by: Peter Krempa -Message-Id: -Signed-off-by: Kevin Wolf -(cherry picked from commit 65eb7c85a3e62529e2bad782e94d5a7b11dd5a92) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - block/qcow2.c | 7 ++++++- - 1 file changed, 6 insertions(+), 1 deletion(-) - -diff --git a/block/qcow2.c b/block/qcow2.c -index 7c18721..83b1fc0 100644 ---- a/block/qcow2.c -+++ b/block/qcow2.c -@@ -1881,6 +1881,11 @@ fail: - static void qcow2_reopen_commit(BDRVReopenState *state) - { - qcow2_update_options_commit(state->bs, state->opaque); -+ g_free(state->opaque); -+} -+ -+static void qcow2_reopen_commit_post(BDRVReopenState *state) -+{ - if (state->flags & BDRV_O_RDWR) { - Error *local_err = NULL; - -@@ -1895,7 +1900,6 @@ static void qcow2_reopen_commit(BDRVReopenState *state) - bdrv_get_node_name(state->bs)); - } - } -- g_free(state->opaque); - } - - static void qcow2_reopen_abort(BDRVReopenState *state) -@@ -5492,6 +5496,7 @@ BlockDriver bdrv_qcow2 = { - .bdrv_close = qcow2_close, - .bdrv_reopen_prepare = qcow2_reopen_prepare, - .bdrv_reopen_commit = qcow2_reopen_commit, -+ .bdrv_reopen_commit_post = qcow2_reopen_commit_post, - .bdrv_reopen_abort = qcow2_reopen_abort, - .bdrv_join_options = qcow2_join_options, - .bdrv_child_perm = bdrv_format_default_perms, --- -1.8.3.1 - diff --git a/kvm-block-trickle-down-the-fallback-image-creation-funct.patch b/kvm-block-trickle-down-the-fallback-image-creation-funct.patch deleted file mode 100644 index 5ba1521..0000000 --- a/kvm-block-trickle-down-the-fallback-image-creation-funct.patch +++ /dev/null @@ -1,296 +0,0 @@ -From a1f7b929ae1fe6fa424c520c3a5eb497333b0fd9 Mon Sep 17 00:00:00 2001 -From: Maxim Levitsky -Date: Thu, 26 Mar 2020 20:23:07 +0000 -Subject: [PATCH 2/4] block: trickle down the fallback image creation function - use to the block drivers - -RH-Author: Maxim Levitsky -Message-id: <20200326202307.9264-3-mlevitsk@redhat.com> -Patchwork-id: 94446 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 2/2] block: trickle down the fallback image creation function use to the block drivers -Bugzilla: 1816007 -RH-Acked-by: Danilo de Paula -RH-Acked-by: Kevin Wolf -RH-Acked-by: Max Reitz - -Instead of checking the .bdrv_co_create_opts to see if we need the -fallback, just implement the .bdrv_co_create_opts in the drivers that -need it. - -This way we don't break various places that need to know if the -underlying protocol/format really supports image creation, and this way -we still allow some drivers to not support image creation. - -Fixes: fd17146cd93d1704cd96d7c2757b325fc7aac6fd -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1816007 - -Note that technically this driver reverts the image creation fallback -for the vxhs driver since I don't have a means to test it, and IMHO it -is better to leave it not supported as it was prior to generic image -creation patches. - -Also drop iscsi_create_opts which was left accidentally. - -Signed-off-by: Maxim Levitsky -Message-Id: <20200326011218.29230-3-mlevitsk@redhat.com> -Reviewed-by: Denis V. Lunev -[mreitz: Fixed alignment, and moved bdrv_co_create_opts_simple() and - bdrv_create_opts_simple from block.h into block_int.h] -Signed-off-by: Max Reitz -(cherry picked from commit 5a5e7f8cd86b7ced0732b1b6e28c82baa65b09c9) - -Contextual conflicts in block.c and include/block/block_int.h - -(conflict in block.c by default shows as functional but -with --diff-algorithm=patience it becomes a contextual conflict) - -... -001/2:[----] [--] 'block: pass BlockDriver reference to the .bdrv_co_create' -002/2:[0014] [FC] 'block: trickle down the fallback image creation function use to the block drivers' -... -002/2: 'meld <(git show 5a5e7f8^\!) <(git show 6d3bca5^\!)' - -So now running: -meld <(git show 5a5e7f8^\! --diff-algorithm=patience) <(git show 6d3bca5^\! --diff-algorithm=patience) - -shows no contextual conflicts -It is mostly due to missing commit f6dc1c31d3801dcbdf0c56574f9ff4f05180810c -Thanks to Max Reitz for helping me with this. - -Signed-off-by: Maxim Levitsky -Signed-off-by: Danilo C. L. de Paula ---- - block.c | 35 ++++++++++++++++++++--------------- - block/file-posix.c | 7 ++++++- - block/iscsi.c | 16 ++++------------ - block/nbd.c | 6 ++++++ - block/nvme.c | 3 +++ - include/block/block.h | 1 + - include/block/block_int.h | 11 +++++++++++ - 7 files changed, 51 insertions(+), 28 deletions(-) - -diff --git a/block.c b/block.c -index f9a1c5b..ba3b40d7 100644 ---- a/block.c -+++ b/block.c -@@ -597,8 +597,15 @@ static int create_file_fallback_zero_first_sector(BlockBackend *blk, - return 0; - } - --static int bdrv_create_file_fallback(const char *filename, BlockDriver *drv, -- QemuOpts *opts, Error **errp) -+/** -+ * Simple implementation of bdrv_co_create_opts for protocol drivers -+ * which only support creation via opening a file -+ * (usually existing raw storage device) -+ */ -+int coroutine_fn bdrv_co_create_opts_simple(BlockDriver *drv, -+ const char *filename, -+ QemuOpts *opts, -+ Error **errp) - { - BlockBackend *blk; - QDict *options; -@@ -662,11 +669,7 @@ int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp) - return -ENOENT; - } - -- if (drv->bdrv_co_create_opts) { -- return bdrv_create(drv, filename, opts, errp); -- } else { -- return bdrv_create_file_fallback(filename, drv, opts, errp); -- } -+ return bdrv_create(drv, filename, opts, errp); - } - - /** -@@ -1543,9 +1546,9 @@ QemuOptsList bdrv_runtime_opts = { - }, - }; - --static QemuOptsList fallback_create_opts = { -- .name = "fallback-create-opts", -- .head = QTAILQ_HEAD_INITIALIZER(fallback_create_opts.head), -+QemuOptsList bdrv_create_opts_simple = { -+ .name = "simple-create-opts", -+ .head = QTAILQ_HEAD_INITIALIZER(bdrv_create_opts_simple.head), - .desc = { - { - .name = BLOCK_OPT_SIZE, -@@ -5910,13 +5913,15 @@ void bdrv_img_create(const char *filename, const char *fmt, - return; - } - -- create_opts = qemu_opts_append(create_opts, drv->create_opts); -- if (proto_drv->create_opts) { -- create_opts = qemu_opts_append(create_opts, proto_drv->create_opts); -- } else { -- create_opts = qemu_opts_append(create_opts, &fallback_create_opts); -+ if (!proto_drv->create_opts) { -+ error_setg(errp, "Protocol driver '%s' does not support image creation", -+ proto_drv->format_name); -+ return; - } - -+ create_opts = qemu_opts_append(create_opts, drv->create_opts); -+ create_opts = qemu_opts_append(create_opts, proto_drv->create_opts); -+ - /* Create parameter list with default values */ - opts = qemu_opts_create(create_opts, NULL, 0, &error_abort); - qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort); -diff --git a/block/file-posix.c b/block/file-posix.c -index a2e0a74..dd18d40 100644 ---- a/block/file-posix.c -+++ b/block/file-posix.c -@@ -3432,6 +3432,8 @@ static BlockDriver bdrv_host_device = { - .bdrv_reopen_prepare = raw_reopen_prepare, - .bdrv_reopen_commit = raw_reopen_commit, - .bdrv_reopen_abort = raw_reopen_abort, -+ .bdrv_co_create_opts = bdrv_co_create_opts_simple, -+ .create_opts = &bdrv_create_opts_simple, - .mutable_opts = mutable_opts, - .bdrv_co_invalidate_cache = raw_co_invalidate_cache, - .bdrv_co_pwrite_zeroes = hdev_co_pwrite_zeroes, -@@ -3558,10 +3560,11 @@ static BlockDriver bdrv_host_cdrom = { - .bdrv_reopen_prepare = raw_reopen_prepare, - .bdrv_reopen_commit = raw_reopen_commit, - .bdrv_reopen_abort = raw_reopen_abort, -+ .bdrv_co_create_opts = bdrv_co_create_opts_simple, -+ .create_opts = &bdrv_create_opts_simple, - .mutable_opts = mutable_opts, - .bdrv_co_invalidate_cache = raw_co_invalidate_cache, - -- - .bdrv_co_preadv = raw_co_preadv, - .bdrv_co_pwritev = raw_co_pwritev, - .bdrv_co_flush_to_disk = raw_co_flush_to_disk, -@@ -3690,6 +3693,8 @@ static BlockDriver bdrv_host_cdrom = { - .bdrv_reopen_prepare = raw_reopen_prepare, - .bdrv_reopen_commit = raw_reopen_commit, - .bdrv_reopen_abort = raw_reopen_abort, -+ .bdrv_co_create_opts = bdrv_co_create_opts_simple, -+ .create_opts = &bdrv_create_opts_simple, - .mutable_opts = mutable_opts, - - .bdrv_co_preadv = raw_co_preadv, -diff --git a/block/iscsi.c b/block/iscsi.c -index b45da65..16b0716 100644 ---- a/block/iscsi.c -+++ b/block/iscsi.c -@@ -2399,18 +2399,6 @@ out_unlock: - return r; - } - --static QemuOptsList iscsi_create_opts = { -- .name = "iscsi-create-opts", -- .head = QTAILQ_HEAD_INITIALIZER(iscsi_create_opts.head), -- .desc = { -- { -- .name = BLOCK_OPT_SIZE, -- .type = QEMU_OPT_SIZE, -- .help = "Virtual disk size" -- }, -- { /* end of list */ } -- } --}; - - static const char *const iscsi_strong_runtime_opts[] = { - "transport", -@@ -2434,6 +2422,8 @@ static BlockDriver bdrv_iscsi = { - .bdrv_parse_filename = iscsi_parse_filename, - .bdrv_file_open = iscsi_open, - .bdrv_close = iscsi_close, -+ .bdrv_co_create_opts = bdrv_co_create_opts_simple, -+ .create_opts = &bdrv_create_opts_simple, - .bdrv_reopen_prepare = iscsi_reopen_prepare, - .bdrv_reopen_commit = iscsi_reopen_commit, - .bdrv_co_invalidate_cache = iscsi_co_invalidate_cache, -@@ -2471,6 +2461,8 @@ static BlockDriver bdrv_iser = { - .bdrv_parse_filename = iscsi_parse_filename, - .bdrv_file_open = iscsi_open, - .bdrv_close = iscsi_close, -+ .bdrv_co_create_opts = bdrv_co_create_opts_simple, -+ .create_opts = &bdrv_create_opts_simple, - .bdrv_reopen_prepare = iscsi_reopen_prepare, - .bdrv_reopen_commit = iscsi_reopen_commit, - .bdrv_co_invalidate_cache = iscsi_co_invalidate_cache, -diff --git a/block/nbd.c b/block/nbd.c -index a73f0d9..927915d 100644 ---- a/block/nbd.c -+++ b/block/nbd.c -@@ -2030,6 +2030,8 @@ static BlockDriver bdrv_nbd = { - .protocol_name = "nbd", - .instance_size = sizeof(BDRVNBDState), - .bdrv_parse_filename = nbd_parse_filename, -+ .bdrv_co_create_opts = bdrv_co_create_opts_simple, -+ .create_opts = &bdrv_create_opts_simple, - .bdrv_file_open = nbd_open, - .bdrv_reopen_prepare = nbd_client_reopen_prepare, - .bdrv_co_preadv = nbd_client_co_preadv, -@@ -2055,6 +2057,8 @@ static BlockDriver bdrv_nbd_tcp = { - .protocol_name = "nbd+tcp", - .instance_size = sizeof(BDRVNBDState), - .bdrv_parse_filename = nbd_parse_filename, -+ .bdrv_co_create_opts = bdrv_co_create_opts_simple, -+ .create_opts = &bdrv_create_opts_simple, - .bdrv_file_open = nbd_open, - .bdrv_reopen_prepare = nbd_client_reopen_prepare, - .bdrv_co_preadv = nbd_client_co_preadv, -@@ -2080,6 +2084,8 @@ static BlockDriver bdrv_nbd_unix = { - .protocol_name = "nbd+unix", - .instance_size = sizeof(BDRVNBDState), - .bdrv_parse_filename = nbd_parse_filename, -+ .bdrv_co_create_opts = bdrv_co_create_opts_simple, -+ .create_opts = &bdrv_create_opts_simple, - .bdrv_file_open = nbd_open, - .bdrv_reopen_prepare = nbd_client_reopen_prepare, - .bdrv_co_preadv = nbd_client_co_preadv, -diff --git a/block/nvme.c b/block/nvme.c -index d41c4bd..7b7c0cc 100644 ---- a/block/nvme.c -+++ b/block/nvme.c -@@ -1333,6 +1333,9 @@ static BlockDriver bdrv_nvme = { - .protocol_name = "nvme", - .instance_size = sizeof(BDRVNVMeState), - -+ .bdrv_co_create_opts = bdrv_co_create_opts_simple, -+ .create_opts = &bdrv_create_opts_simple, -+ - .bdrv_parse_filename = nvme_parse_filename, - .bdrv_file_open = nvme_file_open, - .bdrv_close = nvme_close, -diff --git a/include/block/block.h b/include/block/block.h -index 1df9848..92685d2 100644 ---- a/include/block/block.h -+++ b/include/block/block.h -@@ -293,6 +293,7 @@ BlockDriver *bdrv_find_format(const char *format_name); - int bdrv_create(BlockDriver *drv, const char* filename, - QemuOpts *opts, Error **errp); - int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp); -+ - BlockDriverState *bdrv_new(void); - void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top, - Error **errp); -diff --git a/include/block/block_int.h b/include/block/block_int.h -index 7ff81be..529f153 100644 ---- a/include/block/block_int.h -+++ b/include/block/block_int.h -@@ -1325,4 +1325,15 @@ int coroutine_fn bdrv_co_copy_range_to(BdrvChild *src, uint64_t src_offset, - - int refresh_total_sectors(BlockDriverState *bs, int64_t hint); - -+/** -+ * Simple implementation of bdrv_co_create_opts for protocol drivers -+ * which only support creation via opening a file -+ * (usually existing raw storage device) -+ */ -+int coroutine_fn bdrv_co_create_opts_simple(BlockDriver *drv, -+ const char *filename, -+ QemuOpts *opts, -+ Error **errp); -+extern QemuOptsList bdrv_create_opts_simple; -+ - #endif /* BLOCK_INT_H */ --- -1.8.3.1 - diff --git a/kvm-blockdev-Acquire-AioContext-on-dirty-bitmap-function.patch b/kvm-blockdev-Acquire-AioContext-on-dirty-bitmap-function.patch deleted file mode 100644 index 9a69130..0000000 --- a/kvm-blockdev-Acquire-AioContext-on-dirty-bitmap-function.patch +++ /dev/null @@ -1,176 +0,0 @@ -From dc2654f2319ad6c379e0ba10be143726c6f0e9e0 Mon Sep 17 00:00:00 2001 -From: Sergio Lopez Pascual -Date: Fri, 7 Feb 2020 11:27:47 +0000 -Subject: [PATCH 14/18] blockdev: Acquire AioContext on dirty bitmap functions - -RH-Author: Sergio Lopez Pascual -Message-id: <20200207112749.25073-8-slp@redhat.com> -Patchwork-id: 93760 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 7/9] blockdev: Acquire AioContext on dirty bitmap functions -Bugzilla: 1745606 1746217 1773517 1779036 1782111 1782175 1783965 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Paolo Bonzini -RH-Acked-by: Max Reitz -RH-Acked-by: Stefan Hajnoczi - -Dirty map addition and removal functions are not acquiring to BDS -AioContext, while they may call to code that expects it to be -acquired. - -This may trigger a crash with a stack trace like this one: - - #0 0x00007f0ef146370f in __GI_raise (sig=sig@entry=6) - at ../sysdeps/unix/sysv/linux/raise.c:50 - #1 0x00007f0ef144db25 in __GI_abort () at abort.c:79 - #2 0x0000565022294dce in error_exit - (err=, msg=msg@entry=0x56502243a730 <__func__.16350> "qemu_mutex_unlock_impl") at util/qemu-thread-posix.c:36 - #3 0x00005650222950ba in qemu_mutex_unlock_impl - (mutex=mutex@entry=0x5650244b0240, file=file@entry=0x565022439adf "util/async.c", line=line@entry=526) at util/qemu-thread-posix.c:108 - #4 0x0000565022290029 in aio_context_release - (ctx=ctx@entry=0x5650244b01e0) at util/async.c:526 - #5 0x000056502221cd08 in bdrv_can_store_new_dirty_bitmap - (bs=bs@entry=0x5650244dc820, name=name@entry=0x56502481d360 "bitmap1", granularity=granularity@entry=65536, errp=errp@entry=0x7fff22831718) - at block/dirty-bitmap.c:542 - #6 0x000056502206ae53 in qmp_block_dirty_bitmap_add - (errp=0x7fff22831718, disabled=false, has_disabled=, persistent=, has_persistent=true, granularity=65536, has_granularity=, name=0x56502481d360 "bitmap1", node=) at blockdev.c:2894 - #7 0x000056502206ae53 in qmp_block_dirty_bitmap_add - (node=, name=0x56502481d360 "bitmap1", has_granularity=, granularity=, has_persistent=true, persistent=, has_disabled=false, disabled=false, errp=0x7fff22831718) at blockdev.c:2856 - #8 0x00005650221847a3 in qmp_marshal_block_dirty_bitmap_add - (args=, ret=, errp=0x7fff22831798) - at qapi/qapi-commands-block-core.c:651 - #9 0x0000565022247e6c in do_qmp_dispatch - (errp=0x7fff22831790, allow_oob=, request=, cmds=0x565022b32d60 ) at qapi/qmp-dispatch.c:132 - #10 0x0000565022247e6c in qmp_dispatch - (cmds=0x565022b32d60 , request=, allow_oob=) at qapi/qmp-dispatch.c:175 - #11 0x0000565022166061 in monitor_qmp_dispatch - (mon=0x56502450faa0, req=) at monitor/qmp.c:145 - #12 0x00005650221666fa in monitor_qmp_bh_dispatcher - (data=) at monitor/qmp.c:234 - #13 0x000056502228f866 in aio_bh_call (bh=0x56502440eae0) - at util/async.c:117 - #14 0x000056502228f866 in aio_bh_poll (ctx=ctx@entry=0x56502440d7a0) - at util/async.c:117 - #15 0x0000565022292c54 in aio_dispatch (ctx=0x56502440d7a0) - at util/aio-posix.c:459 - #16 0x000056502228f742 in aio_ctx_dispatch - (source=, callback=, user_data=) at util/async.c:260 - #17 0x00007f0ef5ce667d in g_main_dispatch (context=0x56502449aa40) - at gmain.c:3176 - #18 0x00007f0ef5ce667d in g_main_context_dispatch - (context=context@entry=0x56502449aa40) at gmain.c:3829 - #19 0x0000565022291d08 in glib_pollfds_poll () at util/main-loop.c:219 - #20 0x0000565022291d08 in os_host_main_loop_wait - (timeout=) at util/main-loop.c:242 - #21 0x0000565022291d08 in main_loop_wait (nonblocking=) - at util/main-loop.c:518 - #22 0x00005650220743c1 in main_loop () at vl.c:1828 - #23 0x0000565021f20a72 in main - (argc=, argv=, envp=) - at vl.c:4504 - -Fix this by acquiring the AioContext at qmp_block_dirty_bitmap_add() -and qmp_block_dirty_bitmap_add(). - -RHBZ: https://bugzilla.redhat.com/show_bug.cgi?id=1782175 -Signed-off-by: Sergio Lopez -Signed-off-by: Kevin Wolf -(cherry picked from commit 91005a495e228ebd7e5e173cd18f952450eef82d) -Signed-off-by: Sergio Lopez -Signed-off-by: Danilo C. L. de Paula ---- - blockdev.c | 22 ++++++++++++++++++---- - 1 file changed, 18 insertions(+), 4 deletions(-) - -diff --git a/blockdev.c b/blockdev.c -index 1dacbc2..d4ef6cd 100644 ---- a/blockdev.c -+++ b/blockdev.c -@@ -2984,6 +2984,7 @@ void qmp_block_dirty_bitmap_add(const char *node, const char *name, - { - BlockDriverState *bs; - BdrvDirtyBitmap *bitmap; -+ AioContext *aio_context; - - if (!name || name[0] == '\0') { - error_setg(errp, "Bitmap name cannot be empty"); -@@ -2995,11 +2996,14 @@ void qmp_block_dirty_bitmap_add(const char *node, const char *name, - return; - } - -+ aio_context = bdrv_get_aio_context(bs); -+ aio_context_acquire(aio_context); -+ - if (has_granularity) { - if (granularity < 512 || !is_power_of_2(granularity)) { - error_setg(errp, "Granularity must be power of 2 " - "and at least 512"); -- return; -+ goto out; - } - } else { - /* Default to cluster size, if available: */ -@@ -3017,12 +3021,12 @@ void qmp_block_dirty_bitmap_add(const char *node, const char *name, - if (persistent && - !bdrv_can_store_new_dirty_bitmap(bs, name, granularity, errp)) - { -- return; -+ goto out; - } - - bitmap = bdrv_create_dirty_bitmap(bs, granularity, name, errp); - if (bitmap == NULL) { -- return; -+ goto out; - } - - if (disabled) { -@@ -3030,6 +3034,9 @@ void qmp_block_dirty_bitmap_add(const char *node, const char *name, - } - - bdrv_dirty_bitmap_set_persistence(bitmap, persistent); -+ -+out: -+ aio_context_release(aio_context); - } - - static BdrvDirtyBitmap *do_block_dirty_bitmap_remove( -@@ -3038,21 +3045,27 @@ static BdrvDirtyBitmap *do_block_dirty_bitmap_remove( - { - BlockDriverState *bs; - BdrvDirtyBitmap *bitmap; -+ AioContext *aio_context; - - bitmap = block_dirty_bitmap_lookup(node, name, &bs, errp); - if (!bitmap || !bs) { - return NULL; - } - -+ aio_context = bdrv_get_aio_context(bs); -+ aio_context_acquire(aio_context); -+ - if (bdrv_dirty_bitmap_check(bitmap, BDRV_BITMAP_BUSY | BDRV_BITMAP_RO, - errp)) { -+ aio_context_release(aio_context); - return NULL; - } - - if (bdrv_dirty_bitmap_get_persistence(bitmap) && - bdrv_remove_persistent_dirty_bitmap(bs, name, errp) < 0) - { -- return NULL; -+ aio_context_release(aio_context); -+ return NULL; - } - - if (release) { -@@ -3063,6 +3076,7 @@ static BdrvDirtyBitmap *do_block_dirty_bitmap_remove( - *bitmap_bs = bs; - } - -+ aio_context_release(aio_context); - return release ? NULL : bitmap; - } - --- -1.8.3.1 - diff --git a/kvm-blockdev-Return-bs-to-the-proper-context-on-snapshot.patch b/kvm-blockdev-Return-bs-to-the-proper-context-on-snapshot.patch deleted file mode 100644 index b2dd453..0000000 --- a/kvm-blockdev-Return-bs-to-the-proper-context-on-snapshot.patch +++ /dev/null @@ -1,107 +0,0 @@ -From 24e5eca4218b294bd013e2d85a38345045506bec Mon Sep 17 00:00:00 2001 -From: Sergio Lopez Pascual -Date: Fri, 7 Feb 2020 11:27:48 +0000 -Subject: [PATCH 15/18] blockdev: Return bs to the proper context on snapshot - abort - -RH-Author: Sergio Lopez Pascual -Message-id: <20200207112749.25073-9-slp@redhat.com> -Patchwork-id: 93761 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 8/9] blockdev: Return bs to the proper context on snapshot abort -Bugzilla: 1745606 1746217 1773517 1779036 1782111 1782175 1783965 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Paolo Bonzini -RH-Acked-by: Max Reitz -RH-Acked-by: Stefan Hajnoczi - -external_snapshot_abort() calls to bdrv_set_backing_hd(), which -returns state->old_bs to the main AioContext, as it's intended to be -used then the BDS is going to be released. As that's not the case when -aborting an external snapshot, return it to the AioContext it was -before the call. - -This issue can be triggered by issuing a transaction with two actions, -a proper blockdev-snapshot-sync and a bogus one, so the second will -trigger a transaction abort. This results in a crash with an stack -trace like this one: - - #0 0x00007fa1048b28df in __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:50 - #1 0x00007fa10489ccf5 in __GI_abort () at abort.c:79 - #2 0x00007fa10489cbc9 in __assert_fail_base - (fmt=0x7fa104a03300 "%s%s%s:%u: %s%sAssertion `%s' failed.\n%n", assertion=0x5572240b44d8 "bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs)", file=0x557224014d30 "block.c", line=2240, function=) at assert.c:92 - #3 0x00007fa1048aae96 in __GI___assert_fail - (assertion=assertion@entry=0x5572240b44d8 "bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs)", file=file@entry=0x557224014d30 "block.c", line=line@entry=2240, function=function@entry=0x5572240b5d60 <__PRETTY_FUNCTION__.31620> "bdrv_replace_child_noperm") at assert.c:101 - #4 0x0000557223e631f8 in bdrv_replace_child_noperm (child=0x557225b9c980, new_bs=new_bs@entry=0x557225c42e40) at block.c:2240 - #5 0x0000557223e68be7 in bdrv_replace_node (from=0x557226951a60, to=0x557225c42e40, errp=0x5572247d6138 ) at block.c:4196 - #6 0x0000557223d069c4 in external_snapshot_abort (common=0x557225d7e170) at blockdev.c:1731 - #7 0x0000557223d069c4 in external_snapshot_abort (common=0x557225d7e170) at blockdev.c:1717 - #8 0x0000557223d09013 in qmp_transaction (dev_list=, has_props=, props=0x557225cc7d70, errp=errp@entry=0x7ffe704c0c98) at blockdev.c:2360 - #9 0x0000557223e32085 in qmp_marshal_transaction (args=, ret=, errp=0x7ffe704c0d08) at qapi/qapi-commands-transaction.c:44 - #10 0x0000557223ee798c in do_qmp_dispatch (errp=0x7ffe704c0d00, allow_oob=, request=, cmds=0x5572247d3cc0 ) at qapi/qmp-dispatch.c:132 - #11 0x0000557223ee798c in qmp_dispatch (cmds=0x5572247d3cc0 , request=, allow_oob=) at qapi/qmp-dispatch.c:175 - #12 0x0000557223e06141 in monitor_qmp_dispatch (mon=0x557225c69ff0, req=) at monitor/qmp.c:120 - #13 0x0000557223e0678a in monitor_qmp_bh_dispatcher (data=) at monitor/qmp.c:209 - #14 0x0000557223f2f366 in aio_bh_call (bh=0x557225b9dc60) at util/async.c:117 - #15 0x0000557223f2f366 in aio_bh_poll (ctx=ctx@entry=0x557225b9c840) at util/async.c:117 - #16 0x0000557223f32754 in aio_dispatch (ctx=0x557225b9c840) at util/aio-posix.c:459 - #17 0x0000557223f2f242 in aio_ctx_dispatch (source=, callback=, user_data=) at util/async.c:260 - #18 0x00007fa10913467d in g_main_dispatch (context=0x557225c28e80) at gmain.c:3176 - #19 0x00007fa10913467d in g_main_context_dispatch (context=context@entry=0x557225c28e80) at gmain.c:3829 - #20 0x0000557223f31808 in glib_pollfds_poll () at util/main-loop.c:219 - #21 0x0000557223f31808 in os_host_main_loop_wait (timeout=) at util/main-loop.c:242 - #22 0x0000557223f31808 in main_loop_wait (nonblocking=) at util/main-loop.c:518 - #23 0x0000557223d13201 in main_loop () at vl.c:1828 - #24 0x0000557223bbfb82 in main (argc=, argv=, envp=) at vl.c:4504 - -RHBZ: https://bugzilla.redhat.com/show_bug.cgi?id=1779036 -Signed-off-by: Sergio Lopez -Signed-off-by: Kevin Wolf -(cherry picked from commit 377410f6fb4f6b0d26d4a028c20766fae05de17e) -Signed-off-by: Sergio Lopez -Signed-off-by: Danilo C. L. de Paula ---- - blockdev.c | 21 +++++++++++++++++++++ - 1 file changed, 21 insertions(+) - -diff --git a/blockdev.c b/blockdev.c -index d4ef6cd..4cd9a58 100644 ---- a/blockdev.c -+++ b/blockdev.c -@@ -1731,6 +1731,8 @@ static void external_snapshot_abort(BlkActionState *common) - if (state->new_bs) { - if (state->overlay_appended) { - AioContext *aio_context; -+ AioContext *tmp_context; -+ int ret; - - aio_context = bdrv_get_aio_context(state->old_bs); - aio_context_acquire(aio_context); -@@ -1738,6 +1740,25 @@ static void external_snapshot_abort(BlkActionState *common) - bdrv_ref(state->old_bs); /* we can't let bdrv_set_backind_hd() - close state->old_bs; we need it */ - bdrv_set_backing_hd(state->new_bs, NULL, &error_abort); -+ -+ /* -+ * The call to bdrv_set_backing_hd() above returns state->old_bs to -+ * the main AioContext. As we're still going to be using it, return -+ * it to the AioContext it was before. -+ */ -+ tmp_context = bdrv_get_aio_context(state->old_bs); -+ if (aio_context != tmp_context) { -+ aio_context_release(aio_context); -+ aio_context_acquire(tmp_context); -+ -+ ret = bdrv_try_set_aio_context(state->old_bs, -+ aio_context, NULL); -+ assert(ret == 0); -+ -+ aio_context_release(tmp_context); -+ aio_context_acquire(aio_context); -+ } -+ - bdrv_replace_node(state->new_bs, state->old_bs, &error_abort); - bdrv_unref(state->old_bs); /* bdrv_replace_node() ref'ed old_bs */ - --- -1.8.3.1 - diff --git a/kvm-blockdev-fix-coding-style-issues-in-drive_backup_pre.patch b/kvm-blockdev-fix-coding-style-issues-in-drive_backup_pre.patch deleted file mode 100644 index 399a06a..0000000 --- a/kvm-blockdev-fix-coding-style-issues-in-drive_backup_pre.patch +++ /dev/null @@ -1,62 +0,0 @@ -From d56b53cd75c4146eae7a06d1cc30ab823a9bde93 Mon Sep 17 00:00:00 2001 -From: Sergio Lopez Pascual -Date: Fri, 7 Feb 2020 11:27:41 +0000 -Subject: [PATCH 08/18] blockdev: fix coding style issues in - drive_backup_prepare -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Sergio Lopez Pascual -Message-id: <20200207112749.25073-2-slp@redhat.com> -Patchwork-id: 93754 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 1/9] blockdev: fix coding style issues in drive_backup_prepare -Bugzilla: 1745606 1746217 1773517 1779036 1782111 1782175 1783965 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Paolo Bonzini -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Max Reitz -RH-Acked-by: Stefan Hajnoczi - -Fix a couple of minor coding style issues in drive_backup_prepare. - -Signed-off-by: Sergio Lopez -Reviewed-by: Max Reitz -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit 471ded690e19689018535e3f48480507ed073e22) -Signed-off-by: Sergio Lopez -Signed-off-by: Danilo C. L. de Paula ---- - blockdev.c | 8 +++++--- - 1 file changed, 5 insertions(+), 3 deletions(-) - -diff --git a/blockdev.c b/blockdev.c -index 8e029e9..553e315 100644 ---- a/blockdev.c -+++ b/blockdev.c -@@ -3620,7 +3620,7 @@ static BlockJob *do_drive_backup(DriveBackup *backup, JobTxn *txn, - - if (!backup->has_format) { - backup->format = backup->mode == NEW_IMAGE_MODE_EXISTING ? -- NULL : (char*) bs->drv->format_name; -+ NULL : (char *) bs->drv->format_name; - } - - /* Early check to avoid creating target */ -@@ -3630,8 +3630,10 @@ static BlockJob *do_drive_backup(DriveBackup *backup, JobTxn *txn, - - flags = bs->open_flags | BDRV_O_RDWR; - -- /* See if we have a backing HD we can use to create our new image -- * on top of. */ -+ /* -+ * See if we have a backing HD we can use to create our new image -+ * on top of. -+ */ - if (backup->sync == MIRROR_SYNC_MODE_TOP) { - source = backing_bs(bs); - if (!source) { --- -1.8.3.1 - diff --git a/kvm-blockdev-honor-bdrv_try_set_aio_context-context-requ.patch b/kvm-blockdev-honor-bdrv_try_set_aio_context-context-requ.patch deleted file mode 100644 index a94ee75..0000000 --- a/kvm-blockdev-honor-bdrv_try_set_aio_context-context-requ.patch +++ /dev/null @@ -1,204 +0,0 @@ -From da4ee4c0d56200042cb86f8ccd2777009bd82df3 Mon Sep 17 00:00:00 2001 -From: Sergio Lopez Pascual -Date: Fri, 7 Feb 2020 11:27:44 +0000 -Subject: [PATCH 11/18] blockdev: honor bdrv_try_set_aio_context() context - requirements - -RH-Author: Sergio Lopez Pascual -Message-id: <20200207112749.25073-5-slp@redhat.com> -Patchwork-id: 93758 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 4/9] blockdev: honor bdrv_try_set_aio_context() context requirements -Bugzilla: 1745606 1746217 1773517 1779036 1782111 1782175 1783965 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Paolo Bonzini -RH-Acked-by: Max Reitz -RH-Acked-by: Stefan Hajnoczi - -bdrv_try_set_aio_context() requires that the old context is held, and -the new context is not held. Fix all the occurrences where it's not -done this way. - -Suggested-by: Max Reitz -Signed-off-by: Sergio Lopez -Signed-off-by: Kevin Wolf -(cherry picked from commit 3ea67e08832775a28d0bd2795f01bc77e7ea1512) -Signed-off-by: Sergio Lopez -Signed-off-by: Danilo C. L. de Paula ---- - blockdev.c | 68 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-------- - 1 file changed, 60 insertions(+), 8 deletions(-) - -diff --git a/blockdev.c b/blockdev.c -index 152a0f7..1dacbc2 100644 ---- a/blockdev.c -+++ b/blockdev.c -@@ -1535,6 +1535,7 @@ static void external_snapshot_prepare(BlkActionState *common, - DO_UPCAST(ExternalSnapshotState, common, common); - TransactionAction *action = common->action; - AioContext *aio_context; -+ AioContext *old_context; - int ret; - - /* 'blockdev-snapshot' and 'blockdev-snapshot-sync' have similar -@@ -1675,7 +1676,16 @@ static void external_snapshot_prepare(BlkActionState *common, - goto out; - } - -+ /* Honor bdrv_try_set_aio_context() context acquisition requirements. */ -+ old_context = bdrv_get_aio_context(state->new_bs); -+ aio_context_release(aio_context); -+ aio_context_acquire(old_context); -+ - ret = bdrv_try_set_aio_context(state->new_bs, aio_context, errp); -+ -+ aio_context_release(old_context); -+ aio_context_acquire(aio_context); -+ - if (ret < 0) { - goto out; - } -@@ -1775,11 +1785,13 @@ static void drive_backup_prepare(BlkActionState *common, Error **errp) - BlockDriverState *target_bs; - BlockDriverState *source = NULL; - AioContext *aio_context; -+ AioContext *old_context; - QDict *options; - Error *local_err = NULL; - int flags; - int64_t size; - bool set_backing_hd = false; -+ int ret; - - assert(common->action->type == TRANSACTION_ACTION_KIND_DRIVE_BACKUP); - backup = common->action->u.drive_backup.data; -@@ -1868,6 +1880,21 @@ static void drive_backup_prepare(BlkActionState *common, Error **errp) - goto out; - } - -+ /* Honor bdrv_try_set_aio_context() context acquisition requirements. */ -+ old_context = bdrv_get_aio_context(target_bs); -+ aio_context_release(aio_context); -+ aio_context_acquire(old_context); -+ -+ ret = bdrv_try_set_aio_context(target_bs, aio_context, errp); -+ if (ret < 0) { -+ bdrv_unref(target_bs); -+ aio_context_release(old_context); -+ return; -+ } -+ -+ aio_context_release(old_context); -+ aio_context_acquire(aio_context); -+ - if (set_backing_hd) { - bdrv_set_backing_hd(target_bs, source, &local_err); - if (local_err) { -@@ -1947,6 +1974,8 @@ static void blockdev_backup_prepare(BlkActionState *common, Error **errp) - BlockDriverState *bs; - BlockDriverState *target_bs; - AioContext *aio_context; -+ AioContext *old_context; -+ int ret; - - assert(common->action->type == TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP); - backup = common->action->u.blockdev_backup.data; -@@ -1961,7 +1990,18 @@ static void blockdev_backup_prepare(BlkActionState *common, Error **errp) - return; - } - -+ /* Honor bdrv_try_set_aio_context() context acquisition requirements. */ - aio_context = bdrv_get_aio_context(bs); -+ old_context = bdrv_get_aio_context(target_bs); -+ aio_context_acquire(old_context); -+ -+ ret = bdrv_try_set_aio_context(target_bs, aio_context, errp); -+ if (ret < 0) { -+ aio_context_release(old_context); -+ return; -+ } -+ -+ aio_context_release(old_context); - aio_context_acquire(aio_context); - state->bs = bs; - -@@ -3562,7 +3602,6 @@ static BlockJob *do_backup_common(BackupCommon *backup, - BlockJob *job = NULL; - BdrvDirtyBitmap *bmap = NULL; - int job_flags = JOB_DEFAULT; -- int ret; - - if (!backup->has_speed) { - backup->speed = 0; -@@ -3586,11 +3625,6 @@ static BlockJob *do_backup_common(BackupCommon *backup, - backup->compress = false; - } - -- ret = bdrv_try_set_aio_context(target_bs, aio_context, errp); -- if (ret < 0) { -- return NULL; -- } -- - if ((backup->sync == MIRROR_SYNC_MODE_BITMAP) || - (backup->sync == MIRROR_SYNC_MODE_INCREMENTAL)) { - /* done before desugaring 'incremental' to print the right message */ -@@ -3825,6 +3859,7 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp) - BlockDriverState *bs; - BlockDriverState *source, *target_bs; - AioContext *aio_context; -+ AioContext *old_context; - BlockMirrorBackingMode backing_mode; - Error *local_err = NULL; - QDict *options = NULL; -@@ -3937,12 +3972,22 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp) - (arg->mode == NEW_IMAGE_MODE_EXISTING || - !bdrv_has_zero_init(target_bs))); - -+ -+ /* Honor bdrv_try_set_aio_context() context acquisition requirements. */ -+ old_context = bdrv_get_aio_context(target_bs); -+ aio_context_release(aio_context); -+ aio_context_acquire(old_context); -+ - ret = bdrv_try_set_aio_context(target_bs, aio_context, errp); - if (ret < 0) { - bdrv_unref(target_bs); -- goto out; -+ aio_context_release(old_context); -+ return; - } - -+ aio_context_release(old_context); -+ aio_context_acquire(aio_context); -+ - blockdev_mirror_common(arg->has_job_id ? arg->job_id : NULL, bs, target_bs, - arg->has_replaces, arg->replaces, arg->sync, - backing_mode, zero_target, -@@ -3984,6 +4029,7 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id, - BlockDriverState *bs; - BlockDriverState *target_bs; - AioContext *aio_context; -+ AioContext *old_context; - BlockMirrorBackingMode backing_mode = MIRROR_LEAVE_BACKING_CHAIN; - Error *local_err = NULL; - bool zero_target; -@@ -4001,10 +4047,16 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id, - - zero_target = (sync == MIRROR_SYNC_MODE_FULL); - -+ /* Honor bdrv_try_set_aio_context() context acquisition requirements. */ -+ old_context = bdrv_get_aio_context(target_bs); - aio_context = bdrv_get_aio_context(bs); -- aio_context_acquire(aio_context); -+ aio_context_acquire(old_context); - - ret = bdrv_try_set_aio_context(target_bs, aio_context, errp); -+ -+ aio_context_release(old_context); -+ aio_context_acquire(aio_context); -+ - if (ret < 0) { - goto out; - } --- -1.8.3.1 - diff --git a/kvm-blockdev-unify-qmp_blockdev_backup-and-blockdev-back.patch b/kvm-blockdev-unify-qmp_blockdev_backup-and-blockdev-back.patch deleted file mode 100644 index c426384..0000000 --- a/kvm-blockdev-unify-qmp_blockdev_backup-and-blockdev-back.patch +++ /dev/null @@ -1,144 +0,0 @@ -From 959955217f745f1ee6cbea97314efe69f2d7dc08 Mon Sep 17 00:00:00 2001 -From: Sergio Lopez Pascual -Date: Fri, 7 Feb 2020 11:27:43 +0000 -Subject: [PATCH 10/18] blockdev: unify qmp_blockdev_backup and blockdev-backup - transaction paths - -RH-Author: Sergio Lopez Pascual -Message-id: <20200207112749.25073-4-slp@redhat.com> -Patchwork-id: 93756 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 3/9] blockdev: unify qmp_blockdev_backup and blockdev-backup transaction paths -Bugzilla: 1745606 1746217 1773517 1779036 1782111 1782175 1783965 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Paolo Bonzini -RH-Acked-by: Max Reitz -RH-Acked-by: Stefan Hajnoczi - -Issuing a blockdev-backup from qmp_blockdev_backup takes a slightly -different path than when it's issued from a transaction. In the code, -this is manifested as some redundancy between do_blockdev_backup() and -blockdev_backup_prepare(). - -This change unifies both paths, merging do_blockdev_backup() and -blockdev_backup_prepare(), and changing qmp_blockdev_backup() to -create a transaction instead of calling do_backup_common() direcly. - -As a side-effect, now qmp_blockdev_backup() is executed inside a -drained section, as it happens when creating a blockdev-backup -transaction. This change is visible from the user's perspective, as -the job gets paused and immediately resumed before starting the actual -work. - -Signed-off-by: Sergio Lopez -Reviewed-by: Max Reitz -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit 5b7bfe515ecbd584b40ff6e41d2fd8b37c7d5139) -Signed-off-by: Sergio Lopez -Signed-off-by: Danilo C. L. de Paula ---- - blockdev.c | 60 +++++++++++++----------------------------------------------- - 1 file changed, 13 insertions(+), 47 deletions(-) - -diff --git a/blockdev.c b/blockdev.c -index 5e85fc0..152a0f7 100644 ---- a/blockdev.c -+++ b/blockdev.c -@@ -1940,16 +1940,13 @@ typedef struct BlockdevBackupState { - BlockJob *job; - } BlockdevBackupState; - --static BlockJob *do_blockdev_backup(BlockdevBackup *backup, JobTxn *txn, -- Error **errp); -- - static void blockdev_backup_prepare(BlkActionState *common, Error **errp) - { - BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, common); - BlockdevBackup *backup; -- BlockDriverState *bs, *target; -+ BlockDriverState *bs; -+ BlockDriverState *target_bs; - AioContext *aio_context; -- Error *local_err = NULL; - - assert(common->action->type == TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP); - backup = common->action->u.blockdev_backup.data; -@@ -1959,8 +1956,8 @@ static void blockdev_backup_prepare(BlkActionState *common, Error **errp) - return; - } - -- target = bdrv_lookup_bs(backup->target, backup->target, errp); -- if (!target) { -+ target_bs = bdrv_lookup_bs(backup->target, backup->target, errp); -+ if (!target_bs) { - return; - } - -@@ -1971,13 +1968,10 @@ static void blockdev_backup_prepare(BlkActionState *common, Error **errp) - /* Paired with .clean() */ - bdrv_drained_begin(state->bs); - -- state->job = do_blockdev_backup(backup, common->block_job_txn, &local_err); -- if (local_err) { -- error_propagate(errp, local_err); -- goto out; -- } -+ state->job = do_backup_common(qapi_BlockdevBackup_base(backup), -+ bs, target_bs, aio_context, -+ common->block_job_txn, errp); - --out: - aio_context_release(aio_context); - } - -@@ -3695,41 +3689,13 @@ XDbgBlockGraph *qmp_x_debug_query_block_graph(Error **errp) - return bdrv_get_xdbg_block_graph(errp); - } - --BlockJob *do_blockdev_backup(BlockdevBackup *backup, JobTxn *txn, -- Error **errp) -+void qmp_blockdev_backup(BlockdevBackup *backup, Error **errp) - { -- BlockDriverState *bs; -- BlockDriverState *target_bs; -- AioContext *aio_context; -- BlockJob *job; -- -- bs = bdrv_lookup_bs(backup->device, backup->device, errp); -- if (!bs) { -- return NULL; -- } -- -- target_bs = bdrv_lookup_bs(backup->target, backup->target, errp); -- if (!target_bs) { -- return NULL; -- } -- -- aio_context = bdrv_get_aio_context(bs); -- aio_context_acquire(aio_context); -- -- job = do_backup_common(qapi_BlockdevBackup_base(backup), -- bs, target_bs, aio_context, txn, errp); -- -- aio_context_release(aio_context); -- return job; --} -- --void qmp_blockdev_backup(BlockdevBackup *arg, Error **errp) --{ -- BlockJob *job; -- job = do_blockdev_backup(arg, NULL, errp); -- if (job) { -- job_start(&job->job); -- } -+ TransactionAction action = { -+ .type = TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP, -+ .u.blockdev_backup.data = backup, -+ }; -+ blockdev_do_action(&action, errp); - } - - /* Parameter check and block job starting for drive mirroring. --- -1.8.3.1 - diff --git a/kvm-blockdev-unify-qmp_drive_backup-and-drive-backup-tra.patch b/kvm-blockdev-unify-qmp_drive_backup-and-drive-backup-tra.patch deleted file mode 100644 index 9ec1975..0000000 --- a/kvm-blockdev-unify-qmp_drive_backup-and-drive-backup-tra.patch +++ /dev/null @@ -1,419 +0,0 @@ -From 4a03ab2a6cc4974d8d43240d1297b09160818af3 Mon Sep 17 00:00:00 2001 -From: Sergio Lopez Pascual -Date: Fri, 7 Feb 2020 11:27:42 +0000 -Subject: [PATCH 09/18] blockdev: unify qmp_drive_backup and drive-backup - transaction paths - -RH-Author: Sergio Lopez Pascual -Message-id: <20200207112749.25073-3-slp@redhat.com> -Patchwork-id: 93755 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 2/9] blockdev: unify qmp_drive_backup and drive-backup transaction paths -Bugzilla: 1745606 1746217 1773517 1779036 1782111 1782175 1783965 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Paolo Bonzini -RH-Acked-by: Max Reitz -RH-Acked-by: Stefan Hajnoczi - -Issuing a drive-backup from qmp_drive_backup takes a slightly -different path than when it's issued from a transaction. In the code, -this is manifested as some redundancy between do_drive_backup() and -drive_backup_prepare(). - -This change unifies both paths, merging do_drive_backup() and -drive_backup_prepare(), and changing qmp_drive_backup() to create a -transaction instead of calling do_backup_common() direcly. - -As a side-effect, now qmp_drive_backup() is executed inside a drained -section, as it happens when creating a drive-backup transaction. This -change is visible from the user's perspective, as the job gets paused -and immediately resumed before starting the actual work. - -Also fix tests 141, 185 and 219 to cope with the extra -JOB_STATUS_CHANGE lines. - -Signed-off-by: Sergio Lopez -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit 2288ccfac96281c316db942d10e3f921c1373064) -Signed-off-by: Sergio Lopez -Signed-off-by: Danilo C. L. de Paula ---- - blockdev.c | 224 ++++++++++++++++++++------------------------- - tests/qemu-iotests/141.out | 2 + - tests/qemu-iotests/185.out | 2 + - tests/qemu-iotests/219 | 7 +- - tests/qemu-iotests/219.out | 8 ++ - 5 files changed, 117 insertions(+), 126 deletions(-) - -diff --git a/blockdev.c b/blockdev.c -index 553e315..5e85fc0 100644 ---- a/blockdev.c -+++ b/blockdev.c -@@ -1761,39 +1761,128 @@ typedef struct DriveBackupState { - BlockJob *job; - } DriveBackupState; - --static BlockJob *do_drive_backup(DriveBackup *backup, JobTxn *txn, -- Error **errp); -+static BlockJob *do_backup_common(BackupCommon *backup, -+ BlockDriverState *bs, -+ BlockDriverState *target_bs, -+ AioContext *aio_context, -+ JobTxn *txn, Error **errp); - - static void drive_backup_prepare(BlkActionState *common, Error **errp) - { - DriveBackupState *state = DO_UPCAST(DriveBackupState, common, common); -- BlockDriverState *bs; - DriveBackup *backup; -+ BlockDriverState *bs; -+ BlockDriverState *target_bs; -+ BlockDriverState *source = NULL; - AioContext *aio_context; -+ QDict *options; - Error *local_err = NULL; -+ int flags; -+ int64_t size; -+ bool set_backing_hd = false; - - assert(common->action->type == TRANSACTION_ACTION_KIND_DRIVE_BACKUP); - backup = common->action->u.drive_backup.data; - -+ if (!backup->has_mode) { -+ backup->mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS; -+ } -+ - bs = bdrv_lookup_bs(backup->device, backup->device, errp); - if (!bs) { - return; - } - -+ if (!bs->drv) { -+ error_setg(errp, "Device has no medium"); -+ return; -+ } -+ - aio_context = bdrv_get_aio_context(bs); - aio_context_acquire(aio_context); - - /* Paired with .clean() */ - bdrv_drained_begin(bs); - -- state->bs = bs; -+ if (!backup->has_format) { -+ backup->format = backup->mode == NEW_IMAGE_MODE_EXISTING ? -+ NULL : (char *) bs->drv->format_name; -+ } -+ -+ /* Early check to avoid creating target */ -+ if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_BACKUP_SOURCE, errp)) { -+ goto out; -+ } -+ -+ flags = bs->open_flags | BDRV_O_RDWR; -+ -+ /* -+ * See if we have a backing HD we can use to create our new image -+ * on top of. -+ */ -+ if (backup->sync == MIRROR_SYNC_MODE_TOP) { -+ source = backing_bs(bs); -+ if (!source) { -+ backup->sync = MIRROR_SYNC_MODE_FULL; -+ } -+ } -+ if (backup->sync == MIRROR_SYNC_MODE_NONE) { -+ source = bs; -+ flags |= BDRV_O_NO_BACKING; -+ set_backing_hd = true; -+ } -+ -+ size = bdrv_getlength(bs); -+ if (size < 0) { -+ error_setg_errno(errp, -size, "bdrv_getlength failed"); -+ goto out; -+ } -+ -+ if (backup->mode != NEW_IMAGE_MODE_EXISTING) { -+ assert(backup->format); -+ if (source) { -+ bdrv_refresh_filename(source); -+ bdrv_img_create(backup->target, backup->format, source->filename, -+ source->drv->format_name, NULL, -+ size, flags, false, &local_err); -+ } else { -+ bdrv_img_create(backup->target, backup->format, NULL, NULL, NULL, -+ size, flags, false, &local_err); -+ } -+ } - -- state->job = do_drive_backup(backup, common->block_job_txn, &local_err); - if (local_err) { - error_propagate(errp, local_err); - goto out; - } - -+ options = qdict_new(); -+ qdict_put_str(options, "discard", "unmap"); -+ qdict_put_str(options, "detect-zeroes", "unmap"); -+ if (backup->format) { -+ qdict_put_str(options, "driver", backup->format); -+ } -+ -+ target_bs = bdrv_open(backup->target, NULL, options, flags, errp); -+ if (!target_bs) { -+ goto out; -+ } -+ -+ if (set_backing_hd) { -+ bdrv_set_backing_hd(target_bs, source, &local_err); -+ if (local_err) { -+ goto unref; -+ } -+ } -+ -+ state->bs = bs; -+ -+ state->job = do_backup_common(qapi_DriveBackup_base(backup), -+ bs, target_bs, aio_context, -+ common->block_job_txn, errp); -+ -+unref: -+ bdrv_unref(target_bs); - out: - aio_context_release(aio_context); - } -@@ -3587,126 +3676,13 @@ static BlockJob *do_backup_common(BackupCommon *backup, - return job; - } - --static BlockJob *do_drive_backup(DriveBackup *backup, JobTxn *txn, -- Error **errp) --{ -- BlockDriverState *bs; -- BlockDriverState *target_bs; -- BlockDriverState *source = NULL; -- BlockJob *job = NULL; -- AioContext *aio_context; -- QDict *options; -- Error *local_err = NULL; -- int flags; -- int64_t size; -- bool set_backing_hd = false; -- -- if (!backup->has_mode) { -- backup->mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS; -- } -- -- bs = bdrv_lookup_bs(backup->device, backup->device, errp); -- if (!bs) { -- return NULL; -- } -- -- if (!bs->drv) { -- error_setg(errp, "Device has no medium"); -- return NULL; -- } -- -- aio_context = bdrv_get_aio_context(bs); -- aio_context_acquire(aio_context); -- -- if (!backup->has_format) { -- backup->format = backup->mode == NEW_IMAGE_MODE_EXISTING ? -- NULL : (char *) bs->drv->format_name; -- } -- -- /* Early check to avoid creating target */ -- if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_BACKUP_SOURCE, errp)) { -- goto out; -- } -- -- flags = bs->open_flags | BDRV_O_RDWR; -- -- /* -- * See if we have a backing HD we can use to create our new image -- * on top of. -- */ -- if (backup->sync == MIRROR_SYNC_MODE_TOP) { -- source = backing_bs(bs); -- if (!source) { -- backup->sync = MIRROR_SYNC_MODE_FULL; -- } -- } -- if (backup->sync == MIRROR_SYNC_MODE_NONE) { -- source = bs; -- flags |= BDRV_O_NO_BACKING; -- set_backing_hd = true; -- } -- -- size = bdrv_getlength(bs); -- if (size < 0) { -- error_setg_errno(errp, -size, "bdrv_getlength failed"); -- goto out; -- } -- -- if (backup->mode != NEW_IMAGE_MODE_EXISTING) { -- assert(backup->format); -- if (source) { -- bdrv_refresh_filename(source); -- bdrv_img_create(backup->target, backup->format, source->filename, -- source->drv->format_name, NULL, -- size, flags, false, &local_err); -- } else { -- bdrv_img_create(backup->target, backup->format, NULL, NULL, NULL, -- size, flags, false, &local_err); -- } -- } -- -- if (local_err) { -- error_propagate(errp, local_err); -- goto out; -- } -- -- options = qdict_new(); -- qdict_put_str(options, "discard", "unmap"); -- qdict_put_str(options, "detect-zeroes", "unmap"); -- if (backup->format) { -- qdict_put_str(options, "driver", backup->format); -- } -- -- target_bs = bdrv_open(backup->target, NULL, options, flags, errp); -- if (!target_bs) { -- goto out; -- } -- -- if (set_backing_hd) { -- bdrv_set_backing_hd(target_bs, source, &local_err); -- if (local_err) { -- goto unref; -- } -- } -- -- job = do_backup_common(qapi_DriveBackup_base(backup), -- bs, target_bs, aio_context, txn, errp); -- --unref: -- bdrv_unref(target_bs); --out: -- aio_context_release(aio_context); -- return job; --} -- --void qmp_drive_backup(DriveBackup *arg, Error **errp) -+void qmp_drive_backup(DriveBackup *backup, Error **errp) - { -- -- BlockJob *job; -- job = do_drive_backup(arg, NULL, errp); -- if (job) { -- job_start(&job->job); -- } -+ TransactionAction action = { -+ .type = TRANSACTION_ACTION_KIND_DRIVE_BACKUP, -+ .u.drive_backup.data = backup, -+ }; -+ blockdev_do_action(&action, errp); - } - - BlockDeviceInfoList *qmp_query_named_block_nodes(Error **errp) -diff --git a/tests/qemu-iotests/141.out b/tests/qemu-iotests/141.out -index 3645675..263b680 100644 ---- a/tests/qemu-iotests/141.out -+++ b/tests/qemu-iotests/141.out -@@ -13,6 +13,8 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 backing_file=TEST_DIR/m. - Formatting 'TEST_DIR/o.IMGFMT', fmt=IMGFMT size=1048576 backing_file=TEST_DIR/t.IMGFMT backing_fmt=IMGFMT - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "job0"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "job0"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "paused", "id": "job0"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "job0"}} - {'execute': 'blockdev-del', 'arguments': {'node-name': 'drv0'}} - {"error": {"class": "GenericError", "desc": "Node 'drv0' is busy: node is used as backing hd of 'NODE_NAME'"}} - {'execute': 'block-job-cancel', 'arguments': {'device': 'job0'}} -diff --git a/tests/qemu-iotests/185.out b/tests/qemu-iotests/185.out -index 8379ac5..9a3b657 100644 ---- a/tests/qemu-iotests/185.out -+++ b/tests/qemu-iotests/185.out -@@ -65,6 +65,8 @@ Formatting 'TEST_DIR/t.qcow2.copy', fmt=qcow2 size=67108864 cluster_size=65536 l - Formatting 'TEST_DIR/t.qcow2.copy', fmt=qcow2 size=67108864 cluster_size=65536 lazy_refcounts=off refcount_bits=16 - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "disk"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "disk"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "paused", "id": "disk"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "disk"}} - {"return": {}} - { 'execute': 'quit' } - {"return": {}} -diff --git a/tests/qemu-iotests/219 b/tests/qemu-iotests/219 -index e0c5166..655f54d 100755 ---- a/tests/qemu-iotests/219 -+++ b/tests/qemu-iotests/219 -@@ -63,7 +63,7 @@ def test_pause_resume(vm): - # logged immediately - iotests.log(vm.qmp('query-jobs')) - --def test_job_lifecycle(vm, job, job_args, has_ready=False): -+def test_job_lifecycle(vm, job, job_args, has_ready=False, is_mirror=False): - global img_size - - iotests.log('') -@@ -135,6 +135,9 @@ def test_job_lifecycle(vm, job, job_args, has_ready=False): - iotests.log('Waiting for PENDING state...') - iotests.log(iotests.filter_qmp_event(vm.event_wait('JOB_STATUS_CHANGE'))) - iotests.log(iotests.filter_qmp_event(vm.event_wait('JOB_STATUS_CHANGE'))) -+ if is_mirror: -+ iotests.log(iotests.filter_qmp_event(vm.event_wait('JOB_STATUS_CHANGE'))) -+ iotests.log(iotests.filter_qmp_event(vm.event_wait('JOB_STATUS_CHANGE'))) - - if not job_args.get('auto-finalize', True): - # PENDING state: -@@ -218,7 +221,7 @@ with iotests.FilePath('disk.img') as disk_path, \ - - for auto_finalize in [True, False]: - for auto_dismiss in [True, False]: -- test_job_lifecycle(vm, 'drive-backup', job_args={ -+ test_job_lifecycle(vm, 'drive-backup', is_mirror=True, job_args={ - 'device': 'drive0-node', - 'target': copy_path, - 'sync': 'full', -diff --git a/tests/qemu-iotests/219.out b/tests/qemu-iotests/219.out -index 8ebd3fe..0ea5d0b 100644 ---- a/tests/qemu-iotests/219.out -+++ b/tests/qemu-iotests/219.out -@@ -135,6 +135,8 @@ Pause/resume in RUNNING - {"return": {}} - - Waiting for PENDING state... -+{"data": {"id": "job0", "status": "paused"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} -+{"data": {"id": "job0", "status": "running"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} - {"data": {"id": "job0", "status": "waiting"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} - {"data": {"id": "job0", "status": "pending"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} - {"data": {"id": "job0", "status": "concluded"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} -@@ -186,6 +188,8 @@ Pause/resume in RUNNING - {"return": {}} - - Waiting for PENDING state... -+{"data": {"id": "job0", "status": "paused"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} -+{"data": {"id": "job0", "status": "running"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} - {"data": {"id": "job0", "status": "waiting"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} - {"data": {"id": "job0", "status": "pending"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} - {"data": {"id": "job0", "status": "concluded"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} -@@ -245,6 +249,8 @@ Pause/resume in RUNNING - {"return": {}} - - Waiting for PENDING state... -+{"data": {"id": "job0", "status": "paused"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} -+{"data": {"id": "job0", "status": "running"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} - {"data": {"id": "job0", "status": "waiting"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} - {"data": {"id": "job0", "status": "pending"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} - {"return": [{"current-progress": 4194304, "id": "job0", "status": "pending", "total-progress": 4194304, "type": "backup"}]} -@@ -304,6 +310,8 @@ Pause/resume in RUNNING - {"return": {}} - - Waiting for PENDING state... -+{"data": {"id": "job0", "status": "paused"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} -+{"data": {"id": "job0", "status": "running"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} - {"data": {"id": "job0", "status": "waiting"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} - {"data": {"id": "job0", "status": "pending"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} - {"return": [{"current-progress": 4194304, "id": "job0", "status": "pending", "total-progress": 4194304, "type": "backup"}]} --- -1.8.3.1 - diff --git a/kvm-build-rename-CONFIG_LIBCAP-to-CONFIG_LIBCAP_NG.patch b/kvm-build-rename-CONFIG_LIBCAP-to-CONFIG_LIBCAP_NG.patch deleted file mode 100644 index 5d21bf8..0000000 --- a/kvm-build-rename-CONFIG_LIBCAP-to-CONFIG_LIBCAP_NG.patch +++ /dev/null @@ -1,137 +0,0 @@ -From f756c1c4590a37c533ec0429644a7034ba35dada Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:00:38 +0100 -Subject: [PATCH 007/116] build: rename CONFIG_LIBCAP to CONFIG_LIBCAP_NG -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-4-dgilbert@redhat.com> -Patchwork-id: 93459 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 003/112] build: rename CONFIG_LIBCAP to CONFIG_LIBCAP_NG -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Paolo Bonzini - -Since we are actually testing for the newer capng library, rename the -symbol to match. - -Reviewed-by: Dr. David Alan Gilbert -Signed-off-by: Paolo Bonzini -(cherry picked from commit a358bca24026a377e0804e137a4499e4e041918d) -Signed-off-by: Miroslav Rezanina ---- - configure | 2 +- - qemu-bridge-helper.c | 6 +++--- - scsi/qemu-pr-helper.c | 12 ++++++------ - 3 files changed, 10 insertions(+), 10 deletions(-) - -diff --git a/configure b/configure -index 16564f8..7831618 100755 ---- a/configure -+++ b/configure -@@ -6760,7 +6760,7 @@ if test "$l2tpv3" = "yes" ; then - echo "CONFIG_L2TPV3=y" >> $config_host_mak - fi - if test "$cap_ng" = "yes" ; then -- echo "CONFIG_LIBCAP=y" >> $config_host_mak -+ echo "CONFIG_LIBCAP_NG=y" >> $config_host_mak - fi - echo "CONFIG_AUDIO_DRIVERS=$audio_drv_list" >> $config_host_mak - for drv in $audio_drv_list; do -diff --git a/qemu-bridge-helper.c b/qemu-bridge-helper.c -index 3d50ec0..88b2674 100644 ---- a/qemu-bridge-helper.c -+++ b/qemu-bridge-helper.c -@@ -43,7 +43,7 @@ - - #include "net/tap-linux.h" - --#ifdef CONFIG_LIBCAP -+#ifdef CONFIG_LIBCAP_NG - #include - #endif - -@@ -207,7 +207,7 @@ static int send_fd(int c, int fd) - return sendmsg(c, &msg, 0); - } - --#ifdef CONFIG_LIBCAP -+#ifdef CONFIG_LIBCAP_NG - static int drop_privileges(void) - { - /* clear all capabilities */ -@@ -246,7 +246,7 @@ int main(int argc, char **argv) - int access_allowed, access_denied; - int ret = EXIT_SUCCESS; - --#ifdef CONFIG_LIBCAP -+#ifdef CONFIG_LIBCAP_NG - /* if we're run from an suid binary, immediately drop privileges preserving - * cap_net_admin */ - if (geteuid() == 0 && getuid() != geteuid()) { -diff --git a/scsi/qemu-pr-helper.c b/scsi/qemu-pr-helper.c -index debb18f..0659cee 100644 ---- a/scsi/qemu-pr-helper.c -+++ b/scsi/qemu-pr-helper.c -@@ -24,7 +24,7 @@ - #include - #include - --#ifdef CONFIG_LIBCAP -+#ifdef CONFIG_LIBCAP_NG - #include - #endif - #include -@@ -70,7 +70,7 @@ static int num_active_sockets = 1; - static int noisy; - static int verbose; - --#ifdef CONFIG_LIBCAP -+#ifdef CONFIG_LIBCAP_NG - static int uid = -1; - static int gid = -1; - #endif -@@ -97,7 +97,7 @@ static void usage(const char *name) - " (default '%s')\n" - " -T, --trace [[enable=]][,events=][,file=]\n" - " specify tracing options\n" --#ifdef CONFIG_LIBCAP -+#ifdef CONFIG_LIBCAP_NG - " -u, --user=USER user to drop privileges to\n" - " -g, --group=GROUP group to drop privileges to\n" - #endif -@@ -827,7 +827,7 @@ static void close_server_socket(void) - num_active_sockets--; - } - --#ifdef CONFIG_LIBCAP -+#ifdef CONFIG_LIBCAP_NG - static int drop_privileges(void) - { - /* clear all capabilities */ -@@ -920,7 +920,7 @@ int main(int argc, char **argv) - pidfile = g_strdup(optarg); - pidfile_specified = true; - break; --#ifdef CONFIG_LIBCAP -+#ifdef CONFIG_LIBCAP_NG - case 'u': { - unsigned long res; - struct passwd *userinfo = getpwnam(optarg); -@@ -1056,7 +1056,7 @@ int main(int argc, char **argv) - exit(EXIT_FAILURE); - } - --#ifdef CONFIG_LIBCAP -+#ifdef CONFIG_LIBCAP_NG - if (drop_privileges() < 0) { - error_report("Failed to drop privileges: %s", strerror(errno)); - exit(EXIT_FAILURE); --- -1.8.3.1 - diff --git a/kvm-build-sys-do-not-make-qemu-ga-link-with-pixman.patch b/kvm-build-sys-do-not-make-qemu-ga-link-with-pixman.patch deleted file mode 100644 index 5b1b170..0000000 --- a/kvm-build-sys-do-not-make-qemu-ga-link-with-pixman.patch +++ /dev/null @@ -1,2463 +0,0 @@ -From fc2d0dfe60b14992a9b67e7a18394ba6365dc5ed Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= -Date: Wed, 18 Mar 2020 18:10:40 +0000 -Subject: [PATCH 2/2] build-sys: do not make qemu-ga link with pixman -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Marc-André Lureau -Message-id: <20200318181040.256425-1-marcandre.lureau@redhat.com> -Patchwork-id: 94381 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH] build-sys: do not make qemu-ga link with pixman -Bugzilla: 1811670 -RH-Acked-by: Markus Armbruster -RH-Acked-by: John Snow -RH-Acked-by: Daniel P. Berrange - -Since commit d52c454aadcdae74506f315ebf8b58bb79a05573 ("contrib: add -vhost-user-gpu"), qemu-ga is linking with pixman. - -This is because the Make-based build-system use a global namespace for -variables, and we rely on "main.o-libs" for different linking targets. - -Note: this kind of variable clashing is hard to fix or prevent -currently. meson should help, as declarations have a linear -dependency and doesn't rely so much on variables and clever tricks. - -Note2: we have a lot of main.c (or other duplicated names!) in -tree. Imho, it would be annoying and a bad workaroud to rename all -those to avoid conflicts like I did here. - -Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1811670 - -Signed-off-by: Marc-André Lureau -Message-Id: <20200311160923.882474-1-marcandre.lureau@redhat.com> -Signed-off-by: Paolo Bonzini - -BZ: https://bugzilla.redhat.com/show_bug.cgi?id=1811670 -Brew: http://brewweb.devel.redhat.com/brew/taskinfo?taskID=27330493 - -(cherry picked from commit 5b42bc5ce9ab4a3171819feea5042931817211fd) -Signed-off-by: Marc-André Lureau -Signed-off-by: Danilo C. L. de Paula ---- - contrib/vhost-user-gpu/Makefile.objs | 6 +- - contrib/vhost-user-gpu/main.c | 1191 ------------------------------- - contrib/vhost-user-gpu/vhost-user-gpu.c | 1191 +++++++++++++++++++++++++++++++ - 3 files changed, 1194 insertions(+), 1194 deletions(-) - delete mode 100644 contrib/vhost-user-gpu/main.c - create mode 100644 contrib/vhost-user-gpu/vhost-user-gpu.c - -diff --git a/contrib/vhost-user-gpu/Makefile.objs b/contrib/vhost-user-gpu/Makefile.objs -index 6170c91..0929609 100644 ---- a/contrib/vhost-user-gpu/Makefile.objs -+++ b/contrib/vhost-user-gpu/Makefile.objs -@@ -1,7 +1,7 @@ --vhost-user-gpu-obj-y = main.o virgl.o vugbm.o -+vhost-user-gpu-obj-y = vhost-user-gpu.o virgl.o vugbm.o - --main.o-cflags := $(PIXMAN_CFLAGS) $(GBM_CFLAGS) --main.o-libs := $(PIXMAN_LIBS) -+vhost-user-gpu.o-cflags := $(PIXMAN_CFLAGS) $(GBM_CFLAGS) -+vhost-user-gpu.o-libs := $(PIXMAN_LIBS) - - virgl.o-cflags := $(VIRGL_CFLAGS) $(GBM_CFLAGS) - virgl.o-libs := $(VIRGL_LIBS) -diff --git a/contrib/vhost-user-gpu/main.c b/contrib/vhost-user-gpu/main.c -deleted file mode 100644 -index b45d201..0000000 ---- a/contrib/vhost-user-gpu/main.c -+++ /dev/null -@@ -1,1191 +0,0 @@ --/* -- * Virtio vhost-user GPU Device -- * -- * Copyright Red Hat, Inc. 2013-2018 -- * -- * Authors: -- * Dave Airlie -- * Gerd Hoffmann -- * Marc-André Lureau -- * -- * This work is licensed under the terms of the GNU GPL, version 2 or later. -- * See the COPYING file in the top-level directory. -- */ --#include "qemu/osdep.h" --#include "qemu/drm.h" --#include "qapi/error.h" --#include "qemu/sockets.h" -- --#include --#include -- --#include "vugpu.h" --#include "hw/virtio/virtio-gpu-bswap.h" --#include "hw/virtio/virtio-gpu-pixman.h" --#include "virgl.h" --#include "vugbm.h" -- --enum { -- VHOST_USER_GPU_MAX_QUEUES = 2, --}; -- --struct virtio_gpu_simple_resource { -- uint32_t resource_id; -- uint32_t width; -- uint32_t height; -- uint32_t format; -- struct iovec *iov; -- unsigned int iov_cnt; -- uint32_t scanout_bitmask; -- pixman_image_t *image; -- struct vugbm_buffer buffer; -- QTAILQ_ENTRY(virtio_gpu_simple_resource) next; --}; -- --static gboolean opt_print_caps; --static int opt_fdnum = -1; --static char *opt_socket_path; --static char *opt_render_node; --static gboolean opt_virgl; -- --static void vg_handle_ctrl(VuDev *dev, int qidx); -- --static const char * --vg_cmd_to_string(int cmd) --{ --#define CMD(cmd) [cmd] = #cmd -- static const char *vg_cmd_str[] = { -- CMD(VIRTIO_GPU_UNDEFINED), -- -- /* 2d commands */ -- CMD(VIRTIO_GPU_CMD_GET_DISPLAY_INFO), -- CMD(VIRTIO_GPU_CMD_RESOURCE_CREATE_2D), -- CMD(VIRTIO_GPU_CMD_RESOURCE_UNREF), -- CMD(VIRTIO_GPU_CMD_SET_SCANOUT), -- CMD(VIRTIO_GPU_CMD_RESOURCE_FLUSH), -- CMD(VIRTIO_GPU_CMD_TRANSFER_TO_HOST_2D), -- CMD(VIRTIO_GPU_CMD_RESOURCE_ATTACH_BACKING), -- CMD(VIRTIO_GPU_CMD_RESOURCE_DETACH_BACKING), -- CMD(VIRTIO_GPU_CMD_GET_CAPSET_INFO), -- CMD(VIRTIO_GPU_CMD_GET_CAPSET), -- -- /* 3d commands */ -- CMD(VIRTIO_GPU_CMD_CTX_CREATE), -- CMD(VIRTIO_GPU_CMD_CTX_DESTROY), -- CMD(VIRTIO_GPU_CMD_CTX_ATTACH_RESOURCE), -- CMD(VIRTIO_GPU_CMD_CTX_DETACH_RESOURCE), -- CMD(VIRTIO_GPU_CMD_RESOURCE_CREATE_3D), -- CMD(VIRTIO_GPU_CMD_TRANSFER_TO_HOST_3D), -- CMD(VIRTIO_GPU_CMD_TRANSFER_FROM_HOST_3D), -- CMD(VIRTIO_GPU_CMD_SUBMIT_3D), -- -- /* cursor commands */ -- CMD(VIRTIO_GPU_CMD_UPDATE_CURSOR), -- CMD(VIRTIO_GPU_CMD_MOVE_CURSOR), -- }; --#undef REQ -- -- if (cmd >= 0 && cmd < G_N_ELEMENTS(vg_cmd_str)) { -- return vg_cmd_str[cmd]; -- } else { -- return "unknown"; -- } --} -- --static int --vg_sock_fd_read(int sock, void *buf, ssize_t buflen) --{ -- int ret; -- -- do { -- ret = read(sock, buf, buflen); -- } while (ret < 0 && (errno == EINTR || errno == EAGAIN)); -- -- g_warn_if_fail(ret == buflen); -- return ret; --} -- --static void --vg_sock_fd_close(VuGpu *g) --{ -- if (g->sock_fd >= 0) { -- close(g->sock_fd); -- g->sock_fd = -1; -- } --} -- --static gboolean --source_wait_cb(gint fd, GIOCondition condition, gpointer user_data) --{ -- VuGpu *g = user_data; -- -- if (!vg_recv_msg(g, VHOST_USER_GPU_DMABUF_UPDATE, 0, NULL)) { -- return G_SOURCE_CONTINUE; -- } -- -- /* resume */ -- g->wait_ok = 0; -- vg_handle_ctrl(&g->dev.parent, 0); -- -- return G_SOURCE_REMOVE; --} -- --void --vg_wait_ok(VuGpu *g) --{ -- assert(g->wait_ok == 0); -- g->wait_ok = g_unix_fd_add(g->sock_fd, G_IO_IN | G_IO_HUP, -- source_wait_cb, g); --} -- --static int --vg_sock_fd_write(int sock, const void *buf, ssize_t buflen, int fd) --{ -- ssize_t ret; -- struct iovec iov = { -- .iov_base = (void *)buf, -- .iov_len = buflen, -- }; -- struct msghdr msg = { -- .msg_iov = &iov, -- .msg_iovlen = 1, -- }; -- union { -- struct cmsghdr cmsghdr; -- char control[CMSG_SPACE(sizeof(int))]; -- } cmsgu; -- struct cmsghdr *cmsg; -- -- if (fd != -1) { -- msg.msg_control = cmsgu.control; -- msg.msg_controllen = sizeof(cmsgu.control); -- -- cmsg = CMSG_FIRSTHDR(&msg); -- cmsg->cmsg_len = CMSG_LEN(sizeof(int)); -- cmsg->cmsg_level = SOL_SOCKET; -- cmsg->cmsg_type = SCM_RIGHTS; -- -- *((int *)CMSG_DATA(cmsg)) = fd; -- } -- -- do { -- ret = sendmsg(sock, &msg, 0); -- } while (ret == -1 && (errno == EINTR || errno == EAGAIN)); -- -- g_warn_if_fail(ret == buflen); -- return ret; --} -- --void --vg_send_msg(VuGpu *vg, const VhostUserGpuMsg *msg, int fd) --{ -- if (vg_sock_fd_write(vg->sock_fd, msg, -- VHOST_USER_GPU_HDR_SIZE + msg->size, fd) < 0) { -- vg_sock_fd_close(vg); -- } --} -- --bool --vg_recv_msg(VuGpu *g, uint32_t expect_req, uint32_t expect_size, -- gpointer payload) --{ -- uint32_t req, flags, size; -- -- if (vg_sock_fd_read(g->sock_fd, &req, sizeof(req)) < 0 || -- vg_sock_fd_read(g->sock_fd, &flags, sizeof(flags)) < 0 || -- vg_sock_fd_read(g->sock_fd, &size, sizeof(size)) < 0) { -- goto err; -- } -- -- g_return_val_if_fail(req == expect_req, false); -- g_return_val_if_fail(flags & VHOST_USER_GPU_MSG_FLAG_REPLY, false); -- g_return_val_if_fail(size == expect_size, false); -- -- if (size && vg_sock_fd_read(g->sock_fd, payload, size) != size) { -- goto err; -- } -- -- return true; -- --err: -- vg_sock_fd_close(g); -- return false; --} -- --static struct virtio_gpu_simple_resource * --virtio_gpu_find_resource(VuGpu *g, uint32_t resource_id) --{ -- struct virtio_gpu_simple_resource *res; -- -- QTAILQ_FOREACH(res, &g->reslist, next) { -- if (res->resource_id == resource_id) { -- return res; -- } -- } -- return NULL; --} -- --void --vg_ctrl_response(VuGpu *g, -- struct virtio_gpu_ctrl_command *cmd, -- struct virtio_gpu_ctrl_hdr *resp, -- size_t resp_len) --{ -- size_t s; -- -- if (cmd->cmd_hdr.flags & VIRTIO_GPU_FLAG_FENCE) { -- resp->flags |= VIRTIO_GPU_FLAG_FENCE; -- resp->fence_id = cmd->cmd_hdr.fence_id; -- resp->ctx_id = cmd->cmd_hdr.ctx_id; -- } -- virtio_gpu_ctrl_hdr_bswap(resp); -- s = iov_from_buf(cmd->elem.in_sg, cmd->elem.in_num, 0, resp, resp_len); -- if (s != resp_len) { -- g_critical("%s: response size incorrect %zu vs %zu", -- __func__, s, resp_len); -- } -- vu_queue_push(&g->dev.parent, cmd->vq, &cmd->elem, s); -- vu_queue_notify(&g->dev.parent, cmd->vq); -- cmd->finished = true; --} -- --void --vg_ctrl_response_nodata(VuGpu *g, -- struct virtio_gpu_ctrl_command *cmd, -- enum virtio_gpu_ctrl_type type) --{ -- struct virtio_gpu_ctrl_hdr resp = { -- .type = type, -- }; -- -- vg_ctrl_response(g, cmd, &resp, sizeof(resp)); --} -- --void --vg_get_display_info(VuGpu *vg, struct virtio_gpu_ctrl_command *cmd) --{ -- struct virtio_gpu_resp_display_info dpy_info = { {} }; -- VhostUserGpuMsg msg = { -- .request = VHOST_USER_GPU_GET_DISPLAY_INFO, -- .size = 0, -- }; -- -- assert(vg->wait_ok == 0); -- -- vg_send_msg(vg, &msg, -1); -- if (!vg_recv_msg(vg, msg.request, sizeof(dpy_info), &dpy_info)) { -- return; -- } -- -- vg_ctrl_response(vg, cmd, &dpy_info.hdr, sizeof(dpy_info)); --} -- --static void --vg_resource_create_2d(VuGpu *g, -- struct virtio_gpu_ctrl_command *cmd) --{ -- pixman_format_code_t pformat; -- struct virtio_gpu_simple_resource *res; -- struct virtio_gpu_resource_create_2d c2d; -- -- VUGPU_FILL_CMD(c2d); -- virtio_gpu_bswap_32(&c2d, sizeof(c2d)); -- -- if (c2d.resource_id == 0) { -- g_critical("%s: resource id 0 is not allowed", __func__); -- cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; -- return; -- } -- -- res = virtio_gpu_find_resource(g, c2d.resource_id); -- if (res) { -- g_critical("%s: resource already exists %d", __func__, c2d.resource_id); -- cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; -- return; -- } -- -- res = g_new0(struct virtio_gpu_simple_resource, 1); -- res->width = c2d.width; -- res->height = c2d.height; -- res->format = c2d.format; -- res->resource_id = c2d.resource_id; -- -- pformat = virtio_gpu_get_pixman_format(c2d.format); -- if (!pformat) { -- g_critical("%s: host couldn't handle guest format %d", -- __func__, c2d.format); -- g_free(res); -- cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER; -- return; -- } -- vugbm_buffer_create(&res->buffer, &g->gdev, c2d.width, c2d.height); -- res->image = pixman_image_create_bits(pformat, -- c2d.width, -- c2d.height, -- (uint32_t *)res->buffer.mmap, -- res->buffer.stride); -- if (!res->image) { -- g_critical("%s: resource creation failed %d %d %d", -- __func__, c2d.resource_id, c2d.width, c2d.height); -- g_free(res); -- cmd->error = VIRTIO_GPU_RESP_ERR_OUT_OF_MEMORY; -- return; -- } -- -- QTAILQ_INSERT_HEAD(&g->reslist, res, next); --} -- --static void --vg_disable_scanout(VuGpu *g, int scanout_id) --{ -- struct virtio_gpu_scanout *scanout = &g->scanout[scanout_id]; -- struct virtio_gpu_simple_resource *res; -- -- if (scanout->resource_id == 0) { -- return; -- } -- -- res = virtio_gpu_find_resource(g, scanout->resource_id); -- if (res) { -- res->scanout_bitmask &= ~(1 << scanout_id); -- } -- -- scanout->width = 0; -- scanout->height = 0; -- -- if (g->sock_fd >= 0) { -- VhostUserGpuMsg msg = { -- .request = VHOST_USER_GPU_SCANOUT, -- .size = sizeof(VhostUserGpuScanout), -- .payload.scanout.scanout_id = scanout_id, -- }; -- vg_send_msg(g, &msg, -1); -- } --} -- --static void --vg_resource_destroy(VuGpu *g, -- struct virtio_gpu_simple_resource *res) --{ -- int i; -- -- if (res->scanout_bitmask) { -- for (i = 0; i < VIRTIO_GPU_MAX_SCANOUTS; i++) { -- if (res->scanout_bitmask & (1 << i)) { -- vg_disable_scanout(g, i); -- } -- } -- } -- -- vugbm_buffer_destroy(&res->buffer); -- pixman_image_unref(res->image); -- QTAILQ_REMOVE(&g->reslist, res, next); -- g_free(res); --} -- --static void --vg_resource_unref(VuGpu *g, -- struct virtio_gpu_ctrl_command *cmd) --{ -- struct virtio_gpu_simple_resource *res; -- struct virtio_gpu_resource_unref unref; -- -- VUGPU_FILL_CMD(unref); -- virtio_gpu_bswap_32(&unref, sizeof(unref)); -- -- res = virtio_gpu_find_resource(g, unref.resource_id); -- if (!res) { -- g_critical("%s: illegal resource specified %d", -- __func__, unref.resource_id); -- cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; -- return; -- } -- vg_resource_destroy(g, res); --} -- --int --vg_create_mapping_iov(VuGpu *g, -- struct virtio_gpu_resource_attach_backing *ab, -- struct virtio_gpu_ctrl_command *cmd, -- struct iovec **iov) --{ -- struct virtio_gpu_mem_entry *ents; -- size_t esize, s; -- int i; -- -- if (ab->nr_entries > 16384) { -- g_critical("%s: nr_entries is too big (%d > 16384)", -- __func__, ab->nr_entries); -- return -1; -- } -- -- esize = sizeof(*ents) * ab->nr_entries; -- ents = g_malloc(esize); -- s = iov_to_buf(cmd->elem.out_sg, cmd->elem.out_num, -- sizeof(*ab), ents, esize); -- if (s != esize) { -- g_critical("%s: command data size incorrect %zu vs %zu", -- __func__, s, esize); -- g_free(ents); -- return -1; -- } -- -- *iov = g_malloc0(sizeof(struct iovec) * ab->nr_entries); -- for (i = 0; i < ab->nr_entries; i++) { -- uint64_t len = ents[i].length; -- (*iov)[i].iov_len = ents[i].length; -- (*iov)[i].iov_base = vu_gpa_to_va(&g->dev.parent, &len, ents[i].addr); -- if (!(*iov)[i].iov_base || len != ents[i].length) { -- g_critical("%s: resource %d element %d", -- __func__, ab->resource_id, i); -- g_free(*iov); -- g_free(ents); -- *iov = NULL; -- return -1; -- } -- } -- g_free(ents); -- return 0; --} -- --static void --vg_resource_attach_backing(VuGpu *g, -- struct virtio_gpu_ctrl_command *cmd) --{ -- struct virtio_gpu_simple_resource *res; -- struct virtio_gpu_resource_attach_backing ab; -- int ret; -- -- VUGPU_FILL_CMD(ab); -- virtio_gpu_bswap_32(&ab, sizeof(ab)); -- -- res = virtio_gpu_find_resource(g, ab.resource_id); -- if (!res) { -- g_critical("%s: illegal resource specified %d", -- __func__, ab.resource_id); -- cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; -- return; -- } -- -- ret = vg_create_mapping_iov(g, &ab, cmd, &res->iov); -- if (ret != 0) { -- cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC; -- return; -- } -- -- res->iov_cnt = ab.nr_entries; --} -- --static void --vg_resource_detach_backing(VuGpu *g, -- struct virtio_gpu_ctrl_command *cmd) --{ -- struct virtio_gpu_simple_resource *res; -- struct virtio_gpu_resource_detach_backing detach; -- -- VUGPU_FILL_CMD(detach); -- virtio_gpu_bswap_32(&detach, sizeof(detach)); -- -- res = virtio_gpu_find_resource(g, detach.resource_id); -- if (!res || !res->iov) { -- g_critical("%s: illegal resource specified %d", -- __func__, detach.resource_id); -- cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; -- return; -- } -- -- g_free(res->iov); -- res->iov = NULL; -- res->iov_cnt = 0; --} -- --static void --vg_transfer_to_host_2d(VuGpu *g, -- struct virtio_gpu_ctrl_command *cmd) --{ -- struct virtio_gpu_simple_resource *res; -- int h; -- uint32_t src_offset, dst_offset, stride; -- int bpp; -- pixman_format_code_t format; -- struct virtio_gpu_transfer_to_host_2d t2d; -- -- VUGPU_FILL_CMD(t2d); -- virtio_gpu_t2d_bswap(&t2d); -- -- res = virtio_gpu_find_resource(g, t2d.resource_id); -- if (!res || !res->iov) { -- g_critical("%s: illegal resource specified %d", -- __func__, t2d.resource_id); -- cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; -- return; -- } -- -- if (t2d.r.x > res->width || -- t2d.r.y > res->height || -- t2d.r.width > res->width || -- t2d.r.height > res->height || -- t2d.r.x + t2d.r.width > res->width || -- t2d.r.y + t2d.r.height > res->height) { -- g_critical("%s: transfer bounds outside resource" -- " bounds for resource %d: %d %d %d %d vs %d %d", -- __func__, t2d.resource_id, t2d.r.x, t2d.r.y, -- t2d.r.width, t2d.r.height, res->width, res->height); -- cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER; -- return; -- } -- -- format = pixman_image_get_format(res->image); -- bpp = (PIXMAN_FORMAT_BPP(format) + 7) / 8; -- stride = pixman_image_get_stride(res->image); -- -- if (t2d.offset || t2d.r.x || t2d.r.y || -- t2d.r.width != pixman_image_get_width(res->image)) { -- void *img_data = pixman_image_get_data(res->image); -- for (h = 0; h < t2d.r.height; h++) { -- src_offset = t2d.offset + stride * h; -- dst_offset = (t2d.r.y + h) * stride + (t2d.r.x * bpp); -- -- iov_to_buf(res->iov, res->iov_cnt, src_offset, -- img_data -- + dst_offset, t2d.r.width * bpp); -- } -- } else { -- iov_to_buf(res->iov, res->iov_cnt, 0, -- pixman_image_get_data(res->image), -- pixman_image_get_stride(res->image) -- * pixman_image_get_height(res->image)); -- } --} -- --static void --vg_set_scanout(VuGpu *g, -- struct virtio_gpu_ctrl_command *cmd) --{ -- struct virtio_gpu_simple_resource *res, *ores; -- struct virtio_gpu_scanout *scanout; -- struct virtio_gpu_set_scanout ss; -- int fd; -- -- VUGPU_FILL_CMD(ss); -- virtio_gpu_bswap_32(&ss, sizeof(ss)); -- -- if (ss.scanout_id >= VIRTIO_GPU_MAX_SCANOUTS) { -- g_critical("%s: illegal scanout id specified %d", -- __func__, ss.scanout_id); -- cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_SCANOUT_ID; -- return; -- } -- -- if (ss.resource_id == 0) { -- vg_disable_scanout(g, ss.scanout_id); -- return; -- } -- -- /* create a surface for this scanout */ -- res = virtio_gpu_find_resource(g, ss.resource_id); -- if (!res) { -- g_critical("%s: illegal resource specified %d", -- __func__, ss.resource_id); -- cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; -- return; -- } -- -- if (ss.r.x > res->width || -- ss.r.y > res->height || -- ss.r.width > res->width || -- ss.r.height > res->height || -- ss.r.x + ss.r.width > res->width || -- ss.r.y + ss.r.height > res->height) { -- g_critical("%s: illegal scanout %d bounds for" -- " resource %d, (%d,%d)+%d,%d vs %d %d", -- __func__, ss.scanout_id, ss.resource_id, ss.r.x, ss.r.y, -- ss.r.width, ss.r.height, res->width, res->height); -- cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER; -- return; -- } -- -- scanout = &g->scanout[ss.scanout_id]; -- -- ores = virtio_gpu_find_resource(g, scanout->resource_id); -- if (ores) { -- ores->scanout_bitmask &= ~(1 << ss.scanout_id); -- } -- -- res->scanout_bitmask |= (1 << ss.scanout_id); -- scanout->resource_id = ss.resource_id; -- scanout->x = ss.r.x; -- scanout->y = ss.r.y; -- scanout->width = ss.r.width; -- scanout->height = ss.r.height; -- -- struct vugbm_buffer *buffer = &res->buffer; -- -- if (vugbm_buffer_can_get_dmabuf_fd(buffer)) { -- VhostUserGpuMsg msg = { -- .request = VHOST_USER_GPU_DMABUF_SCANOUT, -- .size = sizeof(VhostUserGpuDMABUFScanout), -- .payload.dmabuf_scanout = (VhostUserGpuDMABUFScanout) { -- .scanout_id = ss.scanout_id, -- .x = ss.r.x, -- .y = ss.r.y, -- .width = ss.r.width, -- .height = ss.r.height, -- .fd_width = buffer->width, -- .fd_height = buffer->height, -- .fd_stride = buffer->stride, -- .fd_drm_fourcc = buffer->format -- } -- }; -- -- if (vugbm_buffer_get_dmabuf_fd(buffer, &fd)) { -- vg_send_msg(g, &msg, fd); -- close(fd); -- } -- } else { -- VhostUserGpuMsg msg = { -- .request = VHOST_USER_GPU_SCANOUT, -- .size = sizeof(VhostUserGpuScanout), -- .payload.scanout = (VhostUserGpuScanout) { -- .scanout_id = ss.scanout_id, -- .width = scanout->width, -- .height = scanout->height -- } -- }; -- vg_send_msg(g, &msg, -1); -- } --} -- --static void --vg_resource_flush(VuGpu *g, -- struct virtio_gpu_ctrl_command *cmd) --{ -- struct virtio_gpu_simple_resource *res; -- struct virtio_gpu_resource_flush rf; -- pixman_region16_t flush_region; -- int i; -- -- VUGPU_FILL_CMD(rf); -- virtio_gpu_bswap_32(&rf, sizeof(rf)); -- -- res = virtio_gpu_find_resource(g, rf.resource_id); -- if (!res) { -- g_critical("%s: illegal resource specified %d\n", -- __func__, rf.resource_id); -- cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; -- return; -- } -- -- if (rf.r.x > res->width || -- rf.r.y > res->height || -- rf.r.width > res->width || -- rf.r.height > res->height || -- rf.r.x + rf.r.width > res->width || -- rf.r.y + rf.r.height > res->height) { -- g_critical("%s: flush bounds outside resource" -- " bounds for resource %d: %d %d %d %d vs %d %d\n", -- __func__, rf.resource_id, rf.r.x, rf.r.y, -- rf.r.width, rf.r.height, res->width, res->height); -- cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER; -- return; -- } -- -- pixman_region_init_rect(&flush_region, -- rf.r.x, rf.r.y, rf.r.width, rf.r.height); -- for (i = 0; i < VIRTIO_GPU_MAX_SCANOUTS; i++) { -- struct virtio_gpu_scanout *scanout; -- pixman_region16_t region, finalregion; -- pixman_box16_t *extents; -- -- if (!(res->scanout_bitmask & (1 << i))) { -- continue; -- } -- scanout = &g->scanout[i]; -- -- pixman_region_init(&finalregion); -- pixman_region_init_rect(®ion, scanout->x, scanout->y, -- scanout->width, scanout->height); -- -- pixman_region_intersect(&finalregion, &flush_region, ®ion); -- -- extents = pixman_region_extents(&finalregion); -- size_t width = extents->x2 - extents->x1; -- size_t height = extents->y2 - extents->y1; -- -- if (vugbm_buffer_can_get_dmabuf_fd(&res->buffer)) { -- VhostUserGpuMsg vmsg = { -- .request = VHOST_USER_GPU_DMABUF_UPDATE, -- .size = sizeof(VhostUserGpuUpdate), -- .payload.update = (VhostUserGpuUpdate) { -- .scanout_id = i, -- .x = extents->x1, -- .y = extents->y1, -- .width = width, -- .height = height, -- } -- }; -- vg_send_msg(g, &vmsg, -1); -- vg_wait_ok(g); -- } else { -- size_t bpp = -- PIXMAN_FORMAT_BPP(pixman_image_get_format(res->image)) / 8; -- size_t size = width * height * bpp; -- -- void *p = g_malloc(VHOST_USER_GPU_HDR_SIZE + -- sizeof(VhostUserGpuUpdate) + size); -- VhostUserGpuMsg *msg = p; -- msg->request = VHOST_USER_GPU_UPDATE; -- msg->size = sizeof(VhostUserGpuUpdate) + size; -- msg->payload.update = (VhostUserGpuUpdate) { -- .scanout_id = i, -- .x = extents->x1, -- .y = extents->y1, -- .width = width, -- .height = height, -- }; -- pixman_image_t *i = -- pixman_image_create_bits(pixman_image_get_format(res->image), -- msg->payload.update.width, -- msg->payload.update.height, -- p + offsetof(VhostUserGpuMsg, -- payload.update.data), -- width * bpp); -- pixman_image_composite(PIXMAN_OP_SRC, -- res->image, NULL, i, -- extents->x1, extents->y1, -- 0, 0, 0, 0, -- width, height); -- pixman_image_unref(i); -- vg_send_msg(g, msg, -1); -- g_free(msg); -- } -- pixman_region_fini(®ion); -- pixman_region_fini(&finalregion); -- } -- pixman_region_fini(&flush_region); --} -- --static void --vg_process_cmd(VuGpu *vg, struct virtio_gpu_ctrl_command *cmd) --{ -- switch (cmd->cmd_hdr.type) { -- case VIRTIO_GPU_CMD_GET_DISPLAY_INFO: -- vg_get_display_info(vg, cmd); -- break; -- case VIRTIO_GPU_CMD_RESOURCE_CREATE_2D: -- vg_resource_create_2d(vg, cmd); -- break; -- case VIRTIO_GPU_CMD_RESOURCE_UNREF: -- vg_resource_unref(vg, cmd); -- break; -- case VIRTIO_GPU_CMD_RESOURCE_FLUSH: -- vg_resource_flush(vg, cmd); -- break; -- case VIRTIO_GPU_CMD_TRANSFER_TO_HOST_2D: -- vg_transfer_to_host_2d(vg, cmd); -- break; -- case VIRTIO_GPU_CMD_SET_SCANOUT: -- vg_set_scanout(vg, cmd); -- break; -- case VIRTIO_GPU_CMD_RESOURCE_ATTACH_BACKING: -- vg_resource_attach_backing(vg, cmd); -- break; -- case VIRTIO_GPU_CMD_RESOURCE_DETACH_BACKING: -- vg_resource_detach_backing(vg, cmd); -- break; -- /* case VIRTIO_GPU_CMD_GET_EDID: */ -- /* break */ -- default: -- g_warning("TODO handle ctrl %x\n", cmd->cmd_hdr.type); -- cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC; -- break; -- } -- if (!cmd->finished) { -- vg_ctrl_response_nodata(vg, cmd, cmd->error ? cmd->error : -- VIRTIO_GPU_RESP_OK_NODATA); -- } --} -- --static void --vg_handle_ctrl(VuDev *dev, int qidx) --{ -- VuGpu *vg = container_of(dev, VuGpu, dev.parent); -- VuVirtq *vq = vu_get_queue(dev, qidx); -- struct virtio_gpu_ctrl_command *cmd = NULL; -- size_t len; -- -- for (;;) { -- if (vg->wait_ok != 0) { -- return; -- } -- -- cmd = vu_queue_pop(dev, vq, sizeof(struct virtio_gpu_ctrl_command)); -- if (!cmd) { -- break; -- } -- cmd->vq = vq; -- cmd->error = 0; -- cmd->finished = false; -- -- len = iov_to_buf(cmd->elem.out_sg, cmd->elem.out_num, -- 0, &cmd->cmd_hdr, sizeof(cmd->cmd_hdr)); -- if (len != sizeof(cmd->cmd_hdr)) { -- g_warning("%s: command size incorrect %zu vs %zu\n", -- __func__, len, sizeof(cmd->cmd_hdr)); -- } -- -- virtio_gpu_ctrl_hdr_bswap(&cmd->cmd_hdr); -- g_debug("%d %s\n", cmd->cmd_hdr.type, -- vg_cmd_to_string(cmd->cmd_hdr.type)); -- -- if (vg->virgl) { -- vg_virgl_process_cmd(vg, cmd); -- } else { -- vg_process_cmd(vg, cmd); -- } -- -- if (!cmd->finished) { -- QTAILQ_INSERT_TAIL(&vg->fenceq, cmd, next); -- vg->inflight++; -- } else { -- g_free(cmd); -- } -- } --} -- --static void --update_cursor_data_simple(VuGpu *g, uint32_t resource_id, gpointer data) --{ -- struct virtio_gpu_simple_resource *res; -- -- res = virtio_gpu_find_resource(g, resource_id); -- g_return_if_fail(res != NULL); -- g_return_if_fail(pixman_image_get_width(res->image) == 64); -- g_return_if_fail(pixman_image_get_height(res->image) == 64); -- g_return_if_fail( -- PIXMAN_FORMAT_BPP(pixman_image_get_format(res->image)) == 32); -- -- memcpy(data, pixman_image_get_data(res->image), 64 * 64 * sizeof(uint32_t)); --} -- --static void --vg_process_cursor_cmd(VuGpu *g, struct virtio_gpu_update_cursor *cursor) --{ -- bool move = cursor->hdr.type != VIRTIO_GPU_CMD_MOVE_CURSOR; -- -- g_debug("%s move:%d\n", G_STRFUNC, move); -- -- if (move) { -- VhostUserGpuMsg msg = { -- .request = cursor->resource_id ? -- VHOST_USER_GPU_CURSOR_POS : VHOST_USER_GPU_CURSOR_POS_HIDE, -- .size = sizeof(VhostUserGpuCursorPos), -- .payload.cursor_pos = { -- .scanout_id = cursor->pos.scanout_id, -- .x = cursor->pos.x, -- .y = cursor->pos.y, -- } -- }; -- vg_send_msg(g, &msg, -1); -- } else { -- VhostUserGpuMsg msg = { -- .request = VHOST_USER_GPU_CURSOR_UPDATE, -- .size = sizeof(VhostUserGpuCursorUpdate), -- .payload.cursor_update = { -- .pos = { -- .scanout_id = cursor->pos.scanout_id, -- .x = cursor->pos.x, -- .y = cursor->pos.y, -- }, -- .hot_x = cursor->hot_x, -- .hot_y = cursor->hot_y, -- } -- }; -- if (g->virgl) { -- vg_virgl_update_cursor_data(g, cursor->resource_id, -- msg.payload.cursor_update.data); -- } else { -- update_cursor_data_simple(g, cursor->resource_id, -- msg.payload.cursor_update.data); -- } -- vg_send_msg(g, &msg, -1); -- } --} -- --static void --vg_handle_cursor(VuDev *dev, int qidx) --{ -- VuGpu *g = container_of(dev, VuGpu, dev.parent); -- VuVirtq *vq = vu_get_queue(dev, qidx); -- VuVirtqElement *elem; -- size_t len; -- struct virtio_gpu_update_cursor cursor; -- -- for (;;) { -- elem = vu_queue_pop(dev, vq, sizeof(VuVirtqElement)); -- if (!elem) { -- break; -- } -- g_debug("cursor out:%d in:%d\n", elem->out_num, elem->in_num); -- -- len = iov_to_buf(elem->out_sg, elem->out_num, -- 0, &cursor, sizeof(cursor)); -- if (len != sizeof(cursor)) { -- g_warning("%s: cursor size incorrect %zu vs %zu\n", -- __func__, len, sizeof(cursor)); -- } else { -- virtio_gpu_bswap_32(&cursor, sizeof(cursor)); -- vg_process_cursor_cmd(g, &cursor); -- } -- vu_queue_push(dev, vq, elem, 0); -- vu_queue_notify(dev, vq); -- g_free(elem); -- } --} -- --static void --vg_panic(VuDev *dev, const char *msg) --{ -- g_critical("%s\n", msg); -- exit(1); --} -- --static void --vg_queue_set_started(VuDev *dev, int qidx, bool started) --{ -- VuVirtq *vq = vu_get_queue(dev, qidx); -- -- g_debug("queue started %d:%d\n", qidx, started); -- -- switch (qidx) { -- case 0: -- vu_set_queue_handler(dev, vq, started ? vg_handle_ctrl : NULL); -- break; -- case 1: -- vu_set_queue_handler(dev, vq, started ? vg_handle_cursor : NULL); -- break; -- default: -- break; -- } --} -- --static void --set_gpu_protocol_features(VuGpu *g) --{ -- uint64_t u64; -- VhostUserGpuMsg msg = { -- .request = VHOST_USER_GPU_GET_PROTOCOL_FEATURES -- }; -- -- assert(g->wait_ok == 0); -- vg_send_msg(g, &msg, -1); -- if (!vg_recv_msg(g, msg.request, sizeof(u64), &u64)) { -- return; -- } -- -- msg = (VhostUserGpuMsg) { -- .request = VHOST_USER_GPU_SET_PROTOCOL_FEATURES, -- .size = sizeof(uint64_t), -- .payload.u64 = 0 -- }; -- vg_send_msg(g, &msg, -1); --} -- --static int --vg_process_msg(VuDev *dev, VhostUserMsg *msg, int *do_reply) --{ -- VuGpu *g = container_of(dev, VuGpu, dev.parent); -- -- switch (msg->request) { -- case VHOST_USER_GPU_SET_SOCKET: { -- g_return_val_if_fail(msg->fd_num == 1, 1); -- g_return_val_if_fail(g->sock_fd == -1, 1); -- g->sock_fd = msg->fds[0]; -- set_gpu_protocol_features(g); -- return 1; -- } -- default: -- return 0; -- } -- -- return 0; --} -- --static uint64_t --vg_get_features(VuDev *dev) --{ -- uint64_t features = 0; -- -- if (opt_virgl) { -- features |= 1 << VIRTIO_GPU_F_VIRGL; -- } -- -- return features; --} -- --static void --vg_set_features(VuDev *dev, uint64_t features) --{ -- VuGpu *g = container_of(dev, VuGpu, dev.parent); -- bool virgl = features & (1 << VIRTIO_GPU_F_VIRGL); -- -- if (virgl && !g->virgl_inited) { -- if (!vg_virgl_init(g)) { -- vg_panic(dev, "Failed to initialize virgl"); -- } -- g->virgl_inited = true; -- } -- -- g->virgl = virgl; --} -- --static int --vg_get_config(VuDev *dev, uint8_t *config, uint32_t len) --{ -- VuGpu *g = container_of(dev, VuGpu, dev.parent); -- -- g_return_val_if_fail(len <= sizeof(struct virtio_gpu_config), -1); -- -- if (opt_virgl) { -- g->virtio_config.num_capsets = vg_virgl_get_num_capsets(); -- } -- -- memcpy(config, &g->virtio_config, len); -- -- return 0; --} -- --static int --vg_set_config(VuDev *dev, const uint8_t *data, -- uint32_t offset, uint32_t size, -- uint32_t flags) --{ -- VuGpu *g = container_of(dev, VuGpu, dev.parent); -- struct virtio_gpu_config *config = (struct virtio_gpu_config *)data; -- -- if (config->events_clear) { -- g->virtio_config.events_read &= ~config->events_clear; -- } -- -- return 0; --} -- --static const VuDevIface vuiface = { -- .set_features = vg_set_features, -- .get_features = vg_get_features, -- .queue_set_started = vg_queue_set_started, -- .process_msg = vg_process_msg, -- .get_config = vg_get_config, -- .set_config = vg_set_config, --}; -- --static void --vg_destroy(VuGpu *g) --{ -- struct virtio_gpu_simple_resource *res, *tmp; -- -- vug_deinit(&g->dev); -- -- vg_sock_fd_close(g); -- -- QTAILQ_FOREACH_SAFE(res, &g->reslist, next, tmp) { -- vg_resource_destroy(g, res); -- } -- -- vugbm_device_destroy(&g->gdev); --} -- --static GOptionEntry entries[] = { -- { "print-capabilities", 'c', 0, G_OPTION_ARG_NONE, &opt_print_caps, -- "Print capabilities", NULL }, -- { "fd", 'f', 0, G_OPTION_ARG_INT, &opt_fdnum, -- "Use inherited fd socket", "FDNUM" }, -- { "socket-path", 's', 0, G_OPTION_ARG_FILENAME, &opt_socket_path, -- "Use UNIX socket path", "PATH" }, -- { "render-node", 'r', 0, G_OPTION_ARG_FILENAME, &opt_render_node, -- "Specify DRM render node", "PATH" }, -- { "virgl", 'v', 0, G_OPTION_ARG_NONE, &opt_virgl, -- "Turn virgl rendering on", NULL }, -- { NULL, } --}; -- --int --main(int argc, char *argv[]) --{ -- GOptionContext *context; -- GError *error = NULL; -- GMainLoop *loop = NULL; -- int fd; -- VuGpu g = { .sock_fd = -1, .drm_rnode_fd = -1 }; -- -- QTAILQ_INIT(&g.reslist); -- QTAILQ_INIT(&g.fenceq); -- -- context = g_option_context_new("QEMU vhost-user-gpu"); -- g_option_context_add_main_entries(context, entries, NULL); -- if (!g_option_context_parse(context, &argc, &argv, &error)) { -- g_printerr("Option parsing failed: %s\n", error->message); -- exit(EXIT_FAILURE); -- } -- g_option_context_free(context); -- -- if (opt_print_caps) { -- g_print("{\n"); -- g_print(" \"type\": \"gpu\",\n"); -- g_print(" \"features\": [\n"); -- g_print(" \"render-node\",\n"); -- g_print(" \"virgl\"\n"); -- g_print(" ]\n"); -- g_print("}\n"); -- exit(EXIT_SUCCESS); -- } -- -- g.drm_rnode_fd = qemu_drm_rendernode_open(opt_render_node); -- if (opt_render_node && g.drm_rnode_fd == -1) { -- g_printerr("Failed to open DRM rendernode.\n"); -- exit(EXIT_FAILURE); -- } -- -- if (g.drm_rnode_fd >= 0) { -- if (!vugbm_device_init(&g.gdev, g.drm_rnode_fd)) { -- g_warning("Failed to init DRM device, using fallback path"); -- } -- } -- -- if ((!!opt_socket_path + (opt_fdnum != -1)) != 1) { -- g_printerr("Please specify either --fd or --socket-path\n"); -- exit(EXIT_FAILURE); -- } -- -- if (opt_socket_path) { -- int lsock = unix_listen(opt_socket_path, &error_fatal); -- if (lsock < 0) { -- g_printerr("Failed to listen on %s.\n", opt_socket_path); -- exit(EXIT_FAILURE); -- } -- fd = accept(lsock, NULL, NULL); -- close(lsock); -- } else { -- fd = opt_fdnum; -- } -- if (fd == -1) { -- g_printerr("Invalid vhost-user socket.\n"); -- exit(EXIT_FAILURE); -- } -- -- if (!vug_init(&g.dev, VHOST_USER_GPU_MAX_QUEUES, fd, vg_panic, &vuiface)) { -- g_printerr("Failed to initialize libvhost-user-glib.\n"); -- exit(EXIT_FAILURE); -- } -- -- loop = g_main_loop_new(NULL, FALSE); -- g_main_loop_run(loop); -- g_main_loop_unref(loop); -- -- vg_destroy(&g); -- if (g.drm_rnode_fd >= 0) { -- close(g.drm_rnode_fd); -- } -- -- return 0; --} -diff --git a/contrib/vhost-user-gpu/vhost-user-gpu.c b/contrib/vhost-user-gpu/vhost-user-gpu.c -new file mode 100644 -index 0000000..b45d201 ---- /dev/null -+++ b/contrib/vhost-user-gpu/vhost-user-gpu.c -@@ -0,0 +1,1191 @@ -+/* -+ * Virtio vhost-user GPU Device -+ * -+ * Copyright Red Hat, Inc. 2013-2018 -+ * -+ * Authors: -+ * Dave Airlie -+ * Gerd Hoffmann -+ * Marc-André Lureau -+ * -+ * This work is licensed under the terms of the GNU GPL, version 2 or later. -+ * See the COPYING file in the top-level directory. -+ */ -+#include "qemu/osdep.h" -+#include "qemu/drm.h" -+#include "qapi/error.h" -+#include "qemu/sockets.h" -+ -+#include -+#include -+ -+#include "vugpu.h" -+#include "hw/virtio/virtio-gpu-bswap.h" -+#include "hw/virtio/virtio-gpu-pixman.h" -+#include "virgl.h" -+#include "vugbm.h" -+ -+enum { -+ VHOST_USER_GPU_MAX_QUEUES = 2, -+}; -+ -+struct virtio_gpu_simple_resource { -+ uint32_t resource_id; -+ uint32_t width; -+ uint32_t height; -+ uint32_t format; -+ struct iovec *iov; -+ unsigned int iov_cnt; -+ uint32_t scanout_bitmask; -+ pixman_image_t *image; -+ struct vugbm_buffer buffer; -+ QTAILQ_ENTRY(virtio_gpu_simple_resource) next; -+}; -+ -+static gboolean opt_print_caps; -+static int opt_fdnum = -1; -+static char *opt_socket_path; -+static char *opt_render_node; -+static gboolean opt_virgl; -+ -+static void vg_handle_ctrl(VuDev *dev, int qidx); -+ -+static const char * -+vg_cmd_to_string(int cmd) -+{ -+#define CMD(cmd) [cmd] = #cmd -+ static const char *vg_cmd_str[] = { -+ CMD(VIRTIO_GPU_UNDEFINED), -+ -+ /* 2d commands */ -+ CMD(VIRTIO_GPU_CMD_GET_DISPLAY_INFO), -+ CMD(VIRTIO_GPU_CMD_RESOURCE_CREATE_2D), -+ CMD(VIRTIO_GPU_CMD_RESOURCE_UNREF), -+ CMD(VIRTIO_GPU_CMD_SET_SCANOUT), -+ CMD(VIRTIO_GPU_CMD_RESOURCE_FLUSH), -+ CMD(VIRTIO_GPU_CMD_TRANSFER_TO_HOST_2D), -+ CMD(VIRTIO_GPU_CMD_RESOURCE_ATTACH_BACKING), -+ CMD(VIRTIO_GPU_CMD_RESOURCE_DETACH_BACKING), -+ CMD(VIRTIO_GPU_CMD_GET_CAPSET_INFO), -+ CMD(VIRTIO_GPU_CMD_GET_CAPSET), -+ -+ /* 3d commands */ -+ CMD(VIRTIO_GPU_CMD_CTX_CREATE), -+ CMD(VIRTIO_GPU_CMD_CTX_DESTROY), -+ CMD(VIRTIO_GPU_CMD_CTX_ATTACH_RESOURCE), -+ CMD(VIRTIO_GPU_CMD_CTX_DETACH_RESOURCE), -+ CMD(VIRTIO_GPU_CMD_RESOURCE_CREATE_3D), -+ CMD(VIRTIO_GPU_CMD_TRANSFER_TO_HOST_3D), -+ CMD(VIRTIO_GPU_CMD_TRANSFER_FROM_HOST_3D), -+ CMD(VIRTIO_GPU_CMD_SUBMIT_3D), -+ -+ /* cursor commands */ -+ CMD(VIRTIO_GPU_CMD_UPDATE_CURSOR), -+ CMD(VIRTIO_GPU_CMD_MOVE_CURSOR), -+ }; -+#undef REQ -+ -+ if (cmd >= 0 && cmd < G_N_ELEMENTS(vg_cmd_str)) { -+ return vg_cmd_str[cmd]; -+ } else { -+ return "unknown"; -+ } -+} -+ -+static int -+vg_sock_fd_read(int sock, void *buf, ssize_t buflen) -+{ -+ int ret; -+ -+ do { -+ ret = read(sock, buf, buflen); -+ } while (ret < 0 && (errno == EINTR || errno == EAGAIN)); -+ -+ g_warn_if_fail(ret == buflen); -+ return ret; -+} -+ -+static void -+vg_sock_fd_close(VuGpu *g) -+{ -+ if (g->sock_fd >= 0) { -+ close(g->sock_fd); -+ g->sock_fd = -1; -+ } -+} -+ -+static gboolean -+source_wait_cb(gint fd, GIOCondition condition, gpointer user_data) -+{ -+ VuGpu *g = user_data; -+ -+ if (!vg_recv_msg(g, VHOST_USER_GPU_DMABUF_UPDATE, 0, NULL)) { -+ return G_SOURCE_CONTINUE; -+ } -+ -+ /* resume */ -+ g->wait_ok = 0; -+ vg_handle_ctrl(&g->dev.parent, 0); -+ -+ return G_SOURCE_REMOVE; -+} -+ -+void -+vg_wait_ok(VuGpu *g) -+{ -+ assert(g->wait_ok == 0); -+ g->wait_ok = g_unix_fd_add(g->sock_fd, G_IO_IN | G_IO_HUP, -+ source_wait_cb, g); -+} -+ -+static int -+vg_sock_fd_write(int sock, const void *buf, ssize_t buflen, int fd) -+{ -+ ssize_t ret; -+ struct iovec iov = { -+ .iov_base = (void *)buf, -+ .iov_len = buflen, -+ }; -+ struct msghdr msg = { -+ .msg_iov = &iov, -+ .msg_iovlen = 1, -+ }; -+ union { -+ struct cmsghdr cmsghdr; -+ char control[CMSG_SPACE(sizeof(int))]; -+ } cmsgu; -+ struct cmsghdr *cmsg; -+ -+ if (fd != -1) { -+ msg.msg_control = cmsgu.control; -+ msg.msg_controllen = sizeof(cmsgu.control); -+ -+ cmsg = CMSG_FIRSTHDR(&msg); -+ cmsg->cmsg_len = CMSG_LEN(sizeof(int)); -+ cmsg->cmsg_level = SOL_SOCKET; -+ cmsg->cmsg_type = SCM_RIGHTS; -+ -+ *((int *)CMSG_DATA(cmsg)) = fd; -+ } -+ -+ do { -+ ret = sendmsg(sock, &msg, 0); -+ } while (ret == -1 && (errno == EINTR || errno == EAGAIN)); -+ -+ g_warn_if_fail(ret == buflen); -+ return ret; -+} -+ -+void -+vg_send_msg(VuGpu *vg, const VhostUserGpuMsg *msg, int fd) -+{ -+ if (vg_sock_fd_write(vg->sock_fd, msg, -+ VHOST_USER_GPU_HDR_SIZE + msg->size, fd) < 0) { -+ vg_sock_fd_close(vg); -+ } -+} -+ -+bool -+vg_recv_msg(VuGpu *g, uint32_t expect_req, uint32_t expect_size, -+ gpointer payload) -+{ -+ uint32_t req, flags, size; -+ -+ if (vg_sock_fd_read(g->sock_fd, &req, sizeof(req)) < 0 || -+ vg_sock_fd_read(g->sock_fd, &flags, sizeof(flags)) < 0 || -+ vg_sock_fd_read(g->sock_fd, &size, sizeof(size)) < 0) { -+ goto err; -+ } -+ -+ g_return_val_if_fail(req == expect_req, false); -+ g_return_val_if_fail(flags & VHOST_USER_GPU_MSG_FLAG_REPLY, false); -+ g_return_val_if_fail(size == expect_size, false); -+ -+ if (size && vg_sock_fd_read(g->sock_fd, payload, size) != size) { -+ goto err; -+ } -+ -+ return true; -+ -+err: -+ vg_sock_fd_close(g); -+ return false; -+} -+ -+static struct virtio_gpu_simple_resource * -+virtio_gpu_find_resource(VuGpu *g, uint32_t resource_id) -+{ -+ struct virtio_gpu_simple_resource *res; -+ -+ QTAILQ_FOREACH(res, &g->reslist, next) { -+ if (res->resource_id == resource_id) { -+ return res; -+ } -+ } -+ return NULL; -+} -+ -+void -+vg_ctrl_response(VuGpu *g, -+ struct virtio_gpu_ctrl_command *cmd, -+ struct virtio_gpu_ctrl_hdr *resp, -+ size_t resp_len) -+{ -+ size_t s; -+ -+ if (cmd->cmd_hdr.flags & VIRTIO_GPU_FLAG_FENCE) { -+ resp->flags |= VIRTIO_GPU_FLAG_FENCE; -+ resp->fence_id = cmd->cmd_hdr.fence_id; -+ resp->ctx_id = cmd->cmd_hdr.ctx_id; -+ } -+ virtio_gpu_ctrl_hdr_bswap(resp); -+ s = iov_from_buf(cmd->elem.in_sg, cmd->elem.in_num, 0, resp, resp_len); -+ if (s != resp_len) { -+ g_critical("%s: response size incorrect %zu vs %zu", -+ __func__, s, resp_len); -+ } -+ vu_queue_push(&g->dev.parent, cmd->vq, &cmd->elem, s); -+ vu_queue_notify(&g->dev.parent, cmd->vq); -+ cmd->finished = true; -+} -+ -+void -+vg_ctrl_response_nodata(VuGpu *g, -+ struct virtio_gpu_ctrl_command *cmd, -+ enum virtio_gpu_ctrl_type type) -+{ -+ struct virtio_gpu_ctrl_hdr resp = { -+ .type = type, -+ }; -+ -+ vg_ctrl_response(g, cmd, &resp, sizeof(resp)); -+} -+ -+void -+vg_get_display_info(VuGpu *vg, struct virtio_gpu_ctrl_command *cmd) -+{ -+ struct virtio_gpu_resp_display_info dpy_info = { {} }; -+ VhostUserGpuMsg msg = { -+ .request = VHOST_USER_GPU_GET_DISPLAY_INFO, -+ .size = 0, -+ }; -+ -+ assert(vg->wait_ok == 0); -+ -+ vg_send_msg(vg, &msg, -1); -+ if (!vg_recv_msg(vg, msg.request, sizeof(dpy_info), &dpy_info)) { -+ return; -+ } -+ -+ vg_ctrl_response(vg, cmd, &dpy_info.hdr, sizeof(dpy_info)); -+} -+ -+static void -+vg_resource_create_2d(VuGpu *g, -+ struct virtio_gpu_ctrl_command *cmd) -+{ -+ pixman_format_code_t pformat; -+ struct virtio_gpu_simple_resource *res; -+ struct virtio_gpu_resource_create_2d c2d; -+ -+ VUGPU_FILL_CMD(c2d); -+ virtio_gpu_bswap_32(&c2d, sizeof(c2d)); -+ -+ if (c2d.resource_id == 0) { -+ g_critical("%s: resource id 0 is not allowed", __func__); -+ cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; -+ return; -+ } -+ -+ res = virtio_gpu_find_resource(g, c2d.resource_id); -+ if (res) { -+ g_critical("%s: resource already exists %d", __func__, c2d.resource_id); -+ cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; -+ return; -+ } -+ -+ res = g_new0(struct virtio_gpu_simple_resource, 1); -+ res->width = c2d.width; -+ res->height = c2d.height; -+ res->format = c2d.format; -+ res->resource_id = c2d.resource_id; -+ -+ pformat = virtio_gpu_get_pixman_format(c2d.format); -+ if (!pformat) { -+ g_critical("%s: host couldn't handle guest format %d", -+ __func__, c2d.format); -+ g_free(res); -+ cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER; -+ return; -+ } -+ vugbm_buffer_create(&res->buffer, &g->gdev, c2d.width, c2d.height); -+ res->image = pixman_image_create_bits(pformat, -+ c2d.width, -+ c2d.height, -+ (uint32_t *)res->buffer.mmap, -+ res->buffer.stride); -+ if (!res->image) { -+ g_critical("%s: resource creation failed %d %d %d", -+ __func__, c2d.resource_id, c2d.width, c2d.height); -+ g_free(res); -+ cmd->error = VIRTIO_GPU_RESP_ERR_OUT_OF_MEMORY; -+ return; -+ } -+ -+ QTAILQ_INSERT_HEAD(&g->reslist, res, next); -+} -+ -+static void -+vg_disable_scanout(VuGpu *g, int scanout_id) -+{ -+ struct virtio_gpu_scanout *scanout = &g->scanout[scanout_id]; -+ struct virtio_gpu_simple_resource *res; -+ -+ if (scanout->resource_id == 0) { -+ return; -+ } -+ -+ res = virtio_gpu_find_resource(g, scanout->resource_id); -+ if (res) { -+ res->scanout_bitmask &= ~(1 << scanout_id); -+ } -+ -+ scanout->width = 0; -+ scanout->height = 0; -+ -+ if (g->sock_fd >= 0) { -+ VhostUserGpuMsg msg = { -+ .request = VHOST_USER_GPU_SCANOUT, -+ .size = sizeof(VhostUserGpuScanout), -+ .payload.scanout.scanout_id = scanout_id, -+ }; -+ vg_send_msg(g, &msg, -1); -+ } -+} -+ -+static void -+vg_resource_destroy(VuGpu *g, -+ struct virtio_gpu_simple_resource *res) -+{ -+ int i; -+ -+ if (res->scanout_bitmask) { -+ for (i = 0; i < VIRTIO_GPU_MAX_SCANOUTS; i++) { -+ if (res->scanout_bitmask & (1 << i)) { -+ vg_disable_scanout(g, i); -+ } -+ } -+ } -+ -+ vugbm_buffer_destroy(&res->buffer); -+ pixman_image_unref(res->image); -+ QTAILQ_REMOVE(&g->reslist, res, next); -+ g_free(res); -+} -+ -+static void -+vg_resource_unref(VuGpu *g, -+ struct virtio_gpu_ctrl_command *cmd) -+{ -+ struct virtio_gpu_simple_resource *res; -+ struct virtio_gpu_resource_unref unref; -+ -+ VUGPU_FILL_CMD(unref); -+ virtio_gpu_bswap_32(&unref, sizeof(unref)); -+ -+ res = virtio_gpu_find_resource(g, unref.resource_id); -+ if (!res) { -+ g_critical("%s: illegal resource specified %d", -+ __func__, unref.resource_id); -+ cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; -+ return; -+ } -+ vg_resource_destroy(g, res); -+} -+ -+int -+vg_create_mapping_iov(VuGpu *g, -+ struct virtio_gpu_resource_attach_backing *ab, -+ struct virtio_gpu_ctrl_command *cmd, -+ struct iovec **iov) -+{ -+ struct virtio_gpu_mem_entry *ents; -+ size_t esize, s; -+ int i; -+ -+ if (ab->nr_entries > 16384) { -+ g_critical("%s: nr_entries is too big (%d > 16384)", -+ __func__, ab->nr_entries); -+ return -1; -+ } -+ -+ esize = sizeof(*ents) * ab->nr_entries; -+ ents = g_malloc(esize); -+ s = iov_to_buf(cmd->elem.out_sg, cmd->elem.out_num, -+ sizeof(*ab), ents, esize); -+ if (s != esize) { -+ g_critical("%s: command data size incorrect %zu vs %zu", -+ __func__, s, esize); -+ g_free(ents); -+ return -1; -+ } -+ -+ *iov = g_malloc0(sizeof(struct iovec) * ab->nr_entries); -+ for (i = 0; i < ab->nr_entries; i++) { -+ uint64_t len = ents[i].length; -+ (*iov)[i].iov_len = ents[i].length; -+ (*iov)[i].iov_base = vu_gpa_to_va(&g->dev.parent, &len, ents[i].addr); -+ if (!(*iov)[i].iov_base || len != ents[i].length) { -+ g_critical("%s: resource %d element %d", -+ __func__, ab->resource_id, i); -+ g_free(*iov); -+ g_free(ents); -+ *iov = NULL; -+ return -1; -+ } -+ } -+ g_free(ents); -+ return 0; -+} -+ -+static void -+vg_resource_attach_backing(VuGpu *g, -+ struct virtio_gpu_ctrl_command *cmd) -+{ -+ struct virtio_gpu_simple_resource *res; -+ struct virtio_gpu_resource_attach_backing ab; -+ int ret; -+ -+ VUGPU_FILL_CMD(ab); -+ virtio_gpu_bswap_32(&ab, sizeof(ab)); -+ -+ res = virtio_gpu_find_resource(g, ab.resource_id); -+ if (!res) { -+ g_critical("%s: illegal resource specified %d", -+ __func__, ab.resource_id); -+ cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; -+ return; -+ } -+ -+ ret = vg_create_mapping_iov(g, &ab, cmd, &res->iov); -+ if (ret != 0) { -+ cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC; -+ return; -+ } -+ -+ res->iov_cnt = ab.nr_entries; -+} -+ -+static void -+vg_resource_detach_backing(VuGpu *g, -+ struct virtio_gpu_ctrl_command *cmd) -+{ -+ struct virtio_gpu_simple_resource *res; -+ struct virtio_gpu_resource_detach_backing detach; -+ -+ VUGPU_FILL_CMD(detach); -+ virtio_gpu_bswap_32(&detach, sizeof(detach)); -+ -+ res = virtio_gpu_find_resource(g, detach.resource_id); -+ if (!res || !res->iov) { -+ g_critical("%s: illegal resource specified %d", -+ __func__, detach.resource_id); -+ cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; -+ return; -+ } -+ -+ g_free(res->iov); -+ res->iov = NULL; -+ res->iov_cnt = 0; -+} -+ -+static void -+vg_transfer_to_host_2d(VuGpu *g, -+ struct virtio_gpu_ctrl_command *cmd) -+{ -+ struct virtio_gpu_simple_resource *res; -+ int h; -+ uint32_t src_offset, dst_offset, stride; -+ int bpp; -+ pixman_format_code_t format; -+ struct virtio_gpu_transfer_to_host_2d t2d; -+ -+ VUGPU_FILL_CMD(t2d); -+ virtio_gpu_t2d_bswap(&t2d); -+ -+ res = virtio_gpu_find_resource(g, t2d.resource_id); -+ if (!res || !res->iov) { -+ g_critical("%s: illegal resource specified %d", -+ __func__, t2d.resource_id); -+ cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; -+ return; -+ } -+ -+ if (t2d.r.x > res->width || -+ t2d.r.y > res->height || -+ t2d.r.width > res->width || -+ t2d.r.height > res->height || -+ t2d.r.x + t2d.r.width > res->width || -+ t2d.r.y + t2d.r.height > res->height) { -+ g_critical("%s: transfer bounds outside resource" -+ " bounds for resource %d: %d %d %d %d vs %d %d", -+ __func__, t2d.resource_id, t2d.r.x, t2d.r.y, -+ t2d.r.width, t2d.r.height, res->width, res->height); -+ cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER; -+ return; -+ } -+ -+ format = pixman_image_get_format(res->image); -+ bpp = (PIXMAN_FORMAT_BPP(format) + 7) / 8; -+ stride = pixman_image_get_stride(res->image); -+ -+ if (t2d.offset || t2d.r.x || t2d.r.y || -+ t2d.r.width != pixman_image_get_width(res->image)) { -+ void *img_data = pixman_image_get_data(res->image); -+ for (h = 0; h < t2d.r.height; h++) { -+ src_offset = t2d.offset + stride * h; -+ dst_offset = (t2d.r.y + h) * stride + (t2d.r.x * bpp); -+ -+ iov_to_buf(res->iov, res->iov_cnt, src_offset, -+ img_data -+ + dst_offset, t2d.r.width * bpp); -+ } -+ } else { -+ iov_to_buf(res->iov, res->iov_cnt, 0, -+ pixman_image_get_data(res->image), -+ pixman_image_get_stride(res->image) -+ * pixman_image_get_height(res->image)); -+ } -+} -+ -+static void -+vg_set_scanout(VuGpu *g, -+ struct virtio_gpu_ctrl_command *cmd) -+{ -+ struct virtio_gpu_simple_resource *res, *ores; -+ struct virtio_gpu_scanout *scanout; -+ struct virtio_gpu_set_scanout ss; -+ int fd; -+ -+ VUGPU_FILL_CMD(ss); -+ virtio_gpu_bswap_32(&ss, sizeof(ss)); -+ -+ if (ss.scanout_id >= VIRTIO_GPU_MAX_SCANOUTS) { -+ g_critical("%s: illegal scanout id specified %d", -+ __func__, ss.scanout_id); -+ cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_SCANOUT_ID; -+ return; -+ } -+ -+ if (ss.resource_id == 0) { -+ vg_disable_scanout(g, ss.scanout_id); -+ return; -+ } -+ -+ /* create a surface for this scanout */ -+ res = virtio_gpu_find_resource(g, ss.resource_id); -+ if (!res) { -+ g_critical("%s: illegal resource specified %d", -+ __func__, ss.resource_id); -+ cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; -+ return; -+ } -+ -+ if (ss.r.x > res->width || -+ ss.r.y > res->height || -+ ss.r.width > res->width || -+ ss.r.height > res->height || -+ ss.r.x + ss.r.width > res->width || -+ ss.r.y + ss.r.height > res->height) { -+ g_critical("%s: illegal scanout %d bounds for" -+ " resource %d, (%d,%d)+%d,%d vs %d %d", -+ __func__, ss.scanout_id, ss.resource_id, ss.r.x, ss.r.y, -+ ss.r.width, ss.r.height, res->width, res->height); -+ cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER; -+ return; -+ } -+ -+ scanout = &g->scanout[ss.scanout_id]; -+ -+ ores = virtio_gpu_find_resource(g, scanout->resource_id); -+ if (ores) { -+ ores->scanout_bitmask &= ~(1 << ss.scanout_id); -+ } -+ -+ res->scanout_bitmask |= (1 << ss.scanout_id); -+ scanout->resource_id = ss.resource_id; -+ scanout->x = ss.r.x; -+ scanout->y = ss.r.y; -+ scanout->width = ss.r.width; -+ scanout->height = ss.r.height; -+ -+ struct vugbm_buffer *buffer = &res->buffer; -+ -+ if (vugbm_buffer_can_get_dmabuf_fd(buffer)) { -+ VhostUserGpuMsg msg = { -+ .request = VHOST_USER_GPU_DMABUF_SCANOUT, -+ .size = sizeof(VhostUserGpuDMABUFScanout), -+ .payload.dmabuf_scanout = (VhostUserGpuDMABUFScanout) { -+ .scanout_id = ss.scanout_id, -+ .x = ss.r.x, -+ .y = ss.r.y, -+ .width = ss.r.width, -+ .height = ss.r.height, -+ .fd_width = buffer->width, -+ .fd_height = buffer->height, -+ .fd_stride = buffer->stride, -+ .fd_drm_fourcc = buffer->format -+ } -+ }; -+ -+ if (vugbm_buffer_get_dmabuf_fd(buffer, &fd)) { -+ vg_send_msg(g, &msg, fd); -+ close(fd); -+ } -+ } else { -+ VhostUserGpuMsg msg = { -+ .request = VHOST_USER_GPU_SCANOUT, -+ .size = sizeof(VhostUserGpuScanout), -+ .payload.scanout = (VhostUserGpuScanout) { -+ .scanout_id = ss.scanout_id, -+ .width = scanout->width, -+ .height = scanout->height -+ } -+ }; -+ vg_send_msg(g, &msg, -1); -+ } -+} -+ -+static void -+vg_resource_flush(VuGpu *g, -+ struct virtio_gpu_ctrl_command *cmd) -+{ -+ struct virtio_gpu_simple_resource *res; -+ struct virtio_gpu_resource_flush rf; -+ pixman_region16_t flush_region; -+ int i; -+ -+ VUGPU_FILL_CMD(rf); -+ virtio_gpu_bswap_32(&rf, sizeof(rf)); -+ -+ res = virtio_gpu_find_resource(g, rf.resource_id); -+ if (!res) { -+ g_critical("%s: illegal resource specified %d\n", -+ __func__, rf.resource_id); -+ cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; -+ return; -+ } -+ -+ if (rf.r.x > res->width || -+ rf.r.y > res->height || -+ rf.r.width > res->width || -+ rf.r.height > res->height || -+ rf.r.x + rf.r.width > res->width || -+ rf.r.y + rf.r.height > res->height) { -+ g_critical("%s: flush bounds outside resource" -+ " bounds for resource %d: %d %d %d %d vs %d %d\n", -+ __func__, rf.resource_id, rf.r.x, rf.r.y, -+ rf.r.width, rf.r.height, res->width, res->height); -+ cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER; -+ return; -+ } -+ -+ pixman_region_init_rect(&flush_region, -+ rf.r.x, rf.r.y, rf.r.width, rf.r.height); -+ for (i = 0; i < VIRTIO_GPU_MAX_SCANOUTS; i++) { -+ struct virtio_gpu_scanout *scanout; -+ pixman_region16_t region, finalregion; -+ pixman_box16_t *extents; -+ -+ if (!(res->scanout_bitmask & (1 << i))) { -+ continue; -+ } -+ scanout = &g->scanout[i]; -+ -+ pixman_region_init(&finalregion); -+ pixman_region_init_rect(®ion, scanout->x, scanout->y, -+ scanout->width, scanout->height); -+ -+ pixman_region_intersect(&finalregion, &flush_region, ®ion); -+ -+ extents = pixman_region_extents(&finalregion); -+ size_t width = extents->x2 - extents->x1; -+ size_t height = extents->y2 - extents->y1; -+ -+ if (vugbm_buffer_can_get_dmabuf_fd(&res->buffer)) { -+ VhostUserGpuMsg vmsg = { -+ .request = VHOST_USER_GPU_DMABUF_UPDATE, -+ .size = sizeof(VhostUserGpuUpdate), -+ .payload.update = (VhostUserGpuUpdate) { -+ .scanout_id = i, -+ .x = extents->x1, -+ .y = extents->y1, -+ .width = width, -+ .height = height, -+ } -+ }; -+ vg_send_msg(g, &vmsg, -1); -+ vg_wait_ok(g); -+ } else { -+ size_t bpp = -+ PIXMAN_FORMAT_BPP(pixman_image_get_format(res->image)) / 8; -+ size_t size = width * height * bpp; -+ -+ void *p = g_malloc(VHOST_USER_GPU_HDR_SIZE + -+ sizeof(VhostUserGpuUpdate) + size); -+ VhostUserGpuMsg *msg = p; -+ msg->request = VHOST_USER_GPU_UPDATE; -+ msg->size = sizeof(VhostUserGpuUpdate) + size; -+ msg->payload.update = (VhostUserGpuUpdate) { -+ .scanout_id = i, -+ .x = extents->x1, -+ .y = extents->y1, -+ .width = width, -+ .height = height, -+ }; -+ pixman_image_t *i = -+ pixman_image_create_bits(pixman_image_get_format(res->image), -+ msg->payload.update.width, -+ msg->payload.update.height, -+ p + offsetof(VhostUserGpuMsg, -+ payload.update.data), -+ width * bpp); -+ pixman_image_composite(PIXMAN_OP_SRC, -+ res->image, NULL, i, -+ extents->x1, extents->y1, -+ 0, 0, 0, 0, -+ width, height); -+ pixman_image_unref(i); -+ vg_send_msg(g, msg, -1); -+ g_free(msg); -+ } -+ pixman_region_fini(®ion); -+ pixman_region_fini(&finalregion); -+ } -+ pixman_region_fini(&flush_region); -+} -+ -+static void -+vg_process_cmd(VuGpu *vg, struct virtio_gpu_ctrl_command *cmd) -+{ -+ switch (cmd->cmd_hdr.type) { -+ case VIRTIO_GPU_CMD_GET_DISPLAY_INFO: -+ vg_get_display_info(vg, cmd); -+ break; -+ case VIRTIO_GPU_CMD_RESOURCE_CREATE_2D: -+ vg_resource_create_2d(vg, cmd); -+ break; -+ case VIRTIO_GPU_CMD_RESOURCE_UNREF: -+ vg_resource_unref(vg, cmd); -+ break; -+ case VIRTIO_GPU_CMD_RESOURCE_FLUSH: -+ vg_resource_flush(vg, cmd); -+ break; -+ case VIRTIO_GPU_CMD_TRANSFER_TO_HOST_2D: -+ vg_transfer_to_host_2d(vg, cmd); -+ break; -+ case VIRTIO_GPU_CMD_SET_SCANOUT: -+ vg_set_scanout(vg, cmd); -+ break; -+ case VIRTIO_GPU_CMD_RESOURCE_ATTACH_BACKING: -+ vg_resource_attach_backing(vg, cmd); -+ break; -+ case VIRTIO_GPU_CMD_RESOURCE_DETACH_BACKING: -+ vg_resource_detach_backing(vg, cmd); -+ break; -+ /* case VIRTIO_GPU_CMD_GET_EDID: */ -+ /* break */ -+ default: -+ g_warning("TODO handle ctrl %x\n", cmd->cmd_hdr.type); -+ cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC; -+ break; -+ } -+ if (!cmd->finished) { -+ vg_ctrl_response_nodata(vg, cmd, cmd->error ? cmd->error : -+ VIRTIO_GPU_RESP_OK_NODATA); -+ } -+} -+ -+static void -+vg_handle_ctrl(VuDev *dev, int qidx) -+{ -+ VuGpu *vg = container_of(dev, VuGpu, dev.parent); -+ VuVirtq *vq = vu_get_queue(dev, qidx); -+ struct virtio_gpu_ctrl_command *cmd = NULL; -+ size_t len; -+ -+ for (;;) { -+ if (vg->wait_ok != 0) { -+ return; -+ } -+ -+ cmd = vu_queue_pop(dev, vq, sizeof(struct virtio_gpu_ctrl_command)); -+ if (!cmd) { -+ break; -+ } -+ cmd->vq = vq; -+ cmd->error = 0; -+ cmd->finished = false; -+ -+ len = iov_to_buf(cmd->elem.out_sg, cmd->elem.out_num, -+ 0, &cmd->cmd_hdr, sizeof(cmd->cmd_hdr)); -+ if (len != sizeof(cmd->cmd_hdr)) { -+ g_warning("%s: command size incorrect %zu vs %zu\n", -+ __func__, len, sizeof(cmd->cmd_hdr)); -+ } -+ -+ virtio_gpu_ctrl_hdr_bswap(&cmd->cmd_hdr); -+ g_debug("%d %s\n", cmd->cmd_hdr.type, -+ vg_cmd_to_string(cmd->cmd_hdr.type)); -+ -+ if (vg->virgl) { -+ vg_virgl_process_cmd(vg, cmd); -+ } else { -+ vg_process_cmd(vg, cmd); -+ } -+ -+ if (!cmd->finished) { -+ QTAILQ_INSERT_TAIL(&vg->fenceq, cmd, next); -+ vg->inflight++; -+ } else { -+ g_free(cmd); -+ } -+ } -+} -+ -+static void -+update_cursor_data_simple(VuGpu *g, uint32_t resource_id, gpointer data) -+{ -+ struct virtio_gpu_simple_resource *res; -+ -+ res = virtio_gpu_find_resource(g, resource_id); -+ g_return_if_fail(res != NULL); -+ g_return_if_fail(pixman_image_get_width(res->image) == 64); -+ g_return_if_fail(pixman_image_get_height(res->image) == 64); -+ g_return_if_fail( -+ PIXMAN_FORMAT_BPP(pixman_image_get_format(res->image)) == 32); -+ -+ memcpy(data, pixman_image_get_data(res->image), 64 * 64 * sizeof(uint32_t)); -+} -+ -+static void -+vg_process_cursor_cmd(VuGpu *g, struct virtio_gpu_update_cursor *cursor) -+{ -+ bool move = cursor->hdr.type != VIRTIO_GPU_CMD_MOVE_CURSOR; -+ -+ g_debug("%s move:%d\n", G_STRFUNC, move); -+ -+ if (move) { -+ VhostUserGpuMsg msg = { -+ .request = cursor->resource_id ? -+ VHOST_USER_GPU_CURSOR_POS : VHOST_USER_GPU_CURSOR_POS_HIDE, -+ .size = sizeof(VhostUserGpuCursorPos), -+ .payload.cursor_pos = { -+ .scanout_id = cursor->pos.scanout_id, -+ .x = cursor->pos.x, -+ .y = cursor->pos.y, -+ } -+ }; -+ vg_send_msg(g, &msg, -1); -+ } else { -+ VhostUserGpuMsg msg = { -+ .request = VHOST_USER_GPU_CURSOR_UPDATE, -+ .size = sizeof(VhostUserGpuCursorUpdate), -+ .payload.cursor_update = { -+ .pos = { -+ .scanout_id = cursor->pos.scanout_id, -+ .x = cursor->pos.x, -+ .y = cursor->pos.y, -+ }, -+ .hot_x = cursor->hot_x, -+ .hot_y = cursor->hot_y, -+ } -+ }; -+ if (g->virgl) { -+ vg_virgl_update_cursor_data(g, cursor->resource_id, -+ msg.payload.cursor_update.data); -+ } else { -+ update_cursor_data_simple(g, cursor->resource_id, -+ msg.payload.cursor_update.data); -+ } -+ vg_send_msg(g, &msg, -1); -+ } -+} -+ -+static void -+vg_handle_cursor(VuDev *dev, int qidx) -+{ -+ VuGpu *g = container_of(dev, VuGpu, dev.parent); -+ VuVirtq *vq = vu_get_queue(dev, qidx); -+ VuVirtqElement *elem; -+ size_t len; -+ struct virtio_gpu_update_cursor cursor; -+ -+ for (;;) { -+ elem = vu_queue_pop(dev, vq, sizeof(VuVirtqElement)); -+ if (!elem) { -+ break; -+ } -+ g_debug("cursor out:%d in:%d\n", elem->out_num, elem->in_num); -+ -+ len = iov_to_buf(elem->out_sg, elem->out_num, -+ 0, &cursor, sizeof(cursor)); -+ if (len != sizeof(cursor)) { -+ g_warning("%s: cursor size incorrect %zu vs %zu\n", -+ __func__, len, sizeof(cursor)); -+ } else { -+ virtio_gpu_bswap_32(&cursor, sizeof(cursor)); -+ vg_process_cursor_cmd(g, &cursor); -+ } -+ vu_queue_push(dev, vq, elem, 0); -+ vu_queue_notify(dev, vq); -+ g_free(elem); -+ } -+} -+ -+static void -+vg_panic(VuDev *dev, const char *msg) -+{ -+ g_critical("%s\n", msg); -+ exit(1); -+} -+ -+static void -+vg_queue_set_started(VuDev *dev, int qidx, bool started) -+{ -+ VuVirtq *vq = vu_get_queue(dev, qidx); -+ -+ g_debug("queue started %d:%d\n", qidx, started); -+ -+ switch (qidx) { -+ case 0: -+ vu_set_queue_handler(dev, vq, started ? vg_handle_ctrl : NULL); -+ break; -+ case 1: -+ vu_set_queue_handler(dev, vq, started ? vg_handle_cursor : NULL); -+ break; -+ default: -+ break; -+ } -+} -+ -+static void -+set_gpu_protocol_features(VuGpu *g) -+{ -+ uint64_t u64; -+ VhostUserGpuMsg msg = { -+ .request = VHOST_USER_GPU_GET_PROTOCOL_FEATURES -+ }; -+ -+ assert(g->wait_ok == 0); -+ vg_send_msg(g, &msg, -1); -+ if (!vg_recv_msg(g, msg.request, sizeof(u64), &u64)) { -+ return; -+ } -+ -+ msg = (VhostUserGpuMsg) { -+ .request = VHOST_USER_GPU_SET_PROTOCOL_FEATURES, -+ .size = sizeof(uint64_t), -+ .payload.u64 = 0 -+ }; -+ vg_send_msg(g, &msg, -1); -+} -+ -+static int -+vg_process_msg(VuDev *dev, VhostUserMsg *msg, int *do_reply) -+{ -+ VuGpu *g = container_of(dev, VuGpu, dev.parent); -+ -+ switch (msg->request) { -+ case VHOST_USER_GPU_SET_SOCKET: { -+ g_return_val_if_fail(msg->fd_num == 1, 1); -+ g_return_val_if_fail(g->sock_fd == -1, 1); -+ g->sock_fd = msg->fds[0]; -+ set_gpu_protocol_features(g); -+ return 1; -+ } -+ default: -+ return 0; -+ } -+ -+ return 0; -+} -+ -+static uint64_t -+vg_get_features(VuDev *dev) -+{ -+ uint64_t features = 0; -+ -+ if (opt_virgl) { -+ features |= 1 << VIRTIO_GPU_F_VIRGL; -+ } -+ -+ return features; -+} -+ -+static void -+vg_set_features(VuDev *dev, uint64_t features) -+{ -+ VuGpu *g = container_of(dev, VuGpu, dev.parent); -+ bool virgl = features & (1 << VIRTIO_GPU_F_VIRGL); -+ -+ if (virgl && !g->virgl_inited) { -+ if (!vg_virgl_init(g)) { -+ vg_panic(dev, "Failed to initialize virgl"); -+ } -+ g->virgl_inited = true; -+ } -+ -+ g->virgl = virgl; -+} -+ -+static int -+vg_get_config(VuDev *dev, uint8_t *config, uint32_t len) -+{ -+ VuGpu *g = container_of(dev, VuGpu, dev.parent); -+ -+ g_return_val_if_fail(len <= sizeof(struct virtio_gpu_config), -1); -+ -+ if (opt_virgl) { -+ g->virtio_config.num_capsets = vg_virgl_get_num_capsets(); -+ } -+ -+ memcpy(config, &g->virtio_config, len); -+ -+ return 0; -+} -+ -+static int -+vg_set_config(VuDev *dev, const uint8_t *data, -+ uint32_t offset, uint32_t size, -+ uint32_t flags) -+{ -+ VuGpu *g = container_of(dev, VuGpu, dev.parent); -+ struct virtio_gpu_config *config = (struct virtio_gpu_config *)data; -+ -+ if (config->events_clear) { -+ g->virtio_config.events_read &= ~config->events_clear; -+ } -+ -+ return 0; -+} -+ -+static const VuDevIface vuiface = { -+ .set_features = vg_set_features, -+ .get_features = vg_get_features, -+ .queue_set_started = vg_queue_set_started, -+ .process_msg = vg_process_msg, -+ .get_config = vg_get_config, -+ .set_config = vg_set_config, -+}; -+ -+static void -+vg_destroy(VuGpu *g) -+{ -+ struct virtio_gpu_simple_resource *res, *tmp; -+ -+ vug_deinit(&g->dev); -+ -+ vg_sock_fd_close(g); -+ -+ QTAILQ_FOREACH_SAFE(res, &g->reslist, next, tmp) { -+ vg_resource_destroy(g, res); -+ } -+ -+ vugbm_device_destroy(&g->gdev); -+} -+ -+static GOptionEntry entries[] = { -+ { "print-capabilities", 'c', 0, G_OPTION_ARG_NONE, &opt_print_caps, -+ "Print capabilities", NULL }, -+ { "fd", 'f', 0, G_OPTION_ARG_INT, &opt_fdnum, -+ "Use inherited fd socket", "FDNUM" }, -+ { "socket-path", 's', 0, G_OPTION_ARG_FILENAME, &opt_socket_path, -+ "Use UNIX socket path", "PATH" }, -+ { "render-node", 'r', 0, G_OPTION_ARG_FILENAME, &opt_render_node, -+ "Specify DRM render node", "PATH" }, -+ { "virgl", 'v', 0, G_OPTION_ARG_NONE, &opt_virgl, -+ "Turn virgl rendering on", NULL }, -+ { NULL, } -+}; -+ -+int -+main(int argc, char *argv[]) -+{ -+ GOptionContext *context; -+ GError *error = NULL; -+ GMainLoop *loop = NULL; -+ int fd; -+ VuGpu g = { .sock_fd = -1, .drm_rnode_fd = -1 }; -+ -+ QTAILQ_INIT(&g.reslist); -+ QTAILQ_INIT(&g.fenceq); -+ -+ context = g_option_context_new("QEMU vhost-user-gpu"); -+ g_option_context_add_main_entries(context, entries, NULL); -+ if (!g_option_context_parse(context, &argc, &argv, &error)) { -+ g_printerr("Option parsing failed: %s\n", error->message); -+ exit(EXIT_FAILURE); -+ } -+ g_option_context_free(context); -+ -+ if (opt_print_caps) { -+ g_print("{\n"); -+ g_print(" \"type\": \"gpu\",\n"); -+ g_print(" \"features\": [\n"); -+ g_print(" \"render-node\",\n"); -+ g_print(" \"virgl\"\n"); -+ g_print(" ]\n"); -+ g_print("}\n"); -+ exit(EXIT_SUCCESS); -+ } -+ -+ g.drm_rnode_fd = qemu_drm_rendernode_open(opt_render_node); -+ if (opt_render_node && g.drm_rnode_fd == -1) { -+ g_printerr("Failed to open DRM rendernode.\n"); -+ exit(EXIT_FAILURE); -+ } -+ -+ if (g.drm_rnode_fd >= 0) { -+ if (!vugbm_device_init(&g.gdev, g.drm_rnode_fd)) { -+ g_warning("Failed to init DRM device, using fallback path"); -+ } -+ } -+ -+ if ((!!opt_socket_path + (opt_fdnum != -1)) != 1) { -+ g_printerr("Please specify either --fd or --socket-path\n"); -+ exit(EXIT_FAILURE); -+ } -+ -+ if (opt_socket_path) { -+ int lsock = unix_listen(opt_socket_path, &error_fatal); -+ if (lsock < 0) { -+ g_printerr("Failed to listen on %s.\n", opt_socket_path); -+ exit(EXIT_FAILURE); -+ } -+ fd = accept(lsock, NULL, NULL); -+ close(lsock); -+ } else { -+ fd = opt_fdnum; -+ } -+ if (fd == -1) { -+ g_printerr("Invalid vhost-user socket.\n"); -+ exit(EXIT_FAILURE); -+ } -+ -+ if (!vug_init(&g.dev, VHOST_USER_GPU_MAX_QUEUES, fd, vg_panic, &vuiface)) { -+ g_printerr("Failed to initialize libvhost-user-glib.\n"); -+ exit(EXIT_FAILURE); -+ } -+ -+ loop = g_main_loop_new(NULL, FALSE); -+ g_main_loop_run(loop); -+ g_main_loop_unref(loop); -+ -+ vg_destroy(&g); -+ if (g.drm_rnode_fd >= 0) { -+ close(g.drm_rnode_fd); -+ } -+ -+ return 0; -+} --- -1.8.3.1 - diff --git a/kvm-contrib-libvhost-user-Protect-slave-fd-with-mutex.patch b/kvm-contrib-libvhost-user-Protect-slave-fd-with-mutex.patch deleted file mode 100644 index 4212f1c..0000000 --- a/kvm-contrib-libvhost-user-Protect-slave-fd-with-mutex.patch +++ /dev/null @@ -1,134 +0,0 @@ -From 548de8acbf0137b6e49a14b63682badfff037d23 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:44 +0100 -Subject: [PATCH 073/116] contrib/libvhost-user: Protect slave fd with mutex -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-70-dgilbert@redhat.com> -Patchwork-id: 93523 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 069/112] contrib/libvhost-user: Protect slave fd with mutex -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -In future patches we'll be performing commands on the slave-fd driven -by commands on queues, since those queues will be driven by individual -threads we need to make sure they don't attempt to use the slave-fd -for multiple commands in parallel. - -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit c25c02b9e6a196be87a818f459c426556b24770d) -Signed-off-by: Miroslav Rezanina ---- - contrib/libvhost-user/libvhost-user.c | 24 ++++++++++++++++++++---- - contrib/libvhost-user/libvhost-user.h | 3 +++ - 2 files changed, 23 insertions(+), 4 deletions(-) - -diff --git a/contrib/libvhost-user/libvhost-user.c b/contrib/libvhost-user/libvhost-user.c -index ec27b78..63e4106 100644 ---- a/contrib/libvhost-user/libvhost-user.c -+++ b/contrib/libvhost-user/libvhost-user.c -@@ -392,26 +392,37 @@ vu_send_reply(VuDev *dev, int conn_fd, VhostUserMsg *vmsg) - return vu_message_write(dev, conn_fd, vmsg); - } - -+/* -+ * Processes a reply on the slave channel. -+ * Entered with slave_mutex held and releases it before exit. -+ * Returns true on success. -+ */ - static bool - vu_process_message_reply(VuDev *dev, const VhostUserMsg *vmsg) - { - VhostUserMsg msg_reply; -+ bool result = false; - - if ((vmsg->flags & VHOST_USER_NEED_REPLY_MASK) == 0) { -- return true; -+ result = true; -+ goto out; - } - - if (!vu_message_read(dev, dev->slave_fd, &msg_reply)) { -- return false; -+ goto out; - } - - if (msg_reply.request != vmsg->request) { - DPRINT("Received unexpected msg type. Expected %d received %d", - vmsg->request, msg_reply.request); -- return false; -+ goto out; - } - -- return msg_reply.payload.u64 == 0; -+ result = msg_reply.payload.u64 == 0; -+ -+out: -+ pthread_mutex_unlock(&dev->slave_mutex); -+ return result; - } - - /* Kick the log_call_fd if required. */ -@@ -1105,10 +1116,13 @@ bool vu_set_queue_host_notifier(VuDev *dev, VuVirtq *vq, int fd, - return false; - } - -+ pthread_mutex_lock(&dev->slave_mutex); - if (!vu_message_write(dev, dev->slave_fd, &vmsg)) { -+ pthread_mutex_unlock(&dev->slave_mutex); - return false; - } - -+ /* Also unlocks the slave_mutex */ - return vu_process_message_reply(dev, &vmsg); - } - -@@ -1628,6 +1642,7 @@ vu_deinit(VuDev *dev) - close(dev->slave_fd); - dev->slave_fd = -1; - } -+ pthread_mutex_destroy(&dev->slave_mutex); - - if (dev->sock != -1) { - close(dev->sock); -@@ -1663,6 +1678,7 @@ vu_init(VuDev *dev, - dev->remove_watch = remove_watch; - dev->iface = iface; - dev->log_call_fd = -1; -+ pthread_mutex_init(&dev->slave_mutex, NULL); - dev->slave_fd = -1; - dev->max_queues = max_queues; - -diff --git a/contrib/libvhost-user/libvhost-user.h b/contrib/libvhost-user/libvhost-user.h -index 46b6007..1844b6f 100644 ---- a/contrib/libvhost-user/libvhost-user.h -+++ b/contrib/libvhost-user/libvhost-user.h -@@ -19,6 +19,7 @@ - #include - #include - #include -+#include - #include "standard-headers/linux/virtio_ring.h" - - /* Based on qemu/hw/virtio/vhost-user.c */ -@@ -355,6 +356,8 @@ struct VuDev { - VuVirtq *vq; - VuDevInflightInfo inflight_info; - int log_call_fd; -+ /* Must be held while using slave_fd */ -+ pthread_mutex_t slave_mutex; - int slave_fd; - uint64_t log_size; - uint8_t *log_table; --- -1.8.3.1 - diff --git a/kvm-docs-arm-cpu-features-Make-kvm-no-adjvtime-comment-c.patch b/kvm-docs-arm-cpu-features-Make-kvm-no-adjvtime-comment-c.patch deleted file mode 100644 index a6177c6..0000000 --- a/kvm-docs-arm-cpu-features-Make-kvm-no-adjvtime-comment-c.patch +++ /dev/null @@ -1,56 +0,0 @@ -From f01178897c8f5ff98692a22059dd65e35677eaa3 Mon Sep 17 00:00:00 2001 -From: Andrew Jones -Date: Mon, 10 Feb 2020 17:33:58 +0000 -Subject: [PATCH 18/18] docs/arm-cpu-features: Make kvm-no-adjvtime comment - clearer -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Andrew Jones -Message-id: <20200210173358.16896-3-drjones@redhat.com> -Patchwork-id: 93772 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 2/2] docs/arm-cpu-features: Make kvm-no-adjvtime comment clearer -Bugzilla: 1801320 -RH-Acked-by: Auger Eric -RH-Acked-by: Gavin Shan -RH-Acked-by: Philippe Mathieu-Daudé - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1801320 - -Author: Philippe Mathieu-Daudé -Date: Fri, 07 Feb 2020 14:04:28 +0000 - - docs/arm-cpu-features: Make kvm-no-adjvtime comment clearer - - The bold text sounds like 'knock knock'. Only bolding the - second 'not' makes it easier to read. - - Fixes: dea101a1ae - Signed-off-by: Philippe Mathieu-Daudé - Reviewed-by: Andrew Jones - Message-id: 20200206225148.23923-1-philmd@redhat.com - Signed-off-by: Peter Maydell - -(cherry picked from commit fa3236a970b6ea5be3fa3ad258f1a75920ca1ebb) -Signed-off-by: Danilo C. L. de Paula ---- - docs/arm-cpu-features.rst | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/docs/arm-cpu-features.rst b/docs/arm-cpu-features.rst -index 45d1eb6..48d5054 100644 ---- a/docs/arm-cpu-features.rst -+++ b/docs/arm-cpu-features.rst -@@ -185,7 +185,7 @@ the list of KVM VCPU features and their descriptions. - - kvm-no-adjvtime By default kvm-no-adjvtime is disabled. This - means that by default the virtual time -- adjustment is enabled (vtime is *not not* -+ adjustment is enabled (vtime is not *not* - adjusted). - - When virtual time adjustment is enabled each --- -1.8.3.1 - diff --git a/kvm-exec-rom_reset-Free-rom-data-during-inmigrate-skip.patch b/kvm-exec-rom_reset-Free-rom-data-during-inmigrate-skip.patch deleted file mode 100644 index 5d44708..0000000 --- a/kvm-exec-rom_reset-Free-rom-data-during-inmigrate-skip.patch +++ /dev/null @@ -1,85 +0,0 @@ -From 5770fe43fe1e15e6f53cfd3705605e8645b95a98 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Fri, 13 Mar 2020 17:17:08 +0000 -Subject: [PATCH 20/20] exec/rom_reset: Free rom data during inmigrate skip -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200313171708.242774-1-dgilbert@redhat.com> -Patchwork-id: 94292 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/1] exec/rom_reset: Free rom data during inmigrate skip -Bugzilla: 1809380 -RH-Acked-by: Daniel P. Berrange -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Paolo Bonzini - -From: "Dr. David Alan Gilbert" - -bz: https://bugzilla.redhat.com/show_bug.cgi?id=1809380 -brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=27249921 -branch: rhel-av-8.2.0 -upstream: Posted and with review-by, not merged yet - -Commit 355477f8c73e9 skips rom reset when we're an incoming migration -so as not to overwrite shared ram in the ignore-shared migration -optimisation. -However, it's got an unexpected side effect that because it skips -freeing the ROM data, when rom_reset gets called later on, after -migration (e.g. during a reboot), the ROM does get reset to the original -file contents. Because of seabios/x86's weird reboot process -this confuses a reboot into hanging after a migration. - -Fixes: 355477f8c73e9 ("migration: do not rom_reset() during incoming migration") -https://bugzilla.redhat.com/show_bug.cgi?id=1809380 - -Signed-off-by: Dr. David Alan Gilbert -Signed-off-by: Danilo C. L. de Paula ---- - hw/core/loader.c | 25 ++++++++++++++++--------- - 1 file changed, 16 insertions(+), 9 deletions(-) - -diff --git a/hw/core/loader.c b/hw/core/loader.c -index 5099f27..375b29b 100644 ---- a/hw/core/loader.c -+++ b/hw/core/loader.c -@@ -1118,19 +1118,26 @@ static void rom_reset(void *unused) - { - Rom *rom; - -- /* -- * We don't need to fill in the RAM with ROM data because we'll fill -- * the data in during the next incoming migration in all cases. Note -- * that some of those RAMs can actually be modified by the guest on ARM -- * so this is probably the only right thing to do here. -- */ -- if (runstate_check(RUN_STATE_INMIGRATE)) -- return; -- - QTAILQ_FOREACH(rom, &roms, next) { - if (rom->fw_file) { - continue; - } -+ /* -+ * We don't need to fill in the RAM with ROM data because we'll fill -+ * the data in during the next incoming migration in all cases. Note -+ * that some of those RAMs can actually be modified by the guest. -+ */ -+ if (runstate_check(RUN_STATE_INMIGRATE)) { -+ if (rom->data && rom->isrom) { -+ /* -+ * Free it so that a rom_reset after migration doesn't -+ * overwrite a potentially modified 'rom'. -+ */ -+ rom_free_data(rom); -+ } -+ continue; -+ } -+ - if (rom->data == NULL) { - continue; - } --- -1.8.3.1 - diff --git a/kvm-file-posix-Drop-hdev_co_create_opts.patch b/kvm-file-posix-Drop-hdev_co_create_opts.patch deleted file mode 100644 index ea2edbd..0000000 --- a/kvm-file-posix-Drop-hdev_co_create_opts.patch +++ /dev/null @@ -1,131 +0,0 @@ -From 3d3509c010129bd15eb1f5ec1a7b9eedcdbf23f6 Mon Sep 17 00:00:00 2001 -From: Maxim Levitsky -Date: Wed, 11 Mar 2020 10:51:44 +0000 -Subject: [PATCH 03/20] file-posix: Drop hdev_co_create_opts() - -RH-Author: Maxim Levitsky -Message-id: <20200311105147.13208-4-mlevitsk@redhat.com> -Patchwork-id: 94225 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 3/6] file-posix: Drop hdev_co_create_opts() -Bugzilla: 1640894 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: John Snow -RH-Acked-by: Max Reitz - -From: Max Reitz - -The generic fallback implementation effectively does the same. - -Reviewed-by: Maxim Levitsky -Signed-off-by: Max Reitz -Message-Id: <20200122164532.178040-4-mreitz@redhat.com> -Signed-off-by: Max Reitz -(cherry picked from commit 87ca3b8fa615b278b33cabf9ed22b3f44b5214ba) -Signed-off-by: Maxim Levitsky -Signed-off-by: Danilo C. L. de Paula ---- - block/file-posix.c | 67 ------------------------------------------------------ - 1 file changed, 67 deletions(-) - -diff --git a/block/file-posix.c b/block/file-posix.c -index 1b805bd..fd29372 100644 ---- a/block/file-posix.c -+++ b/block/file-posix.c -@@ -3418,67 +3418,6 @@ static coroutine_fn int hdev_co_pwrite_zeroes(BlockDriverState *bs, - return raw_do_pwrite_zeroes(bs, offset, bytes, flags, true); - } - --static int coroutine_fn hdev_co_create_opts(const char *filename, QemuOpts *opts, -- Error **errp) --{ -- int fd; -- int ret = 0; -- struct stat stat_buf; -- int64_t total_size = 0; -- bool has_prefix; -- -- /* This function is used by both protocol block drivers and therefore either -- * of these prefixes may be given. -- * The return value has to be stored somewhere, otherwise this is an error -- * due to -Werror=unused-value. */ -- has_prefix = -- strstart(filename, "host_device:", &filename) || -- strstart(filename, "host_cdrom:" , &filename); -- -- (void)has_prefix; -- -- ret = raw_normalize_devicepath(&filename, errp); -- if (ret < 0) { -- return ret; -- } -- -- /* Read out options */ -- total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0), -- BDRV_SECTOR_SIZE); -- -- fd = qemu_open(filename, O_WRONLY | O_BINARY); -- if (fd < 0) { -- ret = -errno; -- error_setg_errno(errp, -ret, "Could not open device"); -- return ret; -- } -- -- if (fstat(fd, &stat_buf) < 0) { -- ret = -errno; -- error_setg_errno(errp, -ret, "Could not stat device"); -- } else if (!S_ISBLK(stat_buf.st_mode) && !S_ISCHR(stat_buf.st_mode)) { -- error_setg(errp, -- "The given file is neither a block nor a character device"); -- ret = -ENODEV; -- } else if (lseek(fd, 0, SEEK_END) < total_size) { -- error_setg(errp, "Device is too small"); -- ret = -ENOSPC; -- } -- -- if (!ret && total_size) { -- uint8_t buf[BDRV_SECTOR_SIZE] = { 0 }; -- int64_t zero_size = MIN(BDRV_SECTOR_SIZE, total_size); -- if (lseek(fd, 0, SEEK_SET) == -1) { -- ret = -errno; -- } else { -- ret = qemu_write_full(fd, buf, zero_size); -- ret = ret == zero_size ? 0 : -errno; -- } -- } -- qemu_close(fd); -- return ret; --} -- - static BlockDriver bdrv_host_device = { - .format_name = "host_device", - .protocol_name = "host_device", -@@ -3491,8 +3430,6 @@ static BlockDriver bdrv_host_device = { - .bdrv_reopen_prepare = raw_reopen_prepare, - .bdrv_reopen_commit = raw_reopen_commit, - .bdrv_reopen_abort = raw_reopen_abort, -- .bdrv_co_create_opts = hdev_co_create_opts, -- .create_opts = &raw_create_opts, - .mutable_opts = mutable_opts, - .bdrv_co_invalidate_cache = raw_co_invalidate_cache, - .bdrv_co_pwrite_zeroes = hdev_co_pwrite_zeroes, -@@ -3619,8 +3556,6 @@ static BlockDriver bdrv_host_cdrom = { - .bdrv_reopen_prepare = raw_reopen_prepare, - .bdrv_reopen_commit = raw_reopen_commit, - .bdrv_reopen_abort = raw_reopen_abort, -- .bdrv_co_create_opts = hdev_co_create_opts, -- .create_opts = &raw_create_opts, - .mutable_opts = mutable_opts, - .bdrv_co_invalidate_cache = raw_co_invalidate_cache, - -@@ -3753,8 +3688,6 @@ static BlockDriver bdrv_host_cdrom = { - .bdrv_reopen_prepare = raw_reopen_prepare, - .bdrv_reopen_commit = raw_reopen_commit, - .bdrv_reopen_abort = raw_reopen_abort, -- .bdrv_co_create_opts = hdev_co_create_opts, -- .create_opts = &raw_create_opts, - .mutable_opts = mutable_opts, - - .bdrv_co_preadv = raw_co_preadv, --- -1.8.3.1 - diff --git a/kvm-hmp-Allow-using-qdev-ID-for-qemu-io-command.patch b/kvm-hmp-Allow-using-qdev-ID-for-qemu-io-command.patch deleted file mode 100644 index f01dec2..0000000 --- a/kvm-hmp-Allow-using-qdev-ID-for-qemu-io-command.patch +++ /dev/null @@ -1,100 +0,0 @@ -From cebc614e5ddd1f770c4d6dc26c066791f36e56df Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 7 Feb 2020 11:24:02 +0000 -Subject: [PATCH 05/18] hmp: Allow using qdev ID for qemu-io command - -RH-Author: Kevin Wolf -Message-id: <20200207112404.25198-5-kwolf@redhat.com> -Patchwork-id: 93750 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 4/6] hmp: Allow using qdev ID for qemu-io command -Bugzilla: 1781637 -RH-Acked-by: Sergio Lopez Pascual -RH-Acked-by: Max Reitz -RH-Acked-by: Stefan Hajnoczi - -In order to issue requests on an existing BlockBackend with the -'qemu-io' HMP command, allow specifying the BlockBackend not only with a -BlockBackend name, but also with a qdev ID/QOM path for a device that -owns the (possibly anonymous) BlockBackend. - -Because qdev names could be conflicting with BlockBackend and node -names, introduce a -d option to explicitly address a device. If the -option is not given, a BlockBackend or a node is addressed. - -Signed-off-by: Kevin Wolf -(cherry picked from commit 89b6fc45614bb45dcd58f1590415afe5c2791abd) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - hmp-commands.hx | 8 +++++--- - monitor/hmp-cmds.c | 28 ++++++++++++++++++---------- - 2 files changed, 23 insertions(+), 13 deletions(-) - -diff --git a/hmp-commands.hx b/hmp-commands.hx -index cfcc044..dc23185 100644 ---- a/hmp-commands.hx -+++ b/hmp-commands.hx -@@ -1875,9 +1875,11 @@ ETEXI - - { - .name = "qemu-io", -- .args_type = "device:B,command:s", -- .params = "[device] \"[command]\"", -- .help = "run a qemu-io command on a block device", -+ .args_type = "qdev:-d,device:B,command:s", -+ .params = "[-d] [device] \"[command]\"", -+ .help = "run a qemu-io command on a block device\n\t\t\t" -+ "-d: [device] is a device ID rather than a " -+ "drive ID or node name", - .cmd = hmp_qemu_io, - }, - -diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c -index b2551c1..5f8941d 100644 ---- a/monitor/hmp-cmds.c -+++ b/monitor/hmp-cmds.c -@@ -2468,23 +2468,31 @@ void hmp_qemu_io(Monitor *mon, const QDict *qdict) - { - BlockBackend *blk; - BlockBackend *local_blk = NULL; -+ bool qdev = qdict_get_try_bool(qdict, "qdev", false); - const char* device = qdict_get_str(qdict, "device"); - const char* command = qdict_get_str(qdict, "command"); - Error *err = NULL; - int ret; - -- blk = blk_by_name(device); -- if (!blk) { -- BlockDriverState *bs = bdrv_lookup_bs(NULL, device, &err); -- if (bs) { -- blk = local_blk = blk_new(bdrv_get_aio_context(bs), -- 0, BLK_PERM_ALL); -- ret = blk_insert_bs(blk, bs, &err); -- if (ret < 0) { -+ if (qdev) { -+ blk = blk_by_qdev_id(device, &err); -+ if (!blk) { -+ goto fail; -+ } -+ } else { -+ blk = blk_by_name(device); -+ if (!blk) { -+ BlockDriverState *bs = bdrv_lookup_bs(NULL, device, &err); -+ if (bs) { -+ blk = local_blk = blk_new(bdrv_get_aio_context(bs), -+ 0, BLK_PERM_ALL); -+ ret = blk_insert_bs(blk, bs, &err); -+ if (ret < 0) { -+ goto fail; -+ } -+ } else { - goto fail; - } -- } else { -- goto fail; - } - } - --- -1.8.3.1 - diff --git a/kvm-hw-smbios-set-new-default-SMBIOS-fields-for-Windows-.patch b/kvm-hw-smbios-set-new-default-SMBIOS-fields-for-Windows-.patch deleted file mode 100644 index 0f0f126..0000000 --- a/kvm-hw-smbios-set-new-default-SMBIOS-fields-for-Windows-.patch +++ /dev/null @@ -1,262 +0,0 @@ -From e6c3fbfc82863180007569cf2a9132c28a47bf1f Mon Sep 17 00:00:00 2001 -From: "Daniel P. Berrange" -Date: Mon, 20 Jan 2020 16:13:08 +0000 -Subject: [PATCH 01/18] hw/smbios: set new default SMBIOS fields for Windows - driver support -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Daniel P. Berrange -Message-id: <20200120161308.584989-2-berrange@redhat.com> -Patchwork-id: 93422 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/1] hw/smbios: set new default SMBIOS fields for Windows driver support -Bugzilla: 1782529 -RH-Acked-by: Eduardo Habkost -RH-Acked-by: Igor Mammedov -RH-Acked-by: Laszlo Ersek - -For Windows driver support, we have to follow this doc in order to -enable Windows to automatically determine the right drivers to install -for a given guest / host combination: - - https://docs.microsoft.com/en-us/windows-hardware/drivers/install/specifying-hardware-ids-for-a-computer - -Out of the choices available, it was decided that the Windows drivers -will be written to expect use of the scheme documented as "HardwareID-6" -against Windows 10. This uses SMBIOS System (Type 1) and Base Board -(Type 2) tables and will match on - - System Manufacturer = Red Hat - System SKU Number = 8.2.0 - Baseboard Manufacturer = Red Hat - Baseboard Product = RHEL-AV - -The new SMBIOS fields will be tied to machine type and only reported for -pc-q35-8.2.0 machine and later. - -The old SMBIOS fields, previously reported by all machines were: - - System Manufacturer: Red Hat - System Product Name: KVM - System Version: RHEL-8.2.0 PC (Q35 + ICH9, 2009) - System Family: Red Hat Enterprise Linux - Baseboard Manufacturer: Red Hat - Baseboard Product Name: KVM - Baseboard Version: RHEL-8.2.0 PC (Q35 + ICH9, 2009) - Chassis Manufacturer: Red Hat - Chassis Product Name: KVM - Chassis Version: RHEL-8.2.0 PC (Q35 + ICH9, 2009) - Processor Manufacturer: Red Hat - Processor Product Name: KVM - Processor Version: RHEL-8.2.0 PC (Q35 + ICH9, 2009) - -This information will continue to be reported for all machines, except -where it conflicts with the requirement of the new SMBIOS data. IOW, -the "Baseboard Product Name" will change to "RHEL-AV" for pc-q35-8.2.0 -machine types and later. - -Management applications MUST NEVER override the 4 new SMBIOS fields that -are used for Windows driver matching, with differing values. Aside from -this, they are free to override any other field, including those from -the old SMBIOS field data. - -In particular if a management application wants to report its own -product name and version, it is recommended to use "System product" -and "System version" as identifying fields, as these avoid a clash with -the new SMBIOS fields used for Windows drivers. - -Note that until now the Baseboard (type 2) table has only been generated -by QEMU if explicitly asked for on the CLI. This patch makes it always -present for new machine types. - -Signed-off-by: Daniel P. Berrangé -Signed-off-by: Danilo C. L. de Paula ---- - hw/arm/virt.c | 2 +- - hw/i386/pc_piix.c | 2 ++ - hw/i386/pc_q35.c | 8 ++++++++ - hw/smbios/smbios.c | 45 +++++++++++++++++++++++++++++++++++++++++--- - include/hw/firmware/smbios.h | 5 ++++- - include/hw/i386/pc.h | 3 +++ - 6 files changed, 60 insertions(+), 5 deletions(-) - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index d30d38c..2dcf6e7 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -1423,7 +1423,7 @@ static void virt_build_smbios(VirtMachineState *vms) - - smbios_set_defaults("QEMU", product, - vmc->smbios_old_sys_ver ? "1.0" : mc->name, false, -- true, SMBIOS_ENTRY_POINT_30); -+ true, NULL, NULL, SMBIOS_ENTRY_POINT_30); - - smbios_get_tables(MACHINE(vms), NULL, 0, &smbios_tables, &smbios_tables_len, - &smbios_anchor, &smbios_anchor_len); -diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index bd7fdb9..2ac94d5 100644 ---- a/hw/i386/pc_piix.c -+++ b/hw/i386/pc_piix.c -@@ -177,6 +177,8 @@ static void pc_init1(MachineState *machine, - smbios_set_defaults("Red Hat", "KVM", - mc->desc, pcmc->smbios_legacy_mode, - pcmc->smbios_uuid_encoded, -+ pcmc->smbios_stream_product, -+ pcmc->smbios_stream_version, - SMBIOS_ENTRY_POINT_21); - } - -diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index 7531d8e..e975643 100644 ---- a/hw/i386/pc_q35.c -+++ b/hw/i386/pc_q35.c -@@ -200,6 +200,8 @@ static void pc_q35_init(MachineState *machine) - smbios_set_defaults("Red Hat", "KVM", - mc->desc, pcmc->smbios_legacy_mode, - pcmc->smbios_uuid_encoded, -+ pcmc->smbios_stream_product, -+ pcmc->smbios_stream_version, - SMBIOS_ENTRY_POINT_21); - } - -@@ -565,8 +567,11 @@ static void pc_q35_init_rhel820(MachineState *machine) - - static void pc_q35_machine_rhel820_options(MachineClass *m) - { -+ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); - pc_q35_machine_rhel_options(m); - m->desc = "RHEL-8.2.0 PC (Q35 + ICH9, 2009)"; -+ pcmc->smbios_stream_product = "RHEL-AV"; -+ pcmc->smbios_stream_version = "8.2.0"; - } - - DEFINE_PC_MACHINE(q35_rhel820, "pc-q35-rhel8.2.0", pc_q35_init_rhel820, -@@ -579,9 +584,12 @@ static void pc_q35_init_rhel810(MachineState *machine) - - static void pc_q35_machine_rhel810_options(MachineClass *m) - { -+ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); - pc_q35_machine_rhel820_options(m); - m->desc = "RHEL-8.1.0 PC (Q35 + ICH9, 2009)"; - m->alias = NULL; -+ pcmc->smbios_stream_product = NULL; -+ pcmc->smbios_stream_version = NULL; - compat_props_add(m->compat_props, hw_compat_rhel_8_1, hw_compat_rhel_8_1_len); - compat_props_add(m->compat_props, pc_rhel_8_1_compat, pc_rhel_8_1_compat_len); - } -diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c -index e6e9355..d65c149 100644 ---- a/hw/smbios/smbios.c -+++ b/hw/smbios/smbios.c -@@ -57,6 +57,9 @@ static bool smbios_legacy = true; - static bool smbios_uuid_encoded = true; - /* end: legacy structures & constants for <= 2.0 machines */ - -+/* Set to true for modern Windows 10 HardwareID-6 compat */ -+static bool smbios_type2_required; -+ - - uint8_t *smbios_tables; - size_t smbios_tables_len; -@@ -532,7 +535,7 @@ static void smbios_build_type_1_table(void) - - static void smbios_build_type_2_table(void) - { -- SMBIOS_BUILD_TABLE_PRE(2, 0x200, false); /* optional */ -+ SMBIOS_BUILD_TABLE_PRE(2, 0x200, smbios_type2_required); - - SMBIOS_TABLE_SET_STR(2, manufacturer_str, type2.manufacturer); - SMBIOS_TABLE_SET_STR(2, product_str, type2.product); -@@ -753,7 +756,10 @@ void smbios_set_cpuid(uint32_t version, uint32_t features) - - void smbios_set_defaults(const char *manufacturer, const char *product, - const char *version, bool legacy_mode, -- bool uuid_encoded, SmbiosEntryPointType ep_type) -+ bool uuid_encoded, -+ const char *stream_product, -+ const char *stream_version, -+ SmbiosEntryPointType ep_type) - { - smbios_have_defaults = true; - smbios_legacy = legacy_mode; -@@ -774,12 +780,45 @@ void smbios_set_defaults(const char *manufacturer, const char *product, - g_free(smbios_entries); - } - -+ /* -+ * If @stream_product & @stream_version are non-NULL, then -+ * we're following rules for new Windows driver support. -+ * The data we have to report is defined in this doc: -+ * -+ * https://docs.microsoft.com/en-us/windows-hardware/drivers/install/specifying-hardware-ids-for-a-computer -+ * -+ * The Windows drivers are written to expect use of the -+ * scheme documented as "HardwareID-6" against Windows 10, -+ * which uses SMBIOS System (Type 1) and Base Board (Type 2) -+ * tables and will match on -+ * -+ * System Manufacturer = Red Hat (@manufacturer) -+ * System SKU Number = 8.2.0 (@stream_version) -+ * Baseboard Manufacturer = Red Hat (@manufacturer) -+ * Baseboard Product = RHEL-AV (@stream_product) -+ * -+ * NB, SKU must be changed with each RHEL-AV release -+ * -+ * Other fields can be freely used by applications using -+ * QEMU. For example apps can use the "System product" -+ * and "System version" to identify themselves. -+ * -+ * We get 'System Manufacturer' and 'Baseboard Manufacturer' -+ */ - SMBIOS_SET_DEFAULT(type1.manufacturer, manufacturer); - SMBIOS_SET_DEFAULT(type1.product, product); - SMBIOS_SET_DEFAULT(type1.version, version); - SMBIOS_SET_DEFAULT(type1.family, "Red Hat Enterprise Linux"); -+ if (stream_version != NULL) { -+ SMBIOS_SET_DEFAULT(type1.sku, stream_version); -+ } - SMBIOS_SET_DEFAULT(type2.manufacturer, manufacturer); -- SMBIOS_SET_DEFAULT(type2.product, product); -+ if (stream_product != NULL) { -+ SMBIOS_SET_DEFAULT(type2.product, stream_product); -+ smbios_type2_required = true; -+ } else { -+ SMBIOS_SET_DEFAULT(type2.product, product); -+ } - SMBIOS_SET_DEFAULT(type2.version, version); - SMBIOS_SET_DEFAULT(type3.manufacturer, manufacturer); - SMBIOS_SET_DEFAULT(type3.version, version); -diff --git a/include/hw/firmware/smbios.h b/include/hw/firmware/smbios.h -index 02a0ced..67e38a1 100644 ---- a/include/hw/firmware/smbios.h -+++ b/include/hw/firmware/smbios.h -@@ -267,7 +267,10 @@ void smbios_entry_add(QemuOpts *opts, Error **errp); - void smbios_set_cpuid(uint32_t version, uint32_t features); - void smbios_set_defaults(const char *manufacturer, const char *product, - const char *version, bool legacy_mode, -- bool uuid_encoded, SmbiosEntryPointType ep_type); -+ bool uuid_encoded, -+ const char *stream_product, -+ const char *stream_version, -+ SmbiosEntryPointType ep_type); - uint8_t *smbios_get_table_legacy(MachineState *ms, size_t *length); - void smbios_get_tables(MachineState *ms, - const struct smbios_phys_mem_area *mem_array, -diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h -index 2e362c8..b9f29ba 100644 ---- a/include/hw/i386/pc.h -+++ b/include/hw/i386/pc.h -@@ -109,6 +109,9 @@ typedef struct PCMachineClass { - bool smbios_defaults; - bool smbios_legacy_mode; - bool smbios_uuid_encoded; -+ /* New fields needed for Windows HardwareID-6 matching */ -+ const char *smbios_stream_product; -+ const char *smbios_stream_version; - - /* RAM / address space compat: */ - bool gigabyte_align; --- -1.8.3.1 - diff --git a/kvm-i386-Remove-cpu64-rhel6-CPU-model.patch b/kvm-i386-Remove-cpu64-rhel6-CPU-model.patch deleted file mode 100644 index 5d62ace..0000000 --- a/kvm-i386-Remove-cpu64-rhel6-CPU-model.patch +++ /dev/null @@ -1,77 +0,0 @@ -From 4543a3c19816bd07f27eb900f20ae609df03703c Mon Sep 17 00:00:00 2001 -From: Eduardo Habkost -Date: Mon, 23 Dec 2019 21:10:31 +0000 -Subject: [PATCH 1/2] i386: Remove cpu64-rhel6 CPU model - -RH-Author: Eduardo Habkost -Message-id: <20191223211031.26503-1-ehabkost@redhat.com> -Patchwork-id: 93213 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH] i386: Remove cpu64-rhel6 CPU model -Bugzilla: 1741345 -RH-Acked-by: Daniel P. Berrange -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Laszlo Ersek - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1741345 -BRANCH: rhel-av-8.2.0 -Upstream: not applicable -Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=25525975 - -We don't provide rhel6 machine types anymore, so we don't need to -provide compatibility with RHEl6. cpu64-rhel6 was documented as -deprecated and scheduled for removal in 8.2, so now it's time to -remove it. - -Signed-off-by: Eduardo Habkost -Signed-off-by: Danilo C. L. de Paula ---- - target/i386/cpu.c | 26 +------------------------- - 1 file changed, 1 insertion(+), 25 deletions(-) - -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index 790db77..6dce6f2 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -1829,12 +1829,7 @@ static CPUCaches epyc_cache_info = { - - static X86CPUDefinition builtin_x86_defs[] = { - { -- /* qemu64 is the default CPU model for all *-rhel7.* machine-types. -- * The default on RHEL-6 was cpu64-rhel6. -- * libvirt assumes that qemu64 is the default for _all_ machine-types, -- * so we should try to keep qemu64 and cpu64-rhel6 as similar as -- * possible. -- */ -+ /* qemu64 is the default CPU model for all machine-types */ - .name = "qemu64", - .level = 0xd, - .vendor = CPUID_VENDOR_AMD, -@@ -2135,25 +2130,6 @@ static X86CPUDefinition builtin_x86_defs[] = { - .model_id = "Intel(R) Atom(TM) CPU N270 @ 1.60GHz", - }, - { -- .name = "cpu64-rhel6", -- .level = 4, -- .vendor = CPUID_VENDOR_AMD, -- .family = 6, -- .model = 13, -- .stepping = 3, -- .features[FEAT_1_EDX] = CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | -- CPUID_MMX | CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | -- CPUID_MCA | CPUID_PGE | CPUID_MTRR | CPUID_SEP | CPUID_APIC | -- CPUID_CX8 | CPUID_MCE | CPUID_PAE | CPUID_MSR | CPUID_TSC | -- CPUID_PSE | CPUID_DE | CPUID_FP87, -- .features[FEAT_1_ECX] = CPUID_EXT_CX16 | CPUID_EXT_SSE3, -- .features[FEAT_8000_0001_EDX] = CPUID_EXT2_LM | CPUID_EXT2_NX | CPUID_EXT2_SYSCALL, -- .features[FEAT_8000_0001_ECX] = CPUID_EXT3_SSE4A | CPUID_EXT3_ABM | -- CPUID_EXT3_SVM | CPUID_EXT3_LAHF_LM, -- .xlevel = 0x8000000A, -- .model_id = "QEMU Virtual CPU version (cpu64-rhel6)", -- }, -- { - .name = "Conroe", - .level = 10, - .vendor = CPUID_VENDOR_INTEL, --- -1.8.3.1 - diff --git a/kvm-i386-Resolve-CPU-models-to-v1-by-default.patch b/kvm-i386-Resolve-CPU-models-to-v1-by-default.patch deleted file mode 100644 index 1027341..0000000 --- a/kvm-i386-Resolve-CPU-models-to-v1-by-default.patch +++ /dev/null @@ -1,95 +0,0 @@ -From ccda4494b0ea4b81b6b0c3e539a0bcf7e673c68c Mon Sep 17 00:00:00 2001 -From: Eduardo Habkost -Date: Thu, 5 Dec 2019 21:56:50 +0000 -Subject: [PATCH 01/18] i386: Resolve CPU models to v1 by default - -RH-Author: Eduardo Habkost -Message-id: <20191205225650.772600-2-ehabkost@redhat.com> -Patchwork-id: 92907 -O-Subject: [RHEL-AV-8.1.1 qemu-kvm PATCH 1/1] i386: Resolve CPU models to v1 by default -Bugzilla: 1787291 1779078 1779078 -RH-Acked-by: Danilo de Paula -RH-Acked-by: Igor Mammedov -RH-Acked-by: Paolo Bonzini - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1779078 -Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=25187823 -Upstream: submitted, Message-Id: <20191205223339.764534-1-ehabkost@redhat.com> - -When using `query-cpu-definitions` using `-machine none`, -QEMU is resolving all CPU models to their latest versions. The -actual CPU model version being used by another machine type (e.g. -`pc-q35-4.0`) might be different. - -In theory, this was OK because the correct CPU model -version is returned when using the correct `-machine` argument. - -Except that in practice, this breaks libvirt expectations: -libvirt always use `-machine none` when checking if a CPU model -is runnable, because runnability is not expected to be affected -when the machine type is changed. - -For example, when running on a Haswell host without TSX, -Haswell-v4 is runnable, but Haswell-v1 is not. On those hosts, -`query-cpu-definitions` says Haswell is runnable if using -`-machine none`, but Haswell is actually not runnable using any -of the `pc-*` machine types (because they resolve Haswell to -Haswell-v1). In other words, we're breaking the "runnability -guarantee" we promised to not break for a few releases (see -qemu-deprecated.texi). - -To address this issue, change the default CPU model version to v1 -on all machine types, so we make `query-cpu-definitions` output -when using `-machine none` match the results when using `pc-*`. -This will change in the future (the plan is to always return the -latest CPU model version if using `-machine none`), but only -after giving libvirt the opportunity to adapt. - -Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1779078 -Signed-off-by: Eduardo Habkost -Signed-off-by: Danilo C. L. de Paula ---- - qemu-deprecated.texi | 7 +++++++ - target/i386/cpu.c | 8 +++++++- - 2 files changed, 14 insertions(+), 1 deletion(-) - -diff --git a/qemu-deprecated.texi b/qemu-deprecated.texi -index 4b4b742..534ebe9 100644 ---- a/qemu-deprecated.texi -+++ b/qemu-deprecated.texi -@@ -374,6 +374,13 @@ guarantees must resolve the CPU model aliases using te - ``alias-of'' field returned by the ``query-cpu-definitions'' QMP - command. - -+While those guarantees are kept, the return value of -+``query-cpu-definitions'' will have existing CPU model aliases -+point to a version that doesn't break runnability guarantees -+(specifically, version 1 of those CPU models). In future QEMU -+versions, aliases will point to newer CPU model versions -+depending on the machine type, so management software must -+resolve CPU model aliases before starting a virtual machine. - - @node Recently removed features - @appendix Recently removed features -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index 6dce6f2..863192c 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -3926,7 +3926,13 @@ static PropValue tcg_default_props[] = { - }; - - --X86CPUVersion default_cpu_version = CPU_VERSION_LATEST; -+/* -+ * We resolve CPU model aliases using -v1 when using "-machine -+ * none", but this is just for compatibility while libvirt isn't -+ * adapted to resolve CPU model versions before creating VMs. -+ * See "Runnability guarantee of CPU models" at * qemu-deprecated.texi. -+ */ -+X86CPUVersion default_cpu_version = 1; - - void x86_cpu_set_default_version(X86CPUVersion version) - { --- -1.8.3.1 - diff --git a/kvm-iotests-Add-iothread-cases-to-155.patch b/kvm-iotests-Add-iothread-cases-to-155.patch deleted file mode 100644 index 24ac90c..0000000 --- a/kvm-iotests-Add-iothread-cases-to-155.patch +++ /dev/null @@ -1,147 +0,0 @@ -From 2366cd9066e79d6c93a3a28710aea987b2c8f454 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 13 Mar 2020 12:34:38 +0000 -Subject: [PATCH 18/20] iotests: Add iothread cases to 155 - -RH-Author: Kevin Wolf -Message-id: <20200313123439.10548-13-kwolf@redhat.com> -Patchwork-id: 94289 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 12/13] iotests: Add iothread cases to 155 -Bugzilla: 1790482 1805143 -RH-Acked-by: John Snow -RH-Acked-by: Daniel P. Berrange -RH-Acked-by: Peter Krempa - -This patch adds test cases for attaching the backing chain to a mirror -job target right before finalising the job, where the image is in a -non-mainloop AioContext (i.e. the backing chain needs to be moved to the -AioContext of the mirror target). - -This requires switching the test case from virtio-blk to virtio-scsi -because virtio-blk only actually starts using the iothreads when the -guest driver initialises the device (which never happens in a test case -without a guest OS). virtio-scsi always keeps its block nodes in the -AioContext of the the requested iothread without guest interaction. - -Signed-off-by: Kevin Wolf -Message-Id: <20200310113831.27293-7-kwolf@redhat.com> -Reviewed-by: Peter Krempa -Signed-off-by: Kevin Wolf -(cherry picked from commit 6a5f6403a11307794ec79d277a065c137cfc12b2) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - tests/qemu-iotests/155 | 32 +++++++++++++++++++++++--------- - tests/qemu-iotests/155.out | 4 ++-- - 2 files changed, 25 insertions(+), 11 deletions(-) - -diff --git a/tests/qemu-iotests/155 b/tests/qemu-iotests/155 -index 3053e50..b552d1f 100755 ---- a/tests/qemu-iotests/155 -+++ b/tests/qemu-iotests/155 -@@ -49,11 +49,14 @@ target_img = os.path.join(iotests.test_dir, 'target.' + iotests.imgfmt) - # chain opened right away. If False, blockdev-add - # opens it without a backing file and job completion - # is supposed to open the backing chain. -+# use_iothread: If True, an iothread is configured for the virtio-blk device -+# that uses the image being mirrored - - class BaseClass(iotests.QMPTestCase): - target_blockdev_backing = None - target_real_backing = None - target_open_with_backing = True -+ use_iothread = False - - def setUp(self): - qemu_img('create', '-f', iotests.imgfmt, back0_img, '1440K') -@@ -69,7 +72,16 @@ class BaseClass(iotests.QMPTestCase): - 'file': {'driver': 'file', - 'filename': source_img}} - self.vm.add_blockdev(self.vm.qmp_to_opts(blockdev)) -- self.vm.add_device('virtio-blk,id=qdev0,drive=source') -+ -+ if self.use_iothread: -+ self.vm.add_object('iothread,id=iothread0') -+ iothread = ",iothread=iothread0" -+ else: -+ iothread = "" -+ -+ self.vm.add_device('virtio-scsi%s' % iothread) -+ self.vm.add_device('scsi-hd,id=qdev0,drive=source') -+ - self.vm.launch() - - self.assertIntactSourceBackingChain() -@@ -182,24 +194,21 @@ class MirrorBaseClass(BaseClass): - def testFull(self): - self.runMirror('full') - -- node = self.findBlockNode('target', -- '/machine/peripheral/qdev0/virtio-backend') -+ node = self.findBlockNode('target', 'qdev0') - self.assertCorrectBackingImage(node, None) - self.assertIntactSourceBackingChain() - - def testTop(self): - self.runMirror('top') - -- node = self.findBlockNode('target', -- '/machine/peripheral/qdev0/virtio-backend') -+ node = self.findBlockNode('target', 'qdev0') - self.assertCorrectBackingImage(node, back2_img) - self.assertIntactSourceBackingChain() - - def testNone(self): - self.runMirror('none') - -- node = self.findBlockNode('target', -- '/machine/peripheral/qdev0/virtio-backend') -+ node = self.findBlockNode('target', 'qdev0') - self.assertCorrectBackingImage(node, source_img) - self.assertIntactSourceBackingChain() - -@@ -252,6 +261,9 @@ class TestBlockdevMirrorReopen(MirrorBaseClass): - backing="backing") - self.assert_qmp(result, 'return', {}) - -+class TestBlockdevMirrorReopenIothread(TestBlockdevMirrorReopen): -+ use_iothread = True -+ - # Attach the backing chain only during completion, with blockdev-snapshot - class TestBlockdevMirrorSnapshot(MirrorBaseClass): - cmd = 'blockdev-mirror' -@@ -268,6 +280,9 @@ class TestBlockdevMirrorSnapshot(MirrorBaseClass): - overlay="target") - self.assert_qmp(result, 'return', {}) - -+class TestBlockdevMirrorSnapshotIothread(TestBlockdevMirrorSnapshot): -+ use_iothread = True -+ - class TestCommit(BaseClass): - existing = False - -@@ -283,8 +298,7 @@ class TestCommit(BaseClass): - - self.vm.event_wait('BLOCK_JOB_COMPLETED') - -- node = self.findBlockNode(None, -- '/machine/peripheral/qdev0/virtio-backend') -+ node = self.findBlockNode(None, 'qdev0') - self.assert_qmp(node, 'image' + '/backing-image' * 0 + '/filename', - back1_img) - self.assert_qmp(node, 'image' + '/backing-image' * 1 + '/filename', -diff --git a/tests/qemu-iotests/155.out b/tests/qemu-iotests/155.out -index 4fd1c2d..ed714d5 100644 ---- a/tests/qemu-iotests/155.out -+++ b/tests/qemu-iotests/155.out -@@ -1,5 +1,5 @@ --......................... -+............................... - ---------------------------------------------------------------------- --Ran 25 tests -+Ran 31 tests - - OK --- -1.8.3.1 - diff --git a/kvm-iotests-Add-test-for-image-creation-fallback.patch b/kvm-iotests-Add-test-for-image-creation-fallback.patch deleted file mode 100644 index a8ea8f7..0000000 --- a/kvm-iotests-Add-test-for-image-creation-fallback.patch +++ /dev/null @@ -1,138 +0,0 @@ -From 55f3a02574da226299d99bd74d12dd91b0f228dc Mon Sep 17 00:00:00 2001 -From: Maxim Levitsky -Date: Wed, 11 Mar 2020 10:51:46 +0000 -Subject: [PATCH 05/20] iotests: Add test for image creation fallback - -RH-Author: Maxim Levitsky -Message-id: <20200311105147.13208-6-mlevitsk@redhat.com> -Patchwork-id: 94228 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 5/6] iotests: Add test for image creation fallback -Bugzilla: 1640894 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: John Snow -RH-Acked-by: Max Reitz - -From: Max Reitz - -Signed-off-by: Max Reitz -Message-Id: <20200122164532.178040-6-mreitz@redhat.com> -Reviewed-by: Eric Blake -Reviewed-by: Maxim Levitsky -[mreitz: Added a note that NBD does not support resizing, which is why - the second case is expected to fail] -Signed-off-by: Max Reitz -(cherry picked from commit 4dddeac115c5a2c5f74731fda0afd031a0b45490) -Signed-off-by: Maxim Levitsky - -Signed-off-by: Danilo C. L. de Paula ---- - tests/qemu-iotests/259 | 62 ++++++++++++++++++++++++++++++++++++++++++++++ - tests/qemu-iotests/259.out | 14 +++++++++++ - tests/qemu-iotests/group | 1 + - 3 files changed, 77 insertions(+) - create mode 100755 tests/qemu-iotests/259 - create mode 100644 tests/qemu-iotests/259.out - -diff --git a/tests/qemu-iotests/259 b/tests/qemu-iotests/259 -new file mode 100755 -index 0000000..62e29af ---- /dev/null -+++ b/tests/qemu-iotests/259 -@@ -0,0 +1,62 @@ -+#!/usr/bin/env bash -+# -+# Test generic image creation fallback (by using NBD) -+# -+# Copyright (C) 2019 Red Hat, Inc. -+# -+# This program is free software; you can redistribute it and/or modify -+# it under the terms of the GNU General Public License as published by -+# the Free Software Foundation; either version 2 of the License, or -+# (at your option) any later version. -+# -+# This program is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+# GNU General Public License for more details. -+# -+# You should have received a copy of the GNU General Public License -+# along with this program. If not, see . -+# -+ -+# creator -+owner=mreitz@redhat.com -+ -+seq=$(basename $0) -+echo "QA output created by $seq" -+ -+status=1 # failure is the default! -+ -+_cleanup() -+{ -+ _cleanup_test_img -+} -+trap "_cleanup; exit \$status" 0 1 2 3 15 -+ -+# get standard environment, filters and checks -+. ./common.rc -+. ./common.filter -+ -+_supported_fmt raw -+_supported_proto nbd -+_supported_os Linux -+ -+ -+_make_test_img 64M -+ -+echo -+echo '--- Testing creation ---' -+ -+$QEMU_IMG create -f qcow2 "$TEST_IMG" 64M | _filter_img_create -+$QEMU_IMG info "$TEST_IMG" | _filter_img_info -+ -+echo -+echo '--- Testing creation for which the node would need to grow ---' -+ -+# NBD does not support resizing, so this will fail -+$QEMU_IMG create -f qcow2 -o preallocation=metadata "$TEST_IMG" 64M 2>&1 \ -+ | _filter_img_create -+ -+# success, all done -+echo "*** done" -+rm -f $seq.full -+status=0 -diff --git a/tests/qemu-iotests/259.out b/tests/qemu-iotests/259.out -new file mode 100644 -index 0000000..ffed19c ---- /dev/null -+++ b/tests/qemu-iotests/259.out -@@ -0,0 +1,14 @@ -+QA output created by 259 -+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 -+ -+--- Testing creation --- -+Formatting 'TEST_DIR/t.IMGFMT', fmt=qcow2 size=67108864 -+image: TEST_DIR/t.IMGFMT -+file format: qcow2 -+virtual size: 64 MiB (67108864 bytes) -+disk size: unavailable -+ -+--- Testing creation for which the node would need to grow --- -+qemu-img: TEST_DIR/t.IMGFMT: Could not resize image: Image format driver does not support resize -+Formatting 'TEST_DIR/t.IMGFMT', fmt=qcow2 size=67108864 preallocation=metadata -+*** done -diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group -index c0e8197..e47cbfc 100644 ---- a/tests/qemu-iotests/group -+++ b/tests/qemu-iotests/group -@@ -273,6 +273,7 @@ - 256 rw quick - 257 rw - 258 rw quick -+259 rw auto quick - 260 rw quick - 261 rw - 262 rw quick migration --- -1.8.3.1 - diff --git a/kvm-iotests-Create-VM.blockdev_create.patch b/kvm-iotests-Create-VM.blockdev_create.patch deleted file mode 100644 index 805b31a..0000000 --- a/kvm-iotests-Create-VM.blockdev_create.patch +++ /dev/null @@ -1,59 +0,0 @@ -From 05fedde1374abb180cd2b51457385d8128aa7fe4 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 7 Feb 2020 11:24:00 +0000 -Subject: [PATCH 03/18] iotests: Create VM.blockdev_create() - -RH-Author: Kevin Wolf -Message-id: <20200207112404.25198-3-kwolf@redhat.com> -Patchwork-id: 93748 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 2/6] iotests: Create VM.blockdev_create() -Bugzilla: 1781637 -RH-Acked-by: Sergio Lopez Pascual -RH-Acked-by: Max Reitz -RH-Acked-by: Stefan Hajnoczi - -We have several almost identical copies of a blockdev_create() function -in different test cases. Time to create one unified function in -iotests.py. - -To keep the diff managable, this patch only creates the function and -follow-up patches will convert the individual test cases. - -Signed-off-by: Kevin Wolf -(cherry picked from commit e9dbd1cae86f7cb6f8e470e1485aeb0c6e23ae64) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - tests/qemu-iotests/iotests.py | 16 ++++++++++++++++ - 1 file changed, 16 insertions(+) - -diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py -index 3cff671..5741efb 100644 ---- a/tests/qemu-iotests/iotests.py -+++ b/tests/qemu-iotests/iotests.py -@@ -638,6 +638,22 @@ class VM(qtest.QEMUQtestMachine): - elif status == 'null': - return error - -+ # Returns None on success, and an error string on failure -+ def blockdev_create(self, options, job_id='job0', filters=None): -+ if filters is None: -+ filters = [filter_qmp_testfiles] -+ result = self.qmp_log('blockdev-create', filters=filters, -+ job_id=job_id, options=options) -+ -+ if 'return' in result: -+ assert result['return'] == {} -+ job_result = self.run_job(job_id) -+ else: -+ job_result = result['error'] -+ -+ log("") -+ return job_result -+ - def enable_migration_events(self, name): - log('Enabling migration QMP events on %s...' % name) - log(self.qmp('migrate-set-capabilities', capabilities=[ --- -1.8.3.1 - diff --git a/kvm-iotests-Fix-run_job-with-use_log-False.patch b/kvm-iotests-Fix-run_job-with-use_log-False.patch deleted file mode 100644 index b105fc2..0000000 --- a/kvm-iotests-Fix-run_job-with-use_log-False.patch +++ /dev/null @@ -1,47 +0,0 @@ -From bb7b968a02c97564596b73d8d080cd745d96ed6b Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 13 Mar 2020 12:34:35 +0000 -Subject: [PATCH 15/20] iotests: Fix run_job() with use_log=False - -RH-Author: Kevin Wolf -Message-id: <20200313123439.10548-10-kwolf@redhat.com> -Patchwork-id: 94284 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 09/13] iotests: Fix run_job() with use_log=False -Bugzilla: 1790482 1805143 -RH-Acked-by: John Snow -RH-Acked-by: Daniel P. Berrange -RH-Acked-by: Peter Krempa - -The 'job-complete' QMP command should be run with qmp() rather than -qmp_log() if use_log=False is passed. - -Signed-off-by: Kevin Wolf -Message-Id: <20200310113831.27293-4-kwolf@redhat.com> -Reviewed-by: Peter Krempa -Signed-off-by: Kevin Wolf -(cherry picked from commit b31b532122ec6f68d17168449c034d2197bf96ec) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - tests/qemu-iotests/iotests.py | 5 ++++- - 1 file changed, 4 insertions(+), 1 deletion(-) - -diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py -index 0c55f7b..46f880c 100644 ---- a/tests/qemu-iotests/iotests.py -+++ b/tests/qemu-iotests/iotests.py -@@ -618,7 +618,10 @@ class VM(qtest.QEMUQtestMachine): - if use_log: - log('Job failed: %s' % (j['error'])) - elif status == 'ready': -- self.qmp_log('job-complete', id=job) -+ if use_log: -+ self.qmp_log('job-complete', id=job) -+ else: -+ self.qmp('job-complete', id=job) - elif status == 'pending' and not auto_finalize: - if pre_finalize: - pre_finalize() --- -1.8.3.1 - diff --git a/kvm-iotests-Refactor-blockdev-reopen-test-for-iothreads.patch b/kvm-iotests-Refactor-blockdev-reopen-test-for-iothreads.patch deleted file mode 100644 index 17e4a41..0000000 --- a/kvm-iotests-Refactor-blockdev-reopen-test-for-iothreads.patch +++ /dev/null @@ -1,122 +0,0 @@ -From 7e23b64dc20b64ca6fa887cd06cc5e52374f6268 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 13 Mar 2020 12:34:30 +0000 -Subject: [PATCH 10/20] iotests: Refactor blockdev-reopen test for iothreads - -RH-Author: Kevin Wolf -Message-id: <20200313123439.10548-5-kwolf@redhat.com> -Patchwork-id: 94281 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 04/13] iotests: Refactor blockdev-reopen test for iothreads -Bugzilla: 1790482 1805143 -RH-Acked-by: John Snow -RH-Acked-by: Daniel P. Berrange -RH-Acked-by: Peter Krempa - -We'll want to test more than one successful case in the future, so -prepare the test for that by a refactoring that runs each scenario in a -separate VM. - -test_iothreads_switch_{backing,overlay} currently produce errors, but -these are cases that should actually work, by switching either the -backing file node or the overlay node to the AioContext of the other -node. - -Signed-off-by: Kevin Wolf -Tested-by: Peter Krempa -Message-Id: <20200306141413.30705-2-kwolf@redhat.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit 97518e11c3d902a32386d33797044f6b79bccc6f) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - tests/qemu-iotests/245 | 47 ++++++++++++++++++++++++++++++++++++---------- - tests/qemu-iotests/245.out | 4 ++-- - 2 files changed, 39 insertions(+), 12 deletions(-) - -diff --git a/tests/qemu-iotests/245 b/tests/qemu-iotests/245 -index e66a23c..f69c2fa 100644 ---- a/tests/qemu-iotests/245 -+++ b/tests/qemu-iotests/245 -@@ -968,8 +968,7 @@ class TestBlockdevReopen(iotests.QMPTestCase): - self.assertEqual(self.get_node('hd1'), None) - self.assert_qmp(self.get_node('hd2'), 'ro', True) - -- # We don't allow setting a backing file that uses a different AioContext -- def test_iothreads(self): -+ def run_test_iothreads(self, iothread_a, iothread_b, errmsg = None): - opts = hd_opts(0) - result = self.vm.qmp('blockdev-add', conv_keys = False, **opts) - self.assert_qmp(result, 'return', {}) -@@ -984,20 +983,48 @@ class TestBlockdevReopen(iotests.QMPTestCase): - result = self.vm.qmp('object-add', qom_type='iothread', id='iothread1') - self.assert_qmp(result, 'return', {}) - -- result = self.vm.qmp('x-blockdev-set-iothread', node_name='hd0', iothread='iothread0') -+ result = self.vm.qmp('device_add', driver='virtio-scsi', id='scsi0', -+ iothread=iothread_a) - self.assert_qmp(result, 'return', {}) - -- self.reopen(opts, {'backing': 'hd2'}, "Cannot use a new backing file with a different AioContext") -- -- result = self.vm.qmp('x-blockdev-set-iothread', node_name='hd2', iothread='iothread1') -+ result = self.vm.qmp('device_add', driver='virtio-scsi', id='scsi1', -+ iothread=iothread_b) - self.assert_qmp(result, 'return', {}) - -- self.reopen(opts, {'backing': 'hd2'}, "Cannot use a new backing file with a different AioContext") -+ if iothread_a: -+ result = self.vm.qmp('device_add', driver='scsi-hd', drive='hd0', -+ share_rw=True, bus="scsi0.0") -+ self.assert_qmp(result, 'return', {}) - -- result = self.vm.qmp('x-blockdev-set-iothread', node_name='hd2', iothread='iothread0') -- self.assert_qmp(result, 'return', {}) -+ if iothread_b: -+ result = self.vm.qmp('device_add', driver='scsi-hd', drive='hd2', -+ share_rw=True, bus="scsi1.0") -+ self.assert_qmp(result, 'return', {}) - -- self.reopen(opts, {'backing': 'hd2'}) -+ # Attaching the backing file may or may not work -+ self.reopen(opts, {'backing': 'hd2'}, errmsg) -+ -+ # But removing the backing file should always work -+ self.reopen(opts, {'backing': None}) -+ -+ self.vm.shutdown() -+ -+ # We don't allow setting a backing file that uses a different AioContext if -+ # neither of them can switch to the other AioContext -+ def test_iothreads_error(self): -+ self.run_test_iothreads('iothread0', 'iothread1', -+ "Cannot use a new backing file with a different AioContext") -+ -+ def test_iothreads_compatible_users(self): -+ self.run_test_iothreads('iothread0', 'iothread0') -+ -+ def test_iothreads_switch_backing(self): -+ self.run_test_iothreads('iothread0', None, -+ "Cannot use a new backing file with a different AioContext") -+ -+ def test_iothreads_switch_overlay(self): -+ self.run_test_iothreads(None, 'iothread0', -+ "Cannot use a new backing file with a different AioContext") - - if __name__ == '__main__': - iotests.main(supported_fmts=["qcow2"], -diff --git a/tests/qemu-iotests/245.out b/tests/qemu-iotests/245.out -index a19de52..682b933 100644 ---- a/tests/qemu-iotests/245.out -+++ b/tests/qemu-iotests/245.out -@@ -1,6 +1,6 @@ --.................. -+..................... - ---------------------------------------------------------------------- --Ran 18 tests -+Ran 21 tests - - OK - {"execute": "job-finalize", "arguments": {"id": "commit0"}} --- -1.8.3.1 - diff --git a/kvm-iotests-Support-job-complete-in-run_job.patch b/kvm-iotests-Support-job-complete-in-run_job.patch deleted file mode 100644 index 08971a0..0000000 --- a/kvm-iotests-Support-job-complete-in-run_job.patch +++ /dev/null @@ -1,46 +0,0 @@ -From a3778aef0be61dead835af39073a62bbf72c8e20 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 7 Feb 2020 11:23:59 +0000 -Subject: [PATCH 02/18] iotests: Support job-complete in run_job() - -RH-Author: Kevin Wolf -Message-id: <20200207112404.25198-2-kwolf@redhat.com> -Patchwork-id: 93746 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 1/6] iotests: Support job-complete in run_job() -Bugzilla: 1781637 -RH-Acked-by: Sergio Lopez Pascual -RH-Acked-by: Max Reitz -RH-Acked-by: Stefan Hajnoczi - -Automatically complete jobs that have a 'ready' state and need an -explicit job-complete. Without this, run_job() would hang for such -jobs. - -Signed-off-by: Kevin Wolf -Reviewed-by: Eric Blake -Reviewed-by: Vladimir Sementsov-Ogievskiy -Reviewed-by: Alberto Garcia -Reviewed-by: Stefan Hajnoczi -(cherry picked from commit 4688c4e32ec76004676470f11734478799673d6d) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - tests/qemu-iotests/iotests.py | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py -index df07089..3cff671 100644 ---- a/tests/qemu-iotests/iotests.py -+++ b/tests/qemu-iotests/iotests.py -@@ -617,6 +617,8 @@ class VM(qtest.QEMUQtestMachine): - error = j['error'] - if use_log: - log('Job failed: %s' % (j['error'])) -+ elif status == 'ready': -+ self.qmp_log('job-complete', id=job) - elif status == 'pending' and not auto_finalize: - if pre_finalize: - pre_finalize() --- -1.8.3.1 - diff --git a/kvm-iotests-Test-external-snapshot-with-VM-state.patch b/kvm-iotests-Test-external-snapshot-with-VM-state.patch deleted file mode 100644 index 6fcb2f6..0000000 --- a/kvm-iotests-Test-external-snapshot-with-VM-state.patch +++ /dev/null @@ -1,189 +0,0 @@ -From 38b0cff9703fc740c30f5874973ac1be88f94d9f Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 7 Feb 2020 11:24:03 +0000 -Subject: [PATCH 06/18] iotests: Test external snapshot with VM state - -RH-Author: Kevin Wolf -Message-id: <20200207112404.25198-6-kwolf@redhat.com> -Patchwork-id: 93752 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 5/6] iotests: Test external snapshot with VM state -Bugzilla: 1781637 -RH-Acked-by: Sergio Lopez Pascual -RH-Acked-by: Max Reitz -RH-Acked-by: Stefan Hajnoczi - -This tests creating an external snapshot with VM state (which results in -an active overlay over an inactive backing file, which is also the root -node of an inactive BlockBackend), re-activating the images and -performing some operations to test that the re-activation worked as -intended. - -Signed-off-by: Kevin Wolf -(cherry picked from commit f62f08ab7a9d902da70078992248ec5c98f652ad) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - tests/qemu-iotests/280 | 83 ++++++++++++++++++++++++++++++++++++++++++++++ - tests/qemu-iotests/280.out | 50 ++++++++++++++++++++++++++++ - tests/qemu-iotests/group | 1 + - 3 files changed, 134 insertions(+) - create mode 100755 tests/qemu-iotests/280 - create mode 100644 tests/qemu-iotests/280.out - -diff --git a/tests/qemu-iotests/280 b/tests/qemu-iotests/280 -new file mode 100755 -index 0000000..0b1fa8e ---- /dev/null -+++ b/tests/qemu-iotests/280 -@@ -0,0 +1,83 @@ -+#!/usr/bin/env python -+# -+# Copyright (C) 2019 Red Hat, Inc. -+# -+# This program is free software; you can redistribute it and/or modify -+# it under the terms of the GNU General Public License as published by -+# the Free Software Foundation; either version 2 of the License, or -+# (at your option) any later version. -+# -+# This program is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+# GNU General Public License for more details. -+# -+# You should have received a copy of the GNU General Public License -+# along with this program. If not, see . -+# -+# Creator/Owner: Kevin Wolf -+# -+# Test migration to file for taking an external snapshot with VM state. -+ -+import iotests -+import os -+ -+iotests.verify_image_format(supported_fmts=['qcow2']) -+iotests.verify_protocol(supported=['file']) -+iotests.verify_platform(['linux']) -+ -+with iotests.FilePath('base') as base_path , \ -+ iotests.FilePath('top') as top_path, \ -+ iotests.VM() as vm: -+ -+ iotests.qemu_img_log('create', '-f', iotests.imgfmt, base_path, '64M') -+ -+ iotests.log('=== Launch VM ===') -+ vm.add_object('iothread,id=iothread0') -+ vm.add_blockdev('file,filename=%s,node-name=base-file' % (base_path)) -+ vm.add_blockdev('%s,file=base-file,node-name=base-fmt' % (iotests.imgfmt)) -+ vm.add_device('virtio-blk,drive=base-fmt,iothread=iothread0,id=vda') -+ vm.launch() -+ -+ vm.enable_migration_events('VM') -+ -+ iotests.log('\n=== Migrate to file ===') -+ vm.qmp_log('migrate', uri='exec:cat > /dev/null') -+ -+ with iotests.Timeout(3, 'Migration does not complete'): -+ vm.wait_migration() -+ -+ iotests.log('\nVM is now stopped:') -+ iotests.log(vm.qmp('query-migrate')['return']['status']) -+ vm.qmp_log('query-status') -+ -+ iotests.log('\n=== Create a snapshot of the disk image ===') -+ vm.blockdev_create({ -+ 'driver': 'file', -+ 'filename': top_path, -+ 'size': 0, -+ }) -+ vm.qmp_log('blockdev-add', node_name='top-file', -+ driver='file', filename=top_path, -+ filters=[iotests.filter_qmp_testfiles]) -+ -+ vm.blockdev_create({ -+ 'driver': iotests.imgfmt, -+ 'file': 'top-file', -+ 'size': 1024 * 1024, -+ }) -+ vm.qmp_log('blockdev-add', node_name='top-fmt', -+ driver=iotests.imgfmt, file='top-file') -+ -+ vm.qmp_log('blockdev-snapshot', node='base-fmt', overlay='top-fmt') -+ -+ iotests.log('\n=== Resume the VM and simulate a write request ===') -+ vm.qmp_log('cont') -+ iotests.log(vm.hmp_qemu_io('-d vda/virtio-backend', 'write 4k 4k')) -+ -+ iotests.log('\n=== Commit it to the backing file ===') -+ result = vm.qmp_log('block-commit', job_id='job0', auto_dismiss=False, -+ device='top-fmt', top_node='top-fmt', -+ filters=[iotests.filter_qmp_testfiles]) -+ if 'return' in result: -+ vm.run_job('job0') -diff --git a/tests/qemu-iotests/280.out b/tests/qemu-iotests/280.out -new file mode 100644 -index 0000000..5d382fa ---- /dev/null -+++ b/tests/qemu-iotests/280.out -@@ -0,0 +1,50 @@ -+Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=67108864 cluster_size=65536 lazy_refcounts=off refcount_bits=16 -+ -+=== Launch VM === -+Enabling migration QMP events on VM... -+{"return": {}} -+ -+=== Migrate to file === -+{"execute": "migrate", "arguments": {"uri": "exec:cat > /dev/null"}} -+{"return": {}} -+{"data": {"status": "setup"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} -+{"data": {"status": "active"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} -+{"data": {"status": "completed"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} -+ -+VM is now stopped: -+completed -+{"execute": "query-status", "arguments": {}} -+{"return": {"running": false, "singlestep": false, "status": "postmigrate"}} -+ -+=== Create a snapshot of the disk image === -+{"execute": "blockdev-create", "arguments": {"job-id": "job0", "options": {"driver": "file", "filename": "TEST_DIR/PID-top", "size": 0}}} -+{"return": {}} -+{"execute": "job-dismiss", "arguments": {"id": "job0"}} -+{"return": {}} -+ -+{"execute": "blockdev-add", "arguments": {"driver": "file", "filename": "TEST_DIR/PID-top", "node-name": "top-file"}} -+{"return": {}} -+{"execute": "blockdev-create", "arguments": {"job-id": "job0", "options": {"driver": "qcow2", "file": "top-file", "size": 1048576}}} -+{"return": {}} -+{"execute": "job-dismiss", "arguments": {"id": "job0"}} -+{"return": {}} -+ -+{"execute": "blockdev-add", "arguments": {"driver": "qcow2", "file": "top-file", "node-name": "top-fmt"}} -+{"return": {}} -+{"execute": "blockdev-snapshot", "arguments": {"node": "base-fmt", "overlay": "top-fmt"}} -+{"return": {}} -+ -+=== Resume the VM and simulate a write request === -+{"execute": "cont", "arguments": {}} -+{"return": {}} -+{"return": ""} -+ -+=== Commit it to the backing file === -+{"execute": "block-commit", "arguments": {"auto-dismiss": false, "device": "top-fmt", "job-id": "job0", "top-node": "top-fmt"}} -+{"return": {}} -+{"execute": "job-complete", "arguments": {"id": "job0"}} -+{"return": {}} -+{"data": {"device": "job0", "len": 65536, "offset": 65536, "speed": 0, "type": "commit"}, "event": "BLOCK_JOB_READY", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} -+{"data": {"device": "job0", "len": 65536, "offset": 65536, "speed": 0, "type": "commit"}, "event": "BLOCK_JOB_COMPLETED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} -+{"execute": "job-dismiss", "arguments": {"id": "job0"}} -+{"return": {}} -diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group -index 06cc734..01301cd 100644 ---- a/tests/qemu-iotests/group -+++ b/tests/qemu-iotests/group -@@ -286,3 +286,4 @@ - 272 rw - 273 backing quick - 277 rw quick -+280 rw migration quick --- -1.8.3.1 - diff --git a/kvm-iotests-Test-handling-of-AioContexts-with-some-block.patch b/kvm-iotests-Test-handling-of-AioContexts-with-some-block.patch deleted file mode 100644 index b09439b..0000000 --- a/kvm-iotests-Test-handling-of-AioContexts-with-some-block.patch +++ /dev/null @@ -1,322 +0,0 @@ -From 6b9a6ba9ed753ad7aa714b35de938ebeeb4fa6cb Mon Sep 17 00:00:00 2001 -From: Sergio Lopez Pascual -Date: Fri, 7 Feb 2020 10:27:49 +0000 -Subject: [PATCH 16/18] iotests: Test handling of AioContexts with some - blockdev actions - -RH-Author: Sergio Lopez Pascual -Message-id: <20200207112749.25073-10-slp@redhat.com> -Patchwork-id: 93762 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 9/9] iotests: Test handling of AioContexts with some blockdev actions -Bugzilla: 1745606 1746217 1773517 1779036 1782111 1782175 1783965 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Paolo Bonzini -RH-Acked-by: Max Reitz -RH-Acked-by: Stefan Hajnoczi - -Includes the following tests: - - - Adding a dirty bitmap. - * RHBZ: 1782175 - - - Starting a drive-mirror to an NBD-backed target. - * RHBZ: 1746217, 1773517 - - - Aborting an external snapshot transaction. - * RHBZ: 1779036 - - - Aborting a blockdev backup transaction. - * RHBZ: 1782111 - -For each one of them, a VM with a number of disks running in an -IOThread AioContext is used. - -Signed-off-by: Sergio Lopez -Signed-off-by: Kevin Wolf -(cherry picked from commit 9b8c59e7610b9c5315ef093d801843dbe8debfac) -Signed-off-by: Sergio Lopez -Signed-off-by: Danilo C. L. de Paula ---- - tests/qemu-iotests/281 | 247 +++++++++++++++++++++++++++++++++++++++++++++ - tests/qemu-iotests/281.out | 5 + - tests/qemu-iotests/group | 1 + - 3 files changed, 253 insertions(+) - create mode 100755 tests/qemu-iotests/281 - create mode 100644 tests/qemu-iotests/281.out - -diff --git a/tests/qemu-iotests/281 b/tests/qemu-iotests/281 -new file mode 100755 -index 0000000..269d583 ---- /dev/null -+++ b/tests/qemu-iotests/281 -@@ -0,0 +1,247 @@ -+#!/usr/bin/env python -+# -+# Test cases for blockdev + IOThread interactions -+# -+# Copyright (C) 2019 Red Hat, Inc. -+# -+# This program is free software; you can redistribute it and/or modify -+# it under the terms of the GNU General Public License as published by -+# the Free Software Foundation; either version 2 of the License, or -+# (at your option) any later version. -+# -+# This program is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+# GNU General Public License for more details. -+# -+# You should have received a copy of the GNU General Public License -+# along with this program. If not, see . -+# -+ -+import os -+import iotests -+from iotests import qemu_img -+ -+image_len = 64 * 1024 * 1024 -+ -+# Test for RHBZ#1782175 -+class TestDirtyBitmapIOThread(iotests.QMPTestCase): -+ drive0_img = os.path.join(iotests.test_dir, 'drive0.img') -+ images = { 'drive0': drive0_img } -+ -+ def setUp(self): -+ for name in self.images: -+ qemu_img('create', '-f', iotests.imgfmt, -+ self.images[name], str(image_len)) -+ -+ self.vm = iotests.VM() -+ self.vm.add_object('iothread,id=iothread0') -+ -+ for name in self.images: -+ self.vm.add_blockdev('driver=file,filename=%s,node-name=file_%s' -+ % (self.images[name], name)) -+ self.vm.add_blockdev('driver=qcow2,file=file_%s,node-name=%s' -+ % (name, name)) -+ -+ self.vm.launch() -+ self.vm.qmp('x-blockdev-set-iothread', -+ node_name='drive0', iothread='iothread0', -+ force=True) -+ -+ def tearDown(self): -+ self.vm.shutdown() -+ for name in self.images: -+ os.remove(self.images[name]) -+ -+ def test_add_dirty_bitmap(self): -+ result = self.vm.qmp( -+ 'block-dirty-bitmap-add', -+ node='drive0', -+ name='bitmap1', -+ persistent=True, -+ ) -+ -+ self.assert_qmp(result, 'return', {}) -+ -+ -+# Test for RHBZ#1746217 & RHBZ#1773517 -+class TestNBDMirrorIOThread(iotests.QMPTestCase): -+ nbd_sock = os.path.join(iotests.sock_dir, 'nbd.sock') -+ drive0_img = os.path.join(iotests.test_dir, 'drive0.img') -+ mirror_img = os.path.join(iotests.test_dir, 'mirror.img') -+ images = { 'drive0': drive0_img, 'mirror': mirror_img } -+ -+ def setUp(self): -+ for name in self.images: -+ qemu_img('create', '-f', iotests.imgfmt, -+ self.images[name], str(image_len)) -+ -+ self.vm_src = iotests.VM(path_suffix='src') -+ self.vm_src.add_object('iothread,id=iothread0') -+ self.vm_src.add_blockdev('driver=file,filename=%s,node-name=file0' -+ % (self.drive0_img)) -+ self.vm_src.add_blockdev('driver=qcow2,file=file0,node-name=drive0') -+ self.vm_src.launch() -+ self.vm_src.qmp('x-blockdev-set-iothread', -+ node_name='drive0', iothread='iothread0', -+ force=True) -+ -+ self.vm_tgt = iotests.VM(path_suffix='tgt') -+ self.vm_tgt.add_object('iothread,id=iothread0') -+ self.vm_tgt.add_blockdev('driver=file,filename=%s,node-name=file0' -+ % (self.mirror_img)) -+ self.vm_tgt.add_blockdev('driver=qcow2,file=file0,node-name=drive0') -+ self.vm_tgt.launch() -+ self.vm_tgt.qmp('x-blockdev-set-iothread', -+ node_name='drive0', iothread='iothread0', -+ force=True) -+ -+ def tearDown(self): -+ self.vm_src.shutdown() -+ self.vm_tgt.shutdown() -+ for name in self.images: -+ os.remove(self.images[name]) -+ -+ def test_nbd_mirror(self): -+ result = self.vm_tgt.qmp( -+ 'nbd-server-start', -+ addr={ -+ 'type': 'unix', -+ 'data': { 'path': self.nbd_sock } -+ } -+ ) -+ self.assert_qmp(result, 'return', {}) -+ -+ result = self.vm_tgt.qmp( -+ 'nbd-server-add', -+ device='drive0', -+ writable=True -+ ) -+ self.assert_qmp(result, 'return', {}) -+ -+ result = self.vm_src.qmp( -+ 'drive-mirror', -+ device='drive0', -+ target='nbd+unix:///drive0?socket=' + self.nbd_sock, -+ sync='full', -+ mode='existing', -+ speed=64*1024*1024, -+ job_id='j1' -+ ) -+ self.assert_qmp(result, 'return', {}) -+ -+ self.vm_src.event_wait(name="BLOCK_JOB_READY") -+ -+ -+# Test for RHBZ#1779036 -+class TestExternalSnapshotAbort(iotests.QMPTestCase): -+ drive0_img = os.path.join(iotests.test_dir, 'drive0.img') -+ snapshot_img = os.path.join(iotests.test_dir, 'snapshot.img') -+ images = { 'drive0': drive0_img, 'snapshot': snapshot_img } -+ -+ def setUp(self): -+ for name in self.images: -+ qemu_img('create', '-f', iotests.imgfmt, -+ self.images[name], str(image_len)) -+ -+ self.vm = iotests.VM() -+ self.vm.add_object('iothread,id=iothread0') -+ self.vm.add_blockdev('driver=file,filename=%s,node-name=file0' -+ % (self.drive0_img)) -+ self.vm.add_blockdev('driver=qcow2,file=file0,node-name=drive0') -+ self.vm.launch() -+ self.vm.qmp('x-blockdev-set-iothread', -+ node_name='drive0', iothread='iothread0', -+ force=True) -+ -+ def tearDown(self): -+ self.vm.shutdown() -+ for name in self.images: -+ os.remove(self.images[name]) -+ -+ def test_external_snapshot_abort(self): -+ # Use a two actions transaction with a bogus values on the second -+ # one to trigger an abort of the transaction. -+ result = self.vm.qmp('transaction', actions=[ -+ { -+ 'type': 'blockdev-snapshot-sync', -+ 'data': { 'node-name': 'drive0', -+ 'snapshot-file': self.snapshot_img, -+ 'snapshot-node-name': 'snap1', -+ 'mode': 'absolute-paths', -+ 'format': 'qcow2' } -+ }, -+ { -+ 'type': 'blockdev-snapshot-sync', -+ 'data': { 'node-name': 'drive0', -+ 'snapshot-file': '/fakesnapshot', -+ 'snapshot-node-name': 'snap2', -+ 'mode': 'absolute-paths', -+ 'format': 'qcow2' } -+ }, -+ ]) -+ -+ # Crashes on failure, we expect this error. -+ self.assert_qmp(result, 'error/class', 'GenericError') -+ -+ -+# Test for RHBZ#1782111 -+class TestBlockdevBackupAbort(iotests.QMPTestCase): -+ drive0_img = os.path.join(iotests.test_dir, 'drive0.img') -+ drive1_img = os.path.join(iotests.test_dir, 'drive1.img') -+ snap0_img = os.path.join(iotests.test_dir, 'snap0.img') -+ snap1_img = os.path.join(iotests.test_dir, 'snap1.img') -+ images = { 'drive0': drive0_img, -+ 'drive1': drive1_img, -+ 'snap0': snap0_img, -+ 'snap1': snap1_img } -+ -+ def setUp(self): -+ for name in self.images: -+ qemu_img('create', '-f', iotests.imgfmt, -+ self.images[name], str(image_len)) -+ -+ self.vm = iotests.VM() -+ self.vm.add_object('iothread,id=iothread0') -+ self.vm.add_device('virtio-scsi,iothread=iothread0') -+ -+ for name in self.images: -+ self.vm.add_blockdev('driver=file,filename=%s,node-name=file_%s' -+ % (self.images[name], name)) -+ self.vm.add_blockdev('driver=qcow2,file=file_%s,node-name=%s' -+ % (name, name)) -+ -+ self.vm.add_device('scsi-hd,drive=drive0') -+ self.vm.add_device('scsi-hd,drive=drive1') -+ self.vm.launch() -+ -+ def tearDown(self): -+ self.vm.shutdown() -+ for name in self.images: -+ os.remove(self.images[name]) -+ -+ def test_blockdev_backup_abort(self): -+ # Use a two actions transaction with a bogus values on the second -+ # one to trigger an abort of the transaction. -+ result = self.vm.qmp('transaction', actions=[ -+ { -+ 'type': 'blockdev-backup', -+ 'data': { 'device': 'drive0', -+ 'target': 'snap0', -+ 'sync': 'full', -+ 'job-id': 'j1' } -+ }, -+ { -+ 'type': 'blockdev-backup', -+ 'data': { 'device': 'drive1', -+ 'target': 'snap1', -+ 'sync': 'full' } -+ }, -+ ]) -+ -+ # Hangs on failure, we expect this error. -+ self.assert_qmp(result, 'error/class', 'GenericError') -+ -+if __name__ == '__main__': -+ iotests.main(supported_fmts=['qcow2'], -+ supported_protocols=['file']) -diff --git a/tests/qemu-iotests/281.out b/tests/qemu-iotests/281.out -new file mode 100644 -index 0000000..89968f3 ---- /dev/null -+++ b/tests/qemu-iotests/281.out -@@ -0,0 +1,5 @@ -+.... -+---------------------------------------------------------------------- -+Ran 4 tests -+ -+OK -diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group -index 01301cd..c0e8197 100644 ---- a/tests/qemu-iotests/group -+++ b/tests/qemu-iotests/group -@@ -287,3 +287,4 @@ - 273 backing quick - 277 rw quick - 280 rw migration quick -+281 rw quick --- -1.8.3.1 - diff --git a/kvm-iotests-Test-mirror-with-temporarily-disabled-target.patch b/kvm-iotests-Test-mirror-with-temporarily-disabled-target.patch deleted file mode 100644 index 58ef198..0000000 --- a/kvm-iotests-Test-mirror-with-temporarily-disabled-target.patch +++ /dev/null @@ -1,162 +0,0 @@ -From 239f7bdeef48a3c0b07098617371b9955dc55348 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 13 Mar 2020 12:34:36 +0000 -Subject: [PATCH 16/20] iotests: Test mirror with temporarily disabled target - backing file - -RH-Author: Kevin Wolf -Message-id: <20200313123439.10548-11-kwolf@redhat.com> -Patchwork-id: 94288 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 10/13] iotests: Test mirror with temporarily disabled target backing file -Bugzilla: 1790482 1805143 -RH-Acked-by: John Snow -RH-Acked-by: Daniel P. Berrange -RH-Acked-by: Peter Krempa - -The newly tested scenario is a common live storage migration scenario: -The target node is opened without a backing file so that the active -layer is mirrored while its backing chain can be copied in the -background. - -The backing chain should be attached to the mirror target node when -finalising the job, just before switching the users of the source node -to the new copy (at which point the mirror job still has a reference to -the node). drive-mirror did this automatically, but with blockdev-mirror -this is the job of the QMP client. - -This patch adds test cases for two ways to achieve the desired result, -using either x-blockdev-reopen or blockdev-snapshot. - -Signed-off-by: Kevin Wolf -Message-Id: <20200310113831.27293-5-kwolf@redhat.com> -Reviewed-by: Peter Krempa -Signed-off-by: Kevin Wolf -(cherry picked from commit 8bdee9f10eac2aefdcc5095feef756354c87bdec) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - tests/qemu-iotests/155 | 56 +++++++++++++++++++++++++++++++++++++++++----- - tests/qemu-iotests/155.out | 4 ++-- - 2 files changed, 53 insertions(+), 7 deletions(-) - -diff --git a/tests/qemu-iotests/155 b/tests/qemu-iotests/155 -index d7ef257..3053e50 100755 ---- a/tests/qemu-iotests/155 -+++ b/tests/qemu-iotests/155 -@@ -45,10 +45,15 @@ target_img = os.path.join(iotests.test_dir, 'target.' + iotests.imgfmt) - # image during runtime, only makes sense if - # target_blockdev_backing is not None - # (None: same as target_backing) -+# target_open_with_backing: If True, the target image is added with its backing -+# chain opened right away. If False, blockdev-add -+# opens it without a backing file and job completion -+# is supposed to open the backing chain. - - class BaseClass(iotests.QMPTestCase): - target_blockdev_backing = None - target_real_backing = None -+ target_open_with_backing = True - - def setUp(self): - qemu_img('create', '-f', iotests.imgfmt, back0_img, '1440K') -@@ -80,9 +85,13 @@ class BaseClass(iotests.QMPTestCase): - options = { 'node-name': 'target', - 'driver': iotests.imgfmt, - 'file': { 'driver': 'file', -+ 'node-name': 'target-file', - 'filename': target_img } } -- if self.target_blockdev_backing: -- options['backing'] = self.target_blockdev_backing -+ -+ if not self.target_open_with_backing: -+ options['backing'] = None -+ elif self.target_blockdev_backing: -+ options['backing'] = self.target_blockdev_backing - - result = self.vm.qmp('blockdev-add', **options) - self.assert_qmp(result, 'return', {}) -@@ -147,10 +156,14 @@ class BaseClass(iotests.QMPTestCase): - # cmd: Mirroring command to execute, either drive-mirror or blockdev-mirror - - class MirrorBaseClass(BaseClass): -+ def openBacking(self): -+ pass -+ - def runMirror(self, sync): - if self.cmd == 'blockdev-mirror': - result = self.vm.qmp(self.cmd, job_id='mirror-job', device='source', -- sync=sync, target='target') -+ sync=sync, target='target', -+ auto_finalize=False) - else: - if self.existing: - mode = 'existing' -@@ -159,11 +172,12 @@ class MirrorBaseClass(BaseClass): - result = self.vm.qmp(self.cmd, job_id='mirror-job', device='source', - sync=sync, target=target_img, - format=iotests.imgfmt, mode=mode, -- node_name='target') -+ node_name='target', auto_finalize=False) - - self.assert_qmp(result, 'return', {}) - -- self.complete_and_wait('mirror-job') -+ self.vm.run_job('mirror-job', use_log=False, auto_finalize=False, -+ pre_finalize=self.openBacking, auto_dismiss=True) - - def testFull(self): - self.runMirror('full') -@@ -221,6 +235,38 @@ class TestBlockdevMirrorForcedBacking(MirrorBaseClass): - target_blockdev_backing = { 'driver': 'null-co' } - target_real_backing = 'null-co://' - -+# Attach the backing chain only during completion, with blockdev-reopen -+class TestBlockdevMirrorReopen(MirrorBaseClass): -+ cmd = 'blockdev-mirror' -+ existing = True -+ target_backing = 'null-co://' -+ target_open_with_backing = False -+ -+ def openBacking(self): -+ if not self.target_open_with_backing: -+ result = self.vm.qmp('blockdev-add', node_name="backing", -+ driver="null-co") -+ self.assert_qmp(result, 'return', {}) -+ result = self.vm.qmp('x-blockdev-reopen', node_name="target", -+ driver=iotests.imgfmt, file="target-file", -+ backing="backing") -+ self.assert_qmp(result, 'return', {}) -+ -+# Attach the backing chain only during completion, with blockdev-snapshot -+class TestBlockdevMirrorSnapshot(MirrorBaseClass): -+ cmd = 'blockdev-mirror' -+ existing = True -+ target_backing = 'null-co://' -+ target_open_with_backing = False -+ -+ def openBacking(self): -+ if not self.target_open_with_backing: -+ result = self.vm.qmp('blockdev-add', node_name="backing", -+ driver="null-co") -+ self.assert_qmp(result, 'return', {}) -+ result = self.vm.qmp('blockdev-snapshot', node="backing", -+ overlay="target") -+ self.assert_qmp(result, 'return', {}) - - class TestCommit(BaseClass): - existing = False -diff --git a/tests/qemu-iotests/155.out b/tests/qemu-iotests/155.out -index 4176bb9..4fd1c2d 100644 ---- a/tests/qemu-iotests/155.out -+++ b/tests/qemu-iotests/155.out -@@ -1,5 +1,5 @@ --................... -+......................... - ---------------------------------------------------------------------- --Ran 19 tests -+Ran 25 tests - - OK --- -1.8.3.1 - diff --git a/kvm-iotests-Use-complete_and_wait-in-155.patch b/kvm-iotests-Use-complete_and_wait-in-155.patch deleted file mode 100644 index 38b41be..0000000 --- a/kvm-iotests-Use-complete_and_wait-in-155.patch +++ /dev/null @@ -1,50 +0,0 @@ -From 872fbd32d06bda4aba3a7e67a95f76f62e475dbe Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 13 Mar 2020 12:34:27 +0000 -Subject: [PATCH 07/20] iotests: Use complete_and_wait() in 155 - -RH-Author: Kevin Wolf -Message-id: <20200313123439.10548-2-kwolf@redhat.com> -Patchwork-id: 94279 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 01/13] iotests: Use complete_and_wait() in 155 -Bugzilla: 1790482 1805143 -RH-Acked-by: John Snow -RH-Acked-by: Daniel P. Berrange -RH-Acked-by: Peter Krempa - -From: Max Reitz - -This way, we get to see errors during the completion phase. - -Signed-off-by: Max Reitz -Reviewed-by: Vladimir Sementsov-Ogievskiy -Message-Id: <20200218103454.296704-14-mreitz@redhat.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit 6644d0e6192b36cdf2902c9774e1afb8ab2e7223) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - tests/qemu-iotests/155 | 7 +------ - 1 file changed, 1 insertion(+), 6 deletions(-) - -diff --git a/tests/qemu-iotests/155 b/tests/qemu-iotests/155 -index e194859..d7ef257 100755 ---- a/tests/qemu-iotests/155 -+++ b/tests/qemu-iotests/155 -@@ -163,12 +163,7 @@ class MirrorBaseClass(BaseClass): - - self.assert_qmp(result, 'return', {}) - -- self.vm.event_wait('BLOCK_JOB_READY') -- -- result = self.vm.qmp('block-job-complete', device='mirror-job') -- self.assert_qmp(result, 'return', {}) -- -- self.vm.event_wait('BLOCK_JOB_COMPLETED') -+ self.complete_and_wait('mirror-job') - - def testFull(self): - self.runMirror('full') --- -1.8.3.1 - diff --git a/kvm-iotests.py-Let-wait_migration-wait-even-more.patch b/kvm-iotests.py-Let-wait_migration-wait-even-more.patch deleted file mode 100644 index cda8037..0000000 --- a/kvm-iotests.py-Let-wait_migration-wait-even-more.patch +++ /dev/null @@ -1,123 +0,0 @@ -From d6df1426ae65b3a0d50bdbb1f8a7246386dd6ebf Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 7 Feb 2020 11:24:04 +0000 -Subject: [PATCH 07/18] iotests.py: Let wait_migration wait even more - -RH-Author: Kevin Wolf -Message-id: <20200207112404.25198-7-kwolf@redhat.com> -Patchwork-id: 93751 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 6/6] iotests.py: Let wait_migration wait even more -Bugzilla: 1781637 -RH-Acked-by: Sergio Lopez Pascual -RH-Acked-by: Max Reitz -RH-Acked-by: Stefan Hajnoczi - -From: Max Reitz - -The "migration completed" event may be sent (on the source, to be -specific) before the migration is actually completed, so the VM runstate -will still be "finish-migrate" instead of "postmigrate". So ask the -users of VM.wait_migration() to specify the final runstate they desire -and then poll the VM until it has reached that state. (This should be -over very quickly, so busy polling is fine.) - -Without this patch, I see intermittent failures in the new iotest 280 -under high system load. I have not yet seen such failures with other -iotests that use VM.wait_migration() and query-status afterwards, but -maybe they just occur even more rarely, or it is because they also wait -on the destination VM to be running. - -Signed-off-by: Max Reitz -Signed-off-by: Kevin Wolf -(cherry picked from commit 8da7969bd7014f6de037d8ae132b40721944b186) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - tests/qemu-iotests/234 | 8 ++++---- - tests/qemu-iotests/262 | 4 ++-- - tests/qemu-iotests/280 | 2 +- - tests/qemu-iotests/iotests.py | 6 +++++- - 4 files changed, 12 insertions(+), 8 deletions(-) - -diff --git a/tests/qemu-iotests/234 b/tests/qemu-iotests/234 -index 34c818c..59a7f94 100755 ---- a/tests/qemu-iotests/234 -+++ b/tests/qemu-iotests/234 -@@ -69,9 +69,9 @@ with iotests.FilePath('img') as img_path, \ - iotests.log(vm_a.qmp('migrate', uri='exec:cat >%s' % (fifo_a))) - with iotests.Timeout(3, 'Migration does not complete'): - # Wait for the source first (which includes setup=setup) -- vm_a.wait_migration() -+ vm_a.wait_migration('postmigrate') - # Wait for the destination second (which does not) -- vm_b.wait_migration() -+ vm_b.wait_migration('running') - - iotests.log(vm_a.qmp('query-migrate')['return']['status']) - iotests.log(vm_b.qmp('query-migrate')['return']['status']) -@@ -98,9 +98,9 @@ with iotests.FilePath('img') as img_path, \ - iotests.log(vm_b.qmp('migrate', uri='exec:cat >%s' % (fifo_b))) - with iotests.Timeout(3, 'Migration does not complete'): - # Wait for the source first (which includes setup=setup) -- vm_b.wait_migration() -+ vm_b.wait_migration('postmigrate') - # Wait for the destination second (which does not) -- vm_a.wait_migration() -+ vm_a.wait_migration('running') - - iotests.log(vm_a.qmp('query-migrate')['return']['status']) - iotests.log(vm_b.qmp('query-migrate')['return']['status']) -diff --git a/tests/qemu-iotests/262 b/tests/qemu-iotests/262 -index 0963daa..bbcb526 100755 ---- a/tests/qemu-iotests/262 -+++ b/tests/qemu-iotests/262 -@@ -71,9 +71,9 @@ with iotests.FilePath('img') as img_path, \ - iotests.log(vm_a.qmp('migrate', uri='exec:cat >%s' % (fifo))) - with iotests.Timeout(3, 'Migration does not complete'): - # Wait for the source first (which includes setup=setup) -- vm_a.wait_migration() -+ vm_a.wait_migration('postmigrate') - # Wait for the destination second (which does not) -- vm_b.wait_migration() -+ vm_b.wait_migration('running') - - iotests.log(vm_a.qmp('query-migrate')['return']['status']) - iotests.log(vm_b.qmp('query-migrate')['return']['status']) -diff --git a/tests/qemu-iotests/280 b/tests/qemu-iotests/280 -index 0b1fa8e..85e9114 100755 ---- a/tests/qemu-iotests/280 -+++ b/tests/qemu-iotests/280 -@@ -45,7 +45,7 @@ with iotests.FilePath('base') as base_path , \ - vm.qmp_log('migrate', uri='exec:cat > /dev/null') - - with iotests.Timeout(3, 'Migration does not complete'): -- vm.wait_migration() -+ vm.wait_migration('postmigrate') - - iotests.log('\nVM is now stopped:') - iotests.log(vm.qmp('query-migrate')['return']['status']) -diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py -index 5741efb..0c55f7b 100644 ---- a/tests/qemu-iotests/iotests.py -+++ b/tests/qemu-iotests/iotests.py -@@ -663,12 +663,16 @@ class VM(qtest.QEMUQtestMachine): - } - ])) - -- def wait_migration(self): -+ def wait_migration(self, expect_runstate): - while True: - event = self.event_wait('MIGRATION') - log(event, filters=[filter_qmp_event]) - if event['data']['status'] == 'completed': - break -+ # The event may occur in finish-migrate, so wait for the expected -+ # post-migration runstate -+ while self.qmp('query-status')['return']['status'] != expect_runstate: -+ pass - - def node_info(self, node_name): - nodes = self.qmp('query-named-block-nodes') --- -1.8.3.1 - diff --git a/kvm-iscsi-Cap-block-count-from-GET-LBA-STATUS-CVE-2020-1.patch b/kvm-iscsi-Cap-block-count-from-GET-LBA-STATUS-CVE-2020-1.patch deleted file mode 100644 index 2ee9dcd..0000000 --- a/kvm-iscsi-Cap-block-count-from-GET-LBA-STATUS-CVE-2020-1.patch +++ /dev/null @@ -1,79 +0,0 @@ -From 1c508d56d154caf5fbf53e7dabafd707236cb16b Mon Sep 17 00:00:00 2001 -From: jmaloy -Date: Wed, 29 Jan 2020 13:45:18 +0000 -Subject: [PATCH 06/15] iscsi: Cap block count from GET LBA STATUS - (CVE-2020-1711) -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: jmaloy -Message-id: <20200129134518.1293-2-jmaloy@redhat.com> -Patchwork-id: 93571 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/1] iscsi: Cap block count from GET LBA STATUS (CVE-2020-1711) -Bugzilla: 1794503 -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Kevin Wolf -RH-Acked-by: Philippe Mathieu-Daudé - -From: Felipe Franciosi - -When querying an iSCSI server for the provisioning status of blocks (via -GET LBA STATUS), Qemu only validates that the response descriptor zero's -LBA matches the one requested. Given the SCSI spec allows servers to -respond with the status of blocks beyond the end of the LUN, Qemu may -have its heap corrupted by clearing/setting too many bits at the end of -its allocmap for the LUN. - -A malicious guest in control of the iSCSI server could carefully program -Qemu's heap (by selectively setting the bitmap) and then smash it. - -This limits the number of bits that iscsi_co_block_status() will try to -update in the allocmap so it can't overflow the bitmap. - -Fixes: CVE-2020-1711 -Cc: qemu-stable@nongnu.org -Signed-off-by: Felipe Franciosi -Signed-off-by: Peter Turschmid -Signed-off-by: Raphael Norwitz -Signed-off-by: Kevin Wolf -(cherry picked from commit 693fd2acdf14dd86c0bf852610f1c2cca80a74dc) -Signed-off-by: Jon Maloy -Signed-off-by: Danilo C. L. de Paula ---- - block/iscsi.c | 5 +++-- - 1 file changed, 3 insertions(+), 2 deletions(-) - -diff --git a/block/iscsi.c b/block/iscsi.c -index 2aea7e3..cbd5729 100644 ---- a/block/iscsi.c -+++ b/block/iscsi.c -@@ -701,7 +701,7 @@ static int coroutine_fn iscsi_co_block_status(BlockDriverState *bs, - struct scsi_get_lba_status *lbas = NULL; - struct scsi_lba_status_descriptor *lbasd = NULL; - struct IscsiTask iTask; -- uint64_t lba; -+ uint64_t lba, max_bytes; - int ret; - - iscsi_co_init_iscsitask(iscsilun, &iTask); -@@ -721,6 +721,7 @@ static int coroutine_fn iscsi_co_block_status(BlockDriverState *bs, - } - - lba = offset / iscsilun->block_size; -+ max_bytes = (iscsilun->num_blocks - lba) * iscsilun->block_size; - - qemu_mutex_lock(&iscsilun->mutex); - retry: -@@ -764,7 +765,7 @@ retry: - goto out_unlock; - } - -- *pnum = (int64_t) lbasd->num_blocks * iscsilun->block_size; -+ *pnum = MIN((int64_t) lbasd->num_blocks * iscsilun->block_size, max_bytes); - - if (lbasd->provisioning == SCSI_PROVISIONING_TYPE_DEALLOCATED || - lbasd->provisioning == SCSI_PROVISIONING_TYPE_ANCHORED) { --- -1.8.3.1 - diff --git a/kvm-iscsi-Drop-iscsi_co_create_opts.patch b/kvm-iscsi-Drop-iscsi_co_create_opts.patch deleted file mode 100644 index a6d0baf..0000000 --- a/kvm-iscsi-Drop-iscsi_co_create_opts.patch +++ /dev/null @@ -1,113 +0,0 @@ -From 58b7d33e1bc17b89103ceaa39f5722a69b35d810 Mon Sep 17 00:00:00 2001 -From: Maxim Levitsky -Date: Wed, 11 Mar 2020 10:51:45 +0000 -Subject: [PATCH 04/20] iscsi: Drop iscsi_co_create_opts() - -RH-Author: Maxim Levitsky -Message-id: <20200311105147.13208-5-mlevitsk@redhat.com> -Patchwork-id: 94226 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 4/6] iscsi: Drop iscsi_co_create_opts() -Bugzilla: 1640894 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: John Snow -RH-Acked-by: Max Reitz - -From: Max Reitz - -The generic fallback implementation effectively does the same. - -Reviewed-by: Maxim Levitsky -Signed-off-by: Max Reitz -Message-Id: <20200122164532.178040-5-mreitz@redhat.com> -Signed-off-by: Max Reitz -(cherry picked from commit 80f0900905b555f00d644894c786b6d66ac2e00e) -Signed-off-by: Maxim Levitsky -Signed-off-by: Danilo C. L. de Paula ---- - block/iscsi.c | 56 -------------------------------------------------------- - 1 file changed, 56 deletions(-) - -diff --git a/block/iscsi.c b/block/iscsi.c -index cbd5729..b45da65 100644 ---- a/block/iscsi.c -+++ b/block/iscsi.c -@@ -2164,58 +2164,6 @@ static int coroutine_fn iscsi_co_truncate(BlockDriverState *bs, int64_t offset, - return 0; - } - --static int coroutine_fn iscsi_co_create_opts(const char *filename, QemuOpts *opts, -- Error **errp) --{ -- int ret = 0; -- int64_t total_size = 0; -- BlockDriverState *bs; -- IscsiLun *iscsilun = NULL; -- QDict *bs_options; -- Error *local_err = NULL; -- -- bs = bdrv_new(); -- -- /* Read out options */ -- total_size = DIV_ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0), -- BDRV_SECTOR_SIZE); -- bs->opaque = g_new0(struct IscsiLun, 1); -- iscsilun = bs->opaque; -- -- bs_options = qdict_new(); -- iscsi_parse_filename(filename, bs_options, &local_err); -- if (local_err) { -- error_propagate(errp, local_err); -- ret = -EINVAL; -- } else { -- ret = iscsi_open(bs, bs_options, 0, NULL); -- } -- qobject_unref(bs_options); -- -- if (ret != 0) { -- goto out; -- } -- iscsi_detach_aio_context(bs); -- if (iscsilun->type != TYPE_DISK) { -- ret = -ENODEV; -- goto out; -- } -- if (bs->total_sectors < total_size) { -- ret = -ENOSPC; -- goto out; -- } -- -- ret = 0; --out: -- if (iscsilun->iscsi != NULL) { -- iscsi_destroy_context(iscsilun->iscsi); -- } -- g_free(bs->opaque); -- bs->opaque = NULL; -- bdrv_unref(bs); -- return ret; --} -- - static int iscsi_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) - { - IscsiLun *iscsilun = bs->opaque; -@@ -2486,8 +2434,6 @@ static BlockDriver bdrv_iscsi = { - .bdrv_parse_filename = iscsi_parse_filename, - .bdrv_file_open = iscsi_open, - .bdrv_close = iscsi_close, -- .bdrv_co_create_opts = iscsi_co_create_opts, -- .create_opts = &iscsi_create_opts, - .bdrv_reopen_prepare = iscsi_reopen_prepare, - .bdrv_reopen_commit = iscsi_reopen_commit, - .bdrv_co_invalidate_cache = iscsi_co_invalidate_cache, -@@ -2525,8 +2471,6 @@ static BlockDriver bdrv_iser = { - .bdrv_parse_filename = iscsi_parse_filename, - .bdrv_file_open = iscsi_open, - .bdrv_close = iscsi_close, -- .bdrv_co_create_opts = iscsi_co_create_opts, -- .create_opts = &iscsi_create_opts, - .bdrv_reopen_prepare = iscsi_reopen_prepare, - .bdrv_reopen_commit = iscsi_reopen_commit, - .bdrv_co_invalidate_cache = iscsi_co_invalidate_cache, --- -1.8.3.1 - diff --git a/kvm-job-take-each-job-s-lock-individually-in-job_txn_app.patch b/kvm-job-take-each-job-s-lock-individually-in-job_txn_app.patch deleted file mode 100644 index e38428b..0000000 --- a/kvm-job-take-each-job-s-lock-individually-in-job_txn_app.patch +++ /dev/null @@ -1,213 +0,0 @@ -From 3f16b8a33bd7503cbe857fbeb45fff7301b6bb5f Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Wed, 8 Apr 2020 17:29:12 +0100 -Subject: [PATCH 1/6] job: take each job's lock individually in job_txn_apply - -RH-Author: Kevin Wolf -Message-id: <20200408172917.18712-2-kwolf@redhat.com> -Patchwork-id: 94597 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/6] job: take each job's lock individually in job_txn_apply -Bugzilla: 1817621 -RH-Acked-by: Eric Blake -RH-Acked-by: Danilo de Paula -RH-Acked-by: Max Reitz - -From: Stefan Reiter - -All callers of job_txn_apply hold a single job's lock, but different -jobs within a transaction can have different contexts, thus we need to -lock each one individually before applying the callback function. - -Similar to job_completed_txn_abort this also requires releasing the -caller's context before and reacquiring it after to avoid recursive -locks which might break AIO_WAIT_WHILE in the callback. This is safe, since -existing code would already have to take this into account, lest -job_completed_txn_abort might have broken. - -This also brings to light a different issue: When a callback function in -job_txn_apply moves it's job to a different AIO context, callers will -try to release the wrong lock (now that we re-acquire the lock -correctly, previously it would just continue with the old lock, leaving -the job unlocked for the rest of the return path). Fix this by not caching -the job's context. - -This is only necessary for qmp_block_job_finalize, qmp_job_finalize and -job_exit, since everyone else calls through job_exit. - -One test needed adapting, since it calls job_finalize directly, so it -manually needs to acquire the correct context. - -Signed-off-by: Stefan Reiter -Message-Id: <20200407115651.69472-2-s.reiter@proxmox.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit b660a84bbb0eb1a76b505648d31d5e82594fb75e) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - blockdev.c | 9 +++++++++ - job-qmp.c | 9 +++++++++ - job.c | 50 ++++++++++++++++++++++++++++++++++++++++---------- - tests/test-blockjob.c | 2 ++ - 4 files changed, 60 insertions(+), 10 deletions(-) - -diff --git a/blockdev.c b/blockdev.c -index c8d4b51..86eb115 100644 ---- a/blockdev.c -+++ b/blockdev.c -@@ -4215,7 +4215,16 @@ void qmp_block_job_finalize(const char *id, Error **errp) - } - - trace_qmp_block_job_finalize(job); -+ job_ref(&job->job); - job_finalize(&job->job, errp); -+ -+ /* -+ * Job's context might have changed via job_finalize (and job_txn_apply -+ * automatically acquires the new one), so make sure we release the correct -+ * one. -+ */ -+ aio_context = blk_get_aio_context(job->blk); -+ job_unref(&job->job); - aio_context_release(aio_context); - } - -diff --git a/job-qmp.c b/job-qmp.c -index fbfed25..a201220 100644 ---- a/job-qmp.c -+++ b/job-qmp.c -@@ -114,7 +114,16 @@ void qmp_job_finalize(const char *id, Error **errp) - } - - trace_qmp_job_finalize(job); -+ job_ref(job); - job_finalize(job, errp); -+ -+ /* -+ * Job's context might have changed via job_finalize (and job_txn_apply -+ * automatically acquires the new one), so make sure we release the correct -+ * one. -+ */ -+ aio_context = job->aio_context; -+ job_unref(job); - aio_context_release(aio_context); - } - -diff --git a/job.c b/job.c -index 04409b4..48fc4ad 100644 ---- a/job.c -+++ b/job.c -@@ -136,17 +136,38 @@ static void job_txn_del_job(Job *job) - } - } - --static int job_txn_apply(JobTxn *txn, int fn(Job *)) -+static int job_txn_apply(Job *job, int fn(Job *)) - { -- Job *job, *next; -+ AioContext *inner_ctx; -+ Job *other_job, *next; -+ JobTxn *txn = job->txn; - int rc = 0; - -- QLIST_FOREACH_SAFE(job, &txn->jobs, txn_list, next) { -- rc = fn(job); -+ /* -+ * Similar to job_completed_txn_abort, we take each job's lock before -+ * applying fn, but since we assume that outer_ctx is held by the caller, -+ * we need to release it here to avoid holding the lock twice - which would -+ * break AIO_WAIT_WHILE from within fn. -+ */ -+ job_ref(job); -+ aio_context_release(job->aio_context); -+ -+ QLIST_FOREACH_SAFE(other_job, &txn->jobs, txn_list, next) { -+ inner_ctx = other_job->aio_context; -+ aio_context_acquire(inner_ctx); -+ rc = fn(other_job); -+ aio_context_release(inner_ctx); - if (rc) { - break; - } - } -+ -+ /* -+ * Note that job->aio_context might have been changed by calling fn, so we -+ * can't use a local variable to cache it. -+ */ -+ aio_context_acquire(job->aio_context); -+ job_unref(job); - return rc; - } - -@@ -774,11 +795,11 @@ static void job_do_finalize(Job *job) - assert(job && job->txn); - - /* prepare the transaction to complete */ -- rc = job_txn_apply(job->txn, job_prepare); -+ rc = job_txn_apply(job, job_prepare); - if (rc) { - job_completed_txn_abort(job); - } else { -- job_txn_apply(job->txn, job_finalize_single); -+ job_txn_apply(job, job_finalize_single); - } - } - -@@ -824,10 +845,10 @@ static void job_completed_txn_success(Job *job) - assert(other_job->ret == 0); - } - -- job_txn_apply(txn, job_transition_to_pending); -+ job_txn_apply(job, job_transition_to_pending); - - /* If no jobs need manual finalization, automatically do so */ -- if (job_txn_apply(txn, job_needs_finalize) == 0) { -+ if (job_txn_apply(job, job_needs_finalize) == 0) { - job_do_finalize(job); - } - } -@@ -849,9 +870,10 @@ static void job_completed(Job *job) - static void job_exit(void *opaque) - { - Job *job = (Job *)opaque; -- AioContext *ctx = job->aio_context; -+ AioContext *ctx; - -- aio_context_acquire(ctx); -+ job_ref(job); -+ aio_context_acquire(job->aio_context); - - /* This is a lie, we're not quiescent, but still doing the completion - * callbacks. However, completion callbacks tend to involve operations that -@@ -862,6 +884,14 @@ static void job_exit(void *opaque) - - job_completed(job); - -+ /* -+ * Note that calling job_completed can move the job to a different -+ * aio_context, so we cannot cache from above. job_txn_apply takes care of -+ * acquiring the new lock, and we ref/unref to avoid job_completed freeing -+ * the job underneath us. -+ */ -+ ctx = job->aio_context; -+ job_unref(job); - aio_context_release(ctx); - } - -diff --git a/tests/test-blockjob.c b/tests/test-blockjob.c -index 7844c9f..6d857fd 100644 ---- a/tests/test-blockjob.c -+++ b/tests/test-blockjob.c -@@ -368,7 +368,9 @@ static void test_cancel_concluded(void) - aio_poll(qemu_get_aio_context(), true); - assert(job->status == JOB_STATUS_PENDING); - -+ aio_context_acquire(job->aio_context); - job_finalize(job, &error_abort); -+ aio_context_release(job->aio_context); - assert(job->status == JOB_STATUS_CONCLUDED); - - cancel_common(s); --- -1.8.3.1 - diff --git a/kvm-libvhost-user-Fix-some-memtable-remap-cases.patch b/kvm-libvhost-user-Fix-some-memtable-remap-cases.patch deleted file mode 100644 index e362efe..0000000 --- a/kvm-libvhost-user-Fix-some-memtable-remap-cases.patch +++ /dev/null @@ -1,117 +0,0 @@ -From ee360b70f179cf540faebe7e55b34e323e2bb179 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:02:09 +0100 -Subject: [PATCH 098/116] libvhost-user: Fix some memtable remap cases -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-95-dgilbert@redhat.com> -Patchwork-id: 93548 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 094/112] libvhost-user: Fix some memtable remap cases -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -If a new setmemtable command comes in once the vhost threads are -running, it will remap the guests address space and the threads -will now be looking in the wrong place. - -Fortunately we're running this command under lock, so we can -update the queue mappings so that threads will look in the new-right -place. - -Note: This doesn't fix things that the threads might be doing -without a lock (e.g. a readv/writev!) That's for another time. - -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 49e9ec749d4db62ae51f76354143cee183912a1d) -Signed-off-by: Miroslav Rezanina ---- - contrib/libvhost-user/libvhost-user.c | 33 +++++++++++++++++++++++++-------- - contrib/libvhost-user/libvhost-user.h | 3 +++ - 2 files changed, 28 insertions(+), 8 deletions(-) - -diff --git a/contrib/libvhost-user/libvhost-user.c b/contrib/libvhost-user/libvhost-user.c -index 63e4106..b89bf18 100644 ---- a/contrib/libvhost-user/libvhost-user.c -+++ b/contrib/libvhost-user/libvhost-user.c -@@ -565,6 +565,21 @@ vu_reset_device_exec(VuDev *dev, VhostUserMsg *vmsg) - } - - static bool -+map_ring(VuDev *dev, VuVirtq *vq) -+{ -+ vq->vring.desc = qva_to_va(dev, vq->vra.desc_user_addr); -+ vq->vring.used = qva_to_va(dev, vq->vra.used_user_addr); -+ vq->vring.avail = qva_to_va(dev, vq->vra.avail_user_addr); -+ -+ DPRINT("Setting virtq addresses:\n"); -+ DPRINT(" vring_desc at %p\n", vq->vring.desc); -+ DPRINT(" vring_used at %p\n", vq->vring.used); -+ DPRINT(" vring_avail at %p\n", vq->vring.avail); -+ -+ return !(vq->vring.desc && vq->vring.used && vq->vring.avail); -+} -+ -+static bool - vu_set_mem_table_exec_postcopy(VuDev *dev, VhostUserMsg *vmsg) - { - int i; -@@ -767,6 +782,14 @@ vu_set_mem_table_exec(VuDev *dev, VhostUserMsg *vmsg) - close(vmsg->fds[i]); - } - -+ for (i = 0; i < dev->max_queues; i++) { -+ if (dev->vq[i].vring.desc) { -+ if (map_ring(dev, &dev->vq[i])) { -+ vu_panic(dev, "remaping queue %d during setmemtable", i); -+ } -+ } -+ } -+ - return false; - } - -@@ -853,18 +876,12 @@ vu_set_vring_addr_exec(VuDev *dev, VhostUserMsg *vmsg) - DPRINT(" avail_user_addr: 0x%016" PRIx64 "\n", vra->avail_user_addr); - DPRINT(" log_guest_addr: 0x%016" PRIx64 "\n", vra->log_guest_addr); - -+ vq->vra = *vra; - vq->vring.flags = vra->flags; -- vq->vring.desc = qva_to_va(dev, vra->desc_user_addr); -- vq->vring.used = qva_to_va(dev, vra->used_user_addr); -- vq->vring.avail = qva_to_va(dev, vra->avail_user_addr); - vq->vring.log_guest_addr = vra->log_guest_addr; - -- DPRINT("Setting virtq addresses:\n"); -- DPRINT(" vring_desc at %p\n", vq->vring.desc); -- DPRINT(" vring_used at %p\n", vq->vring.used); -- DPRINT(" vring_avail at %p\n", vq->vring.avail); - -- if (!(vq->vring.desc && vq->vring.used && vq->vring.avail)) { -+ if (map_ring(dev, vq)) { - vu_panic(dev, "Invalid vring_addr message"); - return false; - } -diff --git a/contrib/libvhost-user/libvhost-user.h b/contrib/libvhost-user/libvhost-user.h -index 1844b6f..5cb7708 100644 ---- a/contrib/libvhost-user/libvhost-user.h -+++ b/contrib/libvhost-user/libvhost-user.h -@@ -327,6 +327,9 @@ typedef struct VuVirtq { - int err_fd; - unsigned int enable; - bool started; -+ -+ /* Guest addresses of our ring */ -+ struct vhost_vring_addr vra; - } VuVirtq; - - enum VuWatchCondtion { --- -1.8.3.1 - diff --git a/kvm-migration-Change-SaveStateEntry.instance_id-into-uin.patch b/kvm-migration-Change-SaveStateEntry.instance_id-into-uin.patch deleted file mode 100644 index 3477af5..0000000 --- a/kvm-migration-Change-SaveStateEntry.instance_id-into-uin.patch +++ /dev/null @@ -1,179 +0,0 @@ -From 38a032829b6b8d523b4cee05f732031e66fc2e41 Mon Sep 17 00:00:00 2001 -From: Peter Xu -Date: Fri, 31 Jan 2020 17:12:56 +0000 -Subject: [PATCH 14/15] migration: Change SaveStateEntry.instance_id into - uint32_t - -RH-Author: Peter Xu -Message-id: <20200131171257.1066593-3-peterx@redhat.com> -Patchwork-id: 93629 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 2/3] migration: Change SaveStateEntry.instance_id into uint32_t -Bugzilla: 1529231 -RH-Acked-by: Eduardo Habkost -RH-Acked-by: Juan Quintela -RH-Acked-by: Dr. David Alan Gilbert - -It was always used as 32bit, so define it as used to be clear. -Instead of using -1 as the auto-gen magic value, we switch to -UINT32_MAX. We also make sure that we don't auto-gen this value to -avoid overflowed instance IDs without being noticed. - -Suggested-by: Juan Quintela -Signed-off-by: Peter Xu -Reviewed-by: Juan Quintela -Signed-off-by: Juan Quintela -(cherry picked from commit 93062e23619e057743757ee53bf7f8e07f7a3710) -Signed-off-by: Peter Xu -Signed-off-by: Danilo C. L. de Paula - -Conflicts: - include/migration/vmstate.h - migration/savevm.c - stubs/vmstate.c - Due to missing 3cad405bab ("vmstate: replace DeviceState with - VMStateIf", 2020-01-06) - -Signed-off-by: Danilo C. L. de Paula ---- - hw/intc/apic_common.c | 2 +- - include/migration/register.h | 2 +- - include/migration/vmstate.h | 2 +- - migration/savevm.c | 18 ++++++++++-------- - stubs/vmstate.c | 2 +- - 5 files changed, 14 insertions(+), 12 deletions(-) - -diff --git a/hw/intc/apic_common.c b/hw/intc/apic_common.c -index f2c3a7f..54b8731 100644 ---- a/hw/intc/apic_common.c -+++ b/hw/intc/apic_common.c -@@ -268,7 +268,7 @@ static void apic_common_realize(DeviceState *dev, Error **errp) - APICCommonState *s = APIC_COMMON(dev); - APICCommonClass *info; - static DeviceState *vapic; -- int instance_id = s->id; -+ uint32_t instance_id = s->id; - - info = APIC_COMMON_GET_CLASS(s); - info->realize(dev, errp); -diff --git a/include/migration/register.h b/include/migration/register.h -index a13359a..f3ba10b 100644 ---- a/include/migration/register.h -+++ b/include/migration/register.h -@@ -69,7 +69,7 @@ typedef struct SaveVMHandlers { - } SaveVMHandlers; - - int register_savevm_live(const char *idstr, -- int instance_id, -+ uint32_t instance_id, - int version_id, - const SaveVMHandlers *ops, - void *opaque); -diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h -index 883f1cf..296609c 100644 ---- a/include/migration/vmstate.h -+++ b/include/migration/vmstate.h -@@ -1158,7 +1158,7 @@ bool vmstate_save_needed(const VMStateDescription *vmsd, void *opaque); - #define VMSTATE_INSTANCE_ID_ANY -1 - - /* Returns: 0 on success, -1 on failure */ --int vmstate_register_with_alias_id(DeviceState *dev, int instance_id, -+int vmstate_register_with_alias_id(DeviceState *dev, uint32_t instance_id, - const VMStateDescription *vmsd, - void *base, int alias_id, - int required_for_version, -diff --git a/migration/savevm.c b/migration/savevm.c -index e2e8e0a..a80bb52 100644 ---- a/migration/savevm.c -+++ b/migration/savevm.c -@@ -233,7 +233,7 @@ typedef struct CompatEntry { - typedef struct SaveStateEntry { - QTAILQ_ENTRY(SaveStateEntry) entry; - char idstr[256]; -- int instance_id; -+ uint32_t instance_id; - int alias_id; - int version_id; - /* version id read from the stream */ -@@ -665,10 +665,10 @@ void dump_vmstate_json_to_file(FILE *out_file) - fclose(out_file); - } - --static int calculate_new_instance_id(const char *idstr) -+static uint32_t calculate_new_instance_id(const char *idstr) - { - SaveStateEntry *se; -- int instance_id = 0; -+ uint32_t instance_id = 0; - - QTAILQ_FOREACH(se, &savevm_state.handlers, entry) { - if (strcmp(idstr, se->idstr) == 0 -@@ -676,6 +676,8 @@ static int calculate_new_instance_id(const char *idstr) - instance_id = se->instance_id + 1; - } - } -+ /* Make sure we never loop over without being noticed */ -+ assert(instance_id != VMSTATE_INSTANCE_ID_ANY); - return instance_id; - } - -@@ -730,7 +732,7 @@ static void savevm_state_handler_insert(SaveStateEntry *nse) - Meanwhile pass -1 as instance_id if you do not already have a clearly - distinguishing id for all instances of your device class. */ - int register_savevm_live(const char *idstr, -- int instance_id, -+ uint32_t instance_id, - int version_id, - const SaveVMHandlers *ops, - void *opaque) -@@ -784,7 +786,7 @@ void unregister_savevm(DeviceState *dev, const char *idstr, void *opaque) - } - } - --int vmstate_register_with_alias_id(DeviceState *dev, int instance_id, -+int vmstate_register_with_alias_id(DeviceState *dev, uint32_t instance_id, - const VMStateDescription *vmsd, - void *opaque, int alias_id, - int required_for_version, -@@ -1600,7 +1602,7 @@ int qemu_save_device_state(QEMUFile *f) - return qemu_file_get_error(f); - } - --static SaveStateEntry *find_se(const char *idstr, int instance_id) -+static SaveStateEntry *find_se(const char *idstr, uint32_t instance_id) - { - SaveStateEntry *se; - -@@ -2267,7 +2269,7 @@ qemu_loadvm_section_start_full(QEMUFile *f, MigrationIncomingState *mis) - /* Find savevm section */ - se = find_se(idstr, instance_id); - if (se == NULL) { -- error_report("Unknown savevm section or instance '%s' %d. " -+ error_report("Unknown savevm section or instance '%s' %"PRIu32". " - "Make sure that your current VM setup matches your " - "saved VM setup, including any hotplugged devices", - idstr, instance_id); -@@ -2291,7 +2293,7 @@ qemu_loadvm_section_start_full(QEMUFile *f, MigrationIncomingState *mis) - - ret = vmstate_load(f, se); - if (ret < 0) { -- error_report("error while loading state for instance 0x%x of" -+ error_report("error while loading state for instance 0x%"PRIx32" of" - " device '%s'", instance_id, idstr); - return ret; - } -diff --git a/stubs/vmstate.c b/stubs/vmstate.c -index e1e89b8..4ed5cc6 100644 ---- a/stubs/vmstate.c -+++ b/stubs/vmstate.c -@@ -4,7 +4,7 @@ - const VMStateDescription vmstate_dummy = {}; - - int vmstate_register_with_alias_id(DeviceState *dev, -- int instance_id, -+ uint32_t instance_id, - const VMStateDescription *vmsd, - void *base, int alias_id, - int required_for_version, --- -1.8.3.1 - diff --git a/kvm-migration-Create-migration_is_running.patch b/kvm-migration-Create-migration_is_running.patch deleted file mode 100644 index c9593de..0000000 --- a/kvm-migration-Create-migration_is_running.patch +++ /dev/null @@ -1,119 +0,0 @@ -From c9e3d13d70a24bf606ce351886b27bdca25ef4dc Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Tue, 3 Mar 2020 14:51:41 +0000 -Subject: [PATCH 09/18] migration: Create migration_is_running() - -RH-Author: Juan Quintela -Message-id: <20200303145143.149290-9-quintela@redhat.com> -Patchwork-id: 94115 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 08/10] migration: Create migration_is_running() -Bugzilla: 1738451 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Peter Xu -RH-Acked-by: Dr. David Alan Gilbert - -This function returns true if we are in the middle of a migration. -It is like migration_is_setup_or_active() with CANCELLING and COLO. -Adapt all callers that are needed. - -Signed-off-by: Juan Quintela -Reviewed-by: Dr. David Alan Gilbert -(cherry picked from commit 392d87e21325fdb01210176faa07472b4985ccf0) -Signed-off-by: Danilo C. L. de Paula ---- - migration/migration.c | 29 ++++++++++++++++++++++++----- - migration/migration.h | 1 + - migration/savevm.c | 4 +--- - 3 files changed, 26 insertions(+), 8 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index 30c53c6..eb50d77 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -831,6 +831,27 @@ bool migration_is_setup_or_active(int state) - } - } - -+bool migration_is_running(int state) -+{ -+ switch (state) { -+ case MIGRATION_STATUS_ACTIVE: -+ case MIGRATION_STATUS_POSTCOPY_ACTIVE: -+ case MIGRATION_STATUS_POSTCOPY_PAUSED: -+ case MIGRATION_STATUS_POSTCOPY_RECOVER: -+ case MIGRATION_STATUS_SETUP: -+ case MIGRATION_STATUS_PRE_SWITCHOVER: -+ case MIGRATION_STATUS_DEVICE: -+ case MIGRATION_STATUS_WAIT_UNPLUG: -+ case MIGRATION_STATUS_CANCELLING: -+ case MIGRATION_STATUS_COLO: -+ return true; -+ -+ default: -+ return false; -+ -+ } -+} -+ - static void populate_time_info(MigrationInfo *info, MigrationState *s) - { - info->has_status = true; -@@ -1090,7 +1111,7 @@ void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params, - MigrationCapabilityStatusList *cap; - bool cap_list[MIGRATION_CAPABILITY__MAX]; - -- if (migration_is_setup_or_active(s->state)) { -+ if (migration_is_running(s->state)) { - error_setg(errp, QERR_MIGRATION_ACTIVE); - return; - } -@@ -1603,7 +1624,7 @@ static void migrate_fd_cancel(MigrationState *s) - - do { - old_state = s->state; -- if (!migration_is_setup_or_active(old_state)) { -+ if (!migration_is_running(old_state)) { - break; - } - /* If the migration is paused, kick it out of the pause */ -@@ -1900,9 +1921,7 @@ static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc, - return true; - } - -- if (migration_is_setup_or_active(s->state) || -- s->state == MIGRATION_STATUS_CANCELLING || -- s->state == MIGRATION_STATUS_COLO) { -+ if (migration_is_running(s->state)) { - error_setg(errp, QERR_MIGRATION_ACTIVE); - return false; - } -diff --git a/migration/migration.h b/migration/migration.h -index 0b1b0d4..a2b2336 100644 ---- a/migration/migration.h -+++ b/migration/migration.h -@@ -279,6 +279,7 @@ void migrate_fd_error(MigrationState *s, const Error *error); - void migrate_fd_connect(MigrationState *s, Error *error_in); - - bool migration_is_setup_or_active(int state); -+bool migration_is_running(int state); - - void migrate_init(MigrationState *s); - bool migration_is_blocked(Error **errp); -diff --git a/migration/savevm.c b/migration/savevm.c -index a80bb52..144ecf0 100644 ---- a/migration/savevm.c -+++ b/migration/savevm.c -@@ -1506,9 +1506,7 @@ static int qemu_savevm_state(QEMUFile *f, Error **errp) - MigrationState *ms = migrate_get_current(); - MigrationStatus status; - -- if (migration_is_setup_or_active(ms->state) || -- ms->state == MIGRATION_STATUS_CANCELLING || -- ms->state == MIGRATION_STATUS_COLO) { -+ if (migration_is_running(ms->state)) { - error_setg(errp, QERR_MIGRATION_ACTIVE); - return -EINVAL; - } --- -1.8.3.1 - diff --git a/kvm-migration-Define-VMSTATE_INSTANCE_ID_ANY.patch b/kvm-migration-Define-VMSTATE_INSTANCE_ID_ANY.patch deleted file mode 100644 index c2ead53..0000000 --- a/kvm-migration-Define-VMSTATE_INSTANCE_ID_ANY.patch +++ /dev/null @@ -1,257 +0,0 @@ -From 2659af9267586fb626f543773bf3f844727e473b Mon Sep 17 00:00:00 2001 -From: Peter Xu -Date: Fri, 31 Jan 2020 17:12:55 +0000 -Subject: [PATCH 13/15] migration: Define VMSTATE_INSTANCE_ID_ANY - -RH-Author: Peter Xu -Message-id: <20200131171257.1066593-2-peterx@redhat.com> -Patchwork-id: 93630 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/3] migration: Define VMSTATE_INSTANCE_ID_ANY -Bugzilla: 1529231 -RH-Acked-by: Eduardo Habkost -RH-Acked-by: Juan Quintela -RH-Acked-by: Dr. David Alan Gilbert - -Define the new macro VMSTATE_INSTANCE_ID_ANY for callers who wants to -auto-generate the vmstate instance ID. Previously it was hard coded -as -1 instead of this macro. It helps to change this default value in -the follow up patches. No functional change. - -Signed-off-by: Peter Xu -Reviewed-by: Juan Quintela -Signed-off-by: Juan Quintela -(cherry picked from commit 1df2c9a26fcb2fa32d099f8e9adcdae4207872e3) -Signed-off-by: Peter Xu -Signed-off-by: Danilo C. L. de Paula - -Conflicts: - backends/dbus-vmstate.c - File deleted - hw/core/qdev.c - hw/misc/max111x.c - hw/net/eepro100.c - Due to missing commit 3cad405bab ("vmstate: replace - DeviceState with VMStateIf", 2020-01-06) - -Signed-off-by: Danilo C. L. de Paula ---- - hw/arm/stellaris.c | 2 +- - hw/core/qdev.c | 3 ++- - hw/display/ads7846.c | 2 +- - hw/i2c/core.c | 2 +- - hw/input/stellaris_input.c | 3 ++- - hw/intc/apic_common.c | 2 +- - hw/misc/max111x.c | 2 +- - hw/net/eepro100.c | 2 +- - hw/pci/pci.c | 2 +- - hw/ppc/spapr.c | 2 +- - hw/timer/arm_timer.c | 2 +- - hw/tpm/tpm_emulator.c | 3 ++- - include/migration/vmstate.h | 2 ++ - migration/savevm.c | 8 ++++---- - 14 files changed, 21 insertions(+), 16 deletions(-) - -diff --git a/hw/arm/stellaris.c b/hw/arm/stellaris.c -index b198066..bb025e0 100644 ---- a/hw/arm/stellaris.c -+++ b/hw/arm/stellaris.c -@@ -708,7 +708,7 @@ static int stellaris_sys_init(uint32_t base, qemu_irq irq, - memory_region_init_io(&s->iomem, NULL, &ssys_ops, s, "ssys", 0x00001000); - memory_region_add_subregion(get_system_memory(), base, &s->iomem); - ssys_reset(s); -- vmstate_register(NULL, -1, &vmstate_stellaris_sys, s); -+ vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY, &vmstate_stellaris_sys, s); - return 0; - } - -diff --git a/hw/core/qdev.c b/hw/core/qdev.c -index cf1ba28..40f6b2b 100644 ---- a/hw/core/qdev.c -+++ b/hw/core/qdev.c -@@ -890,7 +890,8 @@ static void device_set_realized(Object *obj, bool value, Error **errp) - dev->canonical_path = object_get_canonical_path(OBJECT(dev)); - - if (qdev_get_vmsd(dev)) { -- if (vmstate_register_with_alias_id(dev, -1, qdev_get_vmsd(dev), dev, -+ if (vmstate_register_with_alias_id(dev, VMSTATE_INSTANCE_ID_ANY, -+ qdev_get_vmsd(dev), dev, - dev->instance_id_alias, - dev->alias_required_for_version, - &local_err) < 0) { -diff --git a/hw/display/ads7846.c b/hw/display/ads7846.c -index c12272a..9228b40 100644 ---- a/hw/display/ads7846.c -+++ b/hw/display/ads7846.c -@@ -154,7 +154,7 @@ static void ads7846_realize(SSISlave *d, Error **errp) - - ads7846_int_update(s); - -- vmstate_register(NULL, -1, &vmstate_ads7846, s); -+ vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY, &vmstate_ads7846, s); - } - - static void ads7846_class_init(ObjectClass *klass, void *data) -diff --git a/hw/i2c/core.c b/hw/i2c/core.c -index 92cd489..d770035 100644 ---- a/hw/i2c/core.c -+++ b/hw/i2c/core.c -@@ -61,7 +61,7 @@ I2CBus *i2c_init_bus(DeviceState *parent, const char *name) - - bus = I2C_BUS(qbus_create(TYPE_I2C_BUS, parent, name)); - QLIST_INIT(&bus->current_devs); -- vmstate_register(NULL, -1, &vmstate_i2c_bus, bus); -+ vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY, &vmstate_i2c_bus, bus); - return bus; - } - -diff --git a/hw/input/stellaris_input.c b/hw/input/stellaris_input.c -index 59892b0..e6ee5e1 100644 ---- a/hw/input/stellaris_input.c -+++ b/hw/input/stellaris_input.c -@@ -88,5 +88,6 @@ void stellaris_gamepad_init(int n, qemu_irq *irq, const int *keycode) - } - s->num_buttons = n; - qemu_add_kbd_event_handler(stellaris_gamepad_put_key, s); -- vmstate_register(NULL, -1, &vmstate_stellaris_gamepad, s); -+ vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY, -+ &vmstate_stellaris_gamepad, s); - } -diff --git a/hw/intc/apic_common.c b/hw/intc/apic_common.c -index 375cb6a..f2c3a7f 100644 ---- a/hw/intc/apic_common.c -+++ b/hw/intc/apic_common.c -@@ -284,7 +284,7 @@ static void apic_common_realize(DeviceState *dev, Error **errp) - } - - if (s->legacy_instance_id) { -- instance_id = -1; -+ instance_id = VMSTATE_INSTANCE_ID_ANY; - } - vmstate_register_with_alias_id(NULL, instance_id, &vmstate_apic_common, - s, -1, 0, NULL); -diff --git a/hw/misc/max111x.c b/hw/misc/max111x.c -index a713149..81ee73e 100644 ---- a/hw/misc/max111x.c -+++ b/hw/misc/max111x.c -@@ -146,7 +146,7 @@ static int max111x_init(SSISlave *d, int inputs) - s->input[7] = 0x80; - s->com = 0; - -- vmstate_register(dev, -1, &vmstate_max111x, s); -+ vmstate_register(dev, VMSTATE_INSTANCE_ID_ANY, &vmstate_max111x, s); - return 0; - } - -diff --git a/hw/net/eepro100.c b/hw/net/eepro100.c -index cc2dd8b..39920c6 100644 ---- a/hw/net/eepro100.c -+++ b/hw/net/eepro100.c -@@ -1874,7 +1874,7 @@ static void e100_nic_realize(PCIDevice *pci_dev, Error **errp) - - s->vmstate = g_memdup(&vmstate_eepro100, sizeof(vmstate_eepro100)); - s->vmstate->name = qemu_get_queue(s->nic)->model; -- vmstate_register(&pci_dev->qdev, -1, s->vmstate, s); -+ vmstate_register(&pci_dev->qdev, VMSTATE_INSTANCE_ID_ANY, s->vmstate, s); - } - - static void eepro100_instance_init(Object *obj) -diff --git a/hw/pci/pci.c b/hw/pci/pci.c -index cbc7a32..fed019d 100644 ---- a/hw/pci/pci.c -+++ b/hw/pci/pci.c -@@ -124,7 +124,7 @@ static void pci_bus_realize(BusState *qbus, Error **errp) - bus->machine_done.notify = pcibus_machine_done; - qemu_add_machine_init_done_notifier(&bus->machine_done); - -- vmstate_register(NULL, -1, &vmstate_pcibus, bus); -+ vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY, &vmstate_pcibus, bus); - } - - static void pcie_bus_realize(BusState *qbus, Error **errp) -diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c -index 8749c72..c12862d 100644 ---- a/hw/ppc/spapr.c -+++ b/hw/ppc/spapr.c -@@ -3028,7 +3028,7 @@ static void spapr_machine_init(MachineState *machine) - * interface, this is a legacy from the sPAPREnvironment structure - * which predated MachineState but had a similar function */ - vmstate_register(NULL, 0, &vmstate_spapr, spapr); -- register_savevm_live("spapr/htab", -1, 1, -+ register_savevm_live("spapr/htab", VMSTATE_INSTANCE_ID_ANY, 1, - &savevm_htab_handlers, spapr); - - qbus_set_hotplug_handler(sysbus_get_default(), OBJECT(machine), -diff --git a/hw/timer/arm_timer.c b/hw/timer/arm_timer.c -index af524fa..beaa285 100644 ---- a/hw/timer/arm_timer.c -+++ b/hw/timer/arm_timer.c -@@ -180,7 +180,7 @@ static arm_timer_state *arm_timer_init(uint32_t freq) - s->control = TIMER_CTRL_IE; - - s->timer = ptimer_init(arm_timer_tick, s, PTIMER_POLICY_DEFAULT); -- vmstate_register(NULL, -1, &vmstate_arm_timer, s); -+ vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY, &vmstate_arm_timer, s); - return s; - } - -diff --git a/hw/tpm/tpm_emulator.c b/hw/tpm/tpm_emulator.c -index 22f9113..da7b490 100644 ---- a/hw/tpm/tpm_emulator.c -+++ b/hw/tpm/tpm_emulator.c -@@ -914,7 +914,8 @@ static void tpm_emulator_inst_init(Object *obj) - tpm_emu->cur_locty_number = ~0; - qemu_mutex_init(&tpm_emu->mutex); - -- vmstate_register(NULL, -1, &vmstate_tpm_emulator, obj); -+ vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY, -+ &vmstate_tpm_emulator, obj); - } - - /* -diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h -index ac4f46a..883f1cf 100644 ---- a/include/migration/vmstate.h -+++ b/include/migration/vmstate.h -@@ -1155,6 +1155,8 @@ int vmstate_save_state_v(QEMUFile *f, const VMStateDescription *vmsd, - - bool vmstate_save_needed(const VMStateDescription *vmsd, void *opaque); - -+#define VMSTATE_INSTANCE_ID_ANY -1 -+ - /* Returns: 0 on success, -1 on failure */ - int vmstate_register_with_alias_id(DeviceState *dev, int instance_id, - const VMStateDescription *vmsd, -diff --git a/migration/savevm.c b/migration/savevm.c -index a71b930..e2e8e0a 100644 ---- a/migration/savevm.c -+++ b/migration/savevm.c -@@ -750,7 +750,7 @@ int register_savevm_live(const char *idstr, - - pstrcat(se->idstr, sizeof(se->idstr), idstr); - -- if (instance_id == -1) { -+ if (instance_id == VMSTATE_INSTANCE_ID_ANY) { - se->instance_id = calculate_new_instance_id(se->idstr); - } else { - se->instance_id = instance_id; -@@ -817,14 +817,14 @@ int vmstate_register_with_alias_id(DeviceState *dev, int instance_id, - - se->compat = g_new0(CompatEntry, 1); - pstrcpy(se->compat->idstr, sizeof(se->compat->idstr), vmsd->name); -- se->compat->instance_id = instance_id == -1 ? -+ se->compat->instance_id = instance_id == VMSTATE_INSTANCE_ID_ANY ? - calculate_compat_instance_id(vmsd->name) : instance_id; -- instance_id = -1; -+ instance_id = VMSTATE_INSTANCE_ID_ANY; - } - } - pstrcat(se->idstr, sizeof(se->idstr), vmsd->name); - -- if (instance_id == -1) { -+ if (instance_id == VMSTATE_INSTANCE_ID_ANY) { - se->instance_id = calculate_new_instance_id(se->idstr); - } else { - se->instance_id = instance_id; --- -1.8.3.1 - diff --git a/kvm-migration-Don-t-send-data-if-we-have-stopped.patch b/kvm-migration-Don-t-send-data-if-we-have-stopped.patch deleted file mode 100644 index 9a36714..0000000 --- a/kvm-migration-Don-t-send-data-if-we-have-stopped.patch +++ /dev/null @@ -1,42 +0,0 @@ -From ab07e0b41c50a85940d798a9a65a58698fd2edfb Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Tue, 3 Mar 2020 14:51:40 +0000 -Subject: [PATCH 08/18] migration: Don't send data if we have stopped - -RH-Author: Juan Quintela -Message-id: <20200303145143.149290-8-quintela@redhat.com> -Patchwork-id: 94114 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 07/10] migration: Don't send data if we have stopped -Bugzilla: 1738451 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Peter Xu -RH-Acked-by: Dr. David Alan Gilbert - -If we do a cancel, we got out without one error, but we can't do the -rest of the output as in a normal situation. - -Signed-off-by: Juan Quintela -Reviewed-by: Dr. David Alan Gilbert -(cherry picked from commit b69a0227a803256ad270283872d40ff768f4d56d) -Signed-off-by: Danilo C. L. de Paula ---- - migration/ram.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/migration/ram.c b/migration/ram.c -index a0257ee..902c56c 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -3511,7 +3511,8 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) - ram_control_after_iterate(f, RAM_CONTROL_ROUND); - - out: -- if (ret >= 0) { -+ if (ret >= 0 -+ && migration_is_setup_or_active(migrate_get_current()->state)) { - multifd_send_sync_main(rs); - qemu_put_be64(f, RAM_SAVE_FLAG_EOS); - qemu_fflush(f); --- -1.8.3.1 - diff --git a/kvm-migration-Make-sure-that-we-don-t-call-write-in-case.patch b/kvm-migration-Make-sure-that-we-don-t-call-write-in-case.patch deleted file mode 100644 index 01cb0f1..0000000 --- a/kvm-migration-Make-sure-that-we-don-t-call-write-in-case.patch +++ /dev/null @@ -1,94 +0,0 @@ -From 71b05ab5782aa1e38c016be6264a14f5650d2a87 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Tue, 3 Mar 2020 14:51:35 +0000 -Subject: [PATCH 03/18] migration: Make sure that we don't call write() in case - of error - -RH-Author: Juan Quintela -Message-id: <20200303145143.149290-3-quintela@redhat.com> -Patchwork-id: 94113 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 02/10] migration: Make sure that we don't call write() in case of error -Bugzilla: 1738451 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Peter Xu -RH-Acked-by: Dr. David Alan Gilbert - -If we are exiting due to an error/finish/.... Just don't try to even -touch the channel with one IO operation. - -Signed-off-by: Juan Quintela -Reviewed-by: Dr. David Alan Gilbert -Signed-off-by: Juan Quintela -(cherry picked from commit 4d65a6216bfc44891ac298b74a6921d479805131) -Signed-off-by: Danilo C. L. de Paula ---- - migration/ram.c | 25 +++++++++++++++++++++++++ - 1 file changed, 25 insertions(+) - -diff --git a/migration/ram.c b/migration/ram.c -index 65580e3..8c783b3 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -899,6 +899,12 @@ struct { - uint64_t packet_num; - /* send channels ready */ - QemuSemaphore channels_ready; -+ /* -+ * Have we already run terminate threads. There is a race when it -+ * happens that we got one error while we are exiting. -+ * We will use atomic operations. Only valid values are 0 and 1. -+ */ -+ int exiting; - } *multifd_send_state; - - /* -@@ -927,6 +933,10 @@ static int multifd_send_pages(RAMState *rs) - MultiFDPages_t *pages = multifd_send_state->pages; - uint64_t transferred; - -+ if (atomic_read(&multifd_send_state->exiting)) { -+ return -1; -+ } -+ - qemu_sem_wait(&multifd_send_state->channels_ready); - for (i = next_channel;; i = (i + 1) % migrate_multifd_channels()) { - p = &multifd_send_state->params[i]; -@@ -1008,6 +1018,16 @@ static void multifd_send_terminate_threads(Error *err) - } - } - -+ /* -+ * We don't want to exit each threads twice. Depending on where -+ * we get the error, or if there are two independent errors in two -+ * threads at the same time, we can end calling this function -+ * twice. -+ */ -+ if (atomic_xchg(&multifd_send_state->exiting, 1)) { -+ return; -+ } -+ - for (i = 0; i < migrate_multifd_channels(); i++) { - MultiFDSendParams *p = &multifd_send_state->params[i]; - -@@ -1117,6 +1137,10 @@ static void *multifd_send_thread(void *opaque) - - while (true) { - qemu_sem_wait(&p->sem); -+ -+ if (atomic_read(&multifd_send_state->exiting)) { -+ break; -+ } - qemu_mutex_lock(&p->mutex); - - if (p->pending_job) { -@@ -1225,6 +1249,7 @@ int multifd_save_setup(void) - multifd_send_state->params = g_new0(MultiFDSendParams, thread_count); - multifd_send_state->pages = multifd_pages_init(page_count); - qemu_sem_init(&multifd_send_state->channels_ready, 0); -+ atomic_set(&multifd_send_state->exiting, 0); - - for (i = 0; i < thread_count; i++) { - MultiFDSendParams *p = &multifd_send_state->params[i]; --- -1.8.3.1 - diff --git a/kvm-migration-Maybe-VM-is-paused-when-migration-is-cance.patch b/kvm-migration-Maybe-VM-is-paused-when-migration-is-cance.patch deleted file mode 100644 index 4a7fb28..0000000 --- a/kvm-migration-Maybe-VM-is-paused-when-migration-is-cance.patch +++ /dev/null @@ -1,70 +0,0 @@ -From 3c4f6f0c2bf5562f2aa26f964848ae53e6ac4790 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Tue, 3 Mar 2020 14:51:43 +0000 -Subject: [PATCH 11/18] migration: Maybe VM is paused when migration is - cancelled - -RH-Author: Juan Quintela -Message-id: <20200303145143.149290-11-quintela@redhat.com> -Patchwork-id: 94120 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 10/10] migration: Maybe VM is paused when migration is cancelled -Bugzilla: 1738451 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Peter Xu -RH-Acked-by: Dr. David Alan Gilbert - -From: Zhimin Feng - -If the migration is cancelled when it is in the completion phase, -the migration state is set to MIGRATION_STATUS_CANCELLING. -The VM maybe wait for the 'pause_sem' semaphore in migration_maybe_pause -function, so that VM always is paused. - -Reported-by: Euler Robot -Signed-off-by: Zhimin Feng -Reviewed-by: Juan Quintela -Signed-off-by: Juan Quintela -(cherry picked from commit 8958338b10abcb346b54a8038a491fda2db1c853) -Signed-off-by: Danilo C. L. de Paula ---- - migration/migration.c | 24 ++++++++++++++++-------- - 1 file changed, 16 insertions(+), 8 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index eb50d77..ed18c59 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -2786,14 +2786,22 @@ static int migration_maybe_pause(MigrationState *s, - /* This block intentionally left blank */ - } - -- qemu_mutex_unlock_iothread(); -- migrate_set_state(&s->state, *current_active_state, -- MIGRATION_STATUS_PRE_SWITCHOVER); -- qemu_sem_wait(&s->pause_sem); -- migrate_set_state(&s->state, MIGRATION_STATUS_PRE_SWITCHOVER, -- new_state); -- *current_active_state = new_state; -- qemu_mutex_lock_iothread(); -+ /* -+ * If the migration is cancelled when it is in the completion phase, -+ * the migration state is set to MIGRATION_STATUS_CANCELLING. -+ * So we don't need to wait a semaphore, otherwise we would always -+ * wait for the 'pause_sem' semaphore. -+ */ -+ if (s->state != MIGRATION_STATUS_CANCELLING) { -+ qemu_mutex_unlock_iothread(); -+ migrate_set_state(&s->state, *current_active_state, -+ MIGRATION_STATUS_PRE_SWITCHOVER); -+ qemu_sem_wait(&s->pause_sem); -+ migrate_set_state(&s->state, MIGRATION_STATUS_PRE_SWITCHOVER, -+ new_state); -+ *current_active_state = new_state; -+ qemu_mutex_lock_iothread(); -+ } - - return s->state == new_state ? 0 : -EINVAL; - } --- -1.8.3.1 - diff --git a/kvm-migration-Rate-limit-inside-host-pages.patch b/kvm-migration-Rate-limit-inside-host-pages.patch deleted file mode 100644 index 2d3d519..0000000 --- a/kvm-migration-Rate-limit-inside-host-pages.patch +++ /dev/null @@ -1,172 +0,0 @@ -From 8e8f421cce99543081f225acf46541312cfbc371 Mon Sep 17 00:00:00 2001 -From: Laurent Vivier -Date: Tue, 17 Mar 2020 17:05:18 +0000 -Subject: [PATCH 1/2] migration: Rate limit inside host pages - -RH-Author: Laurent Vivier -Message-id: <20200317170518.9303-1-lvivier@redhat.com> -Patchwork-id: 94374 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH] migration: Rate limit inside host pages -Bugzilla: 1814336 -RH-Acked-by: Peter Xu -RH-Acked-by: Juan Quintela -RH-Acked-by: Dr. David Alan Gilbert - -From: "Dr. David Alan Gilbert" - -When using hugepages, rate limiting is necessary within each huge -page, since a 1G huge page can take a significant time to send, so -you end up with bursty behaviour. - -Fixes: 4c011c37ecb3 ("postcopy: Send whole huge pages") -Reported-by: Lin Ma -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Juan Quintela -Reviewed-by: Peter Xu -Signed-off-by: Juan Quintela -(cherry picked from commit 97e1e06780e70f6e98a0d2df881e0c0927d3aeb6) -Signed-off-by: Laurent Vivier - -BZ: https://bugzilla.redhat.com/show_bug.cgi?id=1814336 -BRANCH: rhel-av-8.2.0 -UPSTREAM: Merged -BREW: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=27283241 -TESTED: Tested that the migration abort doesn't trigger an error message in - the kernel logs on P9 - -Signed-off-by: Danilo C. L. de Paula ---- - migration/migration.c | 57 ++++++++++++++++++++++++++++---------------------- - migration/migration.h | 1 + - migration/ram.c | 2 ++ - migration/trace-events | 4 ++-- - 4 files changed, 37 insertions(+), 27 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index ed18c59..e31d0f5 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -3253,6 +3253,37 @@ void migration_consume_urgent_request(void) - qemu_sem_wait(&migrate_get_current()->rate_limit_sem); - } - -+/* Returns true if the rate limiting was broken by an urgent request */ -+bool migration_rate_limit(void) -+{ -+ int64_t now = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); -+ MigrationState *s = migrate_get_current(); -+ -+ bool urgent = false; -+ migration_update_counters(s, now); -+ if (qemu_file_rate_limit(s->to_dst_file)) { -+ /* -+ * Wait for a delay to do rate limiting OR -+ * something urgent to post the semaphore. -+ */ -+ int ms = s->iteration_start_time + BUFFER_DELAY - now; -+ trace_migration_rate_limit_pre(ms); -+ if (qemu_sem_timedwait(&s->rate_limit_sem, ms) == 0) { -+ /* -+ * We were woken by one or more urgent things but -+ * the timedwait will have consumed one of them. -+ * The service routine for the urgent wake will dec -+ * the semaphore itself for each item it consumes, -+ * so add this one we just eat back. -+ */ -+ qemu_sem_post(&s->rate_limit_sem); -+ urgent = true; -+ } -+ trace_migration_rate_limit_post(urgent); -+ } -+ return urgent; -+} -+ - /* - * Master migration thread on the source VM. - * It drives the migration and pumps the data down the outgoing channel. -@@ -3319,8 +3350,6 @@ static void *migration_thread(void *opaque) - trace_migration_thread_setup_complete(); - - while (migration_is_active(s)) { -- int64_t current_time; -- - if (urgent || !qemu_file_rate_limit(s->to_dst_file)) { - MigIterateState iter_state = migration_iteration_run(s); - if (iter_state == MIG_ITERATE_SKIP) { -@@ -3347,29 +3376,7 @@ static void *migration_thread(void *opaque) - update_iteration_initial_status(s); - } - -- current_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); -- -- migration_update_counters(s, current_time); -- -- urgent = false; -- if (qemu_file_rate_limit(s->to_dst_file)) { -- /* Wait for a delay to do rate limiting OR -- * something urgent to post the semaphore. -- */ -- int ms = s->iteration_start_time + BUFFER_DELAY - current_time; -- trace_migration_thread_ratelimit_pre(ms); -- if (qemu_sem_timedwait(&s->rate_limit_sem, ms) == 0) { -- /* We were worken by one or more urgent things but -- * the timedwait will have consumed one of them. -- * The service routine for the urgent wake will dec -- * the semaphore itself for each item it consumes, -- * so add this one we just eat back. -- */ -- qemu_sem_post(&s->rate_limit_sem); -- urgent = true; -- } -- trace_migration_thread_ratelimit_post(urgent); -- } -+ urgent = migration_rate_limit(); - } - - trace_migration_thread_after_loop(); -diff --git a/migration/migration.h b/migration/migration.h -index a2b2336..a15e8d8 100644 ---- a/migration/migration.h -+++ b/migration/migration.h -@@ -347,5 +347,6 @@ extern bool migrate_pre_2_2; - - void migration_make_urgent_request(void); - void migration_consume_urgent_request(void); -+bool migration_rate_limit(void); - - #endif -diff --git a/migration/ram.c b/migration/ram.c -index 3891eff..5344c7d 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -2661,6 +2661,8 @@ static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss, - - pages += tmppages; - pss->page++; -+ /* Allow rate limiting to happen in the middle of huge pages */ -+ migration_rate_limit(); - } while ((pss->page & (pagesize_bits - 1)) && - offset_in_ramblock(pss->block, pss->page << TARGET_PAGE_BITS)); - -diff --git a/migration/trace-events b/migration/trace-events -index 6dee7b5..2f9129e 100644 ---- a/migration/trace-events -+++ b/migration/trace-events -@@ -138,12 +138,12 @@ migrate_send_rp_recv_bitmap(char *name, int64_t size) "block '%s' size 0x%"PRIi6 - migration_completion_file_err(void) "" - migration_completion_postcopy_end(void) "" - migration_completion_postcopy_end_after_complete(void) "" -+migration_rate_limit_pre(int ms) "%d ms" -+migration_rate_limit_post(int urgent) "urgent: %d" - migration_return_path_end_before(void) "" - migration_return_path_end_after(int rp_error) "%d" - migration_thread_after_loop(void) "" - migration_thread_file_err(void) "" --migration_thread_ratelimit_pre(int ms) "%d ms" --migration_thread_ratelimit_post(int urgent) "urgent: %d" - migration_thread_setup_complete(void) "" - open_return_path_on_source(void) "" - open_return_path_on_source_continue(void) "" --- -1.8.3.1 - diff --git a/kvm-migration-multifd-clean-pages-after-filling-packet.patch b/kvm-migration-multifd-clean-pages-after-filling-packet.patch deleted file mode 100644 index 5fa7fde..0000000 --- a/kvm-migration-multifd-clean-pages-after-filling-packet.patch +++ /dev/null @@ -1,65 +0,0 @@ -From 32ee75b7f4a31d6080e5659e2a0285a046ef1036 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Tue, 3 Mar 2020 14:51:34 +0000 -Subject: [PATCH 02/18] migration/multifd: clean pages after filling packet - -RH-Author: Juan Quintela -Message-id: <20200303145143.149290-2-quintela@redhat.com> -Patchwork-id: 94112 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 01/10] migration/multifd: clean pages after filling packet -Bugzilla: 1738451 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Peter Xu -RH-Acked-by: Dr. David Alan Gilbert - -From: Wei Yang - -This is a preparation for the next patch: - - not use multifd during postcopy. - -Without enabling postcopy, everything looks good. While after enabling -postcopy, migration may fail even not use multifd during postcopy. The -reason is the pages is not properly cleared and *old* target page will -continue to be transferred. - -After clean pages, migration succeeds. - -Signed-off-by: Wei Yang -Reviewed-by: Juan Quintela -Signed-off-by: Juan Quintela -(cherry picked from commit eab54aa78ffd9fb7895b20fc2761ee998479489b) -Signed-off-by: Danilo C. L. de Paula ---- - migration/ram.c | 6 ++++-- - 1 file changed, 4 insertions(+), 2 deletions(-) - -diff --git a/migration/ram.c b/migration/ram.c -index 5078f94..65580e3 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -944,10 +944,10 @@ static int multifd_send_pages(RAMState *rs) - } - qemu_mutex_unlock(&p->mutex); - } -- p->pages->used = 0; -+ assert(!p->pages->used); -+ assert(!p->pages->block); - - p->packet_num = multifd_send_state->packet_num++; -- p->pages->block = NULL; - multifd_send_state->pages = p->pages; - p->pages = pages; - transferred = ((uint64_t) pages->used) * TARGET_PAGE_SIZE + p->packet_len; -@@ -1129,6 +1129,8 @@ static void *multifd_send_thread(void *opaque) - p->flags = 0; - p->num_packets++; - p->num_pages += used; -+ p->pages->used = 0; -+ p->pages->block = NULL; - qemu_mutex_unlock(&p->mutex); - - trace_multifd_send(p->id, packet_num, used, flags, --- -1.8.3.1 - diff --git a/kvm-migration-multifd-fix-destroyed-mutex-access-in-term.patch b/kvm-migration-multifd-fix-destroyed-mutex-access-in-term.patch deleted file mode 100644 index 0c5fe80..0000000 --- a/kvm-migration-multifd-fix-destroyed-mutex-access-in-term.patch +++ /dev/null @@ -1,77 +0,0 @@ -From 2c14a6831954a59256cc8d1980da0ad705a3a3fa Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Tue, 3 Mar 2020 14:51:37 +0000 -Subject: [PATCH 05/18] migration/multifd: fix destroyed mutex access in - terminating multifd threads - -RH-Author: Juan Quintela -Message-id: <20200303145143.149290-5-quintela@redhat.com> -Patchwork-id: 94119 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 04/10] migration/multifd: fix destroyed mutex access in terminating multifd threads -Bugzilla: 1738451 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Peter Xu -RH-Acked-by: Dr. David Alan Gilbert - -From: Jiahui Cen - -One multifd will lock all the other multifds' IOChannel mutex to inform them -to quit by setting p->quit or shutting down p->c. In this senario, if some -multifds had already been terminated and multifd_load_cleanup/multifd_save_cleanup -had destroyed their mutex, it could cause destroyed mutex access when trying -lock their mutex. - -Here is the coredump stack: - #0 0x00007f81a2794437 in raise () from /usr/lib64/libc.so.6 - #1 0x00007f81a2795b28 in abort () from /usr/lib64/libc.so.6 - #2 0x00007f81a278d1b6 in __assert_fail_base () from /usr/lib64/libc.so.6 - #3 0x00007f81a278d262 in __assert_fail () from /usr/lib64/libc.so.6 - #4 0x000055eb1bfadbd3 in qemu_mutex_lock_impl (mutex=0x55eb1e2d1988, file=, line=) at util/qemu-thread-posix.c:64 - #5 0x000055eb1bb4564a in multifd_send_terminate_threads (err=) at migration/ram.c:1015 - #6 0x000055eb1bb4bb7f in multifd_send_thread (opaque=0x55eb1e2d19f8) at migration/ram.c:1171 - #7 0x000055eb1bfad628 in qemu_thread_start (args=0x55eb1e170450) at util/qemu-thread-posix.c:502 - #8 0x00007f81a2b36df5 in start_thread () from /usr/lib64/libpthread.so.0 - #9 0x00007f81a286048d in clone () from /usr/lib64/libc.so.6 - -To fix it up, let's destroy the mutex after all the other multifd threads had -been terminated. - -Signed-off-by: Jiahui Cen -Signed-off-by: Ying Fang -Reviewed-by: Juan Quintela -Signed-off-by: Juan Quintela -(cherry picked from commit 9560a48ecc0c20d87bc458a6db77fba651605819) -Signed-off-by: Danilo C. L. de Paula ---- - migration/ram.c | 8 ++++++++ - 1 file changed, 8 insertions(+) - -diff --git a/migration/ram.c b/migration/ram.c -index 860f781..6c55c5d 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -1052,6 +1052,10 @@ void multifd_save_cleanup(void) - if (p->running) { - qemu_thread_join(&p->thread); - } -+ } -+ for (i = 0; i < migrate_multifd_channels(); i++) { -+ MultiFDSendParams *p = &multifd_send_state->params[i]; -+ - socket_send_channel_destroy(p->c); - p->c = NULL; - qemu_mutex_destroy(&p->mutex); -@@ -1335,6 +1339,10 @@ int multifd_load_cleanup(Error **errp) - qemu_sem_post(&p->sem_sync); - qemu_thread_join(&p->thread); - } -+ } -+ for (i = 0; i < migrate_multifd_channels(); i++) { -+ MultiFDRecvParams *p = &multifd_recv_state->params[i]; -+ - object_unref(OBJECT(p->c)); - p->c = NULL; - qemu_mutex_destroy(&p->mutex); --- -1.8.3.1 - diff --git a/kvm-migration-multifd-fix-nullptr-access-in-multifd_send.patch b/kvm-migration-multifd-fix-nullptr-access-in-multifd_send.patch deleted file mode 100644 index 9e9683c..0000000 --- a/kvm-migration-multifd-fix-nullptr-access-in-multifd_send.patch +++ /dev/null @@ -1,75 +0,0 @@ -From 517a99c5fba163bf684978fe3d9476b619481391 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Tue, 3 Mar 2020 14:51:42 +0000 -Subject: [PATCH 10/18] migration/multifd: fix nullptr access in - multifd_send_terminate_threads - -RH-Author: Juan Quintela -Message-id: <20200303145143.149290-10-quintela@redhat.com> -Patchwork-id: 94117 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 09/10] migration/multifd: fix nullptr access in multifd_send_terminate_threads -Bugzilla: 1738451 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Peter Xu -RH-Acked-by: Dr. David Alan Gilbert - -From: Zhimin Feng - -If the multifd_send_threads is not created when migration is failed, -multifd_save_cleanup would be called twice. In this senario, the -multifd_send_state is accessed after it has been released, the result -is that the source VM is crashing down. - -Here is the coredump stack: - Program received signal SIGSEGV, Segmentation fault. - 0x00005629333a78ef in multifd_send_terminate_threads (err=err@entry=0x0) at migration/ram.c:1012 - 1012 MultiFDSendParams *p = &multifd_send_state->params[i]; - #0 0x00005629333a78ef in multifd_send_terminate_threads (err=err@entry=0x0) at migration/ram.c:1012 - #1 0x00005629333ab8a9 in multifd_save_cleanup () at migration/ram.c:1028 - #2 0x00005629333abaea in multifd_new_send_channel_async (task=0x562935450e70, opaque=) at migration/ram.c:1202 - #3 0x000056293373a562 in qio_task_complete (task=task@entry=0x562935450e70) at io/task.c:196 - #4 0x000056293373a6e0 in qio_task_thread_result (opaque=0x562935450e70) at io/task.c:111 - #5 0x00007f475d4d75a7 in g_idle_dispatch () from /usr/lib64/libglib-2.0.so.0 - #6 0x00007f475d4da9a9 in g_main_context_dispatch () from /usr/lib64/libglib-2.0.so.0 - #7 0x0000562933785b33 in glib_pollfds_poll () at util/main-loop.c:219 - #8 os_host_main_loop_wait (timeout=) at util/main-loop.c:242 - #9 main_loop_wait (nonblocking=nonblocking@entry=0) at util/main-loop.c:518 - #10 0x00005629334c5acf in main_loop () at vl.c:1810 - #11 0x000056293334d7bb in main (argc=, argv=, envp=) at vl.c:4471 - -If the multifd_send_threads is not created when migration is failed. -In this senario, we don't call multifd_save_cleanup in multifd_new_send_channel_async. - -Signed-off-by: Zhimin Feng -Reviewed-by: Juan Quintela -Signed-off-by: Juan Quintela -(cherry picked from commit 9c4d333c092e9c26d38f740ff3616deb42f21681) -Signed-off-by: Danilo C. L. de Paula ---- - migration/ram.c | 10 +++++++++- - 1 file changed, 9 insertions(+), 1 deletion(-) - -diff --git a/migration/ram.c b/migration/ram.c -index 902c56c..3891eff 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -1229,7 +1229,15 @@ static void multifd_new_send_channel_async(QIOTask *task, gpointer opaque) - trace_multifd_new_send_channel_async(p->id); - if (qio_task_propagate_error(task, &local_err)) { - migrate_set_error(migrate_get_current(), local_err); -- multifd_save_cleanup(); -+ /* Error happen, we need to tell who pay attention to me */ -+ qemu_sem_post(&multifd_send_state->channels_ready); -+ qemu_sem_post(&p->sem_sync); -+ /* -+ * Although multifd_send_thread is not created, but main migration -+ * thread neet to judge whether it is running, so we need to mark -+ * its status. -+ */ -+ p->quit = true; - } else { - p->c = QIO_CHANNEL(sioc); - qio_channel_set_delay(p->c, false); --- -1.8.3.1 - diff --git a/kvm-migration-multifd-fix-nullptr-access-in-terminating-.patch b/kvm-migration-multifd-fix-nullptr-access-in-terminating-.patch deleted file mode 100644 index e780698..0000000 --- a/kvm-migration-multifd-fix-nullptr-access-in-terminating-.patch +++ /dev/null @@ -1,68 +0,0 @@ -From 7f664fe26ff67f8131faa7a81a388b8a5b51403f Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Tue, 3 Mar 2020 14:51:36 +0000 -Subject: [PATCH 04/18] migration/multifd: fix nullptr access in terminating - multifd threads - -RH-Author: Juan Quintela -Message-id: <20200303145143.149290-4-quintela@redhat.com> -Patchwork-id: 94110 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 03/10] migration/multifd: fix nullptr access in terminating multifd threads -Bugzilla: 1738451 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Peter Xu -RH-Acked-by: Dr. David Alan Gilbert - -From: Jiahui Cen - -One multifd channel will shutdown all the other multifd's IOChannel when it -fails to receive an IOChannel. In this senario, if some multifds had not -received its IOChannel yet, it would try to shutdown its IOChannel which could -cause nullptr access at qio_channel_shutdown. - -Here is the coredump stack: - #0 object_get_class (obj=obj@entry=0x0) at qom/object.c:908 - #1 0x00005563fdbb8f4a in qio_channel_shutdown (ioc=0x0, how=QIO_CHANNEL_SHUTDOWN_BOTH, errp=0x0) at io/channel.c:355 - #2 0x00005563fd7b4c5f in multifd_recv_terminate_threads (err=) at migration/ram.c:1280 - #3 0x00005563fd7bc019 in multifd_recv_new_channel (ioc=ioc@entry=0x556400255610, errp=errp@entry=0x7ffec07dce00) at migration/ram.c:1478 - #4 0x00005563fda82177 in migration_ioc_process_incoming (ioc=ioc@entry=0x556400255610, errp=errp@entry=0x7ffec07dce30) at migration/migration.c:605 - #5 0x00005563fda8567d in migration_channel_process_incoming (ioc=0x556400255610) at migration/channel.c:44 - #6 0x00005563fda83ee0 in socket_accept_incoming_migration (listener=0x5563fff6b920, cioc=0x556400255610, opaque=) at migration/socket.c:166 - #7 0x00005563fdbc25cd in qio_net_listener_channel_func (ioc=, condition=, opaque=) at io/net-listener.c:54 - #8 0x00007f895b6fe9a9 in g_main_context_dispatch () from /usr/lib64/libglib-2.0.so.0 - #9 0x00005563fdc18136 in glib_pollfds_poll () at util/main-loop.c:218 - #10 0x00005563fdc181b5 in os_host_main_loop_wait (timeout=1000000000) at util/main-loop.c:241 - #11 0x00005563fdc183a2 in main_loop_wait (nonblocking=nonblocking@entry=0) at util/main-loop.c:517 - #12 0x00005563fd8edb37 in main_loop () at vl.c:1791 - #13 0x00005563fd74fd45 in main (argc=, argv=, envp=) at vl.c:4473 - -To fix it up, let's check p->c before calling qio_channel_shutdown. - -Signed-off-by: Jiahui Cen -Signed-off-by: Ying Fang -Reviewed-by: Juan Quintela -Signed-off-by: Juan Quintela -(cherry picked from commit f76e32eb05041ab001184ab16afb56524adccd0c) -Signed-off-by: Danilo C. L. de Paula ---- - migration/ram.c | 4 +++- - 1 file changed, 3 insertions(+), 1 deletion(-) - -diff --git a/migration/ram.c b/migration/ram.c -index 8c783b3..860f781 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -1307,7 +1307,9 @@ static void multifd_recv_terminate_threads(Error *err) - - normal quit, i.e. everything went fine, just finished - - error quit: We close the channels so the channel threads - finish the qio_channel_read_all_eof() */ -- qio_channel_shutdown(p->c, QIO_CHANNEL_SHUTDOWN_BOTH, NULL); -+ if (p->c) { -+ qio_channel_shutdown(p->c, QIO_CHANNEL_SHUTDOWN_BOTH, NULL); -+ } - qemu_mutex_unlock(&p->mutex); - } - } --- -1.8.3.1 - diff --git a/kvm-mirror-Don-t-let-an-operation-wait-for-itself.patch b/kvm-mirror-Don-t-let-an-operation-wait-for-itself.patch deleted file mode 100644 index c20cb6c..0000000 --- a/kvm-mirror-Don-t-let-an-operation-wait-for-itself.patch +++ /dev/null @@ -1,123 +0,0 @@ -From 261ee33e0e6711fadd3049e4640bb731ee3d44ff Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Mon, 24 Feb 2020 16:57:10 +0000 -Subject: [PATCH 9/9] mirror: Don't let an operation wait for itself - -RH-Author: Kevin Wolf -Message-id: <20200224165710.4830-3-kwolf@redhat.com> -Patchwork-id: 94045 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 2/2] mirror: Don't let an operation wait for itself -Bugzilla: 1794692 -RH-Acked-by: John Snow -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Max Reitz - -mirror_wait_for_free_in_flight_slot() just picks a random operation to -wait for. However, when mirror_co_read() waits for free slots, its -MirrorOp is already in s->ops_in_flight, so if not enough slots are -immediately available, an operation can end up waiting for itself to -complete, which results in a hang. - -Fix this by passing the current MirrorOp and skipping this operation -when picking an operation to wait for. - -Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1794692 -Signed-off-by: Kevin Wolf -Reviewed-by: Eric Blake -(cherry picked from commit 7e6c4ff792734e196c8ca82564c56b5e7c6288ca) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - block/mirror.c | 21 ++++++++++++--------- - 1 file changed, 12 insertions(+), 9 deletions(-) - -diff --git a/block/mirror.c b/block/mirror.c -index 8959e42..cacbc70 100644 ---- a/block/mirror.c -+++ b/block/mirror.c -@@ -283,11 +283,14 @@ static int mirror_cow_align(MirrorBlockJob *s, int64_t *offset, - } - - static inline void coroutine_fn --mirror_wait_for_any_operation(MirrorBlockJob *s, bool active) -+mirror_wait_for_any_operation(MirrorBlockJob *s, MirrorOp *self, bool active) - { - MirrorOp *op; - - QTAILQ_FOREACH(op, &s->ops_in_flight, next) { -+ if (self == op) { -+ continue; -+ } - /* Do not wait on pseudo ops, because it may in turn wait on - * some other operation to start, which may in fact be the - * caller of this function. Since there is only one pseudo op -@@ -302,10 +305,10 @@ mirror_wait_for_any_operation(MirrorBlockJob *s, bool active) - } - - static inline void coroutine_fn --mirror_wait_for_free_in_flight_slot(MirrorBlockJob *s) -+mirror_wait_for_free_in_flight_slot(MirrorBlockJob *s, MirrorOp *self) - { - /* Only non-active operations use up in-flight slots */ -- mirror_wait_for_any_operation(s, false); -+ mirror_wait_for_any_operation(s, self, false); - } - - /* Perform a mirror copy operation. -@@ -348,7 +351,7 @@ static void coroutine_fn mirror_co_read(void *opaque) - - while (s->buf_free_count < nb_chunks) { - trace_mirror_yield_in_flight(s, op->offset, s->in_flight); -- mirror_wait_for_free_in_flight_slot(s); -+ mirror_wait_for_free_in_flight_slot(s, op); - } - - /* Now make a QEMUIOVector taking enough granularity-sized chunks -@@ -555,7 +558,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) - - while (s->in_flight >= MAX_IN_FLIGHT) { - trace_mirror_yield_in_flight(s, offset, s->in_flight); -- mirror_wait_for_free_in_flight_slot(s); -+ mirror_wait_for_free_in_flight_slot(s, pseudo_op); - } - - if (s->ret < 0) { -@@ -609,7 +612,7 @@ static void mirror_free_init(MirrorBlockJob *s) - static void coroutine_fn mirror_wait_for_all_io(MirrorBlockJob *s) - { - while (s->in_flight > 0) { -- mirror_wait_for_free_in_flight_slot(s); -+ mirror_wait_for_free_in_flight_slot(s, NULL); - } - } - -@@ -794,7 +797,7 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s) - if (s->in_flight >= MAX_IN_FLIGHT) { - trace_mirror_yield(s, UINT64_MAX, s->buf_free_count, - s->in_flight); -- mirror_wait_for_free_in_flight_slot(s); -+ mirror_wait_for_free_in_flight_slot(s, NULL); - continue; - } - -@@ -947,7 +950,7 @@ static int coroutine_fn mirror_run(Job *job, Error **errp) - /* Do not start passive operations while there are active - * writes in progress */ - while (s->in_active_write_counter) { -- mirror_wait_for_any_operation(s, true); -+ mirror_wait_for_any_operation(s, NULL, true); - } - - if (s->ret < 0) { -@@ -973,7 +976,7 @@ static int coroutine_fn mirror_run(Job *job, Error **errp) - if (s->in_flight >= MAX_IN_FLIGHT || s->buf_free_count == 0 || - (cnt == 0 && s->in_flight > 0)) { - trace_mirror_yield(s, cnt, s->buf_free_count, s->in_flight); -- mirror_wait_for_free_in_flight_slot(s); -+ mirror_wait_for_free_in_flight_slot(s, NULL); - continue; - } else if (cnt != 0) { - delay_ns = mirror_iteration(s); --- -1.8.3.1 - diff --git a/kvm-mirror-Store-MirrorOp.co-for-debuggability.patch b/kvm-mirror-Store-MirrorOp.co-for-debuggability.patch deleted file mode 100644 index 67f3e54..0000000 --- a/kvm-mirror-Store-MirrorOp.co-for-debuggability.patch +++ /dev/null @@ -1,51 +0,0 @@ -From 27fe3b8d42a2c99de01ce20e4b0727079c12da65 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Mon, 24 Feb 2020 16:57:09 +0000 -Subject: [PATCH 8/9] mirror: Store MirrorOp.co for debuggability - -RH-Author: Kevin Wolf -Message-id: <20200224165710.4830-2-kwolf@redhat.com> -Patchwork-id: 94044 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/2] mirror: Store MirrorOp.co for debuggability -Bugzilla: 1794692 -RH-Acked-by: John Snow -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Max Reitz - -If a coroutine is launched, but the coroutine pointer isn't stored -anywhere, debugging any problems inside the coroutine is quite hard. -Let's store the coroutine pointer of a mirror operation in MirrorOp to -have it available in the debugger. - -Signed-off-by: Kevin Wolf -Reviewed-by: Eric Blake -(cherry picked from commit eed325b92c3e68417121ea23f96e33af6a4654ed) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - block/mirror.c | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/block/mirror.c b/block/mirror.c -index f0f2d9d..8959e42 100644 ---- a/block/mirror.c -+++ b/block/mirror.c -@@ -103,6 +103,7 @@ struct MirrorOp { - bool is_pseudo_op; - bool is_active_write; - CoQueue waiting_requests; -+ Coroutine *co; - - QTAILQ_ENTRY(MirrorOp) next; - }; -@@ -429,6 +430,7 @@ static unsigned mirror_perform(MirrorBlockJob *s, int64_t offset, - default: - abort(); - } -+ op->co = co; - - QTAILQ_INSERT_TAIL(&s->ops_in_flight, op, next); - qemu_coroutine_enter(co); --- -1.8.3.1 - diff --git a/kvm-mirror-Wait-only-for-in-flight-operations.patch b/kvm-mirror-Wait-only-for-in-flight-operations.patch deleted file mode 100644 index a06d30e..0000000 --- a/kvm-mirror-Wait-only-for-in-flight-operations.patch +++ /dev/null @@ -1,95 +0,0 @@ -From bddf389330e11fb0ce17413c1bfa2264a281ded2 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Mon, 30 Mar 2020 11:19:24 +0100 -Subject: [PATCH 4/4] mirror: Wait only for in-flight operations - -RH-Author: Kevin Wolf -Message-id: <20200330111924.22938-3-kwolf@redhat.com> -Patchwork-id: 94463 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 2/2] mirror: Wait only for in-flight operations -Bugzilla: 1794692 -RH-Acked-by: Maxim Levitsky -RH-Acked-by: Danilo de Paula -RH-Acked-by: Max Reitz - -mirror_wait_for_free_in_flight_slot() just picks a random operation to -wait for. However, a MirrorOp is already in s->ops_in_flight when -mirror_co_read() waits for free slots, so if not enough slots are -immediately available, an operation can end up waiting for itself, or -two or more operations can wait for each other to complete, which -results in a hang. - -Fix this by adding a flag to MirrorOp that tells us if the request is -already in flight (and therefore occupies slots that it will later -free), and picking only such operations for waiting. - -Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1794692 -Signed-off-by: Kevin Wolf -Message-Id: <20200326153628.4869-3-kwolf@redhat.com> -Reviewed-by: Eric Blake -Signed-off-by: Kevin Wolf -(cherry picked from commit ce8cabbd17cf738ddfc68384440c38e5dd2fdf97) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - block/mirror.c | 9 ++++++++- - 1 file changed, 8 insertions(+), 1 deletion(-) - -diff --git a/block/mirror.c b/block/mirror.c -index 8959e42..5e5a521 100644 ---- a/block/mirror.c -+++ b/block/mirror.c -@@ -102,6 +102,7 @@ struct MirrorOp { - - bool is_pseudo_op; - bool is_active_write; -+ bool is_in_flight; - CoQueue waiting_requests; - Coroutine *co; - -@@ -293,7 +294,9 @@ mirror_wait_for_any_operation(MirrorBlockJob *s, bool active) - * caller of this function. Since there is only one pseudo op - * at any given time, we will always find some real operation - * to wait on. */ -- if (!op->is_pseudo_op && op->is_active_write == active) { -+ if (!op->is_pseudo_op && op->is_in_flight && -+ op->is_active_write == active) -+ { - qemu_co_queue_wait(&op->waiting_requests, NULL); - return; - } -@@ -367,6 +370,7 @@ static void coroutine_fn mirror_co_read(void *opaque) - /* Copy the dirty cluster. */ - s->in_flight++; - s->bytes_in_flight += op->bytes; -+ op->is_in_flight = true; - trace_mirror_one_iteration(s, op->offset, op->bytes); - - ret = bdrv_co_preadv(s->mirror_top_bs->backing, op->offset, op->bytes, -@@ -382,6 +386,7 @@ static void coroutine_fn mirror_co_zero(void *opaque) - op->s->in_flight++; - op->s->bytes_in_flight += op->bytes; - *op->bytes_handled = op->bytes; -+ op->is_in_flight = true; - - ret = blk_co_pwrite_zeroes(op->s->target, op->offset, op->bytes, - op->s->unmap ? BDRV_REQ_MAY_UNMAP : 0); -@@ -396,6 +401,7 @@ static void coroutine_fn mirror_co_discard(void *opaque) - op->s->in_flight++; - op->s->bytes_in_flight += op->bytes; - *op->bytes_handled = op->bytes; -+ op->is_in_flight = true; - - ret = blk_co_pdiscard(op->s->target, op->offset, op->bytes); - mirror_write_complete(op, ret); -@@ -1306,6 +1312,7 @@ static MirrorOp *coroutine_fn active_write_prepare(MirrorBlockJob *s, - .offset = offset, - .bytes = bytes, - .is_active_write = true, -+ .is_in_flight = true, - }; - qemu_co_queue_init(&op->waiting_requests); - QTAILQ_INSERT_TAIL(&s->ops_in_flight, op, next); --- -1.8.3.1 - diff --git a/kvm-multifd-Make-sure-that-we-don-t-do-any-IO-after-an-e.patch b/kvm-multifd-Make-sure-that-we-don-t-do-any-IO-after-an-e.patch deleted file mode 100644 index bca0b4c..0000000 --- a/kvm-multifd-Make-sure-that-we-don-t-do-any-IO-after-an-e.patch +++ /dev/null @@ -1,74 +0,0 @@ -From 78c7fb5afcb298631df47f6b71cf764f921c15f4 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Tue, 3 Mar 2020 14:51:38 +0000 -Subject: [PATCH 06/18] multifd: Make sure that we don't do any IO after an - error - -RH-Author: Juan Quintela -Message-id: <20200303145143.149290-6-quintela@redhat.com> -Patchwork-id: 94118 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 05/10] multifd: Make sure that we don't do any IO after an error -Bugzilla: 1738451 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Peter Xu -RH-Acked-by: Dr. David Alan Gilbert - -Signed-off-by: Juan Quintela -Reviewed-by: Dr. David Alan Gilbert -(cherry picked from commit 3d4095b222d97393b1c2c6e514951ec7798f1c43) -Signed-off-by: Danilo C. L. de Paula ---- - migration/ram.c | 22 +++++++++++++--------- - 1 file changed, 13 insertions(+), 9 deletions(-) - -diff --git a/migration/ram.c b/migration/ram.c -index 6c55c5d..a0257ee 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -3440,7 +3440,7 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) - { - RAMState **temp = opaque; - RAMState *rs = *temp; -- int ret; -+ int ret = 0; - int i; - int64_t t0; - int done = 0; -@@ -3511,12 +3511,14 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) - ram_control_after_iterate(f, RAM_CONTROL_ROUND); - - out: -- multifd_send_sync_main(rs); -- qemu_put_be64(f, RAM_SAVE_FLAG_EOS); -- qemu_fflush(f); -- ram_counters.transferred += 8; -+ if (ret >= 0) { -+ multifd_send_sync_main(rs); -+ qemu_put_be64(f, RAM_SAVE_FLAG_EOS); -+ qemu_fflush(f); -+ ram_counters.transferred += 8; - -- ret = qemu_file_get_error(f); -+ ret = qemu_file_get_error(f); -+ } - if (ret < 0) { - return ret; - } -@@ -3568,9 +3570,11 @@ static int ram_save_complete(QEMUFile *f, void *opaque) - ram_control_after_iterate(f, RAM_CONTROL_FINISH); - } - -- multifd_send_sync_main(rs); -- qemu_put_be64(f, RAM_SAVE_FLAG_EOS); -- qemu_fflush(f); -+ if (ret >= 0) { -+ multifd_send_sync_main(rs); -+ qemu_put_be64(f, RAM_SAVE_FLAG_EOS); -+ qemu_fflush(f); -+ } - - return ret; - } --- -1.8.3.1 - diff --git a/kvm-ppc-Deassert-the-external-interrupt-pin-in-KVM-on-re.patch b/kvm-ppc-Deassert-the-external-interrupt-pin-in-KVM-on-re.patch deleted file mode 100644 index 2dbdb16..0000000 --- a/kvm-ppc-Deassert-the-external-interrupt-pin-in-KVM-on-re.patch +++ /dev/null @@ -1,107 +0,0 @@ -From 22fc9bd7e7ae0b72c6f6e483eb66cf996f519766 Mon Sep 17 00:00:00 2001 -From: David Gibson -Date: Tue, 21 Jan 2020 05:16:11 +0000 -Subject: [PATCH 01/15] ppc: Deassert the external interrupt pin in KVM on - reset -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: David Gibson -Message-id: <20200121051613.388295-2-dgibson@redhat.com> -Patchwork-id: 93429 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 1/3] ppc: Deassert the external interrupt pin in KVM on reset -Bugzilla: 1776638 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Laurent Vivier -RH-Acked-by: Thomas Huth - -From: Greg Kurz - -When a CPU is reset, QEMU makes sure no interrupt is pending by clearing -CPUPPCstate::pending_interrupts in ppc_cpu_reset(). In the case of a -complete machine emulation, eg. a sPAPR machine, an external interrupt -request could still be pending in KVM though, eg. an IPI. It will be -eventually presented to the guest, which is supposed to acknowledge it at -the interrupt controller. If the interrupt controller is emulated in QEMU, -either XICS or XIVE, ppc_set_irq() won't deassert the external interrupt -pin in KVM since it isn't pending anymore for QEMU. When the vCPU re-enters -the guest, the interrupt request is still pending and the vCPU will try -again to acknowledge it. This causes an infinite loop and eventually hangs -the guest. - -The code has been broken since the beginning. The issue wasn't hit before -because accel=kvm,kernel-irqchip=off is an awkward setup that never got -used until recently with the LC92x IBM systems (aka, Boston). - -Add a ppc_irq_reset() function to do the necessary cleanup, ie. deassert -the IRQ pins of the CPU in QEMU and most importantly the external interrupt -pin for this vCPU in KVM. - -Reported-by: Satheesh Rajendran -Signed-off-by: Greg Kurz -Message-Id: <157548861740.3650476.16879693165328764758.stgit@bahia.lan> -Signed-off-by: David Gibson -(cherry picked from commit 401774387aeb37f2ada9bb18f7c7e307b21a3e93) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1776638 - -Signed-off-by: David Gibson -Signed-off-by: Danilo C. L. de Paula ---- - hw/ppc/ppc.c | 8 ++++++++ - include/hw/ppc/ppc.h | 2 ++ - target/ppc/translate_init.inc.c | 1 + - 3 files changed, 11 insertions(+) - -diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c -index 52a18eb..d554b64 100644 ---- a/hw/ppc/ppc.c -+++ b/hw/ppc/ppc.c -@@ -1510,3 +1510,11 @@ PowerPCCPU *ppc_get_vcpu_by_pir(int pir) - - return NULL; - } -+ -+void ppc_irq_reset(PowerPCCPU *cpu) -+{ -+ CPUPPCState *env = &cpu->env; -+ -+ env->irq_input_state = 0; -+ kvmppc_set_interrupt(cpu, PPC_INTERRUPT_EXT, 0); -+} -diff --git a/include/hw/ppc/ppc.h b/include/hw/ppc/ppc.h -index 4bdcb8b..5dd7531 100644 ---- a/include/hw/ppc/ppc.h -+++ b/include/hw/ppc/ppc.h -@@ -76,6 +76,7 @@ static inline void ppc970_irq_init(PowerPCCPU *cpu) {} - static inline void ppcPOWER7_irq_init(PowerPCCPU *cpu) {} - static inline void ppcPOWER9_irq_init(PowerPCCPU *cpu) {} - static inline void ppce500_irq_init(PowerPCCPU *cpu) {} -+static inline void ppc_irq_reset(PowerPCCPU *cpu) {} - #else - void ppc40x_irq_init(PowerPCCPU *cpu); - void ppce500_irq_init(PowerPCCPU *cpu); -@@ -83,6 +84,7 @@ void ppc6xx_irq_init(PowerPCCPU *cpu); - void ppc970_irq_init(PowerPCCPU *cpu); - void ppcPOWER7_irq_init(PowerPCCPU *cpu); - void ppcPOWER9_irq_init(PowerPCCPU *cpu); -+void ppc_irq_reset(PowerPCCPU *cpu); - #endif - - /* PPC machines for OpenBIOS */ -diff --git a/target/ppc/translate_init.inc.c b/target/ppc/translate_init.inc.c -index ba726de..64a8380 100644 ---- a/target/ppc/translate_init.inc.c -+++ b/target/ppc/translate_init.inc.c -@@ -10461,6 +10461,7 @@ static void ppc_cpu_reset(CPUState *s) - env->pending_interrupts = 0; - s->exception_index = POWERPC_EXCP_NONE; - env->error_code = 0; -+ ppc_irq_reset(cpu); - - /* tininess for underflow is detected before rounding */ - set_float_detect_tininess(float_tininess_before_rounding, --- -1.8.3.1 - diff --git a/kvm-ppc-Don-t-use-CPUPPCState-irq_input_state-with-moder.patch b/kvm-ppc-Don-t-use-CPUPPCState-irq_input_state-with-moder.patch deleted file mode 100644 index 457d149..0000000 --- a/kvm-ppc-Don-t-use-CPUPPCState-irq_input_state-with-moder.patch +++ /dev/null @@ -1,112 +0,0 @@ -From f2f57c1ed926384e074d2048cdbdc30ee2f426eb Mon Sep 17 00:00:00 2001 -From: David Gibson -Date: Tue, 21 Jan 2020 05:16:13 +0000 -Subject: [PATCH 03/15] ppc: Don't use CPUPPCState::irq_input_state with modern - Book3s CPU models -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: David Gibson -Message-id: <20200121051613.388295-4-dgibson@redhat.com> -Patchwork-id: 93431 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 3/3] ppc: Don't use CPUPPCState::irq_input_state with modern Book3s CPU models -Bugzilla: 1776638 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Laurent Vivier -RH-Acked-by: Thomas Huth - -From: Greg Kurz - -The power7_set_irq() and power9_set_irq() functions set this but it is -never used actually. Modern Book3s compatible CPUs are only supported -by the pnv and spapr machines. They have an interrupt controller, XICS -for POWER7/8 and XIVE for POWER9, whose models don't require to track -IRQ input states at the CPU level. - -Drop these lines to avoid confusion. - -Signed-off-by: Greg Kurz -Message-Id: <157548862861.3650476.16622818876928044450.stgit@bahia.lan> -Signed-off-by: David Gibson -(cherry picked from commit c1ad0b892ce20cf2b5e619c79e8a0c4c66b235dc) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1776638 - -Signed-off-by: David Gibson -Signed-off-by: Danilo C. L. de Paula ---- - hw/ppc/ppc.c | 16 ++-------------- - target/ppc/cpu.h | 4 +++- - 2 files changed, 5 insertions(+), 15 deletions(-) - -diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c -index d554b64..730a41f 100644 ---- a/hw/ppc/ppc.c -+++ b/hw/ppc/ppc.c -@@ -275,10 +275,9 @@ void ppc970_irq_init(PowerPCCPU *cpu) - static void power7_set_irq(void *opaque, int pin, int level) - { - PowerPCCPU *cpu = opaque; -- CPUPPCState *env = &cpu->env; - - LOG_IRQ("%s: env %p pin %d level %d\n", __func__, -- env, pin, level); -+ &cpu->env, pin, level); - - switch (pin) { - case POWER7_INPUT_INT: -@@ -292,11 +291,6 @@ static void power7_set_irq(void *opaque, int pin, int level) - LOG_IRQ("%s: unknown IRQ pin %d\n", __func__, pin); - return; - } -- if (level) { -- env->irq_input_state |= 1 << pin; -- } else { -- env->irq_input_state &= ~(1 << pin); -- } - } - - void ppcPOWER7_irq_init(PowerPCCPU *cpu) -@@ -311,10 +305,9 @@ void ppcPOWER7_irq_init(PowerPCCPU *cpu) - static void power9_set_irq(void *opaque, int pin, int level) - { - PowerPCCPU *cpu = opaque; -- CPUPPCState *env = &cpu->env; - - LOG_IRQ("%s: env %p pin %d level %d\n", __func__, -- env, pin, level); -+ &cpu->env, pin, level); - - switch (pin) { - case POWER9_INPUT_INT: -@@ -334,11 +327,6 @@ static void power9_set_irq(void *opaque, int pin, int level) - LOG_IRQ("%s: unknown IRQ pin %d\n", __func__, pin); - return; - } -- if (level) { -- env->irq_input_state |= 1 << pin; -- } else { -- env->irq_input_state &= ~(1 << pin); -- } - } - - void ppcPOWER9_irq_init(PowerPCCPU *cpu) -diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h -index 5c53801..8887f76 100644 ---- a/target/ppc/cpu.h -+++ b/target/ppc/cpu.h -@@ -1090,7 +1090,9 @@ struct CPUPPCState { - #if !defined(CONFIG_USER_ONLY) - /* - * This is the IRQ controller, which is implementation dependent -- * and only relevant when emulating a complete machine. -+ * and only relevant when emulating a complete machine. Note that -+ * this isn't used by recent Book3s compatible CPUs (POWER7 and -+ * newer). - */ - uint32_t irq_input_state; - void **irq_inputs; --- -1.8.3.1 - diff --git a/kvm-qapi-Add-allow-write-only-overlay-feature-for-blockd.patch b/kvm-qapi-Add-allow-write-only-overlay-feature-for-blockd.patch deleted file mode 100644 index 9c25b76..0000000 --- a/kvm-qapi-Add-allow-write-only-overlay-feature-for-blockd.patch +++ /dev/null @@ -1,64 +0,0 @@ -From 428eb7260718b69b1f3f421d03bce10b8785fc49 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 13 Mar 2020 12:34:39 +0000 -Subject: [PATCH 19/20] qapi: Add '@allow-write-only-overlay' feature for - 'blockdev-snapshot' - -RH-Author: Kevin Wolf -Message-id: <20200313123439.10548-14-kwolf@redhat.com> -Patchwork-id: 94290 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 13/13] qapi: Add '@allow-write-only-overlay' feature for 'blockdev-snapshot' -Bugzilla: 1790482 1805143 -RH-Acked-by: John Snow -RH-Acked-by: Daniel P. Berrange -RH-Acked-by: Peter Krempa - -From: Peter Krempa - -Anounce that 'blockdev-snapshot' command's permissions allow changing -of the backing file if the 'consistent_read' permission is not required. - -This is useful for libvirt to allow late opening of the backing chain -during a blockdev-mirror. - -Signed-off-by: Peter Krempa -Signed-off-by: Kevin Wolf -Message-Id: <20200310113831.27293-8-kwolf@redhat.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit c6bdc312f30d5c7326aa2fdca3e0f98c15eb541a) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - qapi/block-core.json | 9 ++++++++- - 1 file changed, 8 insertions(+), 1 deletion(-) - -diff --git a/qapi/block-core.json b/qapi/block-core.json -index a1e85b0..a64ad81 100644 ---- a/qapi/block-core.json -+++ b/qapi/block-core.json -@@ -1541,6 +1541,12 @@ - # - # For the arguments, see the documentation of BlockdevSnapshot. - # -+# Features: -+# @allow-write-only-overlay: If present, the check whether this operation is safe -+# was relaxed so that it can be used to change -+# backing file of a destination of a blockdev-mirror. -+# (since 5.0) -+# - # Since: 2.5 - # - # Example: -@@ -1561,7 +1567,8 @@ - # - ## - { 'command': 'blockdev-snapshot', -- 'data': 'BlockdevSnapshot' } -+ 'data': 'BlockdevSnapshot', -+ 'features': [ 'allow-write-only-overlay' ] } - - ## - # @change-backing-file: --- -1.8.3.1 - diff --git a/kvm-qcow2-Fix-qcow2_alloc_cluster_abort-for-external-dat.patch b/kvm-qcow2-Fix-qcow2_alloc_cluster_abort-for-external-dat.patch deleted file mode 100644 index 1a7ace5..0000000 --- a/kvm-qcow2-Fix-qcow2_alloc_cluster_abort-for-external-dat.patch +++ /dev/null @@ -1,52 +0,0 @@ -From ecc4fb6e1941035e1d9def1f69b779fbea216caf Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Mon, 24 Feb 2020 16:13:07 +0000 -Subject: [PATCH 7/9] qcow2: Fix qcow2_alloc_cluster_abort() for external data - file - -RH-Author: Kevin Wolf -Message-id: <20200224161307.29783-2-kwolf@redhat.com> -Patchwork-id: 94042 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/1] qcow2: Fix qcow2_alloc_cluster_abort() for external data file -Bugzilla: 1703907 -RH-Acked-by: John Snow -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Max Reitz - -For external data file, cluster allocations return an offset in the data -file and are not refcounted. In this case, there is nothing to do for -qcow2_alloc_cluster_abort(). Freeing the same offset in the qcow2 file -is wrong and causes crashes in the better case or image corruption in -the worse case. - -Signed-off-by: Kevin Wolf -Message-Id: <20200211094900.17315-3-kwolf@redhat.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit c3b6658c1a5a3fb24d6c27b2594cf86146f75b22) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - block/qcow2-cluster.c | 7 +++++-- - 1 file changed, 5 insertions(+), 2 deletions(-) - -diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c -index 8982b7b..dc3c270 100644 ---- a/block/qcow2-cluster.c -+++ b/block/qcow2-cluster.c -@@ -1015,8 +1015,11 @@ err: - void qcow2_alloc_cluster_abort(BlockDriverState *bs, QCowL2Meta *m) - { - BDRVQcow2State *s = bs->opaque; -- qcow2_free_clusters(bs, m->alloc_offset, m->nb_clusters << s->cluster_bits, -- QCOW2_DISCARD_NEVER); -+ if (!has_data_file(bs)) { -+ qcow2_free_clusters(bs, m->alloc_offset, -+ m->nb_clusters << s->cluster_bits, -+ QCOW2_DISCARD_NEVER); -+ } - } - - /* --- -1.8.3.1 - diff --git a/kvm-qemu-file-Don-t-do-IO-after-shutdown.patch b/kvm-qemu-file-Don-t-do-IO-after-shutdown.patch deleted file mode 100644 index 88a6e31..0000000 --- a/kvm-qemu-file-Don-t-do-IO-after-shutdown.patch +++ /dev/null @@ -1,92 +0,0 @@ -From d84814e298e3b05fb5bc61cc8e641a5e104d32d5 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Tue, 3 Mar 2020 14:51:39 +0000 -Subject: [PATCH 07/18] qemu-file: Don't do IO after shutdown - -RH-Author: Juan Quintela -Message-id: <20200303145143.149290-7-quintela@redhat.com> -Patchwork-id: 94116 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 06/10] qemu-file: Don't do IO after shutdown -Bugzilla: 1738451 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Peter Xu -RH-Acked-by: Dr. David Alan Gilbert - -Be sure that we are not doing neither read/write after shutdown of the -QEMUFile. - -Signed-off-by: Juan Quintela -Reviewed-by: Dr. David Alan Gilbert -(cherry picked from commit a555b8092abc6f1bbe4b64c516679cbd68fcfbd8) -Signed-off-by: Danilo C. L. de Paula ---- - migration/qemu-file.c | 22 +++++++++++++++++++++- - 1 file changed, 21 insertions(+), 1 deletion(-) - -diff --git a/migration/qemu-file.c b/migration/qemu-file.c -index 26fb25d..bbb2b63 100644 ---- a/migration/qemu-file.c -+++ b/migration/qemu-file.c -@@ -53,6 +53,8 @@ struct QEMUFile { - - int last_error; - Error *last_error_obj; -+ /* has the file has been shutdown */ -+ bool shutdown; - }; - - /* -@@ -61,10 +63,18 @@ struct QEMUFile { - */ - int qemu_file_shutdown(QEMUFile *f) - { -+ int ret; -+ -+ f->shutdown = true; - if (!f->ops->shut_down) { - return -ENOSYS; - } -- return f->ops->shut_down(f->opaque, true, true, NULL); -+ ret = f->ops->shut_down(f->opaque, true, true, NULL); -+ -+ if (!f->last_error) { -+ qemu_file_set_error(f, -EIO); -+ } -+ return ret; - } - - /* -@@ -214,6 +224,9 @@ void qemu_fflush(QEMUFile *f) - return; - } - -+ if (f->shutdown) { -+ return; -+ } - if (f->iovcnt > 0) { - expect = iov_size(f->iov, f->iovcnt); - ret = f->ops->writev_buffer(f->opaque, f->iov, f->iovcnt, f->pos, -@@ -328,6 +341,10 @@ static ssize_t qemu_fill_buffer(QEMUFile *f) - f->buf_index = 0; - f->buf_size = pending; - -+ if (f->shutdown) { -+ return 0; -+ } -+ - len = f->ops->get_buffer(f->opaque, f->buf + pending, f->pos, - IO_BUF_SIZE - pending, &local_error); - if (len > 0) { -@@ -642,6 +659,9 @@ int64_t qemu_ftell(QEMUFile *f) - - int qemu_file_rate_limit(QEMUFile *f) - { -+ if (f->shutdown) { -+ return 1; -+ } - if (qemu_file_get_error(f)) { - return 1; - } --- -1.8.3.1 - diff --git a/kvm-replication-assert-we-own-context-before-job_cancel_.patch b/kvm-replication-assert-we-own-context-before-job_cancel_.patch deleted file mode 100644 index 09ef4de..0000000 --- a/kvm-replication-assert-we-own-context-before-job_cancel_.patch +++ /dev/null @@ -1,57 +0,0 @@ -From 46887feac666d0d7633ff3f5af5721fe2a80a8ab Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Wed, 8 Apr 2020 17:29:13 +0100 -Subject: [PATCH 2/6] replication: assert we own context before job_cancel_sync - -RH-Author: Kevin Wolf -Message-id: <20200408172917.18712-3-kwolf@redhat.com> -Patchwork-id: 94595 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 2/6] replication: assert we own context before job_cancel_sync -Bugzilla: 1817621 -RH-Acked-by: Eric Blake -RH-Acked-by: Danilo de Paula -RH-Acked-by: Max Reitz - -From: Stefan Reiter - -job_cancel_sync requires the job's lock to be held, all other callers -already do this (replication_stop, drive_backup_abort, -blockdev_backup_abort, job_cancel_sync_all, cancel_common). - -In this case we're in a BlockDriver handler, so we already have a lock, -just assert that it is the same as the one used for the commit_job. - -Signed-off-by: Stefan Reiter -Message-Id: <20200407115651.69472-3-s.reiter@proxmox.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit 08558e33257ec796594bd411261028a93414a70c) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - block/replication.c | 5 ++++- - 1 file changed, 4 insertions(+), 1 deletion(-) - -diff --git a/block/replication.c b/block/replication.c -index 99532ce..0ce27ee 100644 ---- a/block/replication.c -+++ b/block/replication.c -@@ -144,12 +144,15 @@ fail: - static void replication_close(BlockDriverState *bs) - { - BDRVReplicationState *s = bs->opaque; -+ Job *commit_job; - - if (s->stage == BLOCK_REPLICATION_RUNNING) { - replication_stop(s->rs, false, NULL); - } - if (s->stage == BLOCK_REPLICATION_FAILOVER) { -- job_cancel_sync(&s->commit_job->job); -+ commit_job = &s->commit_job->job; -+ assert(commit_job->aio_context == qemu_get_current_aio_context()); -+ job_cancel_sync(commit_job); - } - - if (s->mode == REPLICATION_MODE_SECONDARY) { --- -1.8.3.1 - diff --git a/kvm-slirp-use-correct-size-while-emulating-IRC-commands.patch b/kvm-slirp-use-correct-size-while-emulating-IRC-commands.patch deleted file mode 100644 index 6d8dfe1..0000000 --- a/kvm-slirp-use-correct-size-while-emulating-IRC-commands.patch +++ /dev/null @@ -1,77 +0,0 @@ -From 0f659af4870f151e25a7d2184b9a383bff58e3ba Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= -Date: Fri, 17 Jan 2020 12:07:57 +0100 -Subject: [PATCH 2/4] slirp: use correct size while emulating IRC commands -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Marc-André Lureau -Message-id: <20200117120758.1076549-3-marcandre.lureau@redhat.com> -Patchwork-id: 93400 -O-Subject: [RHEL-AV-8.1.0 qemu-kvm + RHEL-AV-8.2.0 qemu-kvm PATCH 2/3] slirp: use correct size while emulating IRC commands -Bugzilla: 1791568 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Stefan Hajnoczi - -From: Prasad J Pandit - -While emulating IRC DCC commands, tcp_emu() uses 'mbuf' size -'m->m_size' to write DCC commands via snprintf(3). This may -lead to OOB write access, because 'bptr' points somewhere in -the middle of 'mbuf' buffer, not at the start. Use M_FREEROOM(m) -size to avoid OOB access. - -Reported-by: Vishnu Dev TJ -Signed-off-by: Prasad J Pandit -Reviewed-by: Samuel Thibault -Message-Id: <20200109094228.79764-2-ppandit@redhat.com> - -(cherry picked from libslirp commit ce131029d6d4a405cb7d3ac6716d03e58fb4a5d9) -Signed-off-by: Marc-André Lureau - -Signed-off-by: Miroslav Rezanina ---- - slirp/src/tcp_subr.c | 11 ++++++----- - 1 file changed, 6 insertions(+), 5 deletions(-) - -diff --git a/slirp/src/tcp_subr.c b/slirp/src/tcp_subr.c -index cbecd64..cedbfb2 100644 ---- a/slirp/src/tcp_subr.c -+++ b/slirp/src/tcp_subr.c -@@ -778,7 +778,8 @@ int tcp_emu(struct socket *so, struct mbuf *m) - return 1; - } - m->m_len = bptr - m->m_data; /* Adjust length */ -- m->m_len += snprintf(bptr, m->m_size, "DCC CHAT chat %lu %u%c\n", -+ m->m_len += snprintf(bptr, M_FREEROOM(m), -+ "DCC CHAT chat %lu %u%c\n", - (unsigned long)ntohl(so->so_faddr.s_addr), - ntohs(so->so_fport), 1); - } else if (sscanf(bptr, "DCC SEND %256s %u %u %u", buff, &laddr, &lport, -@@ -788,8 +789,8 @@ int tcp_emu(struct socket *so, struct mbuf *m) - return 1; - } - m->m_len = bptr - m->m_data; /* Adjust length */ -- m->m_len += -- snprintf(bptr, m->m_size, "DCC SEND %s %lu %u %u%c\n", buff, -+ m->m_len += snprintf(bptr, M_FREEROOM(m), -+ "DCC SEND %s %lu %u %u%c\n", buff, - (unsigned long)ntohl(so->so_faddr.s_addr), - ntohs(so->so_fport), n1, 1); - } else if (sscanf(bptr, "DCC MOVE %256s %u %u %u", buff, &laddr, &lport, -@@ -799,8 +800,8 @@ int tcp_emu(struct socket *so, struct mbuf *m) - return 1; - } - m->m_len = bptr - m->m_data; /* Adjust length */ -- m->m_len += -- snprintf(bptr, m->m_size, "DCC MOVE %s %lu %u %u%c\n", buff, -+ m->m_len += snprintf(bptr, M_FREEROOM(m), -+ "DCC MOVE %s %lu %u %u%c\n", buff, - (unsigned long)ntohl(so->so_faddr.s_addr), - ntohs(so->so_fport), n1, 1); - } --- -1.8.3.1 - diff --git a/kvm-slirp-use-correct-size-while-emulating-commands.patch b/kvm-slirp-use-correct-size-while-emulating-commands.patch deleted file mode 100644 index fe42f4f..0000000 --- a/kvm-slirp-use-correct-size-while-emulating-commands.patch +++ /dev/null @@ -1,71 +0,0 @@ -From dfbfcf02738640ab83f7970e636b72b78f166675 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= -Date: Fri, 17 Jan 2020 12:07:58 +0100 -Subject: [PATCH 3/4] slirp: use correct size while emulating commands -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Marc-André Lureau -Message-id: <20200117120758.1076549-4-marcandre.lureau@redhat.com> -Patchwork-id: 93401 -O-Subject: [RHEL-AV-8.1.0 qemu-kvm + RHEL-AV-8.2.0 qemu-kvm PATCH 3/3] slirp: use correct size while emulating commands -Bugzilla: 1791568 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Stefan Hajnoczi - -From: Prasad J Pandit - -While emulating services in tcp_emu(), it uses 'mbuf' size -'m->m_size' to write commands via snprintf(3). Use M_FREEROOM(m) -size to avoid possible OOB access. - -Signed-off-by: Prasad J Pandit -Signed-off-by: Samuel Thibault -Message-Id: <20200109094228.79764-3-ppandit@redhat.com> - -(cherry picked from commit 82ebe9c370a0e2970fb5695aa19aa5214a6a1c80) -Signed-off-by: Marc-André Lureau -Signed-off-by: Miroslav Rezanina ---- - slirp/src/tcp_subr.c | 9 ++++----- - 1 file changed, 4 insertions(+), 5 deletions(-) - -diff --git a/slirp/src/tcp_subr.c b/slirp/src/tcp_subr.c -index cedbfb2..954d1a6 100644 ---- a/slirp/src/tcp_subr.c -+++ b/slirp/src/tcp_subr.c -@@ -696,7 +696,7 @@ int tcp_emu(struct socket *so, struct mbuf *m) - n4 = (laddr & 0xff); - - m->m_len = bptr - m->m_data; /* Adjust length */ -- m->m_len += snprintf(bptr, m->m_size - m->m_len, -+ m->m_len += snprintf(bptr, M_FREEROOM(m), - "ORT %d,%d,%d,%d,%d,%d\r\n%s", n1, n2, n3, n4, - n5, n6, x == 7 ? buff : ""); - return 1; -@@ -731,8 +731,7 @@ int tcp_emu(struct socket *so, struct mbuf *m) - n4 = (laddr & 0xff); - - m->m_len = bptr - m->m_data; /* Adjust length */ -- m->m_len += -- snprintf(bptr, m->m_size - m->m_len, -+ m->m_len += snprintf(bptr, M_FREEROOM(m), - "27 Entering Passive Mode (%d,%d,%d,%d,%d,%d)\r\n%s", - n1, n2, n3, n4, n5, n6, x == 7 ? buff : ""); - -@@ -758,8 +757,8 @@ int tcp_emu(struct socket *so, struct mbuf *m) - if (m->m_data[m->m_len - 1] == '\0' && lport != 0 && - (so = tcp_listen(slirp, INADDR_ANY, 0, so->so_laddr.s_addr, - htons(lport), SS_FACCEPTONCE)) != NULL) -- m->m_len = -- snprintf(m->m_data, m->m_size, "%d", ntohs(so->so_fport)) + 1; -+ m->m_len = snprintf(m->m_data, M_ROOM(m), -+ "%d", ntohs(so->so_fport)) + 1; - return 1; - - case EMU_IRC: --- -1.8.3.1 - diff --git a/kvm-spapr-Don-t-trigger-a-CAS-reboot-for-XICS-XIVE-mode-.patch b/kvm-spapr-Don-t-trigger-a-CAS-reboot-for-XICS-XIVE-mode-.patch deleted file mode 100644 index d934712..0000000 --- a/kvm-spapr-Don-t-trigger-a-CAS-reboot-for-XICS-XIVE-mode-.patch +++ /dev/null @@ -1,113 +0,0 @@ -From f2aeed761d2dad14920fa08c977dc45564886d9b Mon Sep 17 00:00:00 2001 -From: David Gibson -Date: Fri, 3 Jan 2020 01:15:12 +0000 -Subject: [PATCH 1/5] spapr: Don't trigger a CAS reboot for XICS/XIVE mode - changeover -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: David Gibson -Message-id: <20200103011512.49129-2-dgibson@redhat.com> -Patchwork-id: 93261 -O-Subject: [RHEL-AV-4.2 qemu-kvm PATCH 1/1] spapr: Don't trigger a CAS reboot for XICS/XIVE mode changeover -Bugzilla: 1733893 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Thomas Huth -RH-Acked-by: Philippe Mathieu-Daudé - -From: David Gibson - -PAPR allows the interrupt controller used on a POWER9 machine (XICS or -XIVE) to be selected by the guest operating system, by using the -ibm,client-architecture-support (CAS) feature negotiation call. - -Currently, if the guest selects an interrupt controller different from the -one selected at initial boot, this causes the system to be reset with the -new model and the boot starts again. This means we run through the SLOF -boot process twice, as well as any other bootloader (e.g. grub) in use -before the OS calls CAS. This can be confusing and/or inconvenient for -users. - -Thanks to two fairly recent changes, we no longer need this reboot. 1) we -now completely regenerate the device tree when CAS is called (meaning we -don't need special case updates for all the device tree changes caused by -the interrupt controller mode change), 2) we now have explicit code paths -to activate and deactivate the different interrupt controllers, rather than -just implicitly calling those at machine reset time. - -We can therefore eliminate the reboot for changing irq mode, simply by -putting a call to spapr_irq_update_active_intc() before we call -spapr_h_cas_compose_response() (which gives the updated device tree to -the guest firmware and OS). - -Signed-off-by: David Gibson -Reviewed-by: Cedric Le Goater -Reviewed-by: Greg Kurz -(cherry picked from commit 8deb8019d696c75e6ecaee7545026b62aba2f1bb) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1733893 - -Signed-off-by: David Gibson -Signed-off-by: Danilo C. L. de Paula ---- - hw/ppc/spapr_hcall.c | 33 +++++++++++++-------------------- - 1 file changed, 13 insertions(+), 20 deletions(-) - -diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c -index 140f05c..05a7ca2 100644 ---- a/hw/ppc/spapr_hcall.c -+++ b/hw/ppc/spapr_hcall.c -@@ -1767,21 +1767,10 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu, - } - spapr->cas_pre_isa3_guest = !spapr_ovec_test(ov1_guest, OV1_PPC_3_00); - spapr_ovec_cleanup(ov1_guest); -- if (!spapr->cas_reboot) { -- /* If spapr_machine_reset() did not set up a HPT but one is necessary -- * (because the guest isn't going to use radix) then set it up here. */ -- if ((spapr->patb_entry & PATE1_GR) && !guest_radix) { -- /* legacy hash or new hash: */ -- spapr_setup_hpt_and_vrma(spapr); -- } -- spapr->cas_reboot = -- (spapr_h_cas_compose_response(spapr, args[1], args[2], -- ov5_updates) != 0); -- } - - /* -- * Ensure the guest asks for an interrupt mode we support; otherwise -- * terminate the boot. -+ * Ensure the guest asks for an interrupt mode we support; -+ * otherwise terminate the boot. - */ - if (guest_xive) { - if (!spapr->irq->xive) { -@@ -1797,14 +1786,18 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu, - } - } - -- /* -- * Generate a machine reset when we have an update of the -- * interrupt mode. Only required when the machine supports both -- * modes. -- */ -+ spapr_irq_update_active_intc(spapr); -+ - if (!spapr->cas_reboot) { -- spapr->cas_reboot = spapr_ovec_test(ov5_updates, OV5_XIVE_EXPLOIT) -- && spapr->irq->xics && spapr->irq->xive; -+ /* If spapr_machine_reset() did not set up a HPT but one is necessary -+ * (because the guest isn't going to use radix) then set it up here. */ -+ if ((spapr->patb_entry & PATE1_GR) && !guest_radix) { -+ /* legacy hash or new hash: */ -+ spapr_setup_hpt_and_vrma(spapr); -+ } -+ spapr->cas_reboot = -+ (spapr_h_cas_compose_response(spapr, args[1], args[2], -+ ov5_updates) != 0); - } - - spapr_ovec_cleanup(ov5_updates); --- -1.8.3.1 - diff --git a/kvm-spapr-Enable-DD2.3-accelerated-count-cache-flush-in-.patch b/kvm-spapr-Enable-DD2.3-accelerated-count-cache-flush-in-.patch deleted file mode 100644 index 0aa782b..0000000 --- a/kvm-spapr-Enable-DD2.3-accelerated-count-cache-flush-in-.patch +++ /dev/null @@ -1,135 +0,0 @@ -From eb121ffa97c1c25d7853d51b4c8209c0bb521deb Mon Sep 17 00:00:00 2001 -From: David Gibson -Date: Fri, 7 Feb 2020 00:57:04 +0000 -Subject: [PATCH 1/7] spapr: Enable DD2.3 accelerated count cache flush in - pseries-5.0 machine - -RH-Author: David Gibson -Message-id: <20200207005704.194428-1-dgibson@redhat.com> -Patchwork-id: 93737 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCHv2] spapr: Enable DD2.3 accelerated count cache flush in pseries-5.0 machine -Bugzilla: 1796240 -RH-Acked-by: Danilo de Paula -RH-Acked-by: Laurent Vivier -RH-Acked-by: Thomas Huth - -From: David Gibson - -For POWER9 DD2.2 cpus, the best current Spectre v2 indirect branch -mitigation is "count cache disabled", which is configured with: - -machine cap-ibs=fixed-ccd -However, this option isn't available on DD2.3 CPUs with KVM, because they -don't have the count cache disabled. - -For POWER9 DD2.3 cpus, it is "count cache flush with assist", configured -with: - -machine cap-ibs=workaround,cap-ccf-assist=on -However this option isn't available on DD2.2 CPUs with KVM, because they -don't have the special CCF assist instruction this relies on. - -On current machine types, we default to "count cache flush w/o assist", -that is: - -machine cap-ibs=workaround,cap-ccf-assist=off -This runs, with mitigation on both DD2.2 and DD2.3 host cpus, but has a -fairly significant performance impact. - -It turns out we can do better. The special instruction that CCF assist -uses to trigger a count cache flush is a no-op on earlier CPUs, rather than -trapping or causing other badness. It doesn't, of itself, implement the -mitigation, but *if* we have count-cache-disabled, then the count cache -flush is unnecessary, and so using the count cache flush mitigation is -harmless. - -Therefore for the new pseries-5.0 machine type, enable cap-ccf-assist by -default. Along with that, suppress throwing an error if cap-ccf-assist -is selected but KVM doesn't support it, as long as KVM *is* giving us -count-cache-disabled. To allow TCG to work out of the box, even though it -doesn't implement the ccf flush assist, downgrade the error in that case to -a warning. This matches several Spectre mitigations where we allow TCG -to operate for debugging, since we don't really make guarantees about TCG -security properties anyway. - -While we're there, make the TCG warning for this case match that for other -mitigations. - -Signed-off-by: David Gibson -Tested-by: Michael Ellerman -(cherry picked from commit 37965dfe4dffa3ac49438337417608e7f346b58a) -Signed-off-by: Danilo C. L. de Paula - -Conflicts: - hw/ppc/spapr.c - -Adjusted machine version compatibility code to the RHEL machine types -rather than the upstream machine types. - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1796240 -Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=26285002 -Branch: rhel-av-8.2.0 -Upstream: Merged for qemu-5.0 - -Signed-off-by: David Gibson -Signed-off-by: Danilo C. L. de Paula ---- - hw/ppc/spapr.c | 4 +++- - hw/ppc/spapr_caps.c | 21 +++++++++++++++++---- - 2 files changed, 20 insertions(+), 5 deletions(-) - -diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c -index c12862d..a330f03 100644 ---- a/hw/ppc/spapr.c -+++ b/hw/ppc/spapr.c -@@ -4440,7 +4440,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) - smc->default_caps.caps[SPAPR_CAP_HPT_MAXPAGESIZE] = 16; /* 64kiB */ - smc->default_caps.caps[SPAPR_CAP_NESTED_KVM_HV] = SPAPR_CAP_OFF; - smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_ON; -- smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF; -+ smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_ON; - spapr_caps_add_properties(smc, &error_abort); - smc->irq = &spapr_irq_dual; - smc->dr_phb_enabled = true; -@@ -4904,6 +4904,8 @@ static void spapr_machine_rhel810_class_options(MachineClass *mc) - hw_compat_rhel_8_1_len); - compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); - -+ /* from pseries-4.2 */ -+ smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF; - } - - DEFINE_SPAPR_MACHINE(rhel810, "rhel8.1.0", false); -diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c -index 805f385..6e6fb28 100644 ---- a/hw/ppc/spapr_caps.c -+++ b/hw/ppc/spapr_caps.c -@@ -492,11 +492,24 @@ static void cap_ccf_assist_apply(SpaprMachineState *spapr, uint8_t val, - uint8_t kvm_val = kvmppc_get_cap_count_cache_flush_assist(); - - if (tcg_enabled() && val) { -- /* TODO - for now only allow broken for TCG */ -- error_setg(errp, --"Requested count cache flush assist capability level not supported by tcg," -- " try appending -machine cap-ccf-assist=off"); -+ /* TCG doesn't implement anything here, but allow with a warning */ -+ warn_report("TCG doesn't support requested feature, cap-ccf-assist=on"); - } else if (kvm_enabled() && (val > kvm_val)) { -+ uint8_t kvm_ibs = kvmppc_get_cap_safe_indirect_branch(); -+ -+ if (kvm_ibs == SPAPR_CAP_FIXED_CCD) { -+ /* -+ * If we don't have CCF assist on the host, the assist -+ * instruction is a harmless no-op. It won't correctly -+ * implement the cache count flush *but* if we have -+ * count-cache-disabled in the host, that flush is -+ * unnnecessary. So, specifically allow this case. This -+ * allows us to have better performance on POWER9 DD2.3, -+ * while still working on POWER9 DD2.2 and POWER8 host -+ * cpus. -+ */ -+ return; -+ } - error_setg(errp, - "Requested count cache flush assist capability level not supported by kvm," - " try appending -machine cap-ccf-assist=off"); --- -1.8.3.1 - diff --git a/kvm-target-arm-arch_dump-Add-SVE-notes.patch b/kvm-target-arm-arch_dump-Add-SVE-notes.patch deleted file mode 100644 index febea10..0000000 --- a/kvm-target-arm-arch_dump-Add-SVE-notes.patch +++ /dev/null @@ -1,298 +0,0 @@ -From d8871ae2842531130c9b333e7c06a6a5d1561286 Mon Sep 17 00:00:00 2001 -From: Andrew Jones -Date: Fri, 24 Jan 2020 09:14:34 +0100 -Subject: [PATCH 001/116] target/arm/arch_dump: Add SVE notes - -RH-Author: Andrew Jones -Message-id: <20200124091434.15021-2-drjones@redhat.com> -Patchwork-id: 93443 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/1] target/arm/arch_dump: Add SVE notes -Bugzilla: 1725084 -RH-Acked-by: Auger Eric -RH-Acked-by: Laszlo Ersek -RH-Acked-by: Gavin Shan - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1725084 - -Author: Andrew Jones -Date: Thu, 23 Jan 2020 15:22:40 +0000 - - target/arm/arch_dump: Add SVE notes - - When dumping a guest with dump-guest-memory also dump the SVE - registers if they are in use. - - Signed-off-by: Andrew Jones - Reviewed-by: Richard Henderson - Message-id: 20200120101832.18781-1-drjones@redhat.com - [PMM: fixed checkpatch nits] - Signed-off-by: Peter Maydell - -(cherry picked from commit 538baab245ca881e6a6ff720b5133f3ad1fcaafc) -Signed-off-by: Miroslav Rezanina ---- - include/elf.h | 1 + - target/arm/arch_dump.c | 124 ++++++++++++++++++++++++++++++++++++++++++++++++- - target/arm/cpu.h | 25 ++++++++++ - target/arm/kvm64.c | 24 ---------- - 4 files changed, 148 insertions(+), 26 deletions(-) - -diff --git a/include/elf.h b/include/elf.h -index 3501e0c..8fbfe60 100644 ---- a/include/elf.h -+++ b/include/elf.h -@@ -1650,6 +1650,7 @@ typedef struct elf64_shdr { - #define NT_ARM_HW_BREAK 0x402 /* ARM hardware breakpoint registers */ - #define NT_ARM_HW_WATCH 0x403 /* ARM hardware watchpoint registers */ - #define NT_ARM_SYSTEM_CALL 0x404 /* ARM system call number */ -+#define NT_ARM_SVE 0x405 /* ARM Scalable Vector Extension regs */ - - /* - * Physical entry point into the kernel. -diff --git a/target/arm/arch_dump.c b/target/arm/arch_dump.c -index 26a2c09..2345dec 100644 ---- a/target/arm/arch_dump.c -+++ b/target/arm/arch_dump.c -@@ -62,12 +62,23 @@ struct aarch64_user_vfp_state { - - QEMU_BUILD_BUG_ON(sizeof(struct aarch64_user_vfp_state) != 528); - -+/* struct user_sve_header from arch/arm64/include/uapi/asm/ptrace.h */ -+struct aarch64_user_sve_header { -+ uint32_t size; -+ uint32_t max_size; -+ uint16_t vl; -+ uint16_t max_vl; -+ uint16_t flags; -+ uint16_t reserved; -+} QEMU_PACKED; -+ - struct aarch64_note { - Elf64_Nhdr hdr; - char name[8]; /* align_up(sizeof("CORE"), 4) */ - union { - struct aarch64_elf_prstatus prstatus; - struct aarch64_user_vfp_state vfp; -+ struct aarch64_user_sve_header sve; - }; - } QEMU_PACKED; - -@@ -76,6 +87,8 @@ struct aarch64_note { - (AARCH64_NOTE_HEADER_SIZE + sizeof(struct aarch64_elf_prstatus)) - #define AARCH64_PRFPREG_NOTE_SIZE \ - (AARCH64_NOTE_HEADER_SIZE + sizeof(struct aarch64_user_vfp_state)) -+#define AARCH64_SVE_NOTE_SIZE(env) \ -+ (AARCH64_NOTE_HEADER_SIZE + sve_size(env)) - - static void aarch64_note_init(struct aarch64_note *note, DumpState *s, - const char *name, Elf64_Word namesz, -@@ -128,11 +141,102 @@ static int aarch64_write_elf64_prfpreg(WriteCoreDumpFunction f, - return 0; - } - -+#ifdef TARGET_AARCH64 -+static off_t sve_zreg_offset(uint32_t vq, int n) -+{ -+ off_t off = sizeof(struct aarch64_user_sve_header); -+ return ROUND_UP(off, 16) + vq * 16 * n; -+} -+ -+static off_t sve_preg_offset(uint32_t vq, int n) -+{ -+ return sve_zreg_offset(vq, 32) + vq * 16 / 8 * n; -+} -+ -+static off_t sve_fpsr_offset(uint32_t vq) -+{ -+ off_t off = sve_preg_offset(vq, 17); -+ return ROUND_UP(off, 16); -+} -+ -+static off_t sve_fpcr_offset(uint32_t vq) -+{ -+ return sve_fpsr_offset(vq) + sizeof(uint32_t); -+} -+ -+static uint32_t sve_current_vq(CPUARMState *env) -+{ -+ return sve_zcr_len_for_el(env, arm_current_el(env)) + 1; -+} -+ -+static size_t sve_size_vq(uint32_t vq) -+{ -+ off_t off = sve_fpcr_offset(vq) + sizeof(uint32_t); -+ return ROUND_UP(off, 16); -+} -+ -+static size_t sve_size(CPUARMState *env) -+{ -+ return sve_size_vq(sve_current_vq(env)); -+} -+ -+static int aarch64_write_elf64_sve(WriteCoreDumpFunction f, -+ CPUARMState *env, int cpuid, -+ DumpState *s) -+{ -+ struct aarch64_note *note; -+ ARMCPU *cpu = env_archcpu(env); -+ uint32_t vq = sve_current_vq(env); -+ uint64_t tmp[ARM_MAX_VQ * 2], *r; -+ uint32_t fpr; -+ uint8_t *buf; -+ int ret, i; -+ -+ note = g_malloc0(AARCH64_SVE_NOTE_SIZE(env)); -+ buf = (uint8_t *)¬e->sve; -+ -+ aarch64_note_init(note, s, "LINUX", 6, NT_ARM_SVE, sve_size_vq(vq)); -+ -+ note->sve.size = cpu_to_dump32(s, sve_size_vq(vq)); -+ note->sve.max_size = cpu_to_dump32(s, sve_size_vq(cpu->sve_max_vq)); -+ note->sve.vl = cpu_to_dump16(s, vq * 16); -+ note->sve.max_vl = cpu_to_dump16(s, cpu->sve_max_vq * 16); -+ note->sve.flags = cpu_to_dump16(s, 1); -+ -+ for (i = 0; i < 32; ++i) { -+ r = sve_bswap64(tmp, &env->vfp.zregs[i].d[0], vq * 2); -+ memcpy(&buf[sve_zreg_offset(vq, i)], r, vq * 16); -+ } -+ -+ for (i = 0; i < 17; ++i) { -+ r = sve_bswap64(tmp, r = &env->vfp.pregs[i].p[0], -+ DIV_ROUND_UP(vq * 2, 8)); -+ memcpy(&buf[sve_preg_offset(vq, i)], r, vq * 16 / 8); -+ } -+ -+ fpr = cpu_to_dump32(s, vfp_get_fpsr(env)); -+ memcpy(&buf[sve_fpsr_offset(vq)], &fpr, sizeof(uint32_t)); -+ -+ fpr = cpu_to_dump32(s, vfp_get_fpcr(env)); -+ memcpy(&buf[sve_fpcr_offset(vq)], &fpr, sizeof(uint32_t)); -+ -+ ret = f(note, AARCH64_SVE_NOTE_SIZE(env), s); -+ g_free(note); -+ -+ if (ret < 0) { -+ return -1; -+ } -+ -+ return 0; -+} -+#endif -+ - int arm_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs, - int cpuid, void *opaque) - { - struct aarch64_note note; -- CPUARMState *env = &ARM_CPU(cs)->env; -+ ARMCPU *cpu = ARM_CPU(cs); -+ CPUARMState *env = &cpu->env; - DumpState *s = opaque; - uint64_t pstate, sp; - int ret, i; -@@ -163,7 +267,18 @@ int arm_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs, - return -1; - } - -- return aarch64_write_elf64_prfpreg(f, env, cpuid, s); -+ ret = aarch64_write_elf64_prfpreg(f, env, cpuid, s); -+ if (ret) { -+ return ret; -+ } -+ -+#ifdef TARGET_AARCH64 -+ if (cpu_isar_feature(aa64_sve, cpu)) { -+ ret = aarch64_write_elf64_sve(f, env, cpuid, s); -+ } -+#endif -+ -+ return ret; - } - - /* struct pt_regs from arch/arm/include/asm/ptrace.h */ -@@ -335,6 +450,11 @@ ssize_t cpu_get_note_size(int class, int machine, int nr_cpus) - if (class == ELFCLASS64) { - note_size = AARCH64_PRSTATUS_NOTE_SIZE; - note_size += AARCH64_PRFPREG_NOTE_SIZE; -+#ifdef TARGET_AARCH64 -+ if (cpu_isar_feature(aa64_sve, cpu)) { -+ note_size += AARCH64_SVE_NOTE_SIZE(env); -+ } -+#endif - } else { - note_size = ARM_PRSTATUS_NOTE_SIZE; - if (arm_feature(env, ARM_FEATURE_VFP)) { -diff --git a/target/arm/cpu.h b/target/arm/cpu.h -index 83a809d..82dd3cc 100644 ---- a/target/arm/cpu.h -+++ b/target/arm/cpu.h -@@ -975,6 +975,31 @@ void aarch64_sve_narrow_vq(CPUARMState *env, unsigned vq); - void aarch64_sve_change_el(CPUARMState *env, int old_el, - int new_el, bool el0_a64); - void aarch64_add_sve_properties(Object *obj); -+ -+/* -+ * SVE registers are encoded in KVM's memory in an endianness-invariant format. -+ * The byte at offset i from the start of the in-memory representation contains -+ * the bits [(7 + 8 * i) : (8 * i)] of the register value. As this means the -+ * lowest offsets are stored in the lowest memory addresses, then that nearly -+ * matches QEMU's representation, which is to use an array of host-endian -+ * uint64_t's, where the lower offsets are at the lower indices. To complete -+ * the translation we just need to byte swap the uint64_t's on big-endian hosts. -+ */ -+static inline uint64_t *sve_bswap64(uint64_t *dst, uint64_t *src, int nr) -+{ -+#ifdef HOST_WORDS_BIGENDIAN -+ int i; -+ -+ for (i = 0; i < nr; ++i) { -+ dst[i] = bswap64(src[i]); -+ } -+ -+ return dst; -+#else -+ return src; -+#endif -+} -+ - #else - static inline void aarch64_sve_narrow_vq(CPUARMState *env, unsigned vq) { } - static inline void aarch64_sve_change_el(CPUARMState *env, int o, -diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c -index 876184b..e2da756 100644 ---- a/target/arm/kvm64.c -+++ b/target/arm/kvm64.c -@@ -877,30 +877,6 @@ static int kvm_arch_put_fpsimd(CPUState *cs) - } - - /* -- * SVE registers are encoded in KVM's memory in an endianness-invariant format. -- * The byte at offset i from the start of the in-memory representation contains -- * the bits [(7 + 8 * i) : (8 * i)] of the register value. As this means the -- * lowest offsets are stored in the lowest memory addresses, then that nearly -- * matches QEMU's representation, which is to use an array of host-endian -- * uint64_t's, where the lower offsets are at the lower indices. To complete -- * the translation we just need to byte swap the uint64_t's on big-endian hosts. -- */ --static uint64_t *sve_bswap64(uint64_t *dst, uint64_t *src, int nr) --{ --#ifdef HOST_WORDS_BIGENDIAN -- int i; -- -- for (i = 0; i < nr; ++i) { -- dst[i] = bswap64(src[i]); -- } -- -- return dst; --#else -- return src; --#endif --} -- --/* - * KVM SVE registers come in slices where ZREGs have a slice size of 2048 bits - * and PREGS and the FFR have a slice size of 256 bits. However we simply hard - * code the slice index to zero for now as it's unlikely we'll need more than --- -1.8.3.1 - diff --git a/kvm-target-arm-cpu-Add-the-kvm-no-adjvtime-CPU-property.patch b/kvm-target-arm-cpu-Add-the-kvm-no-adjvtime-CPU-property.patch deleted file mode 100644 index 601b8c4..0000000 --- a/kvm-target-arm-cpu-Add-the-kvm-no-adjvtime-CPU-property.patch +++ /dev/null @@ -1,281 +0,0 @@ -From 730f72105b478553c4f22555c29b0f64224ff914 Mon Sep 17 00:00:00 2001 -From: Andrew Jones -Date: Fri, 31 Jan 2020 14:23:14 +0000 -Subject: [PATCH 12/15] target/arm/cpu: Add the kvm-no-adjvtime CPU property -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Andrew Jones -Message-id: <20200131142314.13175-6-drjones@redhat.com> -Patchwork-id: 93623 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 5/5] target/arm/cpu: Add the kvm-no-adjvtime CPU property -Bugzilla: 1647366 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Auger Eric -RH-Acked-by: Gavin Shan - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1647366 - -Author: Andrew Jones -Date: Thu, 30 Jan 2020 16:02:06 +0000 - - target/arm/cpu: Add the kvm-no-adjvtime CPU property - - kvm-no-adjvtime is a KVM specific CPU property and a first of its - kind. To accommodate it we also add kvm_arm_add_vcpu_properties() - and a KVM specific CPU properties description to the CPU features - document. - - Signed-off-by: Andrew Jones - Message-id: 20200120101023.16030-7-drjones@redhat.com - Reviewed-by: Peter Maydell - Signed-off-by: Peter Maydell - -(cherry picked from commit dea101a1ae9968c9fec6ab0291489dad7c49f36f) -Signed-off-by: Danilo C. L. de Paula - -Conflicts: - Dropped the second hunk of the hw/arm/virt.c changes - as they would patch dead code. - -Signed-off-by: Danilo C. L. de Paula ---- - docs/arm-cpu-features.rst | 37 ++++++++++++++++++++++++++++++++++++- - hw/arm/virt.c | 5 +++++ - include/hw/arm/virt.h | 1 + - target/arm/cpu.c | 2 ++ - target/arm/cpu64.c | 1 + - target/arm/kvm.c | 28 ++++++++++++++++++++++++++++ - target/arm/kvm_arm.h | 11 +++++++++++ - target/arm/monitor.c | 1 + - tests/arm-cpu-features.c | 4 ++++ - 9 files changed, 89 insertions(+), 1 deletion(-) - -diff --git a/docs/arm-cpu-features.rst b/docs/arm-cpu-features.rst -index 1b367e2..45d1eb6 100644 ---- a/docs/arm-cpu-features.rst -+++ b/docs/arm-cpu-features.rst -@@ -31,7 +31,9 @@ supporting the feature or only supporting the feature under certain - configurations. For example, the `aarch64` CPU feature, which, when - disabled, enables the optional AArch32 CPU feature, is only supported - when using the KVM accelerator and when running on a host CPU type that --supports the feature. -+supports the feature. While `aarch64` currently only works with KVM, -+it could work with TCG. CPU features that are specific to KVM are -+prefixed with "kvm-" and are described in "KVM VCPU Features". - - CPU Feature Probing - =================== -@@ -171,6 +173,39 @@ disabling many SVE vector lengths would be quite verbose, the `sve` CPU - properties have special semantics (see "SVE CPU Property Parsing - Semantics"). - -+KVM VCPU Features -+================= -+ -+KVM VCPU features are CPU features that are specific to KVM, such as -+paravirt features or features that enable CPU virtualization extensions. -+The features' CPU properties are only available when KVM is enabled and -+are named with the prefix "kvm-". KVM VCPU features may be probed, -+enabled, and disabled in the same way as other CPU features. Below is -+the list of KVM VCPU features and their descriptions. -+ -+ kvm-no-adjvtime By default kvm-no-adjvtime is disabled. This -+ means that by default the virtual time -+ adjustment is enabled (vtime is *not not* -+ adjusted). -+ -+ When virtual time adjustment is enabled each -+ time the VM transitions back to running state -+ the VCPU's virtual counter is updated to ensure -+ stopped time is not counted. This avoids time -+ jumps surprising guest OSes and applications, -+ as long as they use the virtual counter for -+ timekeeping. However it has the side effect of -+ the virtual and physical counters diverging. -+ All timekeeping based on the virtual counter -+ will appear to lag behind any timekeeping that -+ does not subtract VM stopped time. The guest -+ may resynchronize its virtual counter with -+ other time sources as needed. -+ -+ Enable kvm-no-adjvtime to disable virtual time -+ adjustment, also restoring the legacy (pre-5.0) -+ behavior. -+ - SVE CPU Properties - ================== - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index e108391..d30d38c 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -1707,6 +1707,11 @@ static void machvirt_init(MachineState *machine) - } - } - -+ if (vmc->kvm_no_adjvtime && -+ object_property_find(cpuobj, "kvm-no-adjvtime", NULL)) { -+ object_property_set_bool(cpuobj, true, "kvm-no-adjvtime", NULL); -+ } -+ - if (vmc->no_pmu && object_property_find(cpuobj, "pmu", NULL)) { - object_property_set_bool(cpuobj, false, "pmu", NULL); - } -diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h -index 53fdf16..77828ce 100644 ---- a/include/hw/arm/virt.h -+++ b/include/hw/arm/virt.h -@@ -109,6 +109,7 @@ typedef struct { - bool smbios_old_sys_ver; - bool no_highmem_ecam; - bool no_ged; /* Machines < 4.2 has no support for ACPI GED device */ -+ bool kvm_no_adjvtime; - } VirtMachineClass; - - typedef struct { -diff --git a/target/arm/cpu.c b/target/arm/cpu.c -index 3788fc3..e46efe9 100644 ---- a/target/arm/cpu.c -+++ b/target/arm/cpu.c -@@ -2482,6 +2482,7 @@ static void arm_max_initfn(Object *obj) - - if (kvm_enabled()) { - kvm_arm_set_cpu_features_from_host(cpu); -+ kvm_arm_add_vcpu_properties(obj); - } else { - cortex_a15_initfn(obj); - -@@ -2673,6 +2674,7 @@ static void arm_host_initfn(Object *obj) - if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) { - aarch64_add_sve_properties(obj); - } -+ kvm_arm_add_vcpu_properties(obj); - arm_cpu_post_init(obj); - } - -diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c -index a39d6fc..3cd416d 100644 ---- a/target/arm/cpu64.c -+++ b/target/arm/cpu64.c -@@ -605,6 +605,7 @@ static void aarch64_max_initfn(Object *obj) - - if (kvm_enabled()) { - kvm_arm_set_cpu_features_from_host(cpu); -+ kvm_arm_add_vcpu_properties(obj); - } else { - uint64_t t; - uint32_t u; -diff --git a/target/arm/kvm.c b/target/arm/kvm.c -index 26d7f8b..4be9497 100644 ---- a/target/arm/kvm.c -+++ b/target/arm/kvm.c -@@ -17,6 +17,8 @@ - #include "qemu/timer.h" - #include "qemu/error-report.h" - #include "qemu/main-loop.h" -+#include "qom/object.h" -+#include "qapi/error.h" - #include "sysemu/sysemu.h" - #include "sysemu/kvm.h" - #include "sysemu/kvm_int.h" -@@ -179,6 +181,32 @@ void kvm_arm_set_cpu_features_from_host(ARMCPU *cpu) - env->features = arm_host_cpu_features.features; - } - -+static bool kvm_no_adjvtime_get(Object *obj, Error **errp) -+{ -+ return !ARM_CPU(obj)->kvm_adjvtime; -+} -+ -+static void kvm_no_adjvtime_set(Object *obj, bool value, Error **errp) -+{ -+ ARM_CPU(obj)->kvm_adjvtime = !value; -+} -+ -+/* KVM VCPU properties should be prefixed with "kvm-". */ -+void kvm_arm_add_vcpu_properties(Object *obj) -+{ -+ if (!kvm_enabled()) { -+ return; -+ } -+ -+ ARM_CPU(obj)->kvm_adjvtime = true; -+ object_property_add_bool(obj, "kvm-no-adjvtime", kvm_no_adjvtime_get, -+ kvm_no_adjvtime_set, &error_abort); -+ object_property_set_description(obj, "kvm-no-adjvtime", -+ "Set on to disable the adjustment of " -+ "the virtual counter. VM stopped time " -+ "will be counted.", &error_abort); -+} -+ - bool kvm_arm_pmu_supported(CPUState *cpu) - { - KVMState *s = KVM_STATE(current_machine->accelerator); -diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h -index 01a9a18..ae9e075 100644 ---- a/target/arm/kvm_arm.h -+++ b/target/arm/kvm_arm.h -@@ -256,6 +256,15 @@ void kvm_arm_sve_get_vls(CPUState *cs, unsigned long *map); - void kvm_arm_set_cpu_features_from_host(ARMCPU *cpu); - - /** -+ * kvm_arm_add_vcpu_properties: -+ * @obj: The CPU object to add the properties to -+ * -+ * Add all KVM specific CPU properties to the CPU object. These -+ * are the CPU properties with "kvm-" prefixed names. -+ */ -+void kvm_arm_add_vcpu_properties(Object *obj); -+ -+/** - * kvm_arm_aarch32_supported: - * @cs: CPUState - * -@@ -345,6 +354,8 @@ static inline void kvm_arm_set_cpu_features_from_host(ARMCPU *cpu) - cpu->host_cpu_probe_failed = true; - } - -+static inline void kvm_arm_add_vcpu_properties(Object *obj) {} -+ - static inline bool kvm_arm_aarch32_supported(CPUState *cs) - { - return false; -diff --git a/target/arm/monitor.c b/target/arm/monitor.c -index fa054f8..9725dff 100644 ---- a/target/arm/monitor.c -+++ b/target/arm/monitor.c -@@ -103,6 +103,7 @@ static const char *cpu_model_advertised_features[] = { - "sve128", "sve256", "sve384", "sve512", - "sve640", "sve768", "sve896", "sve1024", "sve1152", "sve1280", - "sve1408", "sve1536", "sve1664", "sve1792", "sve1920", "sve2048", -+ "kvm-no-adjvtime", - NULL - }; - -diff --git a/tests/arm-cpu-features.c b/tests/arm-cpu-features.c -index 89285ca..ba1a6fe 100644 ---- a/tests/arm-cpu-features.c -+++ b/tests/arm-cpu-features.c -@@ -428,6 +428,8 @@ static void test_query_cpu_model_expansion(const void *data) - assert_has_feature_enabled(qts, "cortex-a15", "pmu"); - assert_has_not_feature(qts, "cortex-a15", "aarch64"); - -+ assert_has_not_feature(qts, "max", "kvm-no-adjvtime"); -+ - if (g_str_equal(qtest_get_arch(), "aarch64")) { - assert_has_feature_enabled(qts, "max", "aarch64"); - assert_has_feature_enabled(qts, "max", "sve"); -@@ -462,6 +464,8 @@ static void test_query_cpu_model_expansion_kvm(const void *data) - return; - } - -+ assert_has_feature_disabled(qts, "host", "kvm-no-adjvtime"); -+ - if (g_str_equal(qtest_get_arch(), "aarch64")) { - bool kvm_supports_sve; - char max_name[8], name[8]; --- -1.8.3.1 - diff --git a/kvm-target-arm-kvm-Implement-virtual-time-adjustment.patch b/kvm-target-arm-kvm-Implement-virtual-time-adjustment.patch deleted file mode 100644 index 3396a32..0000000 --- a/kvm-target-arm-kvm-Implement-virtual-time-adjustment.patch +++ /dev/null @@ -1,330 +0,0 @@ -From 5388ea3fc0737d1a659256ff3663057bef484c19 Mon Sep 17 00:00:00 2001 -From: Andrew Jones -Date: Fri, 31 Jan 2020 14:23:13 +0000 -Subject: [PATCH 11/15] target/arm/kvm: Implement virtual time adjustment -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Andrew Jones -Message-id: <20200131142314.13175-5-drjones@redhat.com> -Patchwork-id: 93622 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 4/5] target/arm/kvm: Implement virtual time adjustment -Bugzilla: 1647366 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Auger Eric -RH-Acked-by: Gavin Shan - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1647366 - -Author: Andrew Jones -Date: Thu, 30 Jan 2020 16:02:06 +0000 - - target/arm/kvm: Implement virtual time adjustment - - When a VM is stopped (such as when it's paused) guest virtual time - should stop counting. Otherwise, when the VM is resumed it will - experience time jumps and its kernel may report soft lockups. Not - counting virtual time while the VM is stopped has the side effect - of making the guest's time appear to lag when compared with real - time, and even with time derived from the physical counter. For - this reason, this change, which is enabled by default, comes with - a KVM CPU feature allowing it to be disabled, restoring legacy - behavior. - - This patch only provides the implementation of the virtual time - adjustment. A subsequent patch will provide the CPU property - allowing the change to be enabled and disabled. - - Reported-by: Bijan Mottahedeh - Signed-off-by: Andrew Jones - Message-id: 20200120101023.16030-6-drjones@redhat.com - Reviewed-by: Peter Maydell - Signed-off-by: Peter Maydell - -(cherry picked from commit e5ac4200b4cddf44df9adbef677af0d1f1c579c6) -Signed-off-by: Danilo C. L. de Paula ---- - target/arm/cpu.h | 7 ++++ - target/arm/kvm.c | 92 ++++++++++++++++++++++++++++++++++++++++++++++++++++ - target/arm/kvm32.c | 3 ++ - target/arm/kvm64.c | 3 ++ - target/arm/kvm_arm.h | 38 ++++++++++++++++++++++ - target/arm/machine.c | 7 ++++ - 6 files changed, 150 insertions(+) - -diff --git a/target/arm/cpu.h b/target/arm/cpu.h -index 82dd3cc..fbd8ea0 100644 ---- a/target/arm/cpu.h -+++ b/target/arm/cpu.h -@@ -821,6 +821,13 @@ struct ARMCPU { - /* KVM init features for this CPU */ - uint32_t kvm_init_features[7]; - -+ /* KVM CPU state */ -+ -+ /* KVM virtual time adjustment */ -+ bool kvm_adjvtime; -+ bool kvm_vtime_dirty; -+ uint64_t kvm_vtime; -+ - /* Uniprocessor system with MP extensions */ - bool mp_is_up; - -diff --git a/target/arm/kvm.c b/target/arm/kvm.c -index 5b82cef..26d7f8b 100644 ---- a/target/arm/kvm.c -+++ b/target/arm/kvm.c -@@ -359,6 +359,22 @@ static int compare_u64(const void *a, const void *b) - return 0; - } - -+/* -+ * cpreg_values are sorted in ascending order by KVM register ID -+ * (see kvm_arm_init_cpreg_list). This allows us to cheaply find -+ * the storage for a KVM register by ID with a binary search. -+ */ -+static uint64_t *kvm_arm_get_cpreg_ptr(ARMCPU *cpu, uint64_t regidx) -+{ -+ uint64_t *res; -+ -+ res = bsearch(®idx, cpu->cpreg_indexes, cpu->cpreg_array_len, -+ sizeof(uint64_t), compare_u64); -+ assert(res); -+ -+ return &cpu->cpreg_values[res - cpu->cpreg_indexes]; -+} -+ - /* Initialize the ARMCPU cpreg list according to the kernel's - * definition of what CPU registers it knows about (and throw away - * the previous TCG-created cpreg list). -@@ -512,6 +528,23 @@ bool write_list_to_kvmstate(ARMCPU *cpu, int level) - return ok; - } - -+void kvm_arm_cpu_pre_save(ARMCPU *cpu) -+{ -+ /* KVM virtual time adjustment */ -+ if (cpu->kvm_vtime_dirty) { -+ *kvm_arm_get_cpreg_ptr(cpu, KVM_REG_ARM_TIMER_CNT) = cpu->kvm_vtime; -+ } -+} -+ -+void kvm_arm_cpu_post_load(ARMCPU *cpu) -+{ -+ /* KVM virtual time adjustment */ -+ if (cpu->kvm_adjvtime) { -+ cpu->kvm_vtime = *kvm_arm_get_cpreg_ptr(cpu, KVM_REG_ARM_TIMER_CNT); -+ cpu->kvm_vtime_dirty = true; -+ } -+} -+ - void kvm_arm_reset_vcpu(ARMCPU *cpu) - { - int ret; -@@ -579,6 +612,50 @@ int kvm_arm_sync_mpstate_to_qemu(ARMCPU *cpu) - return 0; - } - -+void kvm_arm_get_virtual_time(CPUState *cs) -+{ -+ ARMCPU *cpu = ARM_CPU(cs); -+ struct kvm_one_reg reg = { -+ .id = KVM_REG_ARM_TIMER_CNT, -+ .addr = (uintptr_t)&cpu->kvm_vtime, -+ }; -+ int ret; -+ -+ if (cpu->kvm_vtime_dirty) { -+ return; -+ } -+ -+ ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); -+ if (ret) { -+ error_report("Failed to get KVM_REG_ARM_TIMER_CNT"); -+ abort(); -+ } -+ -+ cpu->kvm_vtime_dirty = true; -+} -+ -+void kvm_arm_put_virtual_time(CPUState *cs) -+{ -+ ARMCPU *cpu = ARM_CPU(cs); -+ struct kvm_one_reg reg = { -+ .id = KVM_REG_ARM_TIMER_CNT, -+ .addr = (uintptr_t)&cpu->kvm_vtime, -+ }; -+ int ret; -+ -+ if (!cpu->kvm_vtime_dirty) { -+ return; -+ } -+ -+ ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); -+ if (ret) { -+ error_report("Failed to set KVM_REG_ARM_TIMER_CNT"); -+ abort(); -+ } -+ -+ cpu->kvm_vtime_dirty = false; -+} -+ - int kvm_put_vcpu_events(ARMCPU *cpu) - { - CPUARMState *env = &cpu->env; -@@ -690,6 +767,21 @@ MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run) - return MEMTXATTRS_UNSPECIFIED; - } - -+void kvm_arm_vm_state_change(void *opaque, int running, RunState state) -+{ -+ CPUState *cs = opaque; -+ ARMCPU *cpu = ARM_CPU(cs); -+ -+ if (running) { -+ if (cpu->kvm_adjvtime) { -+ kvm_arm_put_virtual_time(cs); -+ } -+ } else { -+ if (cpu->kvm_adjvtime) { -+ kvm_arm_get_virtual_time(cs); -+ } -+ } -+} - - int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) - { -diff --git a/target/arm/kvm32.c b/target/arm/kvm32.c -index 32bf8d6..3a8b437 100644 ---- a/target/arm/kvm32.c -+++ b/target/arm/kvm32.c -@@ -16,6 +16,7 @@ - #include "qemu-common.h" - #include "cpu.h" - #include "qemu/timer.h" -+#include "sysemu/runstate.h" - #include "sysemu/kvm.h" - #include "kvm_arm.h" - #include "internals.h" -@@ -198,6 +199,8 @@ int kvm_arch_init_vcpu(CPUState *cs) - return -EINVAL; - } - -+ qemu_add_vm_change_state_handler(kvm_arm_vm_state_change, cs); -+ - /* Determine init features for this CPU */ - memset(cpu->kvm_init_features, 0, sizeof(cpu->kvm_init_features)); - if (cpu->start_powered_off) { -diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c -index 666a81a..d368189 100644 ---- a/target/arm/kvm64.c -+++ b/target/arm/kvm64.c -@@ -23,6 +23,7 @@ - #include "qemu/host-utils.h" - #include "qemu/main-loop.h" - #include "exec/gdbstub.h" -+#include "sysemu/runstate.h" - #include "sysemu/kvm.h" - #include "sysemu/kvm_int.h" - #include "kvm_arm.h" -@@ -735,6 +736,8 @@ int kvm_arch_init_vcpu(CPUState *cs) - return -EINVAL; - } - -+ qemu_add_vm_change_state_handler(kvm_arm_vm_state_change, cs); -+ - /* Determine init features for this CPU */ - memset(cpu->kvm_init_features, 0, sizeof(cpu->kvm_init_features)); - if (cpu->start_powered_off) { -diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h -index b48a9c9..01a9a18 100644 ---- a/target/arm/kvm_arm.h -+++ b/target/arm/kvm_arm.h -@@ -128,6 +128,23 @@ bool write_list_to_kvmstate(ARMCPU *cpu, int level); - bool write_kvmstate_to_list(ARMCPU *cpu); - - /** -+ * kvm_arm_cpu_pre_save: -+ * @cpu: ARMCPU -+ * -+ * Called after write_kvmstate_to_list() from cpu_pre_save() to update -+ * the cpreg list with KVM CPU state. -+ */ -+void kvm_arm_cpu_pre_save(ARMCPU *cpu); -+ -+/** -+ * kvm_arm_cpu_post_load: -+ * @cpu: ARMCPU -+ * -+ * Called from cpu_post_load() to update KVM CPU state from the cpreg list. -+ */ -+void kvm_arm_cpu_post_load(ARMCPU *cpu); -+ -+/** - * kvm_arm_reset_vcpu: - * @cpu: ARMCPU - * -@@ -292,6 +309,24 @@ int kvm_arm_sync_mpstate_to_kvm(ARMCPU *cpu); - */ - int kvm_arm_sync_mpstate_to_qemu(ARMCPU *cpu); - -+/** -+ * kvm_arm_get_virtual_time: -+ * @cs: CPUState -+ * -+ * Gets the VCPU's virtual counter and stores it in the KVM CPU state. -+ */ -+void kvm_arm_get_virtual_time(CPUState *cs); -+ -+/** -+ * kvm_arm_put_virtual_time: -+ * @cs: CPUState -+ * -+ * Sets the VCPU's virtual counter to the value stored in the KVM CPU state. -+ */ -+void kvm_arm_put_virtual_time(CPUState *cs); -+ -+void kvm_arm_vm_state_change(void *opaque, int running, RunState state); -+ - int kvm_arm_vgic_probe(void); - - void kvm_arm_pmu_set_irq(CPUState *cs, int irq); -@@ -339,6 +374,9 @@ static inline void kvm_arm_pmu_set_irq(CPUState *cs, int irq) {} - static inline void kvm_arm_pmu_init(CPUState *cs) {} - - static inline void kvm_arm_sve_get_vls(CPUState *cs, unsigned long *map) {} -+ -+static inline void kvm_arm_get_virtual_time(CPUState *cs) {} -+static inline void kvm_arm_put_virtual_time(CPUState *cs) {} - #endif - - static inline const char *gic_class_name(void) -diff --git a/target/arm/machine.c b/target/arm/machine.c -index eb28b23..241890a 100644 ---- a/target/arm/machine.c -+++ b/target/arm/machine.c -@@ -642,6 +642,12 @@ static int cpu_pre_save(void *opaque) - /* This should never fail */ - abort(); - } -+ -+ /* -+ * kvm_arm_cpu_pre_save() must be called after -+ * write_kvmstate_to_list() -+ */ -+ kvm_arm_cpu_pre_save(cpu); - } else { - if (!write_cpustate_to_list(cpu, false)) { - /* This should never fail. */ -@@ -744,6 +750,7 @@ static int cpu_post_load(void *opaque, int version_id) - * we're using it. - */ - write_list_to_cpustate(cpu); -+ kvm_arm_cpu_post_load(cpu); - } else { - if (!write_list_to_cpustate(cpu)) { - return -1; --- -1.8.3.1 - diff --git a/kvm-target-arm-kvm-trivial-Clean-up-header-documentation.patch b/kvm-target-arm-kvm-trivial-Clean-up-header-documentation.patch deleted file mode 100644 index 8cdc867..0000000 --- a/kvm-target-arm-kvm-trivial-Clean-up-header-documentation.patch +++ /dev/null @@ -1,197 +0,0 @@ -From 11cb9cb7b1b56d5c9723e9c50bc2903281893bcc Mon Sep 17 00:00:00 2001 -From: Andrew Jones -Date: Fri, 31 Jan 2020 14:23:10 +0000 -Subject: [PATCH 08/15] target/arm/kvm: trivial: Clean up header documentation -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Andrew Jones -Message-id: <20200131142314.13175-2-drjones@redhat.com> -Patchwork-id: 93625 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/5] target/arm/kvm: trivial: Clean up header documentation -Bugzilla: 1647366 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Auger Eric -RH-Acked-by: Gavin Shan - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1647366 - -Author: Andrew Jones -Date: Thu, 30 Jan 2020 16:02:05 +0000 - - target/arm/kvm: trivial: Clean up header documentation - - Signed-off-by: Andrew Jones - Message-id: 20200120101023.16030-2-drjones@redhat.com - Reviewed-by: Peter Maydell - Signed-off-by: Peter Maydell - -(cherry picked from commit d1ebbc9d16297b54b153ee33abe05eb4f1df0c66) -Signed-off-by: Danilo C. L. de Paula ---- - target/arm/kvm_arm.h | 46 +++++++++++++++++++++++++++------------------- - 1 file changed, 27 insertions(+), 19 deletions(-) - -diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h -index 8e14d40..b48a9c9 100644 ---- a/target/arm/kvm_arm.h -+++ b/target/arm/kvm_arm.h -@@ -28,9 +28,9 @@ - int kvm_arm_vcpu_init(CPUState *cs); - - /** -- * kvm_arm_vcpu_finalize -+ * kvm_arm_vcpu_finalize: - * @cs: CPUState -- * @feature: int -+ * @feature: feature to finalize - * - * Finalizes the configuration of the specified VCPU feature by - * invoking the KVM_ARM_VCPU_FINALIZE ioctl. Features requiring -@@ -75,8 +75,8 @@ void kvm_arm_register_device(MemoryRegion *mr, uint64_t devid, uint64_t group, - int kvm_arm_init_cpreg_list(ARMCPU *cpu); - - /** -- * kvm_arm_reg_syncs_via_cpreg_list -- * regidx: KVM register index -+ * kvm_arm_reg_syncs_via_cpreg_list: -+ * @regidx: KVM register index - * - * Return true if this KVM register should be synchronized via the - * cpreg list of arbitrary system registers, false if it is synchronized -@@ -85,8 +85,8 @@ int kvm_arm_init_cpreg_list(ARMCPU *cpu); - bool kvm_arm_reg_syncs_via_cpreg_list(uint64_t regidx); - - /** -- * kvm_arm_cpreg_level -- * regidx: KVM register index -+ * kvm_arm_cpreg_level: -+ * @regidx: KVM register index - * - * Return the level of this coprocessor/system register. Return value is - * either KVM_PUT_RUNTIME_STATE, KVM_PUT_RESET_STATE, or KVM_PUT_FULL_STATE. -@@ -148,6 +148,8 @@ void kvm_arm_init_serror_injection(CPUState *cs); - * @cpu: ARMCPU - * - * Get VCPU related state from kvm. -+ * -+ * Returns: 0 if success else < 0 error code - */ - int kvm_get_vcpu_events(ARMCPU *cpu); - -@@ -156,6 +158,8 @@ int kvm_get_vcpu_events(ARMCPU *cpu); - * @cpu: ARMCPU - * - * Put VCPU related state to kvm. -+ * -+ * Returns: 0 if success else < 0 error code - */ - int kvm_put_vcpu_events(ARMCPU *cpu); - -@@ -205,10 +209,12 @@ typedef struct ARMHostCPUFeatures { - - /** - * kvm_arm_get_host_cpu_features: -- * @ahcc: ARMHostCPUClass to fill in -+ * @ahcf: ARMHostCPUClass to fill in - * - * Probe the capabilities of the host kernel's preferred CPU and fill - * in the ARMHostCPUClass struct accordingly. -+ * -+ * Returns true on success and false otherwise. - */ - bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf); - -@@ -242,7 +248,7 @@ void kvm_arm_set_cpu_features_from_host(ARMCPU *cpu); - bool kvm_arm_aarch32_supported(CPUState *cs); - - /** -- * bool kvm_arm_pmu_supported: -+ * kvm_arm_pmu_supported: - * @cs: CPUState - * - * Returns: true if the KVM VCPU can enable its PMU -@@ -251,7 +257,7 @@ bool kvm_arm_aarch32_supported(CPUState *cs); - bool kvm_arm_pmu_supported(CPUState *cs); - - /** -- * bool kvm_arm_sve_supported: -+ * kvm_arm_sve_supported: - * @cs: CPUState - * - * Returns true if the KVM VCPU can enable SVE and false otherwise. -@@ -259,26 +265,30 @@ bool kvm_arm_pmu_supported(CPUState *cs); - bool kvm_arm_sve_supported(CPUState *cs); - - /** -- * kvm_arm_get_max_vm_ipa_size - Returns the number of bits in the -- * IPA address space supported by KVM -- * -+ * kvm_arm_get_max_vm_ipa_size: - * @ms: Machine state handle -+ * -+ * Returns the number of bits in the IPA address space supported by KVM - */ - int kvm_arm_get_max_vm_ipa_size(MachineState *ms); - - /** -- * kvm_arm_sync_mpstate_to_kvm -+ * kvm_arm_sync_mpstate_to_kvm: - * @cpu: ARMCPU - * - * If supported set the KVM MP_STATE based on QEMU's model. -+ * -+ * Returns 0 on success and -1 on failure. - */ - int kvm_arm_sync_mpstate_to_kvm(ARMCPU *cpu); - - /** -- * kvm_arm_sync_mpstate_to_qemu -+ * kvm_arm_sync_mpstate_to_qemu: - * @cpu: ARMCPU - * - * If supported get the MP_STATE from KVM and store in QEMU's model. -+ * -+ * Returns 0 on success and aborts on failure. - */ - int kvm_arm_sync_mpstate_to_qemu(ARMCPU *cpu); - -@@ -292,7 +302,8 @@ int kvm_arm_set_irq(int cpu, int irqtype, int irq, int level); - - static inline void kvm_arm_set_cpu_features_from_host(ARMCPU *cpu) - { -- /* This should never actually be called in the "not KVM" case, -+ /* -+ * This should never actually be called in the "not KVM" case, - * but set up the fields to indicate an error anyway. - */ - cpu->kvm_target = QEMU_KVM_ARM_TARGET_NONE; -@@ -377,23 +388,20 @@ bool kvm_arm_handle_debug(CPUState *cs, struct kvm_debug_exit_arch *debug_exit); - * - * Return: TRUE if any hardware breakpoints in use. - */ -- - bool kvm_arm_hw_debug_active(CPUState *cs); - - /** - * kvm_arm_copy_hw_debug_data: -- * - * @ptr: kvm_guest_debug_arch structure - * - * Copy the architecture specific debug registers into the - * kvm_guest_debug ioctl structure. - */ - struct kvm_guest_debug_arch; -- - void kvm_arm_copy_hw_debug_data(struct kvm_guest_debug_arch *ptr); - - /** -- * its_class_name -+ * its_class_name: - * - * Return the ITS class name to use depending on whether KVM acceleration - * and KVM CAP_SIGNAL_MSI are supported --- -1.8.3.1 - diff --git a/kvm-target-arm-kvm64-kvm64-cpus-have-timer-registers.patch b/kvm-target-arm-kvm64-kvm64-cpus-have-timer-registers.patch deleted file mode 100644 index 36c0f1a..0000000 --- a/kvm-target-arm-kvm64-kvm64-cpus-have-timer-registers.patch +++ /dev/null @@ -1,60 +0,0 @@ -From 2740a84fe798ade5c1ce725d65cdaffb255da47c Mon Sep 17 00:00:00 2001 -From: Andrew Jones -Date: Fri, 31 Jan 2020 14:23:11 +0000 -Subject: [PATCH 09/15] target/arm/kvm64: kvm64 cpus have timer registers -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Andrew Jones -Message-id: <20200131142314.13175-3-drjones@redhat.com> -Patchwork-id: 93621 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 2/5] target/arm/kvm64: kvm64 cpus have timer registers -Bugzilla: 1647366 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Auger Eric -RH-Acked-by: Gavin Shan - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1647366 - -Author: Andrew Jones -Date: Thu, 30 Jan 2020 16:02:06 +0000 - - target/arm/kvm64: kvm64 cpus have timer registers - - Add the missing GENERIC_TIMER feature to kvm64 cpus. - - We don't currently use these registers when KVM is enabled, but it's - probably best we add the feature flag for consistency and potential - future use. There's also precedent, as we add the PMU feature flag to - KVM enabled guests, even though we don't use those registers either. - - This change was originally posted as a hunk of a different, never - merged patch from Bijan Mottahedeh. - - Signed-off-by: Andrew Jones - Reviewed-by: Richard Henderson - Message-id: 20200120101023.16030-4-drjones@redhat.com - Signed-off-by: Peter Maydell - -(cherry picked from commit 65caa415487f4a6e265105446c6ef8f56bb0aa70) -Signed-off-by: Danilo C. L. de Paula ---- - target/arm/kvm64.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c -index e2da756..666a81a 100644 ---- a/target/arm/kvm64.c -+++ b/target/arm/kvm64.c -@@ -605,6 +605,7 @@ bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) - set_feature(&features, ARM_FEATURE_NEON); - set_feature(&features, ARM_FEATURE_AARCH64); - set_feature(&features, ARM_FEATURE_PMU); -+ set_feature(&features, ARM_FEATURE_GENERIC_TIMER); - - ahcf->features = features; - --- -1.8.3.1 - diff --git a/kvm-target-arm-monitor-query-cpu-model-expansion-crashed.patch b/kvm-target-arm-monitor-query-cpu-model-expansion-crashed.patch deleted file mode 100644 index 55f328d..0000000 --- a/kvm-target-arm-monitor-query-cpu-model-expansion-crashed.patch +++ /dev/null @@ -1,81 +0,0 @@ -From c82cf5c08617c947b34eb490d1714729103e3379 Mon Sep 17 00:00:00 2001 -From: Andrew Jones -Date: Mon, 10 Feb 2020 17:33:57 +0000 -Subject: [PATCH 17/18] target/arm/monitor: query-cpu-model-expansion crashed - qemu when using machine type none -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Andrew Jones -Message-id: <20200210173358.16896-2-drjones@redhat.com> -Patchwork-id: 93773 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/2] target/arm/monitor: query-cpu-model-expansion crashed qemu when using machine type none -Bugzilla: 1801320 -RH-Acked-by: Auger Eric -RH-Acked-by: Gavin Shan -RH-Acked-by: Philippe Mathieu-Daudé - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1801320 - -Author: Liang Yan -Date: Fri, 07 Feb 2020 14:04:21 +0000 - - target/arm/monitor: query-cpu-model-expansion crashed qemu when using machine type none - - Commit e19afd566781 mentioned that target-arm only supports queryable - cpu models 'max', 'host', and the current type when KVM is in use. - The logic works well until using machine type none. - - For machine type none, cpu_type will be null if cpu option is not - set by command line, strlen(cpu_type) will terminate process. - So We add a check above it. - - This won't affect i386 and s390x since they do not use current_cpu. - - Signed-off-by: Liang Yan - Message-id: 20200203134251.12986-1-lyan@suse.com - Reviewed-by: Andrew Jones - Tested-by: Andrew Jones - Signed-off-by: Peter Maydell - -(cherry picked from commit 0999a4ba8718aa96105b978d3567fc7e90244c7e) -Signed-off-by: Danilo C. L. de Paula ---- - target/arm/monitor.c | 15 +++++++++------ - 1 file changed, 9 insertions(+), 6 deletions(-) - -diff --git a/target/arm/monitor.c b/target/arm/monitor.c -index 9725dff..c2dc790 100644 ---- a/target/arm/monitor.c -+++ b/target/arm/monitor.c -@@ -137,17 +137,20 @@ CpuModelExpansionInfo *qmp_query_cpu_model_expansion(CpuModelExpansionType type, - } - - if (kvm_enabled()) { -- const char *cpu_type = current_machine->cpu_type; -- int len = strlen(cpu_type) - strlen(ARM_CPU_TYPE_SUFFIX); - bool supported = false; - - if (!strcmp(model->name, "host") || !strcmp(model->name, "max")) { - /* These are kvmarm's recommended cpu types */ - supported = true; -- } else if (strlen(model->name) == len && -- !strncmp(model->name, cpu_type, len)) { -- /* KVM is enabled and we're using this type, so it works. */ -- supported = true; -+ } else if (current_machine->cpu_type) { -+ const char *cpu_type = current_machine->cpu_type; -+ int len = strlen(cpu_type) - strlen(ARM_CPU_TYPE_SUFFIX); -+ -+ if (strlen(model->name) == len && -+ !strncmp(model->name, cpu_type, len)) { -+ /* KVM is enabled and we're using this type, so it works. */ -+ supported = true; -+ } - } - if (!supported) { - error_setg(errp, "We cannot guarantee the CPU type '%s' works " --- -1.8.3.1 - diff --git a/kvm-target-i386-add-a-ucode-rev-property.patch b/kvm-target-i386-add-a-ucode-rev-property.patch deleted file mode 100644 index 5c3c770..0000000 --- a/kvm-target-i386-add-a-ucode-rev-property.patch +++ /dev/null @@ -1,125 +0,0 @@ -From 4009f0bcc8004ce481015d088fe335a16b8d7ce1 Mon Sep 17 00:00:00 2001 -From: Paolo Bonzini -Date: Mon, 17 Feb 2020 16:23:12 +0000 -Subject: [PATCH 2/9] target/i386: add a ucode-rev property - -RH-Author: Paolo Bonzini -Message-id: <20200217162316.2464-3-pbonzini@redhat.com> -Patchwork-id: 93909 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 2/6] target/i386: add a ucode-rev property -Bugzilla: 1791648 -RH-Acked-by: Eduardo Habkost -RH-Acked-by: Maxim Levitsky -RH-Acked-by: Dr. David Alan Gilbert - -Add the property and plumb it in TCG and HVF (the latter of which -tried to support returning a constant value but used the wrong MSR). - -Signed-off-by: Paolo Bonzini -Message-Id: <1579544504-3616-3-git-send-email-pbonzini@redhat.com> -Signed-off-by: Paolo Bonzini -(cherry picked from commit 4e45aff398cd1542c2a384a2a3b8600f23337d86) -Signed-off-by: Danilo C. L. de Paula ---- - target/i386/cpu.c | 10 ++++++++++ - target/i386/cpu.h | 3 +++ - target/i386/hvf/x86_emu.c | 4 +--- - target/i386/misc_helper.c | 4 ++++ - 4 files changed, 18 insertions(+), 3 deletions(-) - -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index 863192c..e505d3e 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -6325,6 +6325,15 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) - } - } - -+ if (cpu->ucode_rev == 0) { -+ /* The default is the same as KVM's. */ -+ if (IS_AMD_CPU(env)) { -+ cpu->ucode_rev = 0x01000065; -+ } else { -+ cpu->ucode_rev = 0x100000000ULL; -+ } -+ } -+ - /* mwait extended info: needed for Core compatibility */ - /* We always wake on interrupt even if host does not have the capability */ - cpu->mwait.ecx |= CPUID_MWAIT_EMX | CPUID_MWAIT_IBE; -@@ -7008,6 +7017,7 @@ static Property x86_cpu_properties[] = { - DEFINE_PROP_UINT32("min-level", X86CPU, env.cpuid_min_level, 0), - DEFINE_PROP_UINT32("min-xlevel", X86CPU, env.cpuid_min_xlevel, 0), - DEFINE_PROP_UINT32("min-xlevel2", X86CPU, env.cpuid_min_xlevel2, 0), -+ DEFINE_PROP_UINT64("ucode-rev", X86CPU, ucode_rev, 0), - DEFINE_PROP_BOOL("full-cpuid-auto-level", X86CPU, full_cpuid_auto_level, true), - DEFINE_PROP_STRING("hv-vendor-id", X86CPU, hyperv_vendor_id), - DEFINE_PROP_BOOL("cpuid-0xb", X86CPU, enable_cpuid_0xb, true), -diff --git a/target/i386/cpu.h b/target/i386/cpu.h -index cde2a16..4441061 100644 ---- a/target/i386/cpu.h -+++ b/target/i386/cpu.h -@@ -348,6 +348,7 @@ typedef enum X86Seg { - #define MSR_IA32_SPEC_CTRL 0x48 - #define MSR_VIRT_SSBD 0xc001011f - #define MSR_IA32_PRED_CMD 0x49 -+#define MSR_IA32_UCODE_REV 0x8b - #define MSR_IA32_CORE_CAPABILITY 0xcf - - #define MSR_IA32_ARCH_CAPABILITIES 0x10a -@@ -1621,6 +1622,8 @@ struct X86CPU { - CPUNegativeOffsetState neg; - CPUX86State env; - -+ uint64_t ucode_rev; -+ - uint32_t hyperv_spinlock_attempts; - char *hyperv_vendor_id; - bool hyperv_synic_kvm_only; -diff --git a/target/i386/hvf/x86_emu.c b/target/i386/hvf/x86_emu.c -index 3df7672..92ab815 100644 ---- a/target/i386/hvf/x86_emu.c -+++ b/target/i386/hvf/x86_emu.c -@@ -664,8 +664,6 @@ static void exec_lods(struct CPUX86State *env, struct x86_decode *decode) - RIP(env) += decode->len; - } - --#define MSR_IA32_UCODE_REV 0x00000017 -- - void simulate_rdmsr(struct CPUState *cpu) - { - X86CPU *x86_cpu = X86_CPU(cpu); -@@ -681,7 +679,7 @@ void simulate_rdmsr(struct CPUState *cpu) - val = cpu_get_apic_base(X86_CPU(cpu)->apic_state); - break; - case MSR_IA32_UCODE_REV: -- val = (0x100000000ULL << 32) | 0x100000000ULL; -+ val = x86_cpu->ucode_rev; - break; - case MSR_EFER: - val = rvmcs(cpu->hvf_fd, VMCS_GUEST_IA32_EFER); -diff --git a/target/i386/misc_helper.c b/target/i386/misc_helper.c -index 3eff688..aed16fe 100644 ---- a/target/i386/misc_helper.c -+++ b/target/i386/misc_helper.c -@@ -229,6 +229,7 @@ void helper_rdmsr(CPUX86State *env) - #else - void helper_wrmsr(CPUX86State *env) - { -+ X86CPU *x86_cpu = env_archcpu(env); - uint64_t val; - - cpu_svm_check_intercept_param(env, SVM_EXIT_MSR, 1, GETPC()); -@@ -371,6 +372,9 @@ void helper_wrmsr(CPUX86State *env) - env->msr_bndcfgs = val; - cpu_sync_bndcs_hflags(env); - break; -+ case MSR_IA32_UCODE_REV: -+ val = x86_cpu->ucode_rev; -+ break; - default: - if ((uint32_t)env->regs[R_ECX] >= MSR_MC0_CTL - && (uint32_t)env->regs[R_ECX] < MSR_MC0_CTL + --- -1.8.3.1 - diff --git a/kvm-target-i386-check-for-availability-of-MSR_IA32_UCODE.patch b/kvm-target-i386-check-for-availability-of-MSR_IA32_UCODE.patch deleted file mode 100644 index a80c9d3..0000000 --- a/kvm-target-i386-check-for-availability-of-MSR_IA32_UCODE.patch +++ /dev/null @@ -1,72 +0,0 @@ -From 27d7b085f2f568050d638b694ed2f51495db718c Mon Sep 17 00:00:00 2001 -From: Paolo Bonzini -Date: Mon, 17 Feb 2020 16:23:15 +0000 -Subject: [PATCH 5/9] target/i386: check for availability of MSR_IA32_UCODE_REV - as an emulated MSR -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Paolo Bonzini -Message-id: <20200217162316.2464-6-pbonzini@redhat.com> -Patchwork-id: 93898 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 5/6] target/i386: check for availability of MSR_IA32_UCODE_REV as an emulated MSR -Bugzilla: 1791648 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Maxim Levitsky -RH-Acked-by: Dr. David Alan Gilbert - -Even though MSR_IA32_UCODE_REV has been available long before Linux 5.6, -which added it to the emulated MSR list, a bug caused the microcode -version to revert to 0x100000000 on INIT. As a result, processors other -than the bootstrap processor would not see the host microcode revision; -some Windows version complain loudly about this and crash with a -fairly explicit MICROCODE REVISION MISMATCH error. - -[If running 5.6 prereleases, the kernel fix "KVM: x86: do not reset - microcode version on INIT or RESET" should also be applied.] - -Reported-by: Alex Williamson -Message-id: <20200211175516.10716-1-pbonzini@redhat.com> -Signed-off-by: Paolo Bonzini -(cherry picked from commit 6702514814c7e7b4cbf179624539b5f38c72740b) -Signed-off-by: Danilo C. L. de Paula ---- - target/i386/kvm.c | 7 +++++-- - 1 file changed, 5 insertions(+), 2 deletions(-) - -diff --git a/target/i386/kvm.c b/target/i386/kvm.c -index 6c61aef..99840ca 100644 ---- a/target/i386/kvm.c -+++ b/target/i386/kvm.c -@@ -105,6 +105,7 @@ static bool has_msr_smi_count; - static bool has_msr_arch_capabs; - static bool has_msr_core_capabs; - static bool has_msr_vmx_vmfunc; -+static bool has_msr_ucode_rev; - - static uint32_t has_architectural_pmu_version; - static uint32_t num_architectural_pmu_gp_counters; -@@ -2056,6 +2057,9 @@ static int kvm_get_supported_msrs(KVMState *s) - case MSR_IA32_VMX_VMFUNC: - has_msr_vmx_vmfunc = true; - break; -+ case MSR_IA32_UCODE_REV: -+ has_msr_ucode_rev = true; -+ break; - } - } - } -@@ -2696,8 +2700,7 @@ static void kvm_init_msrs(X86CPU *cpu) - env->features[FEAT_CORE_CAPABILITY]); - } - -- if (kvm_arch_get_supported_msr_feature(kvm_state, -- MSR_IA32_UCODE_REV)) { -+ if (has_msr_ucode_rev) { - kvm_msr_entry_add(cpu, MSR_IA32_UCODE_REV, cpu->ucode_rev); - } - --- -1.8.3.1 - diff --git a/kvm-target-i386-do-not-set-unsupported-VMX-secondary-exe.patch b/kvm-target-i386-do-not-set-unsupported-VMX-secondary-exe.patch deleted file mode 100644 index 4c2362d..0000000 --- a/kvm-target-i386-do-not-set-unsupported-VMX-secondary-exe.patch +++ /dev/null @@ -1,112 +0,0 @@ -From 77cdcccc49ba988e3b5bcb66decdee2e99fdcd72 Mon Sep 17 00:00:00 2001 -From: Vitaly Kuznetsov -Date: Tue, 14 Apr 2020 15:00:36 +0100 -Subject: [PATCH] target/i386: do not set unsupported VMX secondary execution - controls - -RH-Author: Vitaly Kuznetsov -Message-id: <20200414150036.625732-2-vkuznets@redhat.com> -Patchwork-id: 94674 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/1] target/i386: do not set unsupported VMX secondary execution controls -Bugzilla: 1822682 -RH-Acked-by: Danilo de Paula -RH-Acked-by: Paolo Bonzini - -Commit 048c95163b4 ("target/i386: work around KVM_GET_MSRS bug for -secondary execution controls") added a workaround for KVM pre-dating -commit 6defc591846d ("KVM: nVMX: include conditional controls in /dev/kvm -KVM_GET_MSRS") which wasn't setting certain available controls. The -workaround uses generic CPUID feature bits to set missing VMX controls. - -It was found that in some cases it is possible to observe hosts which -have certain CPUID features but lack the corresponding VMX control. - -In particular, it was reported that Azure VMs have RDSEED but lack -VMX_SECONDARY_EXEC_RDSEED_EXITING; attempts to enable this feature -bit result in QEMU abort. - -Resolve the issue but not applying the workaround when we don't have -to. As there is no good way to find out if KVM has the fix itself, use -95c5c7c77c ("KVM: nVMX: list VMX MSRs in KVM_GET_MSR_INDEX_LIST") instead -as these [are supposed to] come together. - -Fixes: 048c95163b4 ("target/i386: work around KVM_GET_MSRS bug for secondary execution controls") -Suggested-by: Paolo Bonzini -Signed-off-by: Vitaly Kuznetsov -Message-Id: <20200331162752.1209928-1-vkuznets@redhat.com> -Signed-off-by: Paolo Bonzini -(cherry picked from commit 4a910e1f6ab4155ec8b24c49b2585cc486916985) -Signed-off-by: Danilo C. L. de Paula ---- - target/i386/kvm.c | 41 ++++++++++++++++++++++++++--------------- - 1 file changed, 26 insertions(+), 15 deletions(-) - -diff --git a/target/i386/kvm.c b/target/i386/kvm.c -index 99840ca..fcc8f7d 100644 ---- a/target/i386/kvm.c -+++ b/target/i386/kvm.c -@@ -106,6 +106,7 @@ static bool has_msr_arch_capabs; - static bool has_msr_core_capabs; - static bool has_msr_vmx_vmfunc; - static bool has_msr_ucode_rev; -+static bool has_msr_vmx_procbased_ctls2; - - static uint32_t has_architectural_pmu_version; - static uint32_t num_architectural_pmu_gp_counters; -@@ -490,21 +491,28 @@ uint64_t kvm_arch_get_supported_msr_feature(KVMState *s, uint32_t index) - value = msr_data.entries[0].data; - switch (index) { - case MSR_IA32_VMX_PROCBASED_CTLS2: -- /* KVM forgot to add these bits for some time, do this ourselves. */ -- if (kvm_arch_get_supported_cpuid(s, 0xD, 1, R_ECX) & CPUID_XSAVE_XSAVES) { -- value |= (uint64_t)VMX_SECONDARY_EXEC_XSAVES << 32; -- } -- if (kvm_arch_get_supported_cpuid(s, 1, 0, R_ECX) & CPUID_EXT_RDRAND) { -- value |= (uint64_t)VMX_SECONDARY_EXEC_RDRAND_EXITING << 32; -- } -- if (kvm_arch_get_supported_cpuid(s, 7, 0, R_EBX) & CPUID_7_0_EBX_INVPCID) { -- value |= (uint64_t)VMX_SECONDARY_EXEC_ENABLE_INVPCID << 32; -- } -- if (kvm_arch_get_supported_cpuid(s, 7, 0, R_EBX) & CPUID_7_0_EBX_RDSEED) { -- value |= (uint64_t)VMX_SECONDARY_EXEC_RDSEED_EXITING << 32; -- } -- if (kvm_arch_get_supported_cpuid(s, 0x80000001, 0, R_EDX) & CPUID_EXT2_RDTSCP) { -- value |= (uint64_t)VMX_SECONDARY_EXEC_RDTSCP << 32; -+ if (!has_msr_vmx_procbased_ctls2) { -+ /* KVM forgot to add these bits for some time, do this ourselves. */ -+ if (kvm_arch_get_supported_cpuid(s, 0xD, 1, R_ECX) & -+ CPUID_XSAVE_XSAVES) { -+ value |= (uint64_t)VMX_SECONDARY_EXEC_XSAVES << 32; -+ } -+ if (kvm_arch_get_supported_cpuid(s, 1, 0, R_ECX) & -+ CPUID_EXT_RDRAND) { -+ value |= (uint64_t)VMX_SECONDARY_EXEC_RDRAND_EXITING << 32; -+ } -+ if (kvm_arch_get_supported_cpuid(s, 7, 0, R_EBX) & -+ CPUID_7_0_EBX_INVPCID) { -+ value |= (uint64_t)VMX_SECONDARY_EXEC_ENABLE_INVPCID << 32; -+ } -+ if (kvm_arch_get_supported_cpuid(s, 7, 0, R_EBX) & -+ CPUID_7_0_EBX_RDSEED) { -+ value |= (uint64_t)VMX_SECONDARY_EXEC_RDSEED_EXITING << 32; -+ } -+ if (kvm_arch_get_supported_cpuid(s, 0x80000001, 0, R_EDX) & -+ CPUID_EXT2_RDTSCP) { -+ value |= (uint64_t)VMX_SECONDARY_EXEC_RDTSCP << 32; -+ } - } - /* fall through */ - case MSR_IA32_VMX_TRUE_PINBASED_CTLS: -@@ -2060,6 +2068,9 @@ static int kvm_get_supported_msrs(KVMState *s) - case MSR_IA32_UCODE_REV: - has_msr_ucode_rev = true; - break; -+ case MSR_IA32_VMX_PROCBASED_CTLS2: -+ has_msr_vmx_procbased_ctls2 = true; -+ break; - } - } - } --- -1.8.3.1 - diff --git a/kvm-target-i386-enable-monitor-and-ucode-revision-with-c.patch b/kvm-target-i386-enable-monitor-and-ucode-revision-with-c.patch deleted file mode 100644 index 47438a3..0000000 --- a/kvm-target-i386-enable-monitor-and-ucode-revision-with-c.patch +++ /dev/null @@ -1,49 +0,0 @@ -From 7b71a7011437ebfa3bc7df9297e892b82293ec98 Mon Sep 17 00:00:00 2001 -From: Paolo Bonzini -Date: Mon, 17 Feb 2020 16:23:16 +0000 -Subject: [PATCH 6/9] target/i386: enable monitor and ucode revision with -cpu - max -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Paolo Bonzini -Message-id: <20200217162316.2464-7-pbonzini@redhat.com> -Patchwork-id: 93910 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 6/6] target/i386: enable monitor and ucode revision with -cpu max -Bugzilla: 1791648 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Maxim Levitsky -RH-Acked-by: Dr. David Alan Gilbert - -These two features were incorrectly tied to host_cpuid_required rather than -cpu->max_features. As a result, -cpu max was not enabling either MONITOR -features or ucode revision. - -Signed-off-by: Paolo Bonzini -(cherry picked from commit be02cda3afde60d219786e23c3f8edb53aec8e17) - -[RHEL7: context, upstream uses g_autofree] - -Signed-off-by: Danilo C. L. de Paula ---- - target/i386/cpu.c | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index 5ac843d..1685a8c 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -6317,7 +6317,9 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) - g_free(name); - goto out; - } -+ } - -+ if (cpu->max_features && accel_uses_host_cpuid()) { - if (enable_cpu_pm) { - host_cpuid(5, 0, &cpu->mwait.eax, &cpu->mwait.ebx, - &cpu->mwait.ecx, &cpu->mwait.edx); --- -1.8.3.1 - diff --git a/kvm-target-i386-fix-TCG-UCODE_REV-access.patch b/kvm-target-i386-fix-TCG-UCODE_REV-access.patch deleted file mode 100644 index c7ced8a..0000000 --- a/kvm-target-i386-fix-TCG-UCODE_REV-access.patch +++ /dev/null @@ -1,73 +0,0 @@ -From 3d16f05359e6277da1f970f71aa9f76337d655dc Mon Sep 17 00:00:00 2001 -From: Paolo Bonzini -Date: Mon, 17 Feb 2020 16:23:14 +0000 -Subject: [PATCH 4/9] target/i386: fix TCG UCODE_REV access -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Paolo Bonzini -Message-id: <20200217162316.2464-5-pbonzini@redhat.com> -Patchwork-id: 93904 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 4/6] target/i386: fix TCG UCODE_REV access -Bugzilla: 1791648 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Maxim Levitsky -RH-Acked-by: Dr. David Alan Gilbert - -This was a very interesting semantic conflict that caused git to move -the MSR_IA32_UCODE_REV read to helper_wrmsr. Not a big deal, but -still should be fixed... - -Fixes: 4e45aff398 ("target/i386: add a ucode-rev property", 2020-01-24) -Message-id: <20200206171022.9289-1-pbonzini@redhat.com> -Signed-off-by: Paolo Bonzini -(cherry picked from commit 9028c75c9d08be303ccc425bfe3d3b23d8f4cac7) -Signed-off-by: Danilo C. L. de Paula ---- - target/i386/misc_helper.c | 8 ++++---- - 1 file changed, 4 insertions(+), 4 deletions(-) - -diff --git a/target/i386/misc_helper.c b/target/i386/misc_helper.c -index aed16fe..7d61221 100644 ---- a/target/i386/misc_helper.c -+++ b/target/i386/misc_helper.c -@@ -229,7 +229,6 @@ void helper_rdmsr(CPUX86State *env) - #else - void helper_wrmsr(CPUX86State *env) - { -- X86CPU *x86_cpu = env_archcpu(env); - uint64_t val; - - cpu_svm_check_intercept_param(env, SVM_EXIT_MSR, 1, GETPC()); -@@ -372,9 +371,6 @@ void helper_wrmsr(CPUX86State *env) - env->msr_bndcfgs = val; - cpu_sync_bndcs_hflags(env); - break; -- case MSR_IA32_UCODE_REV: -- val = x86_cpu->ucode_rev; -- break; - default: - if ((uint32_t)env->regs[R_ECX] >= MSR_MC0_CTL - && (uint32_t)env->regs[R_ECX] < MSR_MC0_CTL + -@@ -393,6 +389,7 @@ void helper_wrmsr(CPUX86State *env) - - void helper_rdmsr(CPUX86State *env) - { -+ X86CPU *x86_cpu = env_archcpu(env); - uint64_t val; - - cpu_svm_check_intercept_param(env, SVM_EXIT_MSR, 0, GETPC()); -@@ -526,6 +523,9 @@ void helper_rdmsr(CPUX86State *env) - case MSR_IA32_BNDCFGS: - val = env->msr_bndcfgs; - break; -+ case MSR_IA32_UCODE_REV: -+ val = x86_cpu->ucode_rev; -+ break; - default: - if ((uint32_t)env->regs[R_ECX] >= MSR_MC0_CTL - && (uint32_t)env->regs[R_ECX] < MSR_MC0_CTL + --- -1.8.3.1 - diff --git a/kvm-target-i386-kvm-initialize-feature-MSRs-very-early.patch b/kvm-target-i386-kvm-initialize-feature-MSRs-very-early.patch deleted file mode 100644 index 5118aed..0000000 --- a/kvm-target-i386-kvm-initialize-feature-MSRs-very-early.patch +++ /dev/null @@ -1,178 +0,0 @@ -From eb0fc0ae2750a0462698d6d21ebb56a4249539f9 Mon Sep 17 00:00:00 2001 -From: Paolo Bonzini -Date: Mon, 17 Feb 2020 16:23:11 +0000 -Subject: [PATCH 1/9] target/i386: kvm: initialize feature MSRs very early -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Paolo Bonzini -Message-id: <20200217162316.2464-2-pbonzini@redhat.com> -Patchwork-id: 93899 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/6] target/i386: kvm: initialize feature MSRs very early -Bugzilla: 1791648 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Maxim Levitsky -RH-Acked-by: Dr. David Alan Gilbert - -Some read-only MSRs affect the behavior of ioctls such as -KVM_SET_NESTED_STATE. We can initialize them once and for all -right after the CPU is realized, since they will never be modified -by the guest. - -Reported-by: Qingua Cheng -Cc: qemu-stable@nongnu.org -Signed-off-by: Paolo Bonzini -Message-Id: <1579544504-3616-2-git-send-email-pbonzini@redhat.com> -Signed-off-by: Paolo Bonzini -(cherry picked from commit 420ae1fc51c99abfd03b1c590f55617edd2a2bed) -Signed-off-by: Danilo C. L. de Paula ---- - target/i386/kvm.c | 81 ++++++++++++++++++++++++++++++-------------------- - target/i386/kvm_i386.h | 1 + - 2 files changed, 49 insertions(+), 33 deletions(-) - -diff --git a/target/i386/kvm.c b/target/i386/kvm.c -index 86d9a1f..f41605b 100644 ---- a/target/i386/kvm.c -+++ b/target/i386/kvm.c -@@ -67,6 +67,8 @@ - * 255 kvm_msr_entry structs */ - #define MSR_BUF_SIZE 4096 - -+static void kvm_init_msrs(X86CPU *cpu); -+ - const KVMCapabilityInfo kvm_arch_required_capabilities[] = { - KVM_CAP_INFO(SET_TSS_ADDR), - KVM_CAP_INFO(EXT_CPUID), -@@ -1842,6 +1844,8 @@ int kvm_arch_init_vcpu(CPUState *cs) - has_msr_tsc_aux = false; - } - -+ kvm_init_msrs(cpu); -+ - r = hyperv_init_vcpu(cpu); - if (r) { - goto fail; -@@ -2660,11 +2664,53 @@ static void kvm_msr_entry_add_vmx(X86CPU *cpu, FeatureWordArray f) - VMCS12_MAX_FIELD_INDEX << 1); - } - -+static int kvm_buf_set_msrs(X86CPU *cpu) -+{ -+ int ret = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MSRS, cpu->kvm_msr_buf); -+ if (ret < 0) { -+ return ret; -+ } -+ -+ if (ret < cpu->kvm_msr_buf->nmsrs) { -+ struct kvm_msr_entry *e = &cpu->kvm_msr_buf->entries[ret]; -+ error_report("error: failed to set MSR 0x%" PRIx32 " to 0x%" PRIx64, -+ (uint32_t)e->index, (uint64_t)e->data); -+ } -+ -+ assert(ret == cpu->kvm_msr_buf->nmsrs); -+ return 0; -+} -+ -+static void kvm_init_msrs(X86CPU *cpu) -+{ -+ CPUX86State *env = &cpu->env; -+ -+ kvm_msr_buf_reset(cpu); -+ if (has_msr_arch_capabs) { -+ kvm_msr_entry_add(cpu, MSR_IA32_ARCH_CAPABILITIES, -+ env->features[FEAT_ARCH_CAPABILITIES]); -+ } -+ -+ if (has_msr_core_capabs) { -+ kvm_msr_entry_add(cpu, MSR_IA32_CORE_CAPABILITY, -+ env->features[FEAT_CORE_CAPABILITY]); -+ } -+ -+ /* -+ * Older kernels do not include VMX MSRs in KVM_GET_MSR_INDEX_LIST, but -+ * all kernels with MSR features should have them. -+ */ -+ if (kvm_feature_msrs && cpu_has_vmx(env)) { -+ kvm_msr_entry_add_vmx(cpu, env->features); -+ } -+ -+ assert(kvm_buf_set_msrs(cpu) == 0); -+} -+ - static int kvm_put_msrs(X86CPU *cpu, int level) - { - CPUX86State *env = &cpu->env; - int i; -- int ret; - - kvm_msr_buf_reset(cpu); - -@@ -2722,17 +2768,6 @@ static int kvm_put_msrs(X86CPU *cpu, int level) - } - #endif - -- /* If host supports feature MSR, write down. */ -- if (has_msr_arch_capabs) { -- kvm_msr_entry_add(cpu, MSR_IA32_ARCH_CAPABILITIES, -- env->features[FEAT_ARCH_CAPABILITIES]); -- } -- -- if (has_msr_core_capabs) { -- kvm_msr_entry_add(cpu, MSR_IA32_CORE_CAPABILITY, -- env->features[FEAT_CORE_CAPABILITY]); -- } -- - /* - * The following MSRs have side effects on the guest or are too heavy - * for normal writeback. Limit them to reset or full state updates. -@@ -2910,14 +2945,6 @@ static int kvm_put_msrs(X86CPU *cpu, int level) - - /* Note: MSR_IA32_FEATURE_CONTROL is written separately, see - * kvm_put_msr_feature_control. */ -- -- /* -- * Older kernels do not include VMX MSRs in KVM_GET_MSR_INDEX_LIST, but -- * all kernels with MSR features should have them. -- */ -- if (kvm_feature_msrs && cpu_has_vmx(env)) { -- kvm_msr_entry_add_vmx(cpu, env->features); -- } - } - - if (env->mcg_cap) { -@@ -2933,19 +2960,7 @@ static int kvm_put_msrs(X86CPU *cpu, int level) - } - } - -- ret = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MSRS, cpu->kvm_msr_buf); -- if (ret < 0) { -- return ret; -- } -- -- if (ret < cpu->kvm_msr_buf->nmsrs) { -- struct kvm_msr_entry *e = &cpu->kvm_msr_buf->entries[ret]; -- error_report("error: failed to set MSR 0x%" PRIx32 " to 0x%" PRIx64, -- (uint32_t)e->index, (uint64_t)e->data); -- } -- -- assert(ret == cpu->kvm_msr_buf->nmsrs); -- return 0; -+ return kvm_buf_set_msrs(cpu); - } - - -diff --git a/target/i386/kvm_i386.h b/target/i386/kvm_i386.h -index 06fe06b..d98c6f6 100644 ---- a/target/i386/kvm_i386.h -+++ b/target/i386/kvm_i386.h -@@ -66,4 +66,5 @@ bool kvm_enable_x2apic(void); - bool kvm_has_x2apic_api(void); - - bool kvm_hv_vpindex_settable(void); -+ - #endif --- -1.8.3.1 - diff --git a/kvm-target-i386-kvm-initialize-microcode-revision-from-K.patch b/kvm-target-i386-kvm-initialize-microcode-revision-from-K.patch deleted file mode 100644 index 99b18fc..0000000 --- a/kvm-target-i386-kvm-initialize-microcode-revision-from-K.patch +++ /dev/null @@ -1,64 +0,0 @@ -From 8f39b0c9523630efeb451e2298cf64b88cd2ac81 Mon Sep 17 00:00:00 2001 -From: Paolo Bonzini -Date: Mon, 17 Feb 2020 16:23:13 +0000 -Subject: [PATCH 3/9] target/i386: kvm: initialize microcode revision from KVM -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Paolo Bonzini -Message-id: <20200217162316.2464-4-pbonzini@redhat.com> -Patchwork-id: 93897 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 3/6] target/i386: kvm: initialize microcode revision from KVM -Bugzilla: 1791648 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Maxim Levitsky -RH-Acked-by: Dr. David Alan Gilbert - -KVM can return the host microcode revision as a feature MSR. -Use it as the default value for -cpu host. - -Signed-off-by: Paolo Bonzini -Message-Id: <1579544504-3616-4-git-send-email-pbonzini@redhat.com> -Signed-off-by: Paolo Bonzini -(cherry picked from commit 32c87d70ff55b96741f08c35108935cac6f40fe4) -Signed-off-by: Danilo C. L. de Paula ---- - target/i386/cpu.c | 4 ++++ - target/i386/kvm.c | 5 +++++ - 2 files changed, 9 insertions(+) - -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index e505d3e..5ac843d 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -6323,6 +6323,10 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) - &cpu->mwait.ecx, &cpu->mwait.edx); - env->features[FEAT_1_ECX] |= CPUID_EXT_MONITOR; - } -+ if (kvm_enabled() && cpu->ucode_rev == 0) { -+ cpu->ucode_rev = kvm_arch_get_supported_msr_feature(kvm_state, -+ MSR_IA32_UCODE_REV); -+ } - } - - if (cpu->ucode_rev == 0) { -diff --git a/target/i386/kvm.c b/target/i386/kvm.c -index f41605b..6c61aef 100644 ---- a/target/i386/kvm.c -+++ b/target/i386/kvm.c -@@ -2696,6 +2696,11 @@ static void kvm_init_msrs(X86CPU *cpu) - env->features[FEAT_CORE_CAPABILITY]); - } - -+ if (kvm_arch_get_supported_msr_feature(kvm_state, -+ MSR_IA32_UCODE_REV)) { -+ kvm_msr_entry_add(cpu, MSR_IA32_UCODE_REV, cpu->ucode_rev); -+ } -+ - /* - * Older kernels do not include VMX MSRs in KVM_GET_MSR_INDEX_LIST, but - * all kernels with MSR features should have them. --- -1.8.3.1 - diff --git a/kvm-tcp_emu-Fix-oob-access.patch b/kvm-tcp_emu-Fix-oob-access.patch deleted file mode 100644 index e532877..0000000 --- a/kvm-tcp_emu-Fix-oob-access.patch +++ /dev/null @@ -1,59 +0,0 @@ -From 5c2c5496083fa549e1dff903413bb6136fc19d8d Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= -Date: Fri, 17 Jan 2020 12:07:56 +0100 -Subject: [PATCH 1/4] tcp_emu: Fix oob access -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Marc-André Lureau -Message-id: <20200117120758.1076549-2-marcandre.lureau@redhat.com> -Patchwork-id: 93399 -O-Subject: [RHEL-AV-8.1.0 qemu-kvm + RHEL-AV-8.2.0 qemu-kvm PATCH 1/3] tcp_emu: Fix oob access -Bugzilla: 1791568 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Stefan Hajnoczi - -From: Samuel Thibault - -The main loop only checks for one available byte, while we sometimes -need two bytes. - -[ MA - minor conflict, CHANGELOG.md absent ] -(cherry picked from libslirp commit 2655fffed7a9e765bcb4701dd876e9dab975f289) -Signed-off-by: Marc-André Lureau - -Signed-off-by: Miroslav Rezanina ---- - slirp/src/tcp_subr.c | 7 +++++++ - 1 file changed, 7 insertions(+) - -diff --git a/slirp/src/tcp_subr.c b/slirp/src/tcp_subr.c -index d6dd133..cbecd64 100644 ---- a/slirp/src/tcp_subr.c -+++ b/slirp/src/tcp_subr.c -@@ -886,6 +886,9 @@ int tcp_emu(struct socket *so, struct mbuf *m) - break; - - case 5: -+ if (bptr == m->m_data + m->m_len - 1) -+ return 1; /* We need two bytes */ -+ - /* - * The difference between versions 1.0 and - * 2.0 is here. For future versions of -@@ -901,6 +904,10 @@ int tcp_emu(struct socket *so, struct mbuf *m) - /* This is the field containing the port - * number that RA-player is listening to. - */ -+ -+ if (bptr == m->m_data + m->m_len - 1) -+ return 1; /* We need two bytes */ -+ - lport = (((uint8_t *)bptr)[0] << 8) + ((uint8_t *)bptr)[1]; - if (lport < 6970) - lport += 256; /* don't know why */ --- -1.8.3.1 - diff --git a/kvm-tcp_emu-fix-unsafe-snprintf-usages.patch b/kvm-tcp_emu-fix-unsafe-snprintf-usages.patch deleted file mode 100644 index 846da73..0000000 --- a/kvm-tcp_emu-fix-unsafe-snprintf-usages.patch +++ /dev/null @@ -1,149 +0,0 @@ -From 9a7810c257711ce02627916d886fc1029f7a8190 Mon Sep 17 00:00:00 2001 -From: jmaloy -Date: Thu, 13 Feb 2020 15:50:49 +0000 -Subject: [PATCH 3/7] tcp_emu: fix unsafe snprintf() usages -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: jmaloy -Message-id: <20200213155049.3936-3-jmaloy@redhat.com> -Patchwork-id: 93826 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 2/2] tcp_emu: fix unsafe snprintf() usages -Bugzilla: 1798994 -RH-Acked-by: Eduardo Habkost -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi - -From: Marc-André Lureau - -Various calls to snprintf() assume that snprintf() returns "only" the -number of bytes written (excluding terminating NUL). - -https://pubs.opengroup.org/onlinepubs/9699919799/functions/snprintf.html#tag_16_159_04 - -"Upon successful completion, the snprintf() function shall return the -number of bytes that would be written to s had n been sufficiently -large excluding the terminating null byte." - -Before patch ce131029, if there isn't enough room in "m_data" for the -"DCC ..." message, we overflow "m_data". - -After the patch, if there isn't enough room for the same, we don't -overflow "m_data", but we set "m_len" out-of-bounds. The next time an -access is bounded by "m_len", we'll have a buffer overflow then. - -Use slirp_fmt*() to fix potential OOB memory access. - -Reported-by: Laszlo Ersek -Signed-off-by: Marc-André Lureau -Reviewed-by: Samuel Thibault -Message-Id: <20200127092414.169796-7-marcandre.lureau@redhat.com> -(cherry picked from libslirp commit 68ccb8021a838066f0951d4b2817eb6b6f10a843) -Signed-off-by: Jon Maloy - -Signed-off-by: Danilo C. L. de Paula ---- - slirp/src/tcp_subr.c | 44 +++++++++++++++++++++----------------------- - 1 file changed, 21 insertions(+), 23 deletions(-) - -diff --git a/slirp/src/tcp_subr.c b/slirp/src/tcp_subr.c -index 954d1a6..26d4ead 100644 ---- a/slirp/src/tcp_subr.c -+++ b/slirp/src/tcp_subr.c -@@ -655,8 +655,7 @@ int tcp_emu(struct socket *so, struct mbuf *m) - NTOHS(n1); - NTOHS(n2); - m_inc(m, snprintf(NULL, 0, "%d,%d\r\n", n1, n2) + 1); -- m->m_len = snprintf(m->m_data, M_ROOM(m), "%d,%d\r\n", n1, n2); -- assert(m->m_len < M_ROOM(m)); -+ m->m_len = slirp_fmt(m->m_data, M_ROOM(m), "%d,%d\r\n", n1, n2); - } else { - *eol = '\r'; - } -@@ -696,9 +695,9 @@ int tcp_emu(struct socket *so, struct mbuf *m) - n4 = (laddr & 0xff); - - m->m_len = bptr - m->m_data; /* Adjust length */ -- m->m_len += snprintf(bptr, M_FREEROOM(m), -- "ORT %d,%d,%d,%d,%d,%d\r\n%s", n1, n2, n3, n4, -- n5, n6, x == 7 ? buff : ""); -+ m->m_len += slirp_fmt(bptr, M_FREEROOM(m), -+ "ORT %d,%d,%d,%d,%d,%d\r\n%s", -+ n1, n2, n3, n4, n5, n6, x == 7 ? buff : ""); - return 1; - } else if ((bptr = (char *)strstr(m->m_data, "27 Entering")) != NULL) { - /* -@@ -731,10 +730,9 @@ int tcp_emu(struct socket *so, struct mbuf *m) - n4 = (laddr & 0xff); - - m->m_len = bptr - m->m_data; /* Adjust length */ -- m->m_len += snprintf(bptr, M_FREEROOM(m), -- "27 Entering Passive Mode (%d,%d,%d,%d,%d,%d)\r\n%s", -- n1, n2, n3, n4, n5, n6, x == 7 ? buff : ""); -- -+ m->m_len += slirp_fmt(bptr, M_FREEROOM(m), -+ "27 Entering Passive Mode (%d,%d,%d,%d,%d,%d)\r\n%s", -+ n1, n2, n3, n4, n5, n6, x == 7 ? buff : ""); - return 1; - } - -@@ -757,8 +755,8 @@ int tcp_emu(struct socket *so, struct mbuf *m) - if (m->m_data[m->m_len - 1] == '\0' && lport != 0 && - (so = tcp_listen(slirp, INADDR_ANY, 0, so->so_laddr.s_addr, - htons(lport), SS_FACCEPTONCE)) != NULL) -- m->m_len = snprintf(m->m_data, M_ROOM(m), -- "%d", ntohs(so->so_fport)) + 1; -+ m->m_len = slirp_fmt0(m->m_data, M_ROOM(m), -+ "%d", ntohs(so->so_fport)); - return 1; - - case EMU_IRC: -@@ -777,10 +775,10 @@ int tcp_emu(struct socket *so, struct mbuf *m) - return 1; - } - m->m_len = bptr - m->m_data; /* Adjust length */ -- m->m_len += snprintf(bptr, M_FREEROOM(m), -- "DCC CHAT chat %lu %u%c\n", -- (unsigned long)ntohl(so->so_faddr.s_addr), -- ntohs(so->so_fport), 1); -+ m->m_len += slirp_fmt(bptr, M_FREEROOM(m), -+ "DCC CHAT chat %lu %u%c\n", -+ (unsigned long)ntohl(so->so_faddr.s_addr), -+ ntohs(so->so_fport), 1); - } else if (sscanf(bptr, "DCC SEND %256s %u %u %u", buff, &laddr, &lport, - &n1) == 4) { - if ((so = tcp_listen(slirp, INADDR_ANY, 0, htonl(laddr), -@@ -788,10 +786,10 @@ int tcp_emu(struct socket *so, struct mbuf *m) - return 1; - } - m->m_len = bptr - m->m_data; /* Adjust length */ -- m->m_len += snprintf(bptr, M_FREEROOM(m), -- "DCC SEND %s %lu %u %u%c\n", buff, -- (unsigned long)ntohl(so->so_faddr.s_addr), -- ntohs(so->so_fport), n1, 1); -+ m->m_len += slirp_fmt(bptr, M_FREEROOM(m), -+ "DCC SEND %s %lu %u %u%c\n", buff, -+ (unsigned long)ntohl(so->so_faddr.s_addr), -+ ntohs(so->so_fport), n1, 1); - } else if (sscanf(bptr, "DCC MOVE %256s %u %u %u", buff, &laddr, &lport, - &n1) == 4) { - if ((so = tcp_listen(slirp, INADDR_ANY, 0, htonl(laddr), -@@ -799,10 +797,10 @@ int tcp_emu(struct socket *so, struct mbuf *m) - return 1; - } - m->m_len = bptr - m->m_data; /* Adjust length */ -- m->m_len += snprintf(bptr, M_FREEROOM(m), -- "DCC MOVE %s %lu %u %u%c\n", buff, -- (unsigned long)ntohl(so->so_faddr.s_addr), -- ntohs(so->so_fport), n1, 1); -+ m->m_len += slirp_fmt(bptr, M_FREEROOM(m), -+ "DCC MOVE %s %lu %u %u%c\n", buff, -+ (unsigned long)ntohl(so->so_faddr.s_addr), -+ ntohs(so->so_fport), n1, 1); - } - return 1; - --- -1.8.3.1 - diff --git a/kvm-tests-arm-cpu-features-Check-feature-default-values.patch b/kvm-tests-arm-cpu-features-Check-feature-default-values.patch deleted file mode 100644 index e8a48bf..0000000 --- a/kvm-tests-arm-cpu-features-Check-feature-default-values.patch +++ /dev/null @@ -1,106 +0,0 @@ -From 323889aa2182bf39df10f1caf43f22daea2d7d37 Mon Sep 17 00:00:00 2001 -From: Andrew Jones -Date: Fri, 31 Jan 2020 14:23:12 +0000 -Subject: [PATCH 10/15] tests/arm-cpu-features: Check feature default values -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Andrew Jones -Message-id: <20200131142314.13175-4-drjones@redhat.com> -Patchwork-id: 93626 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 3/5] tests/arm-cpu-features: Check feature default values -Bugzilla: 1647366 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Auger Eric -RH-Acked-by: Gavin Shan - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1647366 - -Author: Andrew Jones -Date: Thu, 30 Jan 2020 16:02:06 +0000 - - tests/arm-cpu-features: Check feature default values - - If we know what the default value should be then we can test for - that as well as the feature existence. - - Signed-off-by: Andrew Jones - Reviewed-by: Richard Henderson - Message-id: 20200120101023.16030-5-drjones@redhat.com - Signed-off-by: Peter Maydell - -(cherry picked from commit 789a35efb583464f9fcd5d871a7fd6164318bb91) -Signed-off-by: Danilo C. L. de Paula ---- - tests/arm-cpu-features.c | 37 ++++++++++++++++++++++++++++--------- - 1 file changed, 28 insertions(+), 9 deletions(-) - -diff --git a/tests/arm-cpu-features.c b/tests/arm-cpu-features.c -index 6e99aa9..89285ca 100644 ---- a/tests/arm-cpu-features.c -+++ b/tests/arm-cpu-features.c -@@ -159,6 +159,25 @@ static bool resp_get_feature(QDict *resp, const char *feature) - qobject_unref(_resp); \ - }) - -+#define assert_feature(qts, cpu_type, feature, expected_value) \ -+({ \ -+ QDict *_resp, *_props; \ -+ \ -+ _resp = do_query_no_props(qts, cpu_type); \ -+ g_assert(_resp); \ -+ g_assert(resp_has_props(_resp)); \ -+ _props = resp_get_props(_resp); \ -+ g_assert(qdict_get(_props, feature)); \ -+ g_assert(qdict_get_bool(_props, feature) == (expected_value)); \ -+ qobject_unref(_resp); \ -+}) -+ -+#define assert_has_feature_enabled(qts, cpu_type, feature) \ -+ assert_feature(qts, cpu_type, feature, true) -+ -+#define assert_has_feature_disabled(qts, cpu_type, feature) \ -+ assert_feature(qts, cpu_type, feature, false) -+ - static void assert_type_full(QTestState *qts) - { - const char *error; -@@ -405,16 +424,16 @@ static void test_query_cpu_model_expansion(const void *data) - assert_error(qts, "host", "The CPU type 'host' requires KVM", NULL); - - /* Test expected feature presence/absence for some cpu types */ -- assert_has_feature(qts, "max", "pmu"); -- assert_has_feature(qts, "cortex-a15", "pmu"); -+ assert_has_feature_enabled(qts, "max", "pmu"); -+ assert_has_feature_enabled(qts, "cortex-a15", "pmu"); - assert_has_not_feature(qts, "cortex-a15", "aarch64"); - - if (g_str_equal(qtest_get_arch(), "aarch64")) { -- assert_has_feature(qts, "max", "aarch64"); -- assert_has_feature(qts, "max", "sve"); -- assert_has_feature(qts, "max", "sve128"); -- assert_has_feature(qts, "cortex-a57", "pmu"); -- assert_has_feature(qts, "cortex-a57", "aarch64"); -+ assert_has_feature_enabled(qts, "max", "aarch64"); -+ assert_has_feature_enabled(qts, "max", "sve"); -+ assert_has_feature_enabled(qts, "max", "sve128"); -+ assert_has_feature_enabled(qts, "cortex-a57", "pmu"); -+ assert_has_feature_enabled(qts, "cortex-a57", "aarch64"); - - sve_tests_default(qts, "max"); - -@@ -451,8 +470,8 @@ static void test_query_cpu_model_expansion_kvm(const void *data) - QDict *resp; - char *error; - -- assert_has_feature(qts, "host", "aarch64"); -- assert_has_feature(qts, "host", "pmu"); -+ assert_has_feature_enabled(qts, "host", "aarch64"); -+ assert_has_feature_enabled(qts, "host", "pmu"); - - assert_error(qts, "cortex-a15", - "We cannot guarantee the CPU type 'cortex-a15' works " --- -1.8.3.1 - diff --git a/kvm-tools-virtiofsd-fuse_lowlevel-Fix-fuse_out_header-er.patch b/kvm-tools-virtiofsd-fuse_lowlevel-Fix-fuse_out_header-er.patch deleted file mode 100644 index 3efef47..0000000 --- a/kvm-tools-virtiofsd-fuse_lowlevel-Fix-fuse_out_header-er.patch +++ /dev/null @@ -1,55 +0,0 @@ -From e483eea891139ee38138381ba6715b3a2be050cc Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Tue, 3 Mar 2020 18:43:12 +0000 -Subject: [PATCH 16/18] tools/virtiofsd/fuse_lowlevel: Fix - fuse_out_header::error value -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200303184314.155564-6-dgilbert@redhat.com> -Patchwork-id: 94128 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 5/7] tools/virtiofsd/fuse_lowlevel: Fix fuse_out_header::error value -Bugzilla: 1797064 -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual -RH-Acked-by: Ján Tomko - -From: Philippe Mathieu-Daudé - -Fix warning reported by Clang static code analyzer: - - CC tools/virtiofsd/fuse_lowlevel.o - tools/virtiofsd/fuse_lowlevel.c:195:9: warning: Value stored to 'error' is never read - error = -ERANGE; - ^ ~~~~~~~ - -Fixes: 3db2876 -Reported-by: Clang Static Analyzer -Reviewed-by: Ján Tomko -Reviewed-by: Dr. David Alan Gilbert -Signed-off-by: Philippe Mathieu-Daudé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 09c086b2a144324199f99a7d4de78c3276a486c1) -Signed-off-by: Danilo C. L. de Paula ---- - tools/virtiofsd/fuse_lowlevel.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -index 704c036..2dd36ec 100644 ---- a/tools/virtiofsd/fuse_lowlevel.c -+++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -192,7 +192,7 @@ int fuse_send_reply_iov_nofree(fuse_req_t req, int error, struct iovec *iov, - - if (error <= -1000 || error > 0) { - fuse_log(FUSE_LOG_ERR, "fuse: bad error value: %i\n", error); -- error = -ERANGE; -+ out.error = -ERANGE; - } - - iov[0].iov_base = &out; --- -1.8.3.1 - diff --git a/kvm-tpm-ppi-page-align-PPI-RAM.patch b/kvm-tpm-ppi-page-align-PPI-RAM.patch deleted file mode 100644 index 32c971d..0000000 --- a/kvm-tpm-ppi-page-align-PPI-RAM.patch +++ /dev/null @@ -1,58 +0,0 @@ -From 7cb1c5e1416de9a09180f0930d2a216c77e8cdbd Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= -Date: Thu, 30 Jan 2020 16:01:10 +0000 -Subject: [PATCH 07/15] tpm-ppi: page-align PPI RAM -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Marc-André Lureau -Message-id: <20200130160110.126086-1-marcandre.lureau@redhat.com> -Patchwork-id: 93600 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH] tpm-ppi: page-align PPI RAM -Bugzilla: 1787444 -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Laszlo Ersek -RH-Acked-by: Philippe Mathieu-Daudé - -post-copy migration fails on destination with error such as: -2019-12-26T10:22:44.714644Z qemu-kvm: ram_block_discard_range: -Unaligned start address: 0x559d2afae9a0 - -Use qemu_memalign() to constrain the PPI RAM memory alignment. - -Cc: qemu-stable@nongnu.org -Signed-off-by: Marc-André Lureau -Reviewed-by: Philippe Mathieu-Daudé -Reviewed-by: Dr. David Alan Gilbert -Reviewed-by: Stefan Berger -Signed-off-by: Stefan Berger -Message-id: 20200103074000.1006389-3-marcandre.lureau@redhat.com - -BZ: https://bugzilla.redhat.com/show_bug.cgi?id=1787444 -Brew: http://brewweb.devel.redhat.com/brew/taskinfo?taskID=26122940 - -(cherry picked from commit 71e415c8a75c130875f14d6b2136825789feb297) -Signed-off-by: Marc-André Lureau -Signed-off-by: Danilo C. L. de Paula ---- - hw/tpm/tpm_ppi.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/hw/tpm/tpm_ppi.c b/hw/tpm/tpm_ppi.c -index ff31459..6d9c1a3 100644 ---- a/hw/tpm/tpm_ppi.c -+++ b/hw/tpm/tpm_ppi.c -@@ -43,7 +43,8 @@ void tpm_ppi_reset(TPMPPI *tpmppi) - void tpm_ppi_init(TPMPPI *tpmppi, struct MemoryRegion *m, - hwaddr addr, Object *obj) - { -- tpmppi->buf = g_malloc0(HOST_PAGE_ALIGN(TPM_PPI_ADDR_SIZE)); -+ tpmppi->buf = qemu_memalign(qemu_real_host_page_size, -+ HOST_PAGE_ALIGN(TPM_PPI_ADDR_SIZE)); - memory_region_init_ram_device_ptr(&tpmppi->ram, obj, "tpm-ppi", - TPM_PPI_ADDR_SIZE, tpmppi->buf); - vmstate_register_ram(&tpmppi->ram, DEVICE(obj)); --- -1.8.3.1 - diff --git a/kvm-trace-update-qemu-trace-stap-to-Python-3.patch b/kvm-trace-update-qemu-trace-stap-to-Python-3.patch deleted file mode 100644 index c49aecd..0000000 --- a/kvm-trace-update-qemu-trace-stap-to-Python-3.patch +++ /dev/null @@ -1,82 +0,0 @@ -From e7cdcd1e39c4c030a32c9e8ef79316eae8555bc8 Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Thu, 16 Jan 2020 17:52:48 +0000 -Subject: [PATCH 04/15] trace: update qemu-trace-stap to Python 3 -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Stefan Hajnoczi -Message-id: <20200116175248.286556-2-stefanha@redhat.com> -Patchwork-id: 93365 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/1] trace: update qemu-trace-stap to Python 3 -Bugzilla: 1787395 -RH-Acked-by: John Snow -RH-Acked-by: Vitaly Kuznetsov -RH-Acked-by: Dr. David Alan Gilbert - -qemu-trace-stap does not support Python 3 yet: - - $ scripts/qemu-trace-stap list path/to/qemu-system-x86_64 - Traceback (most recent call last): - File "scripts/qemu-trace-stap", line 175, in - main() - File "scripts/qemu-trace-stap", line 171, in main - args.func(args) - File "scripts/qemu-trace-stap", line 118, in cmd_list - print_probes(args.verbose, "*") - File "scripts/qemu-trace-stap", line 114, in print_probes - if line.startswith(prefix): - TypeError: startswith first arg must be bytes or a tuple of bytes, not str - -Now that QEMU requires Python 3.5 or later we can switch to pure Python -3. Use Popen()'s universal_newlines=True argument to treat stdout as -text instead of binary. - -Fixes: 62dd1048c0bd ("trace: add ability to do simple printf logging via systemtap") -Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=1787395 -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Daniel P. Berrangé -Message-id: 20200107112438.383958-1-stefanha@redhat.com -Message-Id: <20200107112438.383958-1-stefanha@redhat.com> -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit 3f0097169bb60268cc5dda0c5ea47c31ab57b22f) -Signed-off-by: Stefan Hajnoczi -Signed-off-by: Danilo C. L. de Paula ---- - scripts/qemu-trace-stap | 8 ++++---- - 1 file changed, 4 insertions(+), 4 deletions(-) - -diff --git a/scripts/qemu-trace-stap b/scripts/qemu-trace-stap -index 91d1051..90527eb 100755 ---- a/scripts/qemu-trace-stap -+++ b/scripts/qemu-trace-stap -@@ -1,4 +1,4 @@ --#!/usr/bin/python -+#!/usr/bin/env python3 - # -*- python -*- - # - # Copyright (C) 2019 Red Hat, Inc -@@ -18,8 +18,6 @@ - # You should have received a copy of the GNU General Public License - # along with this program; if not, see . - --from __future__ import print_function -- - import argparse - import copy - import os.path -@@ -104,7 +102,9 @@ def cmd_list(args): - if verbose: - print("Listing probes with name '%s'" % script) - proc = subprocess.Popen(["stap", "-l", script], -- stdout=subprocess.PIPE, env=tapset_env(tapsets)) -+ stdout=subprocess.PIPE, -+ universal_newlines=True, -+ env=tapset_env(tapsets)) - out, err = proc.communicate() - if proc.returncode != 0: - print("No probes found, are the tapsets installed in %s" % tapset_dir(args.binary)) --- -1.8.3.1 - diff --git a/kvm-usbredir-Prevent-recursion-in-usbredir_write.patch b/kvm-usbredir-Prevent-recursion-in-usbredir_write.patch deleted file mode 100644 index 8f08256..0000000 --- a/kvm-usbredir-Prevent-recursion-in-usbredir_write.patch +++ /dev/null @@ -1,106 +0,0 @@ -From 8f6311159977b8ee4b78172caa411d3cee4d2ae5 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Tue, 14 Jan 2020 20:23:30 +0000 -Subject: [PATCH 4/5] usbredir: Prevent recursion in usbredir_write -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200114202331.51831-2-dgilbert@redhat.com> -Patchwork-id: 93344 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/2] usbredir: Prevent recursion in usbredir_write -Bugzilla: 1790844 -RH-Acked-by: Peter Xu -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Gerd Hoffmann - -From: "Dr. David Alan Gilbert" - -I've got a case where usbredir_write manages to call back into itself -via spice; this patch causes the recursion to fail (0 bytes) the write; -this seems to avoid the deadlock I was previously seeing. - -I can't say I fully understand the interaction of usbredir and spice; -but there are a few similar guards in spice and usbredir -to catch other cases especially onces also related to spice_server_char_device_wakeup - -This case seems to be triggered by repeated migration+repeated -reconnection of the viewer; but my debugging suggests the migration -finished before this hits. - -The backtrace of the hang looks like: - reds_handle_ticket - reds_handle_other_links - reds_channel_do_link - red_channel_connect - spicevmc_connect - usbredir_create_parser - usbredirparser_do_write - usbredir_write - qemu_chr_fe_write - qemu_chr_write - qemu_chr_write_buffer - spice_chr_write - spice_server_char_device_wakeup - red_char_device_wakeup - red_char_device_write_to_device - vmc_write - usbredirparser_do_write - usbredir_write - qemu_chr_fe_write - qemu_chr_write - qemu_chr_write_buffer - qemu_mutex_lock_impl - -and we fail as we land through qemu_chr_write_buffer's lock -twice. - -Bug: https://bugzilla.redhat.com/show_bug.cgi?id=1752320 - -Signed-off-by: Dr. David Alan Gilbert -Message-Id: <20191218113012.13331-1-dgilbert@redhat.com> -Signed-off-by: Gerd Hoffmann -(cherry picked from commit 394642a8d3742c885e397d5bb5ee0ec40743cdc6) -Signed-off-by: Danilo C. L. de Paula ---- - hw/usb/redirect.c | 9 +++++++++ - 1 file changed, 9 insertions(+) - -diff --git a/hw/usb/redirect.c b/hw/usb/redirect.c -index e0f5ca6..97f2c3a 100644 ---- a/hw/usb/redirect.c -+++ b/hw/usb/redirect.c -@@ -113,6 +113,7 @@ struct USBRedirDevice { - /* Properties */ - CharBackend cs; - bool enable_streams; -+ bool in_write; - uint8_t debug; - int32_t bootindex; - char *filter_str; -@@ -290,6 +291,13 @@ static int usbredir_write(void *priv, uint8_t *data, int count) - return 0; - } - -+ /* Recursion check */ -+ if (dev->in_write) { -+ DPRINTF("usbredir_write recursion\n"); -+ return 0; -+ } -+ dev->in_write = true; -+ - r = qemu_chr_fe_write(&dev->cs, data, count); - if (r < count) { - if (!dev->watch) { -@@ -300,6 +308,7 @@ static int usbredir_write(void *priv, uint8_t *data, int count) - r = 0; - } - } -+ dev->in_write = false; - return r; - } - --- -1.8.3.1 - diff --git a/kvm-util-add-slirp_fmt-helpers.patch b/kvm-util-add-slirp_fmt-helpers.patch deleted file mode 100644 index 31af599..0000000 --- a/kvm-util-add-slirp_fmt-helpers.patch +++ /dev/null @@ -1,140 +0,0 @@ -From 5dc50c6bca059a9cda6677b1fd0187df1de78ed7 Mon Sep 17 00:00:00 2001 -From: jmaloy -Date: Thu, 13 Feb 2020 15:50:48 +0000 -Subject: [PATCH 2/7] util: add slirp_fmt() helpers -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: jmaloy -Message-id: <20200213155049.3936-2-jmaloy@redhat.com> -Patchwork-id: 93824 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/2] util: add slirp_fmt() helpers -Bugzilla: 1798994 -RH-Acked-by: Eduardo Habkost -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi - -From: Marc-André Lureau - -Various calls to snprintf() in libslirp assume that snprintf() returns -"only" the number of bytes written (excluding terminating NUL). - -https://pubs.opengroup.org/onlinepubs/9699919799/functions/snprintf.html#tag_16_159_04 - -"Upon successful completion, the snprintf() function shall return the -number of bytes that would be written to s had n been sufficiently -large excluding the terminating null byte." - -Introduce slirp_fmt() that handles several pathological cases the -way libslirp usually expect: - -- treat error as fatal (instead of silently returning -1) - -- fmt0() will always \0 end - -- return the number of bytes actually written (instead of what would -have been written, which would usually result in OOB later), including -the ending \0 for fmt0() - -- warn if truncation happened (instead of ignoring) - -Other less common cases can still be handled with strcpy/snprintf() etc. - -Signed-off-by: Marc-André Lureau -Reviewed-by: Samuel Thibault -Message-Id: <20200127092414.169796-2-marcandre.lureau@redhat.com> -(cherry picked from libslirp commit 30648c03b27fb8d9611b723184216cd3174b6775) -Signed-off-by: Jon Maloy - -Signed-off-by: Danilo C. L. de Paula ---- - slirp/src/util.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - slirp/src/util.h | 3 +++ - 2 files changed, 65 insertions(+) - -diff --git a/slirp/src/util.c b/slirp/src/util.c -index e596087..e3b6257 100644 ---- a/slirp/src/util.c -+++ b/slirp/src/util.c -@@ -364,3 +364,65 @@ void slirp_pstrcpy(char *buf, int buf_size, const char *str) - } - *q = '\0'; - } -+ -+static int slirp_vsnprintf(char *str, size_t size, -+ const char *format, va_list args) -+{ -+ int rv = vsnprintf(str, size, format, args); -+ -+ if (rv < 0) { -+ g_error("vsnprintf() failed: %s", g_strerror(errno)); -+ } -+ -+ return rv; -+} -+ -+/* -+ * A snprintf()-like function that: -+ * - returns the number of bytes written (excluding optional \0-ending) -+ * - dies on error -+ * - warn on truncation -+ */ -+int slirp_fmt(char *str, size_t size, const char *format, ...) -+{ -+ va_list args; -+ int rv; -+ -+ va_start(args, format); -+ rv = slirp_vsnprintf(str, size, format, args); -+ va_end(args); -+ -+ if (rv > size) { -+ g_critical("vsnprintf() truncation"); -+ } -+ -+ return MIN(rv, size); -+} -+ -+/* -+ * A snprintf()-like function that: -+ * - always \0-end (unless size == 0) -+ * - returns the number of bytes actually written, including \0 ending -+ * - dies on error -+ * - warn on truncation -+ */ -+int slirp_fmt0(char *str, size_t size, const char *format, ...) -+{ -+ va_list args; -+ int rv; -+ -+ va_start(args, format); -+ rv = slirp_vsnprintf(str, size, format, args); -+ va_end(args); -+ -+ if (rv >= size) { -+ g_critical("vsnprintf() truncation"); -+ if (size > 0) -+ str[size - 1] = '\0'; -+ rv = size; -+ } else { -+ rv += 1; /* include \0 */ -+ } -+ -+ return rv; -+} -diff --git a/slirp/src/util.h b/slirp/src/util.h -index 3c6223c..0558dfc 100644 ---- a/slirp/src/util.h -+++ b/slirp/src/util.h -@@ -177,4 +177,7 @@ static inline int slirp_socket_set_fast_reuse(int fd) - - void slirp_pstrcpy(char *buf, int buf_size, const char *str); - -+int slirp_fmt(char *str, size_t size, const char *format, ...); -+int slirp_fmt0(char *str, size_t size, const char *format, ...); -+ - #endif --- -1.8.3.1 - diff --git a/kvm-vfio-pci-Don-t-remove-irqchip-notifier-if-not-regist.patch b/kvm-vfio-pci-Don-t-remove-irqchip-notifier-if-not-regist.patch deleted file mode 100644 index d416e0f..0000000 --- a/kvm-vfio-pci-Don-t-remove-irqchip-notifier-if-not-regist.patch +++ /dev/null @@ -1,58 +0,0 @@ -From e4631c00d8e9ee3608ef3196cbe8bec4841ee988 Mon Sep 17 00:00:00 2001 -From: Peter Xu -Date: Wed, 8 Jan 2020 15:04:57 +0000 -Subject: [PATCH 2/5] vfio/pci: Don't remove irqchip notifier if not registered - -RH-Author: Peter Xu -Message-id: <20200108150457.12324-2-peterx@redhat.com> -Patchwork-id: 93291 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/1] vfio/pci: Don't remove irqchip notifier if not registered -Bugzilla: 1782678 -RH-Acked-by: Alex Williamson -RH-Acked-by: Cornelia Huck -RH-Acked-by: Auger Eric -RH-Acked-by: Jens Freimann - -The kvm irqchip notifier is only registered if the device supports -INTx, however it's unconditionally removed. If the assigned device -does not support INTx, this will cause QEMU to crash when unplugging -the device from the system. Change it to conditionally remove the -notifier only if the notify hook is setup. - -CC: Eduardo Habkost -CC: David Gibson -CC: Alex Williamson -Cc: qemu-stable@nongnu.org # v4.2 -Reported-by: yanghliu@redhat.com -Debugged-by: Eduardo Habkost -Fixes: c5478fea27ac ("vfio/pci: Respond to KVM irqchip change notifier") -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1782678 -Signed-off-by: Peter Xu -Reviewed-by: David Gibson -Reviewed-by: Greg Kurz -Signed-off-by: Alex Williamson -(cherry picked from commit 0446f8121723b134ca1d1ed0b73e96d4a0a8689d) -Signed-off-by: Peter Xu -Signed-off-by: Danilo C. L. de Paula ---- - hw/vfio/pci.c | 4 +++- - 1 file changed, 3 insertions(+), 1 deletion(-) - -diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index 309535f..d717520 100644 ---- a/hw/vfio/pci.c -+++ b/hw/vfio/pci.c -@@ -3100,7 +3100,9 @@ static void vfio_exitfn(PCIDevice *pdev) - vfio_unregister_req_notifier(vdev); - vfio_unregister_err_notifier(vdev); - pci_device_set_intx_routing_notifier(&vdev->pdev, NULL); -- kvm_irqchip_remove_change_notifier(&vdev->irqchip_change_notifier); -+ if (vdev->irqchip_change_notifier.notify) { -+ kvm_irqchip_remove_change_notifier(&vdev->irqchip_change_notifier); -+ } - vfio_disable_interrupts(vdev); - if (vdev->intx.mmap_timer) { - timer_free(vdev->intx.mmap_timer); --- -1.8.3.1 - diff --git a/kvm-vhost-Add-names-to-section-rounded-warning.patch b/kvm-vhost-Add-names-to-section-rounded-warning.patch deleted file mode 100644 index c41a14c..0000000 --- a/kvm-vhost-Add-names-to-section-rounded-warning.patch +++ /dev/null @@ -1,53 +0,0 @@ -From 0d545c5850caf76ad3e8dd9bb0fbc9f86b08e220 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Fri, 24 Jan 2020 19:46:11 +0100 -Subject: [PATCH 002/116] vhost: Add names to section rounded warning -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200124194613.41119-2-dgilbert@redhat.com> -Patchwork-id: 93450 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 1/3] vhost: Add names to section rounded warning -Bugzilla: 1779041 -RH-Acked-by: Michael S. Tsirkin -RH-Acked-by: Vitaly Kuznetsov -RH-Acked-by: Philippe Mathieu-Daudé - -From: "Dr. David Alan Gilbert" - -Add the memory region names to section rounding/alignment -warnings. - -Signed-off-by: Dr. David Alan Gilbert -Message-Id: <20200116202414.157959-2-dgilbert@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit ff4776147e960b128ee68f94c728659f662f4378) -Signed-off-by: Miroslav Rezanina ---- - hw/virtio/vhost.c | 7 ++++--- - 1 file changed, 4 insertions(+), 3 deletions(-) - -diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c -index 4da0d5a..774d87d 100644 ---- a/hw/virtio/vhost.c -+++ b/hw/virtio/vhost.c -@@ -590,9 +590,10 @@ static void vhost_region_add_section(struct vhost_dev *dev, - * match up in the same RAMBlock if they do. - */ - if (mrs_gpa < prev_gpa_start) { -- error_report("%s:Section rounded to %"PRIx64 -- " prior to previous %"PRIx64, -- __func__, mrs_gpa, prev_gpa_start); -+ error_report("%s:Section '%s' rounded to %"PRIx64 -+ " prior to previous '%s' %"PRIx64, -+ __func__, section->mr->name, mrs_gpa, -+ prev_sec->mr->name, prev_gpa_start); - /* A way to cleanly fail here would be better */ - return; - } --- -1.8.3.1 - diff --git a/kvm-vhost-Only-align-sections-for-vhost-user.patch b/kvm-vhost-Only-align-sections-for-vhost-user.patch deleted file mode 100644 index e082ce8..0000000 --- a/kvm-vhost-Only-align-sections-for-vhost-user.patch +++ /dev/null @@ -1,97 +0,0 @@ -From c35466c168e5219bf585aa65ac31fc9bdc7cbf36 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Fri, 24 Jan 2020 19:46:12 +0100 -Subject: [PATCH 003/116] vhost: Only align sections for vhost-user -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200124194613.41119-3-dgilbert@redhat.com> -Patchwork-id: 93452 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 2/3] vhost: Only align sections for vhost-user -Bugzilla: 1779041 -RH-Acked-by: Michael S. Tsirkin -RH-Acked-by: Vitaly Kuznetsov -RH-Acked-by: Philippe Mathieu-Daudé - -From: "Dr. David Alan Gilbert" - -I added hugepage alignment code in c1ece84e7c9 to deal with -vhost-user + postcopy which needs aligned pages when using userfault. -However, on x86 the lower 2MB of address space tends to be shotgun'd -with small fragments around the 512-640k range - e.g. video RAM, and -with HyperV synic pages tend to sit around there - again splitting -it up. The alignment code complains with a 'Section rounded to ...' -error and gives up. - -Since vhost-user already filters out devices without an fd -(see vhost-user.c vhost_user_mem_section_filter) it shouldn't be -affected by those overlaps. - -Turn the alignment off on vhost-kernel so that it doesn't try -and align, and thus won't hit the rounding issues. - -Signed-off-by: Dr. David Alan Gilbert -Message-Id: <20200116202414.157959-3-dgilbert@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -Reviewed-by: Paolo Bonzini -(cherry picked from commit 76525114736e8f669766e69b715fa59ce8648aae) -Signed-off-by: Miroslav Rezanina ---- - hw/virtio/vhost.c | 34 ++++++++++++++++++---------------- - 1 file changed, 18 insertions(+), 16 deletions(-) - -diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c -index 774d87d..25fd469 100644 ---- a/hw/virtio/vhost.c -+++ b/hw/virtio/vhost.c -@@ -547,26 +547,28 @@ static void vhost_region_add_section(struct vhost_dev *dev, - uintptr_t mrs_host = (uintptr_t)memory_region_get_ram_ptr(section->mr) + - section->offset_within_region; - RAMBlock *mrs_rb = section->mr->ram_block; -- size_t mrs_page = qemu_ram_pagesize(mrs_rb); - - trace_vhost_region_add_section(section->mr->name, mrs_gpa, mrs_size, - mrs_host); - -- /* Round the section to it's page size */ -- /* First align the start down to a page boundary */ -- uint64_t alignage = mrs_host & (mrs_page - 1); -- if (alignage) { -- mrs_host -= alignage; -- mrs_size += alignage; -- mrs_gpa -= alignage; -- } -- /* Now align the size up to a page boundary */ -- alignage = mrs_size & (mrs_page - 1); -- if (alignage) { -- mrs_size += mrs_page - alignage; -- } -- trace_vhost_region_add_section_aligned(section->mr->name, mrs_gpa, mrs_size, -- mrs_host); -+ if (dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER) { -+ /* Round the section to it's page size */ -+ /* First align the start down to a page boundary */ -+ size_t mrs_page = qemu_ram_pagesize(mrs_rb); -+ uint64_t alignage = mrs_host & (mrs_page - 1); -+ if (alignage) { -+ mrs_host -= alignage; -+ mrs_size += alignage; -+ mrs_gpa -= alignage; -+ } -+ /* Now align the size up to a page boundary */ -+ alignage = mrs_size & (mrs_page - 1); -+ if (alignage) { -+ mrs_size += mrs_page - alignage; -+ } -+ trace_vhost_region_add_section_aligned(section->mr->name, mrs_gpa, mrs_size, -+ mrs_host); -+ } - - if (dev->n_tmp_sections) { - /* Since we already have at least one section, lets see if --- -1.8.3.1 - diff --git a/kvm-vhost-coding-style-fix.patch b/kvm-vhost-coding-style-fix.patch deleted file mode 100644 index 4546130..0000000 --- a/kvm-vhost-coding-style-fix.patch +++ /dev/null @@ -1,56 +0,0 @@ -From 624d96c456536e1471968a59fbeea206309cc33b Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Fri, 24 Jan 2020 19:46:13 +0100 -Subject: [PATCH 004/116] vhost: coding style fix -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200124194613.41119-4-dgilbert@redhat.com> -Patchwork-id: 93453 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 3/3] vhost: coding style fix -Bugzilla: 1779041 -RH-Acked-by: Michael S. Tsirkin -RH-Acked-by: Vitaly Kuznetsov -RH-Acked-by: Philippe Mathieu-Daudé - -From: "Michael S. Tsirkin" - -Drop a trailing whitespace. Make line shorter. - -Fixes: 76525114736e8 ("vhost: Only align sections for vhost-user") -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 8347505640238d3b80f9bb7510fdc1bb574bad19) -Signed-off-by: Miroslav Rezanina ---- - hw/virtio/vhost.c | 6 +++--- - 1 file changed, 3 insertions(+), 3 deletions(-) - -diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c -index 25fd469..9edfadc 100644 ---- a/hw/virtio/vhost.c -+++ b/hw/virtio/vhost.c -@@ -551,7 +551,7 @@ static void vhost_region_add_section(struct vhost_dev *dev, - trace_vhost_region_add_section(section->mr->name, mrs_gpa, mrs_size, - mrs_host); - -- if (dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER) { -+ if (dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER) { - /* Round the section to it's page size */ - /* First align the start down to a page boundary */ - size_t mrs_page = qemu_ram_pagesize(mrs_rb); -@@ -566,8 +566,8 @@ static void vhost_region_add_section(struct vhost_dev *dev, - if (alignage) { - mrs_size += mrs_page - alignage; - } -- trace_vhost_region_add_section_aligned(section->mr->name, mrs_gpa, mrs_size, -- mrs_host); -+ trace_vhost_region_add_section_aligned(section->mr->name, mrs_gpa, -+ mrs_size, mrs_host); - } - - if (dev->n_tmp_sections) { --- -1.8.3.1 - diff --git a/kvm-vhost-user-Print-unexpected-slave-message-types.patch b/kvm-vhost-user-Print-unexpected-slave-message-types.patch deleted file mode 100644 index e5776e7..0000000 --- a/kvm-vhost-user-Print-unexpected-slave-message-types.patch +++ /dev/null @@ -1,48 +0,0 @@ -From d6abbdaeb2c35efe6793a599c98116e250b1f179 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:43 +0100 -Subject: [PATCH 072/116] vhost-user: Print unexpected slave message types -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-69-dgilbert@redhat.com> -Patchwork-id: 93519 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 068/112] vhost-user: Print unexpected slave message types -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -When we receive an unexpected message type on the slave fd, print -the type. - -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Daniel P. Berrangé -Reviewed-by: Philippe Mathieu-Daudé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 0fdc465d7d5aafeae127eba488f247ac6f58df4c) -Signed-off-by: Miroslav Rezanina ---- - hw/virtio/vhost-user.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c -index 02a9b25..e4f46ec 100644 ---- a/hw/virtio/vhost-user.c -+++ b/hw/virtio/vhost-user.c -@@ -1055,7 +1055,7 @@ static void slave_read(void *opaque) - fd[0]); - break; - default: -- error_report("Received unexpected msg type."); -+ error_report("Received unexpected msg type: %d.", hdr.request); - ret = -EINVAL; - } - --- -1.8.3.1 - diff --git a/kvm-vhost-user-fs-remove-vhostfd-property.patch b/kvm-vhost-user-fs-remove-vhostfd-property.patch deleted file mode 100644 index 5904e82..0000000 --- a/kvm-vhost-user-fs-remove-vhostfd-property.patch +++ /dev/null @@ -1,59 +0,0 @@ -From 912af6f7c270e2939a91c9b3f62b6ba1202edc43 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:00:37 +0100 -Subject: [PATCH 006/116] vhost-user-fs: remove "vhostfd" property -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-3-dgilbert@redhat.com> -Patchwork-id: 93458 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 002/112] vhost-user-fs: remove "vhostfd" property -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Marc-André Lureau - -The property doesn't make much sense for a vhost-user device. - -Signed-off-by: Marc-André Lureau -Message-Id: <20191116112016.14872-1-marcandre.lureau@redhat.com> -Reviewed-by: Stefan Hajnoczi -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 703857348724319735d9be7b5b996e6445c6e6b9) -Signed-off-by: Miroslav Rezanina ---- - hw/virtio/vhost-user-fs.c | 1 - - include/hw/virtio/vhost-user-fs.h | 1 - - 2 files changed, 2 deletions(-) - -diff --git a/hw/virtio/vhost-user-fs.c b/hw/virtio/vhost-user-fs.c -index f0df7f4..ca0b7fc 100644 ---- a/hw/virtio/vhost-user-fs.c -+++ b/hw/virtio/vhost-user-fs.c -@@ -263,7 +263,6 @@ static Property vuf_properties[] = { - DEFINE_PROP_UINT16("num-request-queues", VHostUserFS, - conf.num_request_queues, 1), - DEFINE_PROP_UINT16("queue-size", VHostUserFS, conf.queue_size, 128), -- DEFINE_PROP_STRING("vhostfd", VHostUserFS, conf.vhostfd), - DEFINE_PROP_END_OF_LIST(), - }; - -diff --git a/include/hw/virtio/vhost-user-fs.h b/include/hw/virtio/vhost-user-fs.h -index 539885b..9ff1bdb 100644 ---- a/include/hw/virtio/vhost-user-fs.h -+++ b/include/hw/virtio/vhost-user-fs.h -@@ -28,7 +28,6 @@ typedef struct { - char *tag; - uint16_t num_request_queues; - uint16_t queue_size; -- char *vhostfd; - } VHostUserFSConf; - - typedef struct { --- -1.8.3.1 - diff --git a/kvm-vhost-user-gpu-Drop-trailing-json-comma.patch b/kvm-vhost-user-gpu-Drop-trailing-json-comma.patch deleted file mode 100644 index 3a50632..0000000 --- a/kvm-vhost-user-gpu-Drop-trailing-json-comma.patch +++ /dev/null @@ -1,52 +0,0 @@ -From 044feb40e3041759ee77d08136f334cf3ad67c1e Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?J=C3=A1n=20Tomko?= -Date: Fri, 21 Feb 2020 09:49:23 +0000 -Subject: [PATCH] vhost-user-gpu: Drop trailing json comma -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Ján Tomko -Message-id: <07fed9a38495938a7180819e27f590d80cd6668d.1582278173.git.jtomko@redhat.com> -Patchwork-id: 94019 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/1] vhost-user-gpu: Drop trailing json comma -Bugzilla: 1805334 -RH-Acked-by: Marc-André Lureau -RH-Acked-by: Sergio Lopez Pascual -RH-Acked-by: Laszlo Ersek -RH-Acked-by: Stefan Hajnoczi - -From: Cole Robinson - -Trailing comma is not valid json: - -$ cat contrib/vhost-user-gpu/50-qemu-gpu.json.in | jq -parse error: Expected another key-value pair at line 5, column 1 - -Signed-off-by: Cole Robinson -Reviewed-by: Marc-André Lureau -Reviewed-by: Li Qiang -Reviewed-by: Philippe Mathieu-Daudé -Message-id: 7f5dd2ac9f3504e2699f23e69bc3d8051b729832.1568925097.git.crobinso@redhat.com -Signed-off-by: Gerd Hoffmann -(cherry picked from commit ca26b032e5a0e8a190c763ce828a8740d24b9b65) -Signed-off-by: Ján Tomko -Signed-off-by: Danilo C. L. de Paula ---- - contrib/vhost-user-gpu/50-qemu-gpu.json.in | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/contrib/vhost-user-gpu/50-qemu-gpu.json.in b/contrib/vhost-user-gpu/50-qemu-gpu.json.in -index 658b545..f5edd09 100644 ---- a/contrib/vhost-user-gpu/50-qemu-gpu.json.in -+++ b/contrib/vhost-user-gpu/50-qemu-gpu.json.in -@@ -1,5 +1,5 @@ - { - "description": "QEMU vhost-user-gpu", - "type": "gpu", -- "binary": "@libexecdir@/vhost-user-gpu", -+ "binary": "@libexecdir@/vhost-user-gpu" - } --- -1.8.3.1 - diff --git a/kvm-virtio-add-ability-to-delete-vq-through-a-pointer.patch b/kvm-virtio-add-ability-to-delete-vq-through-a-pointer.patch deleted file mode 100644 index ed10701..0000000 --- a/kvm-virtio-add-ability-to-delete-vq-through-a-pointer.patch +++ /dev/null @@ -1,80 +0,0 @@ -From b395ad369278d0923a590975fabbb99ec7716c6b Mon Sep 17 00:00:00 2001 -From: Julia Suvorova -Date: Wed, 19 Feb 2020 21:34:28 +0000 -Subject: [PATCH 4/7] virtio: add ability to delete vq through a pointer -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Julia Suvorova -Message-id: <20200219213431.11913-2-jusual@redhat.com> -Patchwork-id: 93980 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/4] virtio: add ability to delete vq through a pointer -Bugzilla: 1791590 -RH-Acked-by: Danilo de Paula -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Michael S. Tsirkin - -From: "Michael S. Tsirkin" - -Devices tend to maintain vq pointers, allow deleting them trough a vq pointer. - -Signed-off-by: Michael S. Tsirkin -Reviewed-by: David Hildenbrand -Reviewed-by: David Hildenbrand -(cherry picked from commit 722f8c51d8af223751dfb1d02de40043e8ba067e) -Signed-off-by: Danilo C. L. de Paula ---- - hw/virtio/virtio.c | 15 ++++++++++----- - include/hw/virtio/virtio.h | 2 ++ - 2 files changed, 12 insertions(+), 5 deletions(-) - -diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c -index 3211135..d63a369 100644 ---- a/hw/virtio/virtio.c -+++ b/hw/virtio/virtio.c -@@ -2335,17 +2335,22 @@ VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size, - return &vdev->vq[i]; - } - -+void virtio_delete_queue(VirtQueue *vq) -+{ -+ vq->vring.num = 0; -+ vq->vring.num_default = 0; -+ vq->handle_output = NULL; -+ vq->handle_aio_output = NULL; -+ g_free(vq->used_elems); -+} -+ - void virtio_del_queue(VirtIODevice *vdev, int n) - { - if (n < 0 || n >= VIRTIO_QUEUE_MAX) { - abort(); - } - -- vdev->vq[n].vring.num = 0; -- vdev->vq[n].vring.num_default = 0; -- vdev->vq[n].handle_output = NULL; -- vdev->vq[n].handle_aio_output = NULL; -- g_free(vdev->vq[n].used_elems); -+ virtio_delete_queue(&vdev->vq[n]); - } - - static void virtio_set_isr(VirtIODevice *vdev, int value) -diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h -index 6a20442..91167f6 100644 ---- a/include/hw/virtio/virtio.h -+++ b/include/hw/virtio/virtio.h -@@ -183,6 +183,8 @@ VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size, - - void virtio_del_queue(VirtIODevice *vdev, int n); - -+void virtio_delete_queue(VirtQueue *vq); -+ - void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem, - unsigned int len); - void virtqueue_flush(VirtQueue *vq, unsigned int count); --- -1.8.3.1 - diff --git a/kvm-virtio-don-t-enable-notifications-during-polling.patch b/kvm-virtio-don-t-enable-notifications-during-polling.patch deleted file mode 100644 index 2dffc01..0000000 --- a/kvm-virtio-don-t-enable-notifications-during-polling.patch +++ /dev/null @@ -1,158 +0,0 @@ -From 351dd07d7b5e69cdf47260c9ea848c0c93cd2c8a Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Thu, 9 Jan 2020 11:13:25 +0000 -Subject: [PATCH 3/5] virtio: don't enable notifications during polling - -RH-Author: Stefan Hajnoczi -Message-id: <20200109111325.559557-2-stefanha@redhat.com> -Patchwork-id: 93298 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/1] virtio: don't enable notifications during polling -Bugzilla: 1789301 -RH-Acked-by: Paolo Bonzini -RH-Acked-by: Sergio Lopez Pascual -RH-Acked-by: Michael S. Tsirkin - -Virtqueue notifications are not necessary during polling, so we disable -them. This allows the guest driver to avoid MMIO vmexits. -Unfortunately the virtio-blk and virtio-scsi handler functions re-enable -notifications, defeating this optimization. - -Fix virtio-blk and virtio-scsi emulation so they leave notifications -disabled. The key thing to remember for correctness is that polling -always checks one last time after ending its loop, therefore it's safe -to lose the race when re-enabling notifications at the end of polling. - -There is a measurable performance improvement of 5-10% with the null-co -block driver. Real-life storage configurations will see a smaller -improvement because the MMIO vmexit overhead contributes less to -latency. - -Signed-off-by: Stefan Hajnoczi -Message-Id: <20191209210957.65087-1-stefanha@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit d0435bc513e23a4961b6af20164d1c6c219eb4ea) -Signed-off-by: Stefan Hajnoczi -Signed-off-by: Danilo C. L. de Paula ---- - hw/block/virtio-blk.c | 9 +++++++-- - hw/scsi/virtio-scsi.c | 9 +++++++-- - hw/virtio/virtio.c | 12 ++++++------ - include/hw/virtio/virtio.h | 1 + - 4 files changed, 21 insertions(+), 10 deletions(-) - -diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c -index 4c357d2..c4e55fb 100644 ---- a/hw/block/virtio-blk.c -+++ b/hw/block/virtio-blk.c -@@ -764,13 +764,16 @@ bool virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq) - { - VirtIOBlockReq *req; - MultiReqBuffer mrb = {}; -+ bool suppress_notifications = virtio_queue_get_notification(vq); - bool progress = false; - - aio_context_acquire(blk_get_aio_context(s->blk)); - blk_io_plug(s->blk); - - do { -- virtio_queue_set_notification(vq, 0); -+ if (suppress_notifications) { -+ virtio_queue_set_notification(vq, 0); -+ } - - while ((req = virtio_blk_get_request(s, vq))) { - progress = true; -@@ -781,7 +784,9 @@ bool virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq) - } - } - -- virtio_queue_set_notification(vq, 1); -+ if (suppress_notifications) { -+ virtio_queue_set_notification(vq, 1); -+ } - } while (!virtio_queue_empty(vq)); - - if (mrb.num_reqs) { -diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c -index 54108c0..e2cd1df 100644 ---- a/hw/scsi/virtio-scsi.c -+++ b/hw/scsi/virtio-scsi.c -@@ -597,12 +597,15 @@ bool virtio_scsi_handle_cmd_vq(VirtIOSCSI *s, VirtQueue *vq) - { - VirtIOSCSIReq *req, *next; - int ret = 0; -+ bool suppress_notifications = virtio_queue_get_notification(vq); - bool progress = false; - - QTAILQ_HEAD(, VirtIOSCSIReq) reqs = QTAILQ_HEAD_INITIALIZER(reqs); - - do { -- virtio_queue_set_notification(vq, 0); -+ if (suppress_notifications) { -+ virtio_queue_set_notification(vq, 0); -+ } - - while ((req = virtio_scsi_pop_req(s, vq))) { - progress = true; -@@ -622,7 +625,9 @@ bool virtio_scsi_handle_cmd_vq(VirtIOSCSI *s, VirtQueue *vq) - } - } - -- virtio_queue_set_notification(vq, 1); -+ if (suppress_notifications) { -+ virtio_queue_set_notification(vq, 1); -+ } - } while (ret != -EINVAL && !virtio_queue_empty(vq)); - - QTAILQ_FOREACH_SAFE(req, &reqs, next, next) { -diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c -index 04716b5..3211135 100644 ---- a/hw/virtio/virtio.c -+++ b/hw/virtio/virtio.c -@@ -432,6 +432,11 @@ static void virtio_queue_packed_set_notification(VirtQueue *vq, int enable) - } - } - -+bool virtio_queue_get_notification(VirtQueue *vq) -+{ -+ return vq->notification; -+} -+ - void virtio_queue_set_notification(VirtQueue *vq, int enable) - { - vq->notification = enable; -@@ -3384,17 +3389,12 @@ static bool virtio_queue_host_notifier_aio_poll(void *opaque) - { - EventNotifier *n = opaque; - VirtQueue *vq = container_of(n, VirtQueue, host_notifier); -- bool progress; - - if (!vq->vring.desc || virtio_queue_empty(vq)) { - return false; - } - -- progress = virtio_queue_notify_aio_vq(vq); -- -- /* In case the handler function re-enabled notifications */ -- virtio_queue_set_notification(vq, 0); -- return progress; -+ return virtio_queue_notify_aio_vq(vq); - } - - static void virtio_queue_host_notifier_aio_poll_end(EventNotifier *n) -diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h -index c32a815..6a20442 100644 ---- a/include/hw/virtio/virtio.h -+++ b/include/hw/virtio/virtio.h -@@ -224,6 +224,7 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id); - - void virtio_notify_config(VirtIODevice *vdev); - -+bool virtio_queue_get_notification(VirtQueue *vq); - void virtio_queue_set_notification(VirtQueue *vq, int enable); - - int virtio_queue_ready(VirtQueue *vq); --- -1.8.3.1 - diff --git a/kvm-virtio-fs-fix-MSI-X-nvectors-calculation.patch b/kvm-virtio-fs-fix-MSI-X-nvectors-calculation.patch deleted file mode 100644 index 9a69ed1..0000000 --- a/kvm-virtio-fs-fix-MSI-X-nvectors-calculation.patch +++ /dev/null @@ -1,60 +0,0 @@ -From c0cf6d8a1d3b9bf3928f37fcfd5aa8ae6f1338ca Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:00:36 +0100 -Subject: [PATCH 005/116] virtio-fs: fix MSI-X nvectors calculation -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-2-dgilbert@redhat.com> -Patchwork-id: 93455 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 001/112] virtio-fs: fix MSI-X nvectors calculation -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -The following MSI-X vectors are required: - * VIRTIO Configuration Change - * hiprio virtqueue - * requests virtqueues - -Fix the calculation to reserve enough MSI-X vectors. Otherwise guest -drivers fall back to a sub-optional configuration where all virtqueues -share a single vector. - -This change does not break live migration compatibility since -vhost-user-fs-pci devices are not migratable yet. - -Reported-by: Vivek Goyal -Signed-off-by: Stefan Hajnoczi -Message-Id: <20191209110759.35227-1-stefanha@redhat.com> -Reviewed-by: Dr. David Alan Gilbert -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 366844f3d1329c6423dd752891a28ccb3ee8fddd) -Signed-off-by: Miroslav Rezanina ---- - hw/virtio/vhost-user-fs-pci.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/hw/virtio/vhost-user-fs-pci.c b/hw/virtio/vhost-user-fs-pci.c -index 933a3f2..e3a649d 100644 ---- a/hw/virtio/vhost-user-fs-pci.c -+++ b/hw/virtio/vhost-user-fs-pci.c -@@ -40,7 +40,8 @@ static void vhost_user_fs_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) - DeviceState *vdev = DEVICE(&dev->vdev); - - if (vpci_dev->nvectors == DEV_NVECTORS_UNSPECIFIED) { -- vpci_dev->nvectors = dev->vdev.conf.num_request_queues + 1; -+ /* Also reserve config change and hiprio queue vectors */ -+ vpci_dev->nvectors = dev->vdev.conf.num_request_queues + 2; - } - - qdev_set_parent_bus(vdev, BUS(&vpci_dev->bus)); --- -1.8.3.1 - diff --git a/kvm-virtio-make-virtio_delete_queue-idempotent.patch b/kvm-virtio-make-virtio_delete_queue-idempotent.patch deleted file mode 100644 index 16eb1da..0000000 --- a/kvm-virtio-make-virtio_delete_queue-idempotent.patch +++ /dev/null @@ -1,42 +0,0 @@ -From 901e65fa6ccbadeacd6c585cf49a0a7cdafb4737 Mon Sep 17 00:00:00 2001 -From: Julia Suvorova -Date: Wed, 19 Feb 2020 21:34:29 +0000 -Subject: [PATCH 5/7] virtio: make virtio_delete_queue idempotent - -RH-Author: Julia Suvorova -Message-id: <20200219213431.11913-3-jusual@redhat.com> -Patchwork-id: 93981 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 2/4] virtio: make virtio_delete_queue idempotent -Bugzilla: 1791590 -RH-Acked-by: Danilo de Paula -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Michael S. Tsirkin - -From: "Michael S. Tsirkin" - -Let's make sure calling this twice is harmless - -no known instances, but seems safer. - -Suggested-by: Pan Nengyuan -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 8cd353ea0fbf0e334e015d833f612799be642296) -Signed-off-by: Danilo C. L. de Paula ---- - hw/virtio/virtio.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c -index d63a369..e6a9ba4 100644 ---- a/hw/virtio/virtio.c -+++ b/hw/virtio/virtio.c -@@ -2342,6 +2342,7 @@ void virtio_delete_queue(VirtQueue *vq) - vq->handle_output = NULL; - vq->handle_aio_output = NULL; - g_free(vq->used_elems); -+ vq->used_elems = NULL; - } - - void virtio_del_queue(VirtIODevice *vdev, int n) --- -1.8.3.1 - diff --git a/kvm-virtio-net-delete-also-control-queue-when-TX-RX-dele.patch b/kvm-virtio-net-delete-also-control-queue-when-TX-RX-dele.patch deleted file mode 100644 index c21c699..0000000 --- a/kvm-virtio-net-delete-also-control-queue-when-TX-RX-dele.patch +++ /dev/null @@ -1,49 +0,0 @@ -From 2f494c41715193522c52eafc6af2a5e33f88ceb9 Mon Sep 17 00:00:00 2001 -From: Julia Suvorova -Date: Wed, 19 Feb 2020 21:34:31 +0000 -Subject: [PATCH 7/7] virtio-net: delete also control queue when TX/RX deleted - -RH-Author: Julia Suvorova -Message-id: <20200219213431.11913-5-jusual@redhat.com> -Patchwork-id: 93983 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 4/4] virtio-net: delete also control queue when TX/RX deleted -Bugzilla: 1791590 -RH-Acked-by: Danilo de Paula -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Michael S. Tsirkin - -From: Yuri Benditovich - -https://bugzilla.redhat.com/show_bug.cgi?id=1708480 -If the control queue is not deleted together with TX/RX, it -later will be ignored in freeing cache resources and hot -unplug will not be completed. - -Cc: qemu-stable@nongnu.org -Signed-off-by: Yuri Benditovich -Message-Id: <20191226043649.14481-3-yuri.benditovich@daynix.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit d945d9f1731244ef341f74ede93120fc9de35913) -Signed-off-by: Danilo C. L. de Paula ---- - hw/net/virtio-net.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c -index db3d7c3..f325440 100644 ---- a/hw/net/virtio-net.c -+++ b/hw/net/virtio-net.c -@@ -3101,7 +3101,8 @@ static void virtio_net_device_unrealize(DeviceState *dev, Error **errp) - for (i = 0; i < max_queues; i++) { - virtio_net_del_queue(n, i); - } -- -+ /* delete also control vq */ -+ virtio_del_queue(vdev, max_queues * 2); - qemu_announce_timer_del(&n->announce_timer, false); - g_free(n->vqs); - qemu_del_nic(n->nic); --- -1.8.3.1 - diff --git a/kvm-virtio-reset-region-cache-when-on-queue-deletion.patch b/kvm-virtio-reset-region-cache-when-on-queue-deletion.patch deleted file mode 100644 index c9f1086..0000000 --- a/kvm-virtio-reset-region-cache-when-on-queue-deletion.patch +++ /dev/null @@ -1,46 +0,0 @@ -From 8bf4f561262d9282cebdb3418cdb9a69c92216a0 Mon Sep 17 00:00:00 2001 -From: Julia Suvorova -Date: Wed, 19 Feb 2020 21:34:30 +0000 -Subject: [PATCH 6/7] virtio: reset region cache when on queue deletion - -RH-Author: Julia Suvorova -Message-id: <20200219213431.11913-4-jusual@redhat.com> -Patchwork-id: 93982 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 3/4] virtio: reset region cache when on queue deletion -Bugzilla: 1791590 -RH-Acked-by: Danilo de Paula -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Michael S. Tsirkin - -From: Yuri Benditovich - -https://bugzilla.redhat.com/show_bug.cgi?id=1708480 -Fix leak of region reference that prevents complete -device deletion on hot unplug. - -Cc: qemu-stable@nongnu.org -Signed-off-by: Yuri Benditovich -Message-Id: <20191226043649.14481-2-yuri.benditovich@daynix.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 421afd2fe8dd4603216cbf36081877c391f5a2a4) -Signed-off-by: Danilo C. L. de Paula ---- - hw/virtio/virtio.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c -index e6a9ba4..f644d9a 100644 ---- a/hw/virtio/virtio.c -+++ b/hw/virtio/virtio.c -@@ -2343,6 +2343,7 @@ void virtio_delete_queue(VirtQueue *vq) - vq->handle_aio_output = NULL; - g_free(vq->used_elems); - vq->used_elems = NULL; -+ virtio_virtqueue_reset_region_cache(vq); - } - - void virtio_del_queue(VirtIODevice *vdev, int n) --- -1.8.3.1 - diff --git a/kvm-virtiofs-Add-maintainers-entry.patch b/kvm-virtiofs-Add-maintainers-entry.patch deleted file mode 100644 index fec9371..0000000 --- a/kvm-virtiofs-Add-maintainers-entry.patch +++ /dev/null @@ -1,52 +0,0 @@ -From f4144443eacceb04823ee72cb2d4f9f841f05495 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:11 +0100 -Subject: [PATCH 040/116] virtiofs: Add maintainers entry -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-37-dgilbert@redhat.com> -Patchwork-id: 93491 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 036/112] virtiofs: Add maintainers entry -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Daniel P. Berrangé -Reviewed-by: Philippe Mathieu-Daudé -Tested-by: Philippe Mathieu-Daudé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit bad7d2c3ad1af9344df035aedaf8e0967a543070) -Signed-off-by: Miroslav Rezanina ---- - MAINTAINERS | 8 ++++++++ - 1 file changed, 8 insertions(+) - -diff --git a/MAINTAINERS b/MAINTAINERS -index 5e5e3e5..d1b3e26 100644 ---- a/MAINTAINERS -+++ b/MAINTAINERS -@@ -1575,6 +1575,14 @@ T: git https://github.com/cohuck/qemu.git s390-next - T: git https://github.com/borntraeger/qemu.git s390-next - L: qemu-s390x@nongnu.org - -+virtiofs -+M: Dr. David Alan Gilbert -+M: Stefan Hajnoczi -+S: Supported -+F: tools/virtiofsd/* -+F: hw/virtio/vhost-user-fs* -+F: include/hw/virtio/vhost-user-fs.h -+ - virtio-input - M: Gerd Hoffmann - S: Maintained --- -1.8.3.1 - diff --git a/kvm-virtiofsd-Add-ID-to-the-log-with-FUSE_LOG_DEBUG-leve.patch b/kvm-virtiofsd-Add-ID-to-the-log-with-FUSE_LOG_DEBUG-leve.patch deleted file mode 100644 index a2b91be..0000000 --- a/kvm-virtiofsd-Add-ID-to-the-log-with-FUSE_LOG_DEBUG-leve.patch +++ /dev/null @@ -1,86 +0,0 @@ -From 4d9106acfd7ed9e4d197ddf9f22b79ba6c8afdd8 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:38 +0100 -Subject: [PATCH 067/116] virtiofsd: Add ID to the log with FUSE_LOG_DEBUG - level -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-64-dgilbert@redhat.com> -Patchwork-id: 93514 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 063/112] virtiofsd: Add ID to the log with FUSE_LOG_DEBUG level -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Masayoshi Mizuma - -virtiofsd has some threads, so we see a lot of logs with debug option. -It would be useful for debugging if we can identify the specific thread -from the log. - -Add ID, which is got by gettid(), to the log with FUSE_LOG_DEBUG level -so that we can grep the specific thread. - -The log is like as: - - ]# ./virtiofsd -d -o vhost_user_socket=/tmp/vhostqemu0 -o source=/tmp/share0 -o cache=auto - ... - [ID: 00000097] unique: 12696, success, outsize: 120 - [ID: 00000097] virtio_send_msg: elem 18: with 2 in desc of length 120 - [ID: 00000003] fv_queue_thread: Got queue event on Queue 1 - [ID: 00000003] fv_queue_thread: Queue 1 gave evalue: 1 available: in: 65552 out: 80 - [ID: 00000003] fv_queue_thread: Waiting for Queue 1 event - [ID: 00000071] fv_queue_worker: elem 33: with 2 out desc of length 80 bad_in_num=0 bad_out_num=0 - [ID: 00000071] unique: 12694, opcode: READ (15), nodeid: 2, insize: 80, pid: 2014 - [ID: 00000071] lo_read(ino=2, size=65536, off=131072) - -Signed-off-by: Masayoshi Mizuma - -Signed-off-by: Dr. David Alan Gilbert - added rework as suggested by Daniel P. Berrangé during review -Reviewed-by: Philippe Mathieu-Daudé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 36f3846902bd41413f6c0bf797dee509028c29f4) - -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 8 ++++++++ - 1 file changed, 8 insertions(+) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index ff6910f..f08324f 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -43,6 +43,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -2268,10 +2269,17 @@ static void setup_nofile_rlimit(void) - - static void log_func(enum fuse_log_level level, const char *fmt, va_list ap) - { -+ g_autofree char *localfmt = NULL; -+ - if (current_log_level < level) { - return; - } - -+ if (current_log_level == FUSE_LOG_DEBUG) { -+ localfmt = g_strdup_printf("[ID: %08ld] %s", syscall(__NR_gettid), fmt); -+ fmt = localfmt; -+ } -+ - if (use_syslog) { - int priority = LOG_ERR; - switch (level) { --- -1.8.3.1 - diff --git a/kvm-virtiofsd-Add-Makefile-wiring-for-virtiofsd-contrib.patch b/kvm-virtiofsd-Add-Makefile-wiring-for-virtiofsd-contrib.patch deleted file mode 100644 index b017bf4..0000000 --- a/kvm-virtiofsd-Add-Makefile-wiring-for-virtiofsd-contrib.patch +++ /dev/null @@ -1,106 +0,0 @@ -From 709408de33112d32b7c6675f8c9320b8bebccd58 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:05 +0100 -Subject: [PATCH 034/116] virtiofsd: Add Makefile wiring for virtiofsd contrib -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-31-dgilbert@redhat.com> -Patchwork-id: 93482 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 030/112] virtiofsd: Add Makefile wiring for virtiofsd contrib -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -Wire up the building of the virtiofsd in tools. - -virtiofsd relies on Linux-specific system calls and seccomp. Anyone -wishing to port it to other host operating systems should do so -carefully and without reducing security. - -Only allow building on Linux hosts. - -Signed-off-by: Dr. David Alan Gilbert -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Liam Merwick -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 81bfc42dcf473bc8d3790622633410da72d8e622) -Signed-off-by: Miroslav Rezanina ---- - Makefile | 10 ++++++++++ - Makefile.objs | 1 + - tools/virtiofsd/Makefile.objs | 9 +++++++++ - 3 files changed, 20 insertions(+) - create mode 100644 tools/virtiofsd/Makefile.objs - -diff --git a/Makefile b/Makefile -index 4254950..1526775 100644 ---- a/Makefile -+++ b/Makefile -@@ -330,6 +330,10 @@ endif - endif - endif - -+ifdef CONFIG_LINUX -+HELPERS-y += virtiofsd$(EXESUF) -+endif -+ - # Sphinx does not allow building manuals into the same directory as - # the source files, so if we're doing an in-tree QEMU build we must - # build the manuals into a subdirectory (and then install them from -@@ -430,6 +434,7 @@ dummy := $(call unnest-vars,, \ - elf2dmp-obj-y \ - ivshmem-client-obj-y \ - ivshmem-server-obj-y \ -+ virtiofsd-obj-y \ - rdmacm-mux-obj-y \ - libvhost-user-obj-y \ - vhost-user-scsi-obj-y \ -@@ -675,6 +680,11 @@ rdmacm-mux$(EXESUF): LIBS += "-libumad" - rdmacm-mux$(EXESUF): $(rdmacm-mux-obj-y) $(COMMON_LDADDS) - $(call LINK, $^) - -+ifdef CONFIG_LINUX # relies on Linux-specific syscalls -+virtiofsd$(EXESUF): $(virtiofsd-obj-y) libvhost-user.a $(COMMON_LDADDS) -+ $(call LINK, $^) -+endif -+ - vhost-user-gpu$(EXESUF): $(vhost-user-gpu-obj-y) $(libvhost-user-obj-y) libqemuutil.a libqemustub.a - $(call LINK, $^) - -diff --git a/Makefile.objs b/Makefile.objs -index fcf63e1..1a8f288 100644 ---- a/Makefile.objs -+++ b/Makefile.objs -@@ -125,6 +125,7 @@ vhost-user-blk-obj-y = contrib/vhost-user-blk/ - rdmacm-mux-obj-y = contrib/rdmacm-mux/ - vhost-user-input-obj-y = contrib/vhost-user-input/ - vhost-user-gpu-obj-y = contrib/vhost-user-gpu/ -+virtiofsd-obj-y = tools/virtiofsd/ - - ###################################################################### - trace-events-subdirs = -diff --git a/tools/virtiofsd/Makefile.objs b/tools/virtiofsd/Makefile.objs -new file mode 100644 -index 0000000..45a8075 ---- /dev/null -+++ b/tools/virtiofsd/Makefile.objs -@@ -0,0 +1,9 @@ -+virtiofsd-obj-y = buffer.o \ -+ fuse_opt.o \ -+ fuse_log.o \ -+ fuse_lowlevel.o \ -+ fuse_signals.o \ -+ fuse_virtio.o \ -+ helper.o \ -+ passthrough_ll.o -+ --- -1.8.3.1 - diff --git a/kvm-virtiofsd-Add-auxiliary-.c-s.patch b/kvm-virtiofsd-Add-auxiliary-.c-s.patch deleted file mode 100644 index 90150d9..0000000 --- a/kvm-virtiofsd-Add-auxiliary-.c-s.patch +++ /dev/null @@ -1,1387 +0,0 @@ -From 55b4059d6399c212109c758190e15b574accdd07 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:00:41 +0100 -Subject: [PATCH 010/116] virtiofsd: Add auxiliary .c's -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-7-dgilbert@redhat.com> -Patchwork-id: 93461 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 006/112] virtiofsd: Add auxiliary .c's -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -Add most of the non-main .c files we need from upstream fuse-3.8.0 - -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit ffcf8d9f8649c6e56b1193bbbc9c9f7388920043) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/buffer.c | 321 ++++++++++++++++++++++++++++++ - tools/virtiofsd/fuse_log.c | 40 ++++ - tools/virtiofsd/fuse_opt.c | 423 +++++++++++++++++++++++++++++++++++++++ - tools/virtiofsd/fuse_signals.c | 91 +++++++++ - tools/virtiofsd/helper.c | 440 +++++++++++++++++++++++++++++++++++++++++ - 5 files changed, 1315 insertions(+) - create mode 100644 tools/virtiofsd/buffer.c - create mode 100644 tools/virtiofsd/fuse_log.c - create mode 100644 tools/virtiofsd/fuse_opt.c - create mode 100644 tools/virtiofsd/fuse_signals.c - create mode 100644 tools/virtiofsd/helper.c - -diff --git a/tools/virtiofsd/buffer.c b/tools/virtiofsd/buffer.c -new file mode 100644 -index 0000000..5ab9b87 ---- /dev/null -+++ b/tools/virtiofsd/buffer.c -@@ -0,0 +1,321 @@ -+/* -+ FUSE: Filesystem in Userspace -+ Copyright (C) 2010 Miklos Szeredi -+ -+ Functions for dealing with `struct fuse_buf` and `struct -+ fuse_bufvec`. -+ -+ This program can be distributed under the terms of the GNU LGPLv2. -+ See the file COPYING.LIB -+*/ -+ -+#define _GNU_SOURCE -+ -+#include "config.h" -+#include "fuse_i.h" -+#include "fuse_lowlevel.h" -+#include -+#include -+#include -+#include -+ -+size_t fuse_buf_size(const struct fuse_bufvec *bufv) -+{ -+ size_t i; -+ size_t size = 0; -+ -+ for (i = 0; i < bufv->count; i++) { -+ if (bufv->buf[i].size == SIZE_MAX) -+ size = SIZE_MAX; -+ else -+ size += bufv->buf[i].size; -+ } -+ -+ return size; -+} -+ -+static size_t min_size(size_t s1, size_t s2) -+{ -+ return s1 < s2 ? s1 : s2; -+} -+ -+static ssize_t fuse_buf_write(const struct fuse_buf *dst, size_t dst_off, -+ const struct fuse_buf *src, size_t src_off, -+ size_t len) -+{ -+ ssize_t res = 0; -+ size_t copied = 0; -+ -+ while (len) { -+ if (dst->flags & FUSE_BUF_FD_SEEK) { -+ res = pwrite(dst->fd, (char *)src->mem + src_off, len, -+ dst->pos + dst_off); -+ } else { -+ res = write(dst->fd, (char *)src->mem + src_off, len); -+ } -+ if (res == -1) { -+ if (!copied) -+ return -errno; -+ break; -+ } -+ if (res == 0) -+ break; -+ -+ copied += res; -+ if (!(dst->flags & FUSE_BUF_FD_RETRY)) -+ break; -+ -+ src_off += res; -+ dst_off += res; -+ len -= res; -+ } -+ -+ return copied; -+} -+ -+static ssize_t fuse_buf_read(const struct fuse_buf *dst, size_t dst_off, -+ const struct fuse_buf *src, size_t src_off, -+ size_t len) -+{ -+ ssize_t res = 0; -+ size_t copied = 0; -+ -+ while (len) { -+ if (src->flags & FUSE_BUF_FD_SEEK) { -+ res = pread(src->fd, (char *)dst->mem + dst_off, len, -+ src->pos + src_off); -+ } else { -+ res = read(src->fd, (char *)dst->mem + dst_off, len); -+ } -+ if (res == -1) { -+ if (!copied) -+ return -errno; -+ break; -+ } -+ if (res == 0) -+ break; -+ -+ copied += res; -+ if (!(src->flags & FUSE_BUF_FD_RETRY)) -+ break; -+ -+ dst_off += res; -+ src_off += res; -+ len -= res; -+ } -+ -+ return copied; -+} -+ -+static ssize_t fuse_buf_fd_to_fd(const struct fuse_buf *dst, size_t dst_off, -+ const struct fuse_buf *src, size_t src_off, -+ size_t len) -+{ -+ char buf[4096]; -+ struct fuse_buf tmp = { -+ .size = sizeof(buf), -+ .flags = 0, -+ }; -+ ssize_t res; -+ size_t copied = 0; -+ -+ tmp.mem = buf; -+ -+ while (len) { -+ size_t this_len = min_size(tmp.size, len); -+ size_t read_len; -+ -+ res = fuse_buf_read(&tmp, 0, src, src_off, this_len); -+ if (res < 0) { -+ if (!copied) -+ return res; -+ break; -+ } -+ if (res == 0) -+ break; -+ -+ read_len = res; -+ res = fuse_buf_write(dst, dst_off, &tmp, 0, read_len); -+ if (res < 0) { -+ if (!copied) -+ return res; -+ break; -+ } -+ if (res == 0) -+ break; -+ -+ copied += res; -+ -+ if (res < this_len) -+ break; -+ -+ dst_off += res; -+ src_off += res; -+ len -= res; -+ } -+ -+ return copied; -+} -+ -+#ifdef HAVE_SPLICE -+static ssize_t fuse_buf_splice(const struct fuse_buf *dst, size_t dst_off, -+ const struct fuse_buf *src, size_t src_off, -+ size_t len, enum fuse_buf_copy_flags flags) -+{ -+ int splice_flags = 0; -+ off_t *srcpos = NULL; -+ off_t *dstpos = NULL; -+ off_t srcpos_val; -+ off_t dstpos_val; -+ ssize_t res; -+ size_t copied = 0; -+ -+ if (flags & FUSE_BUF_SPLICE_MOVE) -+ splice_flags |= SPLICE_F_MOVE; -+ if (flags & FUSE_BUF_SPLICE_NONBLOCK) -+ splice_flags |= SPLICE_F_NONBLOCK; -+ -+ if (src->flags & FUSE_BUF_FD_SEEK) { -+ srcpos_val = src->pos + src_off; -+ srcpos = &srcpos_val; -+ } -+ if (dst->flags & FUSE_BUF_FD_SEEK) { -+ dstpos_val = dst->pos + dst_off; -+ dstpos = &dstpos_val; -+ } -+ -+ while (len) { -+ res = splice(src->fd, srcpos, dst->fd, dstpos, len, -+ splice_flags); -+ if (res == -1) { -+ if (copied) -+ break; -+ -+ if (errno != EINVAL || (flags & FUSE_BUF_FORCE_SPLICE)) -+ return -errno; -+ -+ /* Maybe splice is not supported for this combination */ -+ return fuse_buf_fd_to_fd(dst, dst_off, src, src_off, -+ len); -+ } -+ if (res == 0) -+ break; -+ -+ copied += res; -+ if (!(src->flags & FUSE_BUF_FD_RETRY) && -+ !(dst->flags & FUSE_BUF_FD_RETRY)) { -+ break; -+ } -+ -+ len -= res; -+ } -+ -+ return copied; -+} -+#else -+static ssize_t fuse_buf_splice(const struct fuse_buf *dst, size_t dst_off, -+ const struct fuse_buf *src, size_t src_off, -+ size_t len, enum fuse_buf_copy_flags flags) -+{ -+ (void) flags; -+ -+ return fuse_buf_fd_to_fd(dst, dst_off, src, src_off, len); -+} -+#endif -+ -+ -+static ssize_t fuse_buf_copy_one(const struct fuse_buf *dst, size_t dst_off, -+ const struct fuse_buf *src, size_t src_off, -+ size_t len, enum fuse_buf_copy_flags flags) -+{ -+ int src_is_fd = src->flags & FUSE_BUF_IS_FD; -+ int dst_is_fd = dst->flags & FUSE_BUF_IS_FD; -+ -+ if (!src_is_fd && !dst_is_fd) { -+ char *dstmem = (char *)dst->mem + dst_off; -+ char *srcmem = (char *)src->mem + src_off; -+ -+ if (dstmem != srcmem) { -+ if (dstmem + len <= srcmem || srcmem + len <= dstmem) -+ memcpy(dstmem, srcmem, len); -+ else -+ memmove(dstmem, srcmem, len); -+ } -+ -+ return len; -+ } else if (!src_is_fd) { -+ return fuse_buf_write(dst, dst_off, src, src_off, len); -+ } else if (!dst_is_fd) { -+ return fuse_buf_read(dst, dst_off, src, src_off, len); -+ } else if (flags & FUSE_BUF_NO_SPLICE) { -+ return fuse_buf_fd_to_fd(dst, dst_off, src, src_off, len); -+ } else { -+ return fuse_buf_splice(dst, dst_off, src, src_off, len, flags); -+ } -+} -+ -+static const struct fuse_buf *fuse_bufvec_current(struct fuse_bufvec *bufv) -+{ -+ if (bufv->idx < bufv->count) -+ return &bufv->buf[bufv->idx]; -+ else -+ return NULL; -+} -+ -+static int fuse_bufvec_advance(struct fuse_bufvec *bufv, size_t len) -+{ -+ const struct fuse_buf *buf = fuse_bufvec_current(bufv); -+ -+ bufv->off += len; -+ assert(bufv->off <= buf->size); -+ if (bufv->off == buf->size) { -+ assert(bufv->idx < bufv->count); -+ bufv->idx++; -+ if (bufv->idx == bufv->count) -+ return 0; -+ bufv->off = 0; -+ } -+ return 1; -+} -+ -+ssize_t fuse_buf_copy(struct fuse_bufvec *dstv, struct fuse_bufvec *srcv, -+ enum fuse_buf_copy_flags flags) -+{ -+ size_t copied = 0; -+ -+ if (dstv == srcv) -+ return fuse_buf_size(dstv); -+ -+ for (;;) { -+ const struct fuse_buf *src = fuse_bufvec_current(srcv); -+ const struct fuse_buf *dst = fuse_bufvec_current(dstv); -+ size_t src_len; -+ size_t dst_len; -+ size_t len; -+ ssize_t res; -+ -+ if (src == NULL || dst == NULL) -+ break; -+ -+ src_len = src->size - srcv->off; -+ dst_len = dst->size - dstv->off; -+ len = min_size(src_len, dst_len); -+ -+ res = fuse_buf_copy_one(dst, dstv->off, src, srcv->off, len, flags); -+ if (res < 0) { -+ if (!copied) -+ return res; -+ break; -+ } -+ copied += res; -+ -+ if (!fuse_bufvec_advance(srcv, res) || -+ !fuse_bufvec_advance(dstv, res)) -+ break; -+ -+ if (res < len) -+ break; -+ } -+ -+ return copied; -+} -diff --git a/tools/virtiofsd/fuse_log.c b/tools/virtiofsd/fuse_log.c -new file mode 100644 -index 0000000..0d268ab ---- /dev/null -+++ b/tools/virtiofsd/fuse_log.c -@@ -0,0 +1,40 @@ -+/* -+ FUSE: Filesystem in Userspace -+ Copyright (C) 2019 Red Hat, Inc. -+ -+ Logging API. -+ -+ This program can be distributed under the terms of the GNU LGPLv2. -+ See the file COPYING.LIB -+*/ -+ -+#include "fuse_log.h" -+ -+#include -+#include -+ -+static void default_log_func( -+ __attribute__(( unused )) enum fuse_log_level level, -+ const char *fmt, va_list ap) -+{ -+ vfprintf(stderr, fmt, ap); -+} -+ -+static fuse_log_func_t log_func = default_log_func; -+ -+void fuse_set_log_func(fuse_log_func_t func) -+{ -+ if (!func) -+ func = default_log_func; -+ -+ log_func = func; -+} -+ -+void fuse_log(enum fuse_log_level level, const char *fmt, ...) -+{ -+ va_list ap; -+ -+ va_start(ap, fmt); -+ log_func(level, fmt, ap); -+ va_end(ap); -+} -diff --git a/tools/virtiofsd/fuse_opt.c b/tools/virtiofsd/fuse_opt.c -new file mode 100644 -index 0000000..93066b9 ---- /dev/null -+++ b/tools/virtiofsd/fuse_opt.c -@@ -0,0 +1,423 @@ -+/* -+ FUSE: Filesystem in Userspace -+ Copyright (C) 2001-2007 Miklos Szeredi -+ -+ Implementation of option parsing routines (dealing with `struct -+ fuse_args`). -+ -+ This program can be distributed under the terms of the GNU LGPLv2. -+ See the file COPYING.LIB -+*/ -+ -+#include "config.h" -+#include "fuse_i.h" -+#include "fuse_opt.h" -+#include "fuse_misc.h" -+ -+#include -+#include -+#include -+#include -+ -+struct fuse_opt_context { -+ void *data; -+ const struct fuse_opt *opt; -+ fuse_opt_proc_t proc; -+ int argctr; -+ int argc; -+ char **argv; -+ struct fuse_args outargs; -+ char *opts; -+ int nonopt; -+}; -+ -+void fuse_opt_free_args(struct fuse_args *args) -+{ -+ if (args) { -+ if (args->argv && args->allocated) { -+ int i; -+ for (i = 0; i < args->argc; i++) -+ free(args->argv[i]); -+ free(args->argv); -+ } -+ args->argc = 0; -+ args->argv = NULL; -+ args->allocated = 0; -+ } -+} -+ -+static int alloc_failed(void) -+{ -+ fuse_log(FUSE_LOG_ERR, "fuse: memory allocation failed\n"); -+ return -1; -+} -+ -+int fuse_opt_add_arg(struct fuse_args *args, const char *arg) -+{ -+ char **newargv; -+ char *newarg; -+ -+ assert(!args->argv || args->allocated); -+ -+ newarg = strdup(arg); -+ if (!newarg) -+ return alloc_failed(); -+ -+ newargv = realloc(args->argv, (args->argc + 2) * sizeof(char *)); -+ if (!newargv) { -+ free(newarg); -+ return alloc_failed(); -+ } -+ -+ args->argv = newargv; -+ args->allocated = 1; -+ args->argv[args->argc++] = newarg; -+ args->argv[args->argc] = NULL; -+ return 0; -+} -+ -+static int fuse_opt_insert_arg_common(struct fuse_args *args, int pos, -+ const char *arg) -+{ -+ assert(pos <= args->argc); -+ if (fuse_opt_add_arg(args, arg) == -1) -+ return -1; -+ -+ if (pos != args->argc - 1) { -+ char *newarg = args->argv[args->argc - 1]; -+ memmove(&args->argv[pos + 1], &args->argv[pos], -+ sizeof(char *) * (args->argc - pos - 1)); -+ args->argv[pos] = newarg; -+ } -+ return 0; -+} -+ -+int fuse_opt_insert_arg(struct fuse_args *args, int pos, const char *arg) -+{ -+ return fuse_opt_insert_arg_common(args, pos, arg); -+} -+ -+static int next_arg(struct fuse_opt_context *ctx, const char *opt) -+{ -+ if (ctx->argctr + 1 >= ctx->argc) { -+ fuse_log(FUSE_LOG_ERR, "fuse: missing argument after `%s'\n", opt); -+ return -1; -+ } -+ ctx->argctr++; -+ return 0; -+} -+ -+static int add_arg(struct fuse_opt_context *ctx, const char *arg) -+{ -+ return fuse_opt_add_arg(&ctx->outargs, arg); -+} -+ -+static int add_opt_common(char **opts, const char *opt, int esc) -+{ -+ unsigned oldlen = *opts ? strlen(*opts) : 0; -+ char *d = realloc(*opts, oldlen + 1 + strlen(opt) * 2 + 1); -+ -+ if (!d) -+ return alloc_failed(); -+ -+ *opts = d; -+ if (oldlen) { -+ d += oldlen; -+ *d++ = ','; -+ } -+ -+ for (; *opt; opt++) { -+ if (esc && (*opt == ',' || *opt == '\\')) -+ *d++ = '\\'; -+ *d++ = *opt; -+ } -+ *d = '\0'; -+ -+ return 0; -+} -+ -+int fuse_opt_add_opt(char **opts, const char *opt) -+{ -+ return add_opt_common(opts, opt, 0); -+} -+ -+int fuse_opt_add_opt_escaped(char **opts, const char *opt) -+{ -+ return add_opt_common(opts, opt, 1); -+} -+ -+static int add_opt(struct fuse_opt_context *ctx, const char *opt) -+{ -+ return add_opt_common(&ctx->opts, opt, 1); -+} -+ -+static int call_proc(struct fuse_opt_context *ctx, const char *arg, int key, -+ int iso) -+{ -+ if (key == FUSE_OPT_KEY_DISCARD) -+ return 0; -+ -+ if (key != FUSE_OPT_KEY_KEEP && ctx->proc) { -+ int res = ctx->proc(ctx->data, arg, key, &ctx->outargs); -+ if (res == -1 || !res) -+ return res; -+ } -+ if (iso) -+ return add_opt(ctx, arg); -+ else -+ return add_arg(ctx, arg); -+} -+ -+static int match_template(const char *t, const char *arg, unsigned *sepp) -+{ -+ int arglen = strlen(arg); -+ const char *sep = strchr(t, '='); -+ sep = sep ? sep : strchr(t, ' '); -+ if (sep && (!sep[1] || sep[1] == '%')) { -+ int tlen = sep - t; -+ if (sep[0] == '=') -+ tlen ++; -+ if (arglen >= tlen && strncmp(arg, t, tlen) == 0) { -+ *sepp = sep - t; -+ return 1; -+ } -+ } -+ if (strcmp(t, arg) == 0) { -+ *sepp = 0; -+ return 1; -+ } -+ return 0; -+} -+ -+static const struct fuse_opt *find_opt(const struct fuse_opt *opt, -+ const char *arg, unsigned *sepp) -+{ -+ for (; opt && opt->templ; opt++) -+ if (match_template(opt->templ, arg, sepp)) -+ return opt; -+ return NULL; -+} -+ -+int fuse_opt_match(const struct fuse_opt *opts, const char *opt) -+{ -+ unsigned dummy; -+ return find_opt(opts, opt, &dummy) ? 1 : 0; -+} -+ -+static int process_opt_param(void *var, const char *format, const char *param, -+ const char *arg) -+{ -+ assert(format[0] == '%'); -+ if (format[1] == 's') { -+ char **s = var; -+ char *copy = strdup(param); -+ if (!copy) -+ return alloc_failed(); -+ -+ free(*s); -+ *s = copy; -+ } else { -+ if (sscanf(param, format, var) != 1) { -+ fuse_log(FUSE_LOG_ERR, "fuse: invalid parameter in option `%s'\n", arg); -+ return -1; -+ } -+ } -+ return 0; -+} -+ -+static int process_opt(struct fuse_opt_context *ctx, -+ const struct fuse_opt *opt, unsigned sep, -+ const char *arg, int iso) -+{ -+ if (opt->offset == -1U) { -+ if (call_proc(ctx, arg, opt->value, iso) == -1) -+ return -1; -+ } else { -+ void *var = (char *)ctx->data + opt->offset; -+ if (sep && opt->templ[sep + 1]) { -+ const char *param = arg + sep; -+ if (opt->templ[sep] == '=') -+ param ++; -+ if (process_opt_param(var, opt->templ + sep + 1, -+ param, arg) == -1) -+ return -1; -+ } else -+ *(int *)var = opt->value; -+ } -+ return 0; -+} -+ -+static int process_opt_sep_arg(struct fuse_opt_context *ctx, -+ const struct fuse_opt *opt, unsigned sep, -+ const char *arg, int iso) -+{ -+ int res; -+ char *newarg; -+ char *param; -+ -+ if (next_arg(ctx, arg) == -1) -+ return -1; -+ -+ param = ctx->argv[ctx->argctr]; -+ newarg = malloc(sep + strlen(param) + 1); -+ if (!newarg) -+ return alloc_failed(); -+ -+ memcpy(newarg, arg, sep); -+ strcpy(newarg + sep, param); -+ res = process_opt(ctx, opt, sep, newarg, iso); -+ free(newarg); -+ -+ return res; -+} -+ -+static int process_gopt(struct fuse_opt_context *ctx, const char *arg, int iso) -+{ -+ unsigned sep; -+ const struct fuse_opt *opt = find_opt(ctx->opt, arg, &sep); -+ if (opt) { -+ for (; opt; opt = find_opt(opt + 1, arg, &sep)) { -+ int res; -+ if (sep && opt->templ[sep] == ' ' && !arg[sep]) -+ res = process_opt_sep_arg(ctx, opt, sep, arg, -+ iso); -+ else -+ res = process_opt(ctx, opt, sep, arg, iso); -+ if (res == -1) -+ return -1; -+ } -+ return 0; -+ } else -+ return call_proc(ctx, arg, FUSE_OPT_KEY_OPT, iso); -+} -+ -+static int process_real_option_group(struct fuse_opt_context *ctx, char *opts) -+{ -+ char *s = opts; -+ char *d = s; -+ int end = 0; -+ -+ while (!end) { -+ if (*s == '\0') -+ end = 1; -+ if (*s == ',' || end) { -+ int res; -+ -+ *d = '\0'; -+ res = process_gopt(ctx, opts, 1); -+ if (res == -1) -+ return -1; -+ d = opts; -+ } else { -+ if (s[0] == '\\' && s[1] != '\0') { -+ s++; -+ if (s[0] >= '0' && s[0] <= '3' && -+ s[1] >= '0' && s[1] <= '7' && -+ s[2] >= '0' && s[2] <= '7') { -+ *d++ = (s[0] - '0') * 0100 + -+ (s[1] - '0') * 0010 + -+ (s[2] - '0'); -+ s += 2; -+ } else { -+ *d++ = *s; -+ } -+ } else { -+ *d++ = *s; -+ } -+ } -+ s++; -+ } -+ -+ return 0; -+} -+ -+static int process_option_group(struct fuse_opt_context *ctx, const char *opts) -+{ -+ int res; -+ char *copy = strdup(opts); -+ -+ if (!copy) { -+ fuse_log(FUSE_LOG_ERR, "fuse: memory allocation failed\n"); -+ return -1; -+ } -+ res = process_real_option_group(ctx, copy); -+ free(copy); -+ return res; -+} -+ -+static int process_one(struct fuse_opt_context *ctx, const char *arg) -+{ -+ if (ctx->nonopt || arg[0] != '-') -+ return call_proc(ctx, arg, FUSE_OPT_KEY_NONOPT, 0); -+ else if (arg[1] == 'o') { -+ if (arg[2]) -+ return process_option_group(ctx, arg + 2); -+ else { -+ if (next_arg(ctx, arg) == -1) -+ return -1; -+ -+ return process_option_group(ctx, -+ ctx->argv[ctx->argctr]); -+ } -+ } else if (arg[1] == '-' && !arg[2]) { -+ if (add_arg(ctx, arg) == -1) -+ return -1; -+ ctx->nonopt = ctx->outargs.argc; -+ return 0; -+ } else -+ return process_gopt(ctx, arg, 0); -+} -+ -+static int opt_parse(struct fuse_opt_context *ctx) -+{ -+ if (ctx->argc) { -+ if (add_arg(ctx, ctx->argv[0]) == -1) -+ return -1; -+ } -+ -+ for (ctx->argctr = 1; ctx->argctr < ctx->argc; ctx->argctr++) -+ if (process_one(ctx, ctx->argv[ctx->argctr]) == -1) -+ return -1; -+ -+ if (ctx->opts) { -+ if (fuse_opt_insert_arg(&ctx->outargs, 1, "-o") == -1 || -+ fuse_opt_insert_arg(&ctx->outargs, 2, ctx->opts) == -1) -+ return -1; -+ } -+ -+ /* If option separator ("--") is the last argument, remove it */ -+ if (ctx->nonopt && ctx->nonopt == ctx->outargs.argc && -+ strcmp(ctx->outargs.argv[ctx->outargs.argc - 1], "--") == 0) { -+ free(ctx->outargs.argv[ctx->outargs.argc - 1]); -+ ctx->outargs.argv[--ctx->outargs.argc] = NULL; -+ } -+ -+ return 0; -+} -+ -+int fuse_opt_parse(struct fuse_args *args, void *data, -+ const struct fuse_opt opts[], fuse_opt_proc_t proc) -+{ -+ int res; -+ struct fuse_opt_context ctx = { -+ .data = data, -+ .opt = opts, -+ .proc = proc, -+ }; -+ -+ if (!args || !args->argv || !args->argc) -+ return 0; -+ -+ ctx.argc = args->argc; -+ ctx.argv = args->argv; -+ -+ res = opt_parse(&ctx); -+ if (res != -1) { -+ struct fuse_args tmp = *args; -+ *args = ctx.outargs; -+ ctx.outargs = tmp; -+ } -+ free(ctx.opts); -+ fuse_opt_free_args(&ctx.outargs); -+ return res; -+} -diff --git a/tools/virtiofsd/fuse_signals.c b/tools/virtiofsd/fuse_signals.c -new file mode 100644 -index 0000000..4271947 ---- /dev/null -+++ b/tools/virtiofsd/fuse_signals.c -@@ -0,0 +1,91 @@ -+/* -+ FUSE: Filesystem in Userspace -+ Copyright (C) 2001-2007 Miklos Szeredi -+ -+ Utility functions for setting signal handlers. -+ -+ This program can be distributed under the terms of the GNU LGPLv2. -+ See the file COPYING.LIB -+*/ -+ -+#include "config.h" -+#include "fuse_lowlevel.h" -+#include "fuse_i.h" -+ -+#include -+#include -+#include -+#include -+ -+static struct fuse_session *fuse_instance; -+ -+static void exit_handler(int sig) -+{ -+ if (fuse_instance) { -+ fuse_session_exit(fuse_instance); -+ if(sig <= 0) { -+ fuse_log(FUSE_LOG_ERR, "assertion error: signal value <= 0\n"); -+ abort(); -+ } -+ fuse_instance->error = sig; -+ } -+} -+ -+static void do_nothing(int sig) -+{ -+ (void) sig; -+} -+ -+static int set_one_signal_handler(int sig, void (*handler)(int), int remove) -+{ -+ struct sigaction sa; -+ struct sigaction old_sa; -+ -+ memset(&sa, 0, sizeof(struct sigaction)); -+ sa.sa_handler = remove ? SIG_DFL : handler; -+ sigemptyset(&(sa.sa_mask)); -+ sa.sa_flags = 0; -+ -+ if (sigaction(sig, NULL, &old_sa) == -1) { -+ perror("fuse: cannot get old signal handler"); -+ return -1; -+ } -+ -+ if (old_sa.sa_handler == (remove ? handler : SIG_DFL) && -+ sigaction(sig, &sa, NULL) == -1) { -+ perror("fuse: cannot set signal handler"); -+ return -1; -+ } -+ return 0; -+} -+ -+int fuse_set_signal_handlers(struct fuse_session *se) -+{ -+ /* If we used SIG_IGN instead of the do_nothing function, -+ then we would be unable to tell if we set SIG_IGN (and -+ thus should reset to SIG_DFL in fuse_remove_signal_handlers) -+ or if it was already set to SIG_IGN (and should be left -+ untouched. */ -+ if (set_one_signal_handler(SIGHUP, exit_handler, 0) == -1 || -+ set_one_signal_handler(SIGINT, exit_handler, 0) == -1 || -+ set_one_signal_handler(SIGTERM, exit_handler, 0) == -1 || -+ set_one_signal_handler(SIGPIPE, do_nothing, 0) == -1) -+ return -1; -+ -+ fuse_instance = se; -+ return 0; -+} -+ -+void fuse_remove_signal_handlers(struct fuse_session *se) -+{ -+ if (fuse_instance != se) -+ fuse_log(FUSE_LOG_ERR, -+ "fuse: fuse_remove_signal_handlers: unknown session\n"); -+ else -+ fuse_instance = NULL; -+ -+ set_one_signal_handler(SIGHUP, exit_handler, 1); -+ set_one_signal_handler(SIGINT, exit_handler, 1); -+ set_one_signal_handler(SIGTERM, exit_handler, 1); -+ set_one_signal_handler(SIGPIPE, do_nothing, 1); -+} -diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c -new file mode 100644 -index 0000000..64ff7ad ---- /dev/null -+++ b/tools/virtiofsd/helper.c -@@ -0,0 +1,440 @@ -+/* -+ FUSE: Filesystem in Userspace -+ Copyright (C) 2001-2007 Miklos Szeredi -+ -+ Helper functions to create (simple) standalone programs. With the -+ aid of these functions it should be possible to create full FUSE -+ file system by implementing nothing but the request handlers. -+ -+ This program can be distributed under the terms of the GNU LGPLv2. -+ See the file COPYING.LIB. -+*/ -+ -+#include "config.h" -+#include "fuse_i.h" -+#include "fuse_misc.h" -+#include "fuse_opt.h" -+#include "fuse_lowlevel.h" -+#include "mount_util.h" -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#define FUSE_HELPER_OPT(t, p) \ -+ { t, offsetof(struct fuse_cmdline_opts, p), 1 } -+ -+static const struct fuse_opt fuse_helper_opts[] = { -+ FUSE_HELPER_OPT("-h", show_help), -+ FUSE_HELPER_OPT("--help", show_help), -+ FUSE_HELPER_OPT("-V", show_version), -+ FUSE_HELPER_OPT("--version", show_version), -+ FUSE_HELPER_OPT("-d", debug), -+ FUSE_HELPER_OPT("debug", debug), -+ FUSE_HELPER_OPT("-d", foreground), -+ FUSE_HELPER_OPT("debug", foreground), -+ FUSE_OPT_KEY("-d", FUSE_OPT_KEY_KEEP), -+ FUSE_OPT_KEY("debug", FUSE_OPT_KEY_KEEP), -+ FUSE_HELPER_OPT("-f", foreground), -+ FUSE_HELPER_OPT("-s", singlethread), -+ FUSE_HELPER_OPT("fsname=", nodefault_subtype), -+ FUSE_OPT_KEY("fsname=", FUSE_OPT_KEY_KEEP), -+#ifndef __FreeBSD__ -+ FUSE_HELPER_OPT("subtype=", nodefault_subtype), -+ FUSE_OPT_KEY("subtype=", FUSE_OPT_KEY_KEEP), -+#endif -+ FUSE_HELPER_OPT("clone_fd", clone_fd), -+ FUSE_HELPER_OPT("max_idle_threads=%u", max_idle_threads), -+ FUSE_OPT_END -+}; -+ -+struct fuse_conn_info_opts { -+ int atomic_o_trunc; -+ int no_remote_posix_lock; -+ int no_remote_flock; -+ int splice_write; -+ int splice_move; -+ int splice_read; -+ int no_splice_write; -+ int no_splice_move; -+ int no_splice_read; -+ int auto_inval_data; -+ int no_auto_inval_data; -+ int no_readdirplus; -+ int no_readdirplus_auto; -+ int async_dio; -+ int no_async_dio; -+ int writeback_cache; -+ int no_writeback_cache; -+ int async_read; -+ int sync_read; -+ unsigned max_write; -+ unsigned max_readahead; -+ unsigned max_background; -+ unsigned congestion_threshold; -+ unsigned time_gran; -+ int set_max_write; -+ int set_max_readahead; -+ int set_max_background; -+ int set_congestion_threshold; -+ int set_time_gran; -+}; -+ -+#define CONN_OPTION(t, p, v) \ -+ { t, offsetof(struct fuse_conn_info_opts, p), v } -+static const struct fuse_opt conn_info_opt_spec[] = { -+ CONN_OPTION("max_write=%u", max_write, 0), -+ CONN_OPTION("max_write=", set_max_write, 1), -+ CONN_OPTION("max_readahead=%u", max_readahead, 0), -+ CONN_OPTION("max_readahead=", set_max_readahead, 1), -+ CONN_OPTION("max_background=%u", max_background, 0), -+ CONN_OPTION("max_background=", set_max_background, 1), -+ CONN_OPTION("congestion_threshold=%u", congestion_threshold, 0), -+ CONN_OPTION("congestion_threshold=", set_congestion_threshold, 1), -+ CONN_OPTION("sync_read", sync_read, 1), -+ CONN_OPTION("async_read", async_read, 1), -+ CONN_OPTION("atomic_o_trunc", atomic_o_trunc, 1), -+ CONN_OPTION("no_remote_lock", no_remote_posix_lock, 1), -+ CONN_OPTION("no_remote_lock", no_remote_flock, 1), -+ CONN_OPTION("no_remote_flock", no_remote_flock, 1), -+ CONN_OPTION("no_remote_posix_lock", no_remote_posix_lock, 1), -+ CONN_OPTION("splice_write", splice_write, 1), -+ CONN_OPTION("no_splice_write", no_splice_write, 1), -+ CONN_OPTION("splice_move", splice_move, 1), -+ CONN_OPTION("no_splice_move", no_splice_move, 1), -+ CONN_OPTION("splice_read", splice_read, 1), -+ CONN_OPTION("no_splice_read", no_splice_read, 1), -+ CONN_OPTION("auto_inval_data", auto_inval_data, 1), -+ CONN_OPTION("no_auto_inval_data", no_auto_inval_data, 1), -+ CONN_OPTION("readdirplus=no", no_readdirplus, 1), -+ CONN_OPTION("readdirplus=yes", no_readdirplus, 0), -+ CONN_OPTION("readdirplus=yes", no_readdirplus_auto, 1), -+ CONN_OPTION("readdirplus=auto", no_readdirplus, 0), -+ CONN_OPTION("readdirplus=auto", no_readdirplus_auto, 0), -+ CONN_OPTION("async_dio", async_dio, 1), -+ CONN_OPTION("no_async_dio", no_async_dio, 1), -+ CONN_OPTION("writeback_cache", writeback_cache, 1), -+ CONN_OPTION("no_writeback_cache", no_writeback_cache, 1), -+ CONN_OPTION("time_gran=%u", time_gran, 0), -+ CONN_OPTION("time_gran=", set_time_gran, 1), -+ FUSE_OPT_END -+}; -+ -+ -+void fuse_cmdline_help(void) -+{ -+ printf(" -h --help print help\n" -+ " -V --version print version\n" -+ " -d -o debug enable debug output (implies -f)\n" -+ " -f foreground operation\n" -+ " -s disable multi-threaded operation\n" -+ " -o clone_fd use separate fuse device fd for each thread\n" -+ " (may improve performance)\n" -+ " -o max_idle_threads the maximum number of idle worker threads\n" -+ " allowed (default: 10)\n"); -+} -+ -+static int fuse_helper_opt_proc(void *data, const char *arg, int key, -+ struct fuse_args *outargs) -+{ -+ (void) outargs; -+ struct fuse_cmdline_opts *opts = data; -+ -+ switch (key) { -+ case FUSE_OPT_KEY_NONOPT: -+ if (!opts->mountpoint) { -+ if (fuse_mnt_parse_fuse_fd(arg) != -1) { -+ return fuse_opt_add_opt(&opts->mountpoint, arg); -+ } -+ -+ char mountpoint[PATH_MAX] = ""; -+ if (realpath(arg, mountpoint) == NULL) { -+ fuse_log(FUSE_LOG_ERR, -+ "fuse: bad mount point `%s': %s\n", -+ arg, strerror(errno)); -+ return -1; -+ } -+ return fuse_opt_add_opt(&opts->mountpoint, mountpoint); -+ } else { -+ fuse_log(FUSE_LOG_ERR, "fuse: invalid argument `%s'\n", arg); -+ return -1; -+ } -+ -+ default: -+ /* Pass through unknown options */ -+ return 1; -+ } -+} -+ -+/* Under FreeBSD, there is no subtype option so this -+ function actually sets the fsname */ -+static int add_default_subtype(const char *progname, struct fuse_args *args) -+{ -+ int res; -+ char *subtype_opt; -+ -+ const char *basename = strrchr(progname, '/'); -+ if (basename == NULL) -+ basename = progname; -+ else if (basename[1] != '\0') -+ basename++; -+ -+ subtype_opt = (char *) malloc(strlen(basename) + 64); -+ if (subtype_opt == NULL) { -+ fuse_log(FUSE_LOG_ERR, "fuse: memory allocation failed\n"); -+ return -1; -+ } -+#ifdef __FreeBSD__ -+ sprintf(subtype_opt, "-ofsname=%s", basename); -+#else -+ sprintf(subtype_opt, "-osubtype=%s", basename); -+#endif -+ res = fuse_opt_add_arg(args, subtype_opt); -+ free(subtype_opt); -+ return res; -+} -+ -+int fuse_parse_cmdline(struct fuse_args *args, -+ struct fuse_cmdline_opts *opts) -+{ -+ memset(opts, 0, sizeof(struct fuse_cmdline_opts)); -+ -+ opts->max_idle_threads = 10; -+ -+ if (fuse_opt_parse(args, opts, fuse_helper_opts, -+ fuse_helper_opt_proc) == -1) -+ return -1; -+ -+ /* *Linux*: if neither -o subtype nor -o fsname are specified, -+ set subtype to program's basename. -+ *FreeBSD*: if fsname is not specified, set to program's -+ basename. */ -+ if (!opts->nodefault_subtype) -+ if (add_default_subtype(args->argv[0], args) == -1) -+ return -1; -+ -+ return 0; -+} -+ -+ -+int fuse_daemonize(int foreground) -+{ -+ if (!foreground) { -+ int nullfd; -+ int waiter[2]; -+ char completed; -+ -+ if (pipe(waiter)) { -+ perror("fuse_daemonize: pipe"); -+ return -1; -+ } -+ -+ /* -+ * demonize current process by forking it and killing the -+ * parent. This makes current process as a child of 'init'. -+ */ -+ switch(fork()) { -+ case -1: -+ perror("fuse_daemonize: fork"); -+ return -1; -+ case 0: -+ break; -+ default: -+ (void) read(waiter[0], &completed, sizeof(completed)); -+ _exit(0); -+ } -+ -+ if (setsid() == -1) { -+ perror("fuse_daemonize: setsid"); -+ return -1; -+ } -+ -+ (void) chdir("/"); -+ -+ nullfd = open("/dev/null", O_RDWR, 0); -+ if (nullfd != -1) { -+ (void) dup2(nullfd, 0); -+ (void) dup2(nullfd, 1); -+ (void) dup2(nullfd, 2); -+ if (nullfd > 2) -+ close(nullfd); -+ } -+ -+ /* Propagate completion of daemon initialization */ -+ completed = 1; -+ (void) write(waiter[1], &completed, sizeof(completed)); -+ close(waiter[0]); -+ close(waiter[1]); -+ } else { -+ (void) chdir("/"); -+ } -+ return 0; -+} -+ -+int fuse_main_real(int argc, char *argv[], const struct fuse_operations *op, -+ size_t op_size, void *user_data) -+{ -+ struct fuse_args args = FUSE_ARGS_INIT(argc, argv); -+ struct fuse *fuse; -+ struct fuse_cmdline_opts opts; -+ int res; -+ -+ if (fuse_parse_cmdline(&args, &opts) != 0) -+ return 1; -+ -+ if (opts.show_version) { -+ printf("FUSE library version %s\n", PACKAGE_VERSION); -+ fuse_lowlevel_version(); -+ res = 0; -+ goto out1; -+ } -+ -+ if (opts.show_help) { -+ if(args.argv[0][0] != '\0') -+ printf("usage: %s [options] \n\n", -+ args.argv[0]); -+ printf("FUSE options:\n"); -+ fuse_cmdline_help(); -+ fuse_lib_help(&args); -+ res = 0; -+ goto out1; -+ } -+ -+ if (!opts.show_help && -+ !opts.mountpoint) { -+ fuse_log(FUSE_LOG_ERR, "error: no mountpoint specified\n"); -+ res = 2; -+ goto out1; -+ } -+ -+ -+ fuse = fuse_new_31(&args, op, op_size, user_data); -+ if (fuse == NULL) { -+ res = 3; -+ goto out1; -+ } -+ -+ if (fuse_mount(fuse,opts.mountpoint) != 0) { -+ res = 4; -+ goto out2; -+ } -+ -+ if (fuse_daemonize(opts.foreground) != 0) { -+ res = 5; -+ goto out3; -+ } -+ -+ struct fuse_session *se = fuse_get_session(fuse); -+ if (fuse_set_signal_handlers(se) != 0) { -+ res = 6; -+ goto out3; -+ } -+ -+ if (opts.singlethread) -+ res = fuse_loop(fuse); -+ else { -+ struct fuse_loop_config loop_config; -+ loop_config.clone_fd = opts.clone_fd; -+ loop_config.max_idle_threads = opts.max_idle_threads; -+ res = fuse_loop_mt_32(fuse, &loop_config); -+ } -+ if (res) -+ res = 7; -+ -+ fuse_remove_signal_handlers(se); -+out3: -+ fuse_unmount(fuse); -+out2: -+ fuse_destroy(fuse); -+out1: -+ free(opts.mountpoint); -+ fuse_opt_free_args(&args); -+ return res; -+} -+ -+ -+void fuse_apply_conn_info_opts(struct fuse_conn_info_opts *opts, -+ struct fuse_conn_info *conn) -+{ -+ if(opts->set_max_write) -+ conn->max_write = opts->max_write; -+ if(opts->set_max_background) -+ conn->max_background = opts->max_background; -+ if(opts->set_congestion_threshold) -+ conn->congestion_threshold = opts->congestion_threshold; -+ if(opts->set_time_gran) -+ conn->time_gran = opts->time_gran; -+ if(opts->set_max_readahead) -+ conn->max_readahead = opts->max_readahead; -+ -+#define LL_ENABLE(cond,cap) \ -+ if (cond) conn->want |= (cap) -+#define LL_DISABLE(cond,cap) \ -+ if (cond) conn->want &= ~(cap) -+ -+ LL_ENABLE(opts->splice_read, FUSE_CAP_SPLICE_READ); -+ LL_DISABLE(opts->no_splice_read, FUSE_CAP_SPLICE_READ); -+ -+ LL_ENABLE(opts->splice_write, FUSE_CAP_SPLICE_WRITE); -+ LL_DISABLE(opts->no_splice_write, FUSE_CAP_SPLICE_WRITE); -+ -+ LL_ENABLE(opts->splice_move, FUSE_CAP_SPLICE_MOVE); -+ LL_DISABLE(opts->no_splice_move, FUSE_CAP_SPLICE_MOVE); -+ -+ LL_ENABLE(opts->auto_inval_data, FUSE_CAP_AUTO_INVAL_DATA); -+ LL_DISABLE(opts->no_auto_inval_data, FUSE_CAP_AUTO_INVAL_DATA); -+ -+ LL_DISABLE(opts->no_readdirplus, FUSE_CAP_READDIRPLUS); -+ LL_DISABLE(opts->no_readdirplus_auto, FUSE_CAP_READDIRPLUS_AUTO); -+ -+ LL_ENABLE(opts->async_dio, FUSE_CAP_ASYNC_DIO); -+ LL_DISABLE(opts->no_async_dio, FUSE_CAP_ASYNC_DIO); -+ -+ LL_ENABLE(opts->writeback_cache, FUSE_CAP_WRITEBACK_CACHE); -+ LL_DISABLE(opts->no_writeback_cache, FUSE_CAP_WRITEBACK_CACHE); -+ -+ LL_ENABLE(opts->async_read, FUSE_CAP_ASYNC_READ); -+ LL_DISABLE(opts->sync_read, FUSE_CAP_ASYNC_READ); -+ -+ LL_DISABLE(opts->no_remote_posix_lock, FUSE_CAP_POSIX_LOCKS); -+ LL_DISABLE(opts->no_remote_flock, FUSE_CAP_FLOCK_LOCKS); -+} -+ -+struct fuse_conn_info_opts* fuse_parse_conn_info_opts(struct fuse_args *args) -+{ -+ struct fuse_conn_info_opts *opts; -+ -+ opts = calloc(1, sizeof(struct fuse_conn_info_opts)); -+ if(opts == NULL) { -+ fuse_log(FUSE_LOG_ERR, "calloc failed\n"); -+ return NULL; -+ } -+ if(fuse_opt_parse(args, opts, conn_info_opt_spec, NULL) == -1) { -+ free(opts); -+ return NULL; -+ } -+ return opts; -+} -+ -+int fuse_open_channel(const char *mountpoint, const char* options) -+{ -+ struct mount_opts *opts = NULL; -+ int fd = -1; -+ const char *argv[] = { "", "-o", options }; -+ int argc = sizeof(argv) / sizeof(argv[0]); -+ struct fuse_args args = FUSE_ARGS_INIT(argc, (char**) argv); -+ -+ opts = parse_mount_opts(&args); -+ if (opts == NULL) -+ return -1; -+ -+ fd = fuse_kern_mount(mountpoint, opts); -+ destroy_mount_opts(opts); -+ -+ return fd; -+} --- -1.8.3.1 - diff --git a/kvm-virtiofsd-Add-fuse_lowlevel.c.patch b/kvm-virtiofsd-Add-fuse_lowlevel.c.patch deleted file mode 100644 index 1318fef..0000000 --- a/kvm-virtiofsd-Add-fuse_lowlevel.c.patch +++ /dev/null @@ -1,3172 +0,0 @@ -From f6c6830f772e8060255323d2a458cd0e774d9654 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:00:42 +0100 -Subject: [PATCH 011/116] virtiofsd: Add fuse_lowlevel.c -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-8-dgilbert@redhat.com> -Patchwork-id: 93456 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 007/112] virtiofsd: Add fuse_lowlevel.c -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -fuse_lowlevel is one of the largest files from the library -and does most of the work. Add it separately to keep the diff -sizes small. -Again this is from upstream fuse-3.8.0 - -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 2de121f01e37e2fe98a4362f4abf7c0848697f76) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_lowlevel.c | 3129 +++++++++++++++++++++++++++++++++++++++ - 1 file changed, 3129 insertions(+) - create mode 100644 tools/virtiofsd/fuse_lowlevel.c - -diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -new file mode 100644 -index 0000000..f2d7038 ---- /dev/null -+++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -0,0 +1,3129 @@ -+/* -+ FUSE: Filesystem in Userspace -+ Copyright (C) 2001-2007 Miklos Szeredi -+ -+ Implementation of (most of) the low-level FUSE API. The session loop -+ functions are implemented in separate files. -+ -+ This program can be distributed under the terms of the GNU LGPLv2. -+ See the file COPYING.LIB -+*/ -+ -+#define _GNU_SOURCE -+ -+#include "config.h" -+#include "fuse_i.h" -+#include "fuse_kernel.h" -+#include "fuse_opt.h" -+#include "fuse_misc.h" -+#include "mount_util.h" -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#ifndef F_LINUX_SPECIFIC_BASE -+#define F_LINUX_SPECIFIC_BASE 1024 -+#endif -+#ifndef F_SETPIPE_SZ -+#define F_SETPIPE_SZ (F_LINUX_SPECIFIC_BASE + 7) -+#endif -+ -+ -+#define PARAM(inarg) (((char *)(inarg)) + sizeof(*(inarg))) -+#define OFFSET_MAX 0x7fffffffffffffffLL -+ -+#define container_of(ptr, type, member) ({ \ -+ const typeof( ((type *)0)->member ) *__mptr = (ptr); \ -+ (type *)( (char *)__mptr - offsetof(type,member) );}) -+ -+struct fuse_pollhandle { -+ uint64_t kh; -+ struct fuse_session *se; -+}; -+ -+static size_t pagesize; -+ -+static __attribute__((constructor)) void fuse_ll_init_pagesize(void) -+{ -+ pagesize = getpagesize(); -+} -+ -+static void convert_stat(const struct stat *stbuf, struct fuse_attr *attr) -+{ -+ attr->ino = stbuf->st_ino; -+ attr->mode = stbuf->st_mode; -+ attr->nlink = stbuf->st_nlink; -+ attr->uid = stbuf->st_uid; -+ attr->gid = stbuf->st_gid; -+ attr->rdev = stbuf->st_rdev; -+ attr->size = stbuf->st_size; -+ attr->blksize = stbuf->st_blksize; -+ attr->blocks = stbuf->st_blocks; -+ attr->atime = stbuf->st_atime; -+ attr->mtime = stbuf->st_mtime; -+ attr->ctime = stbuf->st_ctime; -+ attr->atimensec = ST_ATIM_NSEC(stbuf); -+ attr->mtimensec = ST_MTIM_NSEC(stbuf); -+ attr->ctimensec = ST_CTIM_NSEC(stbuf); -+} -+ -+static void convert_attr(const struct fuse_setattr_in *attr, struct stat *stbuf) -+{ -+ stbuf->st_mode = attr->mode; -+ stbuf->st_uid = attr->uid; -+ stbuf->st_gid = attr->gid; -+ stbuf->st_size = attr->size; -+ stbuf->st_atime = attr->atime; -+ stbuf->st_mtime = attr->mtime; -+ stbuf->st_ctime = attr->ctime; -+ ST_ATIM_NSEC_SET(stbuf, attr->atimensec); -+ ST_MTIM_NSEC_SET(stbuf, attr->mtimensec); -+ ST_CTIM_NSEC_SET(stbuf, attr->ctimensec); -+} -+ -+static size_t iov_length(const struct iovec *iov, size_t count) -+{ -+ size_t seg; -+ size_t ret = 0; -+ -+ for (seg = 0; seg < count; seg++) -+ ret += iov[seg].iov_len; -+ return ret; -+} -+ -+static void list_init_req(struct fuse_req *req) -+{ -+ req->next = req; -+ req->prev = req; -+} -+ -+static void list_del_req(struct fuse_req *req) -+{ -+ struct fuse_req *prev = req->prev; -+ struct fuse_req *next = req->next; -+ prev->next = next; -+ next->prev = prev; -+} -+ -+static void list_add_req(struct fuse_req *req, struct fuse_req *next) -+{ -+ struct fuse_req *prev = next->prev; -+ req->next = next; -+ req->prev = prev; -+ prev->next = req; -+ next->prev = req; -+} -+ -+static void destroy_req(fuse_req_t req) -+{ -+ pthread_mutex_destroy(&req->lock); -+ free(req); -+} -+ -+void fuse_free_req(fuse_req_t req) -+{ -+ int ctr; -+ struct fuse_session *se = req->se; -+ -+ pthread_mutex_lock(&se->lock); -+ req->u.ni.func = NULL; -+ req->u.ni.data = NULL; -+ list_del_req(req); -+ ctr = --req->ctr; -+ fuse_chan_put(req->ch); -+ req->ch = NULL; -+ pthread_mutex_unlock(&se->lock); -+ if (!ctr) -+ destroy_req(req); -+} -+ -+static struct fuse_req *fuse_ll_alloc_req(struct fuse_session *se) -+{ -+ struct fuse_req *req; -+ -+ req = (struct fuse_req *) calloc(1, sizeof(struct fuse_req)); -+ if (req == NULL) { -+ fuse_log(FUSE_LOG_ERR, "fuse: failed to allocate request\n"); -+ } else { -+ req->se = se; -+ req->ctr = 1; -+ list_init_req(req); -+ fuse_mutex_init(&req->lock); -+ } -+ -+ return req; -+} -+ -+/* Send data. If *ch* is NULL, send via session master fd */ -+static int fuse_send_msg(struct fuse_session *se, struct fuse_chan *ch, -+ struct iovec *iov, int count) -+{ -+ struct fuse_out_header *out = iov[0].iov_base; -+ -+ out->len = iov_length(iov, count); -+ if (se->debug) { -+ if (out->unique == 0) { -+ fuse_log(FUSE_LOG_DEBUG, "NOTIFY: code=%d length=%u\n", -+ out->error, out->len); -+ } else if (out->error) { -+ fuse_log(FUSE_LOG_DEBUG, -+ " unique: %llu, error: %i (%s), outsize: %i\n", -+ (unsigned long long) out->unique, out->error, -+ strerror(-out->error), out->len); -+ } else { -+ fuse_log(FUSE_LOG_DEBUG, -+ " unique: %llu, success, outsize: %i\n", -+ (unsigned long long) out->unique, out->len); -+ } -+ } -+ -+ ssize_t res = writev(ch ? ch->fd : se->fd, -+ iov, count); -+ int err = errno; -+ -+ if (res == -1) { -+ assert(se != NULL); -+ -+ /* ENOENT means the operation was interrupted */ -+ if (!fuse_session_exited(se) && err != ENOENT) -+ perror("fuse: writing device"); -+ return -err; -+ } -+ -+ return 0; -+} -+ -+ -+int fuse_send_reply_iov_nofree(fuse_req_t req, int error, struct iovec *iov, -+ int count) -+{ -+ struct fuse_out_header out; -+ -+ if (error <= -1000 || error > 0) { -+ fuse_log(FUSE_LOG_ERR, "fuse: bad error value: %i\n", error); -+ error = -ERANGE; -+ } -+ -+ out.unique = req->unique; -+ out.error = error; -+ -+ iov[0].iov_base = &out; -+ iov[0].iov_len = sizeof(struct fuse_out_header); -+ -+ return fuse_send_msg(req->se, req->ch, iov, count); -+} -+ -+static int send_reply_iov(fuse_req_t req, int error, struct iovec *iov, -+ int count) -+{ -+ int res; -+ -+ res = fuse_send_reply_iov_nofree(req, error, iov, count); -+ fuse_free_req(req); -+ return res; -+} -+ -+static int send_reply(fuse_req_t req, int error, const void *arg, -+ size_t argsize) -+{ -+ struct iovec iov[2]; -+ int count = 1; -+ if (argsize) { -+ iov[1].iov_base = (void *) arg; -+ iov[1].iov_len = argsize; -+ count++; -+ } -+ return send_reply_iov(req, error, iov, count); -+} -+ -+int fuse_reply_iov(fuse_req_t req, const struct iovec *iov, int count) -+{ -+ int res; -+ struct iovec *padded_iov; -+ -+ padded_iov = malloc((count + 1) * sizeof(struct iovec)); -+ if (padded_iov == NULL) -+ return fuse_reply_err(req, ENOMEM); -+ -+ memcpy(padded_iov + 1, iov, count * sizeof(struct iovec)); -+ count++; -+ -+ res = send_reply_iov(req, 0, padded_iov, count); -+ free(padded_iov); -+ -+ return res; -+} -+ -+ -+/* `buf` is allowed to be empty so that the proper size may be -+ allocated by the caller */ -+size_t fuse_add_direntry(fuse_req_t req, char *buf, size_t bufsize, -+ const char *name, const struct stat *stbuf, off_t off) -+{ -+ (void)req; -+ size_t namelen; -+ size_t entlen; -+ size_t entlen_padded; -+ struct fuse_dirent *dirent; -+ -+ namelen = strlen(name); -+ entlen = FUSE_NAME_OFFSET + namelen; -+ entlen_padded = FUSE_DIRENT_ALIGN(entlen); -+ -+ if ((buf == NULL) || (entlen_padded > bufsize)) -+ return entlen_padded; -+ -+ dirent = (struct fuse_dirent*) buf; -+ dirent->ino = stbuf->st_ino; -+ dirent->off = off; -+ dirent->namelen = namelen; -+ dirent->type = (stbuf->st_mode & S_IFMT) >> 12; -+ memcpy(dirent->name, name, namelen); -+ memset(dirent->name + namelen, 0, entlen_padded - entlen); -+ -+ return entlen_padded; -+} -+ -+static void convert_statfs(const struct statvfs *stbuf, -+ struct fuse_kstatfs *kstatfs) -+{ -+ kstatfs->bsize = stbuf->f_bsize; -+ kstatfs->frsize = stbuf->f_frsize; -+ kstatfs->blocks = stbuf->f_blocks; -+ kstatfs->bfree = stbuf->f_bfree; -+ kstatfs->bavail = stbuf->f_bavail; -+ kstatfs->files = stbuf->f_files; -+ kstatfs->ffree = stbuf->f_ffree; -+ kstatfs->namelen = stbuf->f_namemax; -+} -+ -+static int send_reply_ok(fuse_req_t req, const void *arg, size_t argsize) -+{ -+ return send_reply(req, 0, arg, argsize); -+} -+ -+int fuse_reply_err(fuse_req_t req, int err) -+{ -+ return send_reply(req, -err, NULL, 0); -+} -+ -+void fuse_reply_none(fuse_req_t req) -+{ -+ fuse_free_req(req); -+} -+ -+static unsigned long calc_timeout_sec(double t) -+{ -+ if (t > (double) ULONG_MAX) -+ return ULONG_MAX; -+ else if (t < 0.0) -+ return 0; -+ else -+ return (unsigned long) t; -+} -+ -+static unsigned int calc_timeout_nsec(double t) -+{ -+ double f = t - (double) calc_timeout_sec(t); -+ if (f < 0.0) -+ return 0; -+ else if (f >= 0.999999999) -+ return 999999999; -+ else -+ return (unsigned int) (f * 1.0e9); -+} -+ -+static void fill_entry(struct fuse_entry_out *arg, -+ const struct fuse_entry_param *e) -+{ -+ arg->nodeid = e->ino; -+ arg->generation = e->generation; -+ arg->entry_valid = calc_timeout_sec(e->entry_timeout); -+ arg->entry_valid_nsec = calc_timeout_nsec(e->entry_timeout); -+ arg->attr_valid = calc_timeout_sec(e->attr_timeout); -+ arg->attr_valid_nsec = calc_timeout_nsec(e->attr_timeout); -+ convert_stat(&e->attr, &arg->attr); -+} -+ -+/* `buf` is allowed to be empty so that the proper size may be -+ allocated by the caller */ -+size_t fuse_add_direntry_plus(fuse_req_t req, char *buf, size_t bufsize, -+ const char *name, -+ const struct fuse_entry_param *e, off_t off) -+{ -+ (void)req; -+ size_t namelen; -+ size_t entlen; -+ size_t entlen_padded; -+ -+ namelen = strlen(name); -+ entlen = FUSE_NAME_OFFSET_DIRENTPLUS + namelen; -+ entlen_padded = FUSE_DIRENT_ALIGN(entlen); -+ if ((buf == NULL) || (entlen_padded > bufsize)) -+ return entlen_padded; -+ -+ struct fuse_direntplus *dp = (struct fuse_direntplus *) buf; -+ memset(&dp->entry_out, 0, sizeof(dp->entry_out)); -+ fill_entry(&dp->entry_out, e); -+ -+ struct fuse_dirent *dirent = &dp->dirent; -+ dirent->ino = e->attr.st_ino; -+ dirent->off = off; -+ dirent->namelen = namelen; -+ dirent->type = (e->attr.st_mode & S_IFMT) >> 12; -+ memcpy(dirent->name, name, namelen); -+ memset(dirent->name + namelen, 0, entlen_padded - entlen); -+ -+ return entlen_padded; -+} -+ -+static void fill_open(struct fuse_open_out *arg, -+ const struct fuse_file_info *f) -+{ -+ arg->fh = f->fh; -+ if (f->direct_io) -+ arg->open_flags |= FOPEN_DIRECT_IO; -+ if (f->keep_cache) -+ arg->open_flags |= FOPEN_KEEP_CACHE; -+ if (f->cache_readdir) -+ arg->open_flags |= FOPEN_CACHE_DIR; -+ if (f->nonseekable) -+ arg->open_flags |= FOPEN_NONSEEKABLE; -+} -+ -+int fuse_reply_entry(fuse_req_t req, const struct fuse_entry_param *e) -+{ -+ struct fuse_entry_out arg; -+ size_t size = req->se->conn.proto_minor < 9 ? -+ FUSE_COMPAT_ENTRY_OUT_SIZE : sizeof(arg); -+ -+ /* before ABI 7.4 e->ino == 0 was invalid, only ENOENT meant -+ negative entry */ -+ if (!e->ino && req->se->conn.proto_minor < 4) -+ return fuse_reply_err(req, ENOENT); -+ -+ memset(&arg, 0, sizeof(arg)); -+ fill_entry(&arg, e); -+ return send_reply_ok(req, &arg, size); -+} -+ -+int fuse_reply_create(fuse_req_t req, const struct fuse_entry_param *e, -+ const struct fuse_file_info *f) -+{ -+ char buf[sizeof(struct fuse_entry_out) + sizeof(struct fuse_open_out)]; -+ size_t entrysize = req->se->conn.proto_minor < 9 ? -+ FUSE_COMPAT_ENTRY_OUT_SIZE : sizeof(struct fuse_entry_out); -+ struct fuse_entry_out *earg = (struct fuse_entry_out *) buf; -+ struct fuse_open_out *oarg = (struct fuse_open_out *) (buf + entrysize); -+ -+ memset(buf, 0, sizeof(buf)); -+ fill_entry(earg, e); -+ fill_open(oarg, f); -+ return send_reply_ok(req, buf, -+ entrysize + sizeof(struct fuse_open_out)); -+} -+ -+int fuse_reply_attr(fuse_req_t req, const struct stat *attr, -+ double attr_timeout) -+{ -+ struct fuse_attr_out arg; -+ size_t size = req->se->conn.proto_minor < 9 ? -+ FUSE_COMPAT_ATTR_OUT_SIZE : sizeof(arg); -+ -+ memset(&arg, 0, sizeof(arg)); -+ arg.attr_valid = calc_timeout_sec(attr_timeout); -+ arg.attr_valid_nsec = calc_timeout_nsec(attr_timeout); -+ convert_stat(attr, &arg.attr); -+ -+ return send_reply_ok(req, &arg, size); -+} -+ -+int fuse_reply_readlink(fuse_req_t req, const char *linkname) -+{ -+ return send_reply_ok(req, linkname, strlen(linkname)); -+} -+ -+int fuse_reply_open(fuse_req_t req, const struct fuse_file_info *f) -+{ -+ struct fuse_open_out arg; -+ -+ memset(&arg, 0, sizeof(arg)); -+ fill_open(&arg, f); -+ return send_reply_ok(req, &arg, sizeof(arg)); -+} -+ -+int fuse_reply_write(fuse_req_t req, size_t count) -+{ -+ struct fuse_write_out arg; -+ -+ memset(&arg, 0, sizeof(arg)); -+ arg.size = count; -+ -+ return send_reply_ok(req, &arg, sizeof(arg)); -+} -+ -+int fuse_reply_buf(fuse_req_t req, const char *buf, size_t size) -+{ -+ return send_reply_ok(req, buf, size); -+} -+ -+static int fuse_send_data_iov_fallback(struct fuse_session *se, -+ struct fuse_chan *ch, -+ struct iovec *iov, int iov_count, -+ struct fuse_bufvec *buf, -+ size_t len) -+{ -+ struct fuse_bufvec mem_buf = FUSE_BUFVEC_INIT(len); -+ void *mbuf; -+ int res; -+ -+ /* Optimize common case */ -+ if (buf->count == 1 && buf->idx == 0 && buf->off == 0 && -+ !(buf->buf[0].flags & FUSE_BUF_IS_FD)) { -+ /* FIXME: also avoid memory copy if there are multiple buffers -+ but none of them contain an fd */ -+ -+ iov[iov_count].iov_base = buf->buf[0].mem; -+ iov[iov_count].iov_len = len; -+ iov_count++; -+ return fuse_send_msg(se, ch, iov, iov_count); -+ } -+ -+ res = posix_memalign(&mbuf, pagesize, len); -+ if (res != 0) -+ return res; -+ -+ mem_buf.buf[0].mem = mbuf; -+ res = fuse_buf_copy(&mem_buf, buf, 0); -+ if (res < 0) { -+ free(mbuf); -+ return -res; -+ } -+ len = res; -+ -+ iov[iov_count].iov_base = mbuf; -+ iov[iov_count].iov_len = len; -+ iov_count++; -+ res = fuse_send_msg(se, ch, iov, iov_count); -+ free(mbuf); -+ -+ return res; -+} -+ -+struct fuse_ll_pipe { -+ size_t size; -+ int can_grow; -+ int pipe[2]; -+}; -+ -+static void fuse_ll_pipe_free(struct fuse_ll_pipe *llp) -+{ -+ close(llp->pipe[0]); -+ close(llp->pipe[1]); -+ free(llp); -+} -+ -+#ifdef HAVE_SPLICE -+#if !defined(HAVE_PIPE2) || !defined(O_CLOEXEC) -+static int fuse_pipe(int fds[2]) -+{ -+ int rv = pipe(fds); -+ -+ if (rv == -1) -+ return rv; -+ -+ if (fcntl(fds[0], F_SETFL, O_NONBLOCK) == -1 || -+ fcntl(fds[1], F_SETFL, O_NONBLOCK) == -1 || -+ fcntl(fds[0], F_SETFD, FD_CLOEXEC) == -1 || -+ fcntl(fds[1], F_SETFD, FD_CLOEXEC) == -1) { -+ close(fds[0]); -+ close(fds[1]); -+ rv = -1; -+ } -+ return rv; -+} -+#else -+static int fuse_pipe(int fds[2]) -+{ -+ return pipe2(fds, O_CLOEXEC | O_NONBLOCK); -+} -+#endif -+ -+static struct fuse_ll_pipe *fuse_ll_get_pipe(struct fuse_session *se) -+{ -+ struct fuse_ll_pipe *llp = pthread_getspecific(se->pipe_key); -+ if (llp == NULL) { -+ int res; -+ -+ llp = malloc(sizeof(struct fuse_ll_pipe)); -+ if (llp == NULL) -+ return NULL; -+ -+ res = fuse_pipe(llp->pipe); -+ if (res == -1) { -+ free(llp); -+ return NULL; -+ } -+ -+ /* -+ *the default size is 16 pages on linux -+ */ -+ llp->size = pagesize * 16; -+ llp->can_grow = 1; -+ -+ pthread_setspecific(se->pipe_key, llp); -+ } -+ -+ return llp; -+} -+#endif -+ -+static void fuse_ll_clear_pipe(struct fuse_session *se) -+{ -+ struct fuse_ll_pipe *llp = pthread_getspecific(se->pipe_key); -+ if (llp) { -+ pthread_setspecific(se->pipe_key, NULL); -+ fuse_ll_pipe_free(llp); -+ } -+} -+ -+#if defined(HAVE_SPLICE) && defined(HAVE_VMSPLICE) -+static int read_back(int fd, char *buf, size_t len) -+{ -+ int res; -+ -+ res = read(fd, buf, len); -+ if (res == -1) { -+ fuse_log(FUSE_LOG_ERR, "fuse: internal error: failed to read back from pipe: %s\n", strerror(errno)); -+ return -EIO; -+ } -+ if (res != len) { -+ fuse_log(FUSE_LOG_ERR, "fuse: internal error: short read back from pipe: %i from %zi\n", res, len); -+ return -EIO; -+ } -+ return 0; -+} -+ -+static int grow_pipe_to_max(int pipefd) -+{ -+ int max; -+ int res; -+ int maxfd; -+ char buf[32]; -+ -+ maxfd = open("/proc/sys/fs/pipe-max-size", O_RDONLY); -+ if (maxfd < 0) -+ return -errno; -+ -+ res = read(maxfd, buf, sizeof(buf) - 1); -+ if (res < 0) { -+ int saved_errno; -+ -+ saved_errno = errno; -+ close(maxfd); -+ return -saved_errno; -+ } -+ close(maxfd); -+ buf[res] = '\0'; -+ -+ max = atoi(buf); -+ res = fcntl(pipefd, F_SETPIPE_SZ, max); -+ if (res < 0) -+ return -errno; -+ return max; -+} -+ -+static int fuse_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, -+ struct iovec *iov, int iov_count, -+ struct fuse_bufvec *buf, unsigned int flags) -+{ -+ int res; -+ size_t len = fuse_buf_size(buf); -+ struct fuse_out_header *out = iov[0].iov_base; -+ struct fuse_ll_pipe *llp; -+ int splice_flags; -+ size_t pipesize; -+ size_t total_fd_size; -+ size_t idx; -+ size_t headerlen; -+ struct fuse_bufvec pipe_buf = FUSE_BUFVEC_INIT(len); -+ -+ if (se->broken_splice_nonblock) -+ goto fallback; -+ -+ if (flags & FUSE_BUF_NO_SPLICE) -+ goto fallback; -+ -+ total_fd_size = 0; -+ for (idx = buf->idx; idx < buf->count; idx++) { -+ if (buf->buf[idx].flags & FUSE_BUF_IS_FD) { -+ total_fd_size = buf->buf[idx].size; -+ if (idx == buf->idx) -+ total_fd_size -= buf->off; -+ } -+ } -+ if (total_fd_size < 2 * pagesize) -+ goto fallback; -+ -+ if (se->conn.proto_minor < 14 || -+ !(se->conn.want & FUSE_CAP_SPLICE_WRITE)) -+ goto fallback; -+ -+ llp = fuse_ll_get_pipe(se); -+ if (llp == NULL) -+ goto fallback; -+ -+ -+ headerlen = iov_length(iov, iov_count); -+ -+ out->len = headerlen + len; -+ -+ /* -+ * Heuristic for the required pipe size, does not work if the -+ * source contains less than page size fragments -+ */ -+ pipesize = pagesize * (iov_count + buf->count + 1) + out->len; -+ -+ if (llp->size < pipesize) { -+ if (llp->can_grow) { -+ res = fcntl(llp->pipe[0], F_SETPIPE_SZ, pipesize); -+ if (res == -1) { -+ res = grow_pipe_to_max(llp->pipe[0]); -+ if (res > 0) -+ llp->size = res; -+ llp->can_grow = 0; -+ goto fallback; -+ } -+ llp->size = res; -+ } -+ if (llp->size < pipesize) -+ goto fallback; -+ } -+ -+ -+ res = vmsplice(llp->pipe[1], iov, iov_count, SPLICE_F_NONBLOCK); -+ if (res == -1) -+ goto fallback; -+ -+ if (res != headerlen) { -+ res = -EIO; -+ fuse_log(FUSE_LOG_ERR, "fuse: short vmsplice to pipe: %u/%zu\n", res, -+ headerlen); -+ goto clear_pipe; -+ } -+ -+ pipe_buf.buf[0].flags = FUSE_BUF_IS_FD; -+ pipe_buf.buf[0].fd = llp->pipe[1]; -+ -+ res = fuse_buf_copy(&pipe_buf, buf, -+ FUSE_BUF_FORCE_SPLICE | FUSE_BUF_SPLICE_NONBLOCK); -+ if (res < 0) { -+ if (res == -EAGAIN || res == -EINVAL) { -+ /* -+ * Should only get EAGAIN on kernels with -+ * broken SPLICE_F_NONBLOCK support (<= -+ * 2.6.35) where this error or a short read is -+ * returned even if the pipe itself is not -+ * full -+ * -+ * EINVAL might mean that splice can't handle -+ * this combination of input and output. -+ */ -+ if (res == -EAGAIN) -+ se->broken_splice_nonblock = 1; -+ -+ pthread_setspecific(se->pipe_key, NULL); -+ fuse_ll_pipe_free(llp); -+ goto fallback; -+ } -+ res = -res; -+ goto clear_pipe; -+ } -+ -+ if (res != 0 && res < len) { -+ struct fuse_bufvec mem_buf = FUSE_BUFVEC_INIT(len); -+ void *mbuf; -+ size_t now_len = res; -+ /* -+ * For regular files a short count is either -+ * 1) due to EOF, or -+ * 2) because of broken SPLICE_F_NONBLOCK (see above) -+ * -+ * For other inputs it's possible that we overflowed -+ * the pipe because of small buffer fragments. -+ */ -+ -+ res = posix_memalign(&mbuf, pagesize, len); -+ if (res != 0) -+ goto clear_pipe; -+ -+ mem_buf.buf[0].mem = mbuf; -+ mem_buf.off = now_len; -+ res = fuse_buf_copy(&mem_buf, buf, 0); -+ if (res > 0) { -+ char *tmpbuf; -+ size_t extra_len = res; -+ /* -+ * Trickiest case: got more data. Need to get -+ * back the data from the pipe and then fall -+ * back to regular write. -+ */ -+ tmpbuf = malloc(headerlen); -+ if (tmpbuf == NULL) { -+ free(mbuf); -+ res = ENOMEM; -+ goto clear_pipe; -+ } -+ res = read_back(llp->pipe[0], tmpbuf, headerlen); -+ free(tmpbuf); -+ if (res != 0) { -+ free(mbuf); -+ goto clear_pipe; -+ } -+ res = read_back(llp->pipe[0], mbuf, now_len); -+ if (res != 0) { -+ free(mbuf); -+ goto clear_pipe; -+ } -+ len = now_len + extra_len; -+ iov[iov_count].iov_base = mbuf; -+ iov[iov_count].iov_len = len; -+ iov_count++; -+ res = fuse_send_msg(se, ch, iov, iov_count); -+ free(mbuf); -+ return res; -+ } -+ free(mbuf); -+ res = now_len; -+ } -+ len = res; -+ out->len = headerlen + len; -+ -+ if (se->debug) { -+ fuse_log(FUSE_LOG_DEBUG, -+ " unique: %llu, success, outsize: %i (splice)\n", -+ (unsigned long long) out->unique, out->len); -+ } -+ -+ splice_flags = 0; -+ if ((flags & FUSE_BUF_SPLICE_MOVE) && -+ (se->conn.want & FUSE_CAP_SPLICE_MOVE)) -+ splice_flags |= SPLICE_F_MOVE; -+ -+ res = splice(llp->pipe[0], NULL, ch ? ch->fd : se->fd, -+ NULL, out->len, splice_flags); -+ if (res == -1) { -+ res = -errno; -+ perror("fuse: splice from pipe"); -+ goto clear_pipe; -+ } -+ if (res != out->len) { -+ res = -EIO; -+ fuse_log(FUSE_LOG_ERR, "fuse: short splice from pipe: %u/%u\n", -+ res, out->len); -+ goto clear_pipe; -+ } -+ return 0; -+ -+clear_pipe: -+ fuse_ll_clear_pipe(se); -+ return res; -+ -+fallback: -+ return fuse_send_data_iov_fallback(se, ch, iov, iov_count, buf, len); -+} -+#else -+static int fuse_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, -+ struct iovec *iov, int iov_count, -+ struct fuse_bufvec *buf, unsigned int flags) -+{ -+ size_t len = fuse_buf_size(buf); -+ (void) flags; -+ -+ return fuse_send_data_iov_fallback(se, ch, iov, iov_count, buf, len); -+} -+#endif -+ -+int fuse_reply_data(fuse_req_t req, struct fuse_bufvec *bufv, -+ enum fuse_buf_copy_flags flags) -+{ -+ struct iovec iov[2]; -+ struct fuse_out_header out; -+ int res; -+ -+ iov[0].iov_base = &out; -+ iov[0].iov_len = sizeof(struct fuse_out_header); -+ -+ out.unique = req->unique; -+ out.error = 0; -+ -+ res = fuse_send_data_iov(req->se, req->ch, iov, 1, bufv, flags); -+ if (res <= 0) { -+ fuse_free_req(req); -+ return res; -+ } else { -+ return fuse_reply_err(req, res); -+ } -+} -+ -+int fuse_reply_statfs(fuse_req_t req, const struct statvfs *stbuf) -+{ -+ struct fuse_statfs_out arg; -+ size_t size = req->se->conn.proto_minor < 4 ? -+ FUSE_COMPAT_STATFS_SIZE : sizeof(arg); -+ -+ memset(&arg, 0, sizeof(arg)); -+ convert_statfs(stbuf, &arg.st); -+ -+ return send_reply_ok(req, &arg, size); -+} -+ -+int fuse_reply_xattr(fuse_req_t req, size_t count) -+{ -+ struct fuse_getxattr_out arg; -+ -+ memset(&arg, 0, sizeof(arg)); -+ arg.size = count; -+ -+ return send_reply_ok(req, &arg, sizeof(arg)); -+} -+ -+int fuse_reply_lock(fuse_req_t req, const struct flock *lock) -+{ -+ struct fuse_lk_out arg; -+ -+ memset(&arg, 0, sizeof(arg)); -+ arg.lk.type = lock->l_type; -+ if (lock->l_type != F_UNLCK) { -+ arg.lk.start = lock->l_start; -+ if (lock->l_len == 0) -+ arg.lk.end = OFFSET_MAX; -+ else -+ arg.lk.end = lock->l_start + lock->l_len - 1; -+ } -+ arg.lk.pid = lock->l_pid; -+ return send_reply_ok(req, &arg, sizeof(arg)); -+} -+ -+int fuse_reply_bmap(fuse_req_t req, uint64_t idx) -+{ -+ struct fuse_bmap_out arg; -+ -+ memset(&arg, 0, sizeof(arg)); -+ arg.block = idx; -+ -+ return send_reply_ok(req, &arg, sizeof(arg)); -+} -+ -+static struct fuse_ioctl_iovec *fuse_ioctl_iovec_copy(const struct iovec *iov, -+ size_t count) -+{ -+ struct fuse_ioctl_iovec *fiov; -+ size_t i; -+ -+ fiov = malloc(sizeof(fiov[0]) * count); -+ if (!fiov) -+ return NULL; -+ -+ for (i = 0; i < count; i++) { -+ fiov[i].base = (uintptr_t) iov[i].iov_base; -+ fiov[i].len = iov[i].iov_len; -+ } -+ -+ return fiov; -+} -+ -+int fuse_reply_ioctl_retry(fuse_req_t req, -+ const struct iovec *in_iov, size_t in_count, -+ const struct iovec *out_iov, size_t out_count) -+{ -+ struct fuse_ioctl_out arg; -+ struct fuse_ioctl_iovec *in_fiov = NULL; -+ struct fuse_ioctl_iovec *out_fiov = NULL; -+ struct iovec iov[4]; -+ size_t count = 1; -+ int res; -+ -+ memset(&arg, 0, sizeof(arg)); -+ arg.flags |= FUSE_IOCTL_RETRY; -+ arg.in_iovs = in_count; -+ arg.out_iovs = out_count; -+ iov[count].iov_base = &arg; -+ iov[count].iov_len = sizeof(arg); -+ count++; -+ -+ if (req->se->conn.proto_minor < 16) { -+ if (in_count) { -+ iov[count].iov_base = (void *)in_iov; -+ iov[count].iov_len = sizeof(in_iov[0]) * in_count; -+ count++; -+ } -+ -+ if (out_count) { -+ iov[count].iov_base = (void *)out_iov; -+ iov[count].iov_len = sizeof(out_iov[0]) * out_count; -+ count++; -+ } -+ } else { -+ /* Can't handle non-compat 64bit ioctls on 32bit */ -+ if (sizeof(void *) == 4 && req->ioctl_64bit) { -+ res = fuse_reply_err(req, EINVAL); -+ goto out; -+ } -+ -+ if (in_count) { -+ in_fiov = fuse_ioctl_iovec_copy(in_iov, in_count); -+ if (!in_fiov) -+ goto enomem; -+ -+ iov[count].iov_base = (void *)in_fiov; -+ iov[count].iov_len = sizeof(in_fiov[0]) * in_count; -+ count++; -+ } -+ if (out_count) { -+ out_fiov = fuse_ioctl_iovec_copy(out_iov, out_count); -+ if (!out_fiov) -+ goto enomem; -+ -+ iov[count].iov_base = (void *)out_fiov; -+ iov[count].iov_len = sizeof(out_fiov[0]) * out_count; -+ count++; -+ } -+ } -+ -+ res = send_reply_iov(req, 0, iov, count); -+out: -+ free(in_fiov); -+ free(out_fiov); -+ -+ return res; -+ -+enomem: -+ res = fuse_reply_err(req, ENOMEM); -+ goto out; -+} -+ -+int fuse_reply_ioctl(fuse_req_t req, int result, const void *buf, size_t size) -+{ -+ struct fuse_ioctl_out arg; -+ struct iovec iov[3]; -+ size_t count = 1; -+ -+ memset(&arg, 0, sizeof(arg)); -+ arg.result = result; -+ iov[count].iov_base = &arg; -+ iov[count].iov_len = sizeof(arg); -+ count++; -+ -+ if (size) { -+ iov[count].iov_base = (char *) buf; -+ iov[count].iov_len = size; -+ count++; -+ } -+ -+ return send_reply_iov(req, 0, iov, count); -+} -+ -+int fuse_reply_ioctl_iov(fuse_req_t req, int result, const struct iovec *iov, -+ int count) -+{ -+ struct iovec *padded_iov; -+ struct fuse_ioctl_out arg; -+ int res; -+ -+ padded_iov = malloc((count + 2) * sizeof(struct iovec)); -+ if (padded_iov == NULL) -+ return fuse_reply_err(req, ENOMEM); -+ -+ memset(&arg, 0, sizeof(arg)); -+ arg.result = result; -+ padded_iov[1].iov_base = &arg; -+ padded_iov[1].iov_len = sizeof(arg); -+ -+ memcpy(&padded_iov[2], iov, count * sizeof(struct iovec)); -+ -+ res = send_reply_iov(req, 0, padded_iov, count + 2); -+ free(padded_iov); -+ -+ return res; -+} -+ -+int fuse_reply_poll(fuse_req_t req, unsigned revents) -+{ -+ struct fuse_poll_out arg; -+ -+ memset(&arg, 0, sizeof(arg)); -+ arg.revents = revents; -+ -+ return send_reply_ok(req, &arg, sizeof(arg)); -+} -+ -+int fuse_reply_lseek(fuse_req_t req, off_t off) -+{ -+ struct fuse_lseek_out arg; -+ -+ memset(&arg, 0, sizeof(arg)); -+ arg.offset = off; -+ -+ return send_reply_ok(req, &arg, sizeof(arg)); -+} -+ -+static void do_lookup(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ char *name = (char *) inarg; -+ -+ if (req->se->op.lookup) -+ req->se->op.lookup(req, nodeid, name); -+ else -+ fuse_reply_err(req, ENOSYS); -+} -+ -+static void do_forget(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ struct fuse_forget_in *arg = (struct fuse_forget_in *) inarg; -+ -+ if (req->se->op.forget) -+ req->se->op.forget(req, nodeid, arg->nlookup); -+ else -+ fuse_reply_none(req); -+} -+ -+static void do_batch_forget(fuse_req_t req, fuse_ino_t nodeid, -+ const void *inarg) -+{ -+ struct fuse_batch_forget_in *arg = (void *) inarg; -+ struct fuse_forget_one *param = (void *) PARAM(arg); -+ unsigned int i; -+ -+ (void) nodeid; -+ -+ if (req->se->op.forget_multi) { -+ req->se->op.forget_multi(req, arg->count, -+ (struct fuse_forget_data *) param); -+ } else if (req->se->op.forget) { -+ for (i = 0; i < arg->count; i++) { -+ struct fuse_forget_one *forget = ¶m[i]; -+ struct fuse_req *dummy_req; -+ -+ dummy_req = fuse_ll_alloc_req(req->se); -+ if (dummy_req == NULL) -+ break; -+ -+ dummy_req->unique = req->unique; -+ dummy_req->ctx = req->ctx; -+ dummy_req->ch = NULL; -+ -+ req->se->op.forget(dummy_req, forget->nodeid, -+ forget->nlookup); -+ } -+ fuse_reply_none(req); -+ } else { -+ fuse_reply_none(req); -+ } -+} -+ -+static void do_getattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ struct fuse_file_info *fip = NULL; -+ struct fuse_file_info fi; -+ -+ if (req->se->conn.proto_minor >= 9) { -+ struct fuse_getattr_in *arg = (struct fuse_getattr_in *) inarg; -+ -+ if (arg->getattr_flags & FUSE_GETATTR_FH) { -+ memset(&fi, 0, sizeof(fi)); -+ fi.fh = arg->fh; -+ fip = &fi; -+ } -+ } -+ -+ if (req->se->op.getattr) -+ req->se->op.getattr(req, nodeid, fip); -+ else -+ fuse_reply_err(req, ENOSYS); -+} -+ -+static void do_setattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ struct fuse_setattr_in *arg = (struct fuse_setattr_in *) inarg; -+ -+ if (req->se->op.setattr) { -+ struct fuse_file_info *fi = NULL; -+ struct fuse_file_info fi_store; -+ struct stat stbuf; -+ memset(&stbuf, 0, sizeof(stbuf)); -+ convert_attr(arg, &stbuf); -+ if (arg->valid & FATTR_FH) { -+ arg->valid &= ~FATTR_FH; -+ memset(&fi_store, 0, sizeof(fi_store)); -+ fi = &fi_store; -+ fi->fh = arg->fh; -+ } -+ arg->valid &= -+ FUSE_SET_ATTR_MODE | -+ FUSE_SET_ATTR_UID | -+ FUSE_SET_ATTR_GID | -+ FUSE_SET_ATTR_SIZE | -+ FUSE_SET_ATTR_ATIME | -+ FUSE_SET_ATTR_MTIME | -+ FUSE_SET_ATTR_ATIME_NOW | -+ FUSE_SET_ATTR_MTIME_NOW | -+ FUSE_SET_ATTR_CTIME; -+ -+ req->se->op.setattr(req, nodeid, &stbuf, arg->valid, fi); -+ } else -+ fuse_reply_err(req, ENOSYS); -+} -+ -+static void do_access(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ struct fuse_access_in *arg = (struct fuse_access_in *) inarg; -+ -+ if (req->se->op.access) -+ req->se->op.access(req, nodeid, arg->mask); -+ else -+ fuse_reply_err(req, ENOSYS); -+} -+ -+static void do_readlink(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ (void) inarg; -+ -+ if (req->se->op.readlink) -+ req->se->op.readlink(req, nodeid); -+ else -+ fuse_reply_err(req, ENOSYS); -+} -+ -+static void do_mknod(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ struct fuse_mknod_in *arg = (struct fuse_mknod_in *) inarg; -+ char *name = PARAM(arg); -+ -+ if (req->se->conn.proto_minor >= 12) -+ req->ctx.umask = arg->umask; -+ else -+ name = (char *) inarg + FUSE_COMPAT_MKNOD_IN_SIZE; -+ -+ if (req->se->op.mknod) -+ req->se->op.mknod(req, nodeid, name, arg->mode, arg->rdev); -+ else -+ fuse_reply_err(req, ENOSYS); -+} -+ -+static void do_mkdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ struct fuse_mkdir_in *arg = (struct fuse_mkdir_in *) inarg; -+ -+ if (req->se->conn.proto_minor >= 12) -+ req->ctx.umask = arg->umask; -+ -+ if (req->se->op.mkdir) -+ req->se->op.mkdir(req, nodeid, PARAM(arg), arg->mode); -+ else -+ fuse_reply_err(req, ENOSYS); -+} -+ -+static void do_unlink(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ char *name = (char *) inarg; -+ -+ if (req->se->op.unlink) -+ req->se->op.unlink(req, nodeid, name); -+ else -+ fuse_reply_err(req, ENOSYS); -+} -+ -+static void do_rmdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ char *name = (char *) inarg; -+ -+ if (req->se->op.rmdir) -+ req->se->op.rmdir(req, nodeid, name); -+ else -+ fuse_reply_err(req, ENOSYS); -+} -+ -+static void do_symlink(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ char *name = (char *) inarg; -+ char *linkname = ((char *) inarg) + strlen((char *) inarg) + 1; -+ -+ if (req->se->op.symlink) -+ req->se->op.symlink(req, linkname, nodeid, name); -+ else -+ fuse_reply_err(req, ENOSYS); -+} -+ -+static void do_rename(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ struct fuse_rename_in *arg = (struct fuse_rename_in *) inarg; -+ char *oldname = PARAM(arg); -+ char *newname = oldname + strlen(oldname) + 1; -+ -+ if (req->se->op.rename) -+ req->se->op.rename(req, nodeid, oldname, arg->newdir, newname, -+ 0); -+ else -+ fuse_reply_err(req, ENOSYS); -+} -+ -+static void do_rename2(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ struct fuse_rename2_in *arg = (struct fuse_rename2_in *) inarg; -+ char *oldname = PARAM(arg); -+ char *newname = oldname + strlen(oldname) + 1; -+ -+ if (req->se->op.rename) -+ req->se->op.rename(req, nodeid, oldname, arg->newdir, newname, -+ arg->flags); -+ else -+ fuse_reply_err(req, ENOSYS); -+} -+ -+static void do_link(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ struct fuse_link_in *arg = (struct fuse_link_in *) inarg; -+ -+ if (req->se->op.link) -+ req->se->op.link(req, arg->oldnodeid, nodeid, PARAM(arg)); -+ else -+ fuse_reply_err(req, ENOSYS); -+} -+ -+static void do_create(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ struct fuse_create_in *arg = (struct fuse_create_in *) inarg; -+ -+ if (req->se->op.create) { -+ struct fuse_file_info fi; -+ char *name = PARAM(arg); -+ -+ memset(&fi, 0, sizeof(fi)); -+ fi.flags = arg->flags; -+ -+ if (req->se->conn.proto_minor >= 12) -+ req->ctx.umask = arg->umask; -+ else -+ name = (char *) inarg + sizeof(struct fuse_open_in); -+ -+ req->se->op.create(req, nodeid, name, arg->mode, &fi); -+ } else -+ fuse_reply_err(req, ENOSYS); -+} -+ -+static void do_open(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ struct fuse_open_in *arg = (struct fuse_open_in *) inarg; -+ struct fuse_file_info fi; -+ -+ memset(&fi, 0, sizeof(fi)); -+ fi.flags = arg->flags; -+ -+ if (req->se->op.open) -+ req->se->op.open(req, nodeid, &fi); -+ else -+ fuse_reply_open(req, &fi); -+} -+ -+static void do_read(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ struct fuse_read_in *arg = (struct fuse_read_in *) inarg; -+ -+ if (req->se->op.read) { -+ struct fuse_file_info fi; -+ -+ memset(&fi, 0, sizeof(fi)); -+ fi.fh = arg->fh; -+ if (req->se->conn.proto_minor >= 9) { -+ fi.lock_owner = arg->lock_owner; -+ fi.flags = arg->flags; -+ } -+ req->se->op.read(req, nodeid, arg->size, arg->offset, &fi); -+ } else -+ fuse_reply_err(req, ENOSYS); -+} -+ -+static void do_write(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ struct fuse_write_in *arg = (struct fuse_write_in *) inarg; -+ struct fuse_file_info fi; -+ char *param; -+ -+ memset(&fi, 0, sizeof(fi)); -+ fi.fh = arg->fh; -+ fi.writepage = (arg->write_flags & FUSE_WRITE_CACHE) != 0; -+ -+ if (req->se->conn.proto_minor < 9) { -+ param = ((char *) arg) + FUSE_COMPAT_WRITE_IN_SIZE; -+ } else { -+ fi.lock_owner = arg->lock_owner; -+ fi.flags = arg->flags; -+ param = PARAM(arg); -+ } -+ -+ if (req->se->op.write) -+ req->se->op.write(req, nodeid, param, arg->size, -+ arg->offset, &fi); -+ else -+ fuse_reply_err(req, ENOSYS); -+} -+ -+static void do_write_buf(fuse_req_t req, fuse_ino_t nodeid, const void *inarg, -+ const struct fuse_buf *ibuf) -+{ -+ struct fuse_session *se = req->se; -+ struct fuse_bufvec bufv = { -+ .buf[0] = *ibuf, -+ .count = 1, -+ }; -+ struct fuse_write_in *arg = (struct fuse_write_in *) inarg; -+ struct fuse_file_info fi; -+ -+ memset(&fi, 0, sizeof(fi)); -+ fi.fh = arg->fh; -+ fi.writepage = arg->write_flags & FUSE_WRITE_CACHE; -+ -+ if (se->conn.proto_minor < 9) { -+ bufv.buf[0].mem = ((char *) arg) + FUSE_COMPAT_WRITE_IN_SIZE; -+ bufv.buf[0].size -= sizeof(struct fuse_in_header) + -+ FUSE_COMPAT_WRITE_IN_SIZE; -+ assert(!(bufv.buf[0].flags & FUSE_BUF_IS_FD)); -+ } else { -+ fi.lock_owner = arg->lock_owner; -+ fi.flags = arg->flags; -+ if (!(bufv.buf[0].flags & FUSE_BUF_IS_FD)) -+ bufv.buf[0].mem = PARAM(arg); -+ -+ bufv.buf[0].size -= sizeof(struct fuse_in_header) + -+ sizeof(struct fuse_write_in); -+ } -+ if (bufv.buf[0].size < arg->size) { -+ fuse_log(FUSE_LOG_ERR, "fuse: do_write_buf: buffer size too small\n"); -+ fuse_reply_err(req, EIO); -+ goto out; -+ } -+ bufv.buf[0].size = arg->size; -+ -+ se->op.write_buf(req, nodeid, &bufv, arg->offset, &fi); -+ -+out: -+ /* Need to reset the pipe if ->write_buf() didn't consume all data */ -+ if ((ibuf->flags & FUSE_BUF_IS_FD) && bufv.idx < bufv.count) -+ fuse_ll_clear_pipe(se); -+} -+ -+static void do_flush(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ struct fuse_flush_in *arg = (struct fuse_flush_in *) inarg; -+ struct fuse_file_info fi; -+ -+ memset(&fi, 0, sizeof(fi)); -+ fi.fh = arg->fh; -+ fi.flush = 1; -+ if (req->se->conn.proto_minor >= 7) -+ fi.lock_owner = arg->lock_owner; -+ -+ if (req->se->op.flush) -+ req->se->op.flush(req, nodeid, &fi); -+ else -+ fuse_reply_err(req, ENOSYS); -+} -+ -+static void do_release(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ struct fuse_release_in *arg = (struct fuse_release_in *) inarg; -+ struct fuse_file_info fi; -+ -+ memset(&fi, 0, sizeof(fi)); -+ fi.flags = arg->flags; -+ fi.fh = arg->fh; -+ if (req->se->conn.proto_minor >= 8) { -+ fi.flush = (arg->release_flags & FUSE_RELEASE_FLUSH) ? 1 : 0; -+ fi.lock_owner = arg->lock_owner; -+ } -+ if (arg->release_flags & FUSE_RELEASE_FLOCK_UNLOCK) { -+ fi.flock_release = 1; -+ fi.lock_owner = arg->lock_owner; -+ } -+ -+ if (req->se->op.release) -+ req->se->op.release(req, nodeid, &fi); -+ else -+ fuse_reply_err(req, 0); -+} -+ -+static void do_fsync(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ struct fuse_fsync_in *arg = (struct fuse_fsync_in *) inarg; -+ struct fuse_file_info fi; -+ int datasync = arg->fsync_flags & 1; -+ -+ memset(&fi, 0, sizeof(fi)); -+ fi.fh = arg->fh; -+ -+ if (req->se->op.fsync) -+ req->se->op.fsync(req, nodeid, datasync, &fi); -+ else -+ fuse_reply_err(req, ENOSYS); -+} -+ -+static void do_opendir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ struct fuse_open_in *arg = (struct fuse_open_in *) inarg; -+ struct fuse_file_info fi; -+ -+ memset(&fi, 0, sizeof(fi)); -+ fi.flags = arg->flags; -+ -+ if (req->se->op.opendir) -+ req->se->op.opendir(req, nodeid, &fi); -+ else -+ fuse_reply_open(req, &fi); -+} -+ -+static void do_readdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ struct fuse_read_in *arg = (struct fuse_read_in *) inarg; -+ struct fuse_file_info fi; -+ -+ memset(&fi, 0, sizeof(fi)); -+ fi.fh = arg->fh; -+ -+ if (req->se->op.readdir) -+ req->se->op.readdir(req, nodeid, arg->size, arg->offset, &fi); -+ else -+ fuse_reply_err(req, ENOSYS); -+} -+ -+static void do_readdirplus(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ struct fuse_read_in *arg = (struct fuse_read_in *) inarg; -+ struct fuse_file_info fi; -+ -+ memset(&fi, 0, sizeof(fi)); -+ fi.fh = arg->fh; -+ -+ if (req->se->op.readdirplus) -+ req->se->op.readdirplus(req, nodeid, arg->size, arg->offset, &fi); -+ else -+ fuse_reply_err(req, ENOSYS); -+} -+ -+static void do_releasedir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ struct fuse_release_in *arg = (struct fuse_release_in *) inarg; -+ struct fuse_file_info fi; -+ -+ memset(&fi, 0, sizeof(fi)); -+ fi.flags = arg->flags; -+ fi.fh = arg->fh; -+ -+ if (req->se->op.releasedir) -+ req->se->op.releasedir(req, nodeid, &fi); -+ else -+ fuse_reply_err(req, 0); -+} -+ -+static void do_fsyncdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ struct fuse_fsync_in *arg = (struct fuse_fsync_in *) inarg; -+ struct fuse_file_info fi; -+ int datasync = arg->fsync_flags & 1; -+ -+ memset(&fi, 0, sizeof(fi)); -+ fi.fh = arg->fh; -+ -+ if (req->se->op.fsyncdir) -+ req->se->op.fsyncdir(req, nodeid, datasync, &fi); -+ else -+ fuse_reply_err(req, ENOSYS); -+} -+ -+static void do_statfs(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ (void) nodeid; -+ (void) inarg; -+ -+ if (req->se->op.statfs) -+ req->se->op.statfs(req, nodeid); -+ else { -+ struct statvfs buf = { -+ .f_namemax = 255, -+ .f_bsize = 512, -+ }; -+ fuse_reply_statfs(req, &buf); -+ } -+} -+ -+static void do_setxattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ struct fuse_setxattr_in *arg = (struct fuse_setxattr_in *) inarg; -+ char *name = PARAM(arg); -+ char *value = name + strlen(name) + 1; -+ -+ if (req->se->op.setxattr) -+ req->se->op.setxattr(req, nodeid, name, value, arg->size, -+ arg->flags); -+ else -+ fuse_reply_err(req, ENOSYS); -+} -+ -+static void do_getxattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ struct fuse_getxattr_in *arg = (struct fuse_getxattr_in *) inarg; -+ -+ if (req->se->op.getxattr) -+ req->se->op.getxattr(req, nodeid, PARAM(arg), arg->size); -+ else -+ fuse_reply_err(req, ENOSYS); -+} -+ -+static void do_listxattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ struct fuse_getxattr_in *arg = (struct fuse_getxattr_in *) inarg; -+ -+ if (req->se->op.listxattr) -+ req->se->op.listxattr(req, nodeid, arg->size); -+ else -+ fuse_reply_err(req, ENOSYS); -+} -+ -+static void do_removexattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ char *name = (char *) inarg; -+ -+ if (req->se->op.removexattr) -+ req->se->op.removexattr(req, nodeid, name); -+ else -+ fuse_reply_err(req, ENOSYS); -+} -+ -+static void convert_fuse_file_lock(struct fuse_file_lock *fl, -+ struct flock *flock) -+{ -+ memset(flock, 0, sizeof(struct flock)); -+ flock->l_type = fl->type; -+ flock->l_whence = SEEK_SET; -+ flock->l_start = fl->start; -+ if (fl->end == OFFSET_MAX) -+ flock->l_len = 0; -+ else -+ flock->l_len = fl->end - fl->start + 1; -+ flock->l_pid = fl->pid; -+} -+ -+static void do_getlk(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ struct fuse_lk_in *arg = (struct fuse_lk_in *) inarg; -+ struct fuse_file_info fi; -+ struct flock flock; -+ -+ memset(&fi, 0, sizeof(fi)); -+ fi.fh = arg->fh; -+ fi.lock_owner = arg->owner; -+ -+ convert_fuse_file_lock(&arg->lk, &flock); -+ if (req->se->op.getlk) -+ req->se->op.getlk(req, nodeid, &fi, &flock); -+ else -+ fuse_reply_err(req, ENOSYS); -+} -+ -+static void do_setlk_common(fuse_req_t req, fuse_ino_t nodeid, -+ const void *inarg, int sleep) -+{ -+ struct fuse_lk_in *arg = (struct fuse_lk_in *) inarg; -+ struct fuse_file_info fi; -+ struct flock flock; -+ -+ memset(&fi, 0, sizeof(fi)); -+ fi.fh = arg->fh; -+ fi.lock_owner = arg->owner; -+ -+ if (arg->lk_flags & FUSE_LK_FLOCK) { -+ int op = 0; -+ -+ switch (arg->lk.type) { -+ case F_RDLCK: -+ op = LOCK_SH; -+ break; -+ case F_WRLCK: -+ op = LOCK_EX; -+ break; -+ case F_UNLCK: -+ op = LOCK_UN; -+ break; -+ } -+ if (!sleep) -+ op |= LOCK_NB; -+ -+ if (req->se->op.flock) -+ req->se->op.flock(req, nodeid, &fi, op); -+ else -+ fuse_reply_err(req, ENOSYS); -+ } else { -+ convert_fuse_file_lock(&arg->lk, &flock); -+ if (req->se->op.setlk) -+ req->se->op.setlk(req, nodeid, &fi, &flock, sleep); -+ else -+ fuse_reply_err(req, ENOSYS); -+ } -+} -+ -+static void do_setlk(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ do_setlk_common(req, nodeid, inarg, 0); -+} -+ -+static void do_setlkw(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ do_setlk_common(req, nodeid, inarg, 1); -+} -+ -+static int find_interrupted(struct fuse_session *se, struct fuse_req *req) -+{ -+ struct fuse_req *curr; -+ -+ for (curr = se->list.next; curr != &se->list; curr = curr->next) { -+ if (curr->unique == req->u.i.unique) { -+ fuse_interrupt_func_t func; -+ void *data; -+ -+ curr->ctr++; -+ pthread_mutex_unlock(&se->lock); -+ -+ /* Ugh, ugly locking */ -+ pthread_mutex_lock(&curr->lock); -+ pthread_mutex_lock(&se->lock); -+ curr->interrupted = 1; -+ func = curr->u.ni.func; -+ data = curr->u.ni.data; -+ pthread_mutex_unlock(&se->lock); -+ if (func) -+ func(curr, data); -+ pthread_mutex_unlock(&curr->lock); -+ -+ pthread_mutex_lock(&se->lock); -+ curr->ctr--; -+ if (!curr->ctr) -+ destroy_req(curr); -+ -+ return 1; -+ } -+ } -+ for (curr = se->interrupts.next; curr != &se->interrupts; -+ curr = curr->next) { -+ if (curr->u.i.unique == req->u.i.unique) -+ return 1; -+ } -+ return 0; -+} -+ -+static void do_interrupt(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ struct fuse_interrupt_in *arg = (struct fuse_interrupt_in *) inarg; -+ struct fuse_session *se = req->se; -+ -+ (void) nodeid; -+ if (se->debug) -+ fuse_log(FUSE_LOG_DEBUG, "INTERRUPT: %llu\n", -+ (unsigned long long) arg->unique); -+ -+ req->u.i.unique = arg->unique; -+ -+ pthread_mutex_lock(&se->lock); -+ if (find_interrupted(se, req)) -+ destroy_req(req); -+ else -+ list_add_req(req, &se->interrupts); -+ pthread_mutex_unlock(&se->lock); -+} -+ -+static struct fuse_req *check_interrupt(struct fuse_session *se, -+ struct fuse_req *req) -+{ -+ struct fuse_req *curr; -+ -+ for (curr = se->interrupts.next; curr != &se->interrupts; -+ curr = curr->next) { -+ if (curr->u.i.unique == req->unique) { -+ req->interrupted = 1; -+ list_del_req(curr); -+ free(curr); -+ return NULL; -+ } -+ } -+ curr = se->interrupts.next; -+ if (curr != &se->interrupts) { -+ list_del_req(curr); -+ list_init_req(curr); -+ return curr; -+ } else -+ return NULL; -+} -+ -+static void do_bmap(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ struct fuse_bmap_in *arg = (struct fuse_bmap_in *) inarg; -+ -+ if (req->se->op.bmap) -+ req->se->op.bmap(req, nodeid, arg->blocksize, arg->block); -+ else -+ fuse_reply_err(req, ENOSYS); -+} -+ -+static void do_ioctl(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ struct fuse_ioctl_in *arg = (struct fuse_ioctl_in *) inarg; -+ unsigned int flags = arg->flags; -+ void *in_buf = arg->in_size ? PARAM(arg) : NULL; -+ struct fuse_file_info fi; -+ -+ if (flags & FUSE_IOCTL_DIR && -+ !(req->se->conn.want & FUSE_CAP_IOCTL_DIR)) { -+ fuse_reply_err(req, ENOTTY); -+ return; -+ } -+ -+ memset(&fi, 0, sizeof(fi)); -+ fi.fh = arg->fh; -+ -+ if (sizeof(void *) == 4 && req->se->conn.proto_minor >= 16 && -+ !(flags & FUSE_IOCTL_32BIT)) { -+ req->ioctl_64bit = 1; -+ } -+ -+ if (req->se->op.ioctl) -+ req->se->op.ioctl(req, nodeid, arg->cmd, -+ (void *)(uintptr_t)arg->arg, &fi, flags, -+ in_buf, arg->in_size, arg->out_size); -+ else -+ fuse_reply_err(req, ENOSYS); -+} -+ -+void fuse_pollhandle_destroy(struct fuse_pollhandle *ph) -+{ -+ free(ph); -+} -+ -+static void do_poll(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ struct fuse_poll_in *arg = (struct fuse_poll_in *) inarg; -+ struct fuse_file_info fi; -+ -+ memset(&fi, 0, sizeof(fi)); -+ fi.fh = arg->fh; -+ fi.poll_events = arg->events; -+ -+ if (req->se->op.poll) { -+ struct fuse_pollhandle *ph = NULL; -+ -+ if (arg->flags & FUSE_POLL_SCHEDULE_NOTIFY) { -+ ph = malloc(sizeof(struct fuse_pollhandle)); -+ if (ph == NULL) { -+ fuse_reply_err(req, ENOMEM); -+ return; -+ } -+ ph->kh = arg->kh; -+ ph->se = req->se; -+ } -+ -+ req->se->op.poll(req, nodeid, &fi, ph); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } -+} -+ -+static void do_fallocate(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ struct fuse_fallocate_in *arg = (struct fuse_fallocate_in *) inarg; -+ struct fuse_file_info fi; -+ -+ memset(&fi, 0, sizeof(fi)); -+ fi.fh = arg->fh; -+ -+ if (req->se->op.fallocate) -+ req->se->op.fallocate(req, nodeid, arg->mode, arg->offset, arg->length, &fi); -+ else -+ fuse_reply_err(req, ENOSYS); -+} -+ -+static void do_copy_file_range(fuse_req_t req, fuse_ino_t nodeid_in, const void *inarg) -+{ -+ struct fuse_copy_file_range_in *arg = (struct fuse_copy_file_range_in *) inarg; -+ struct fuse_file_info fi_in, fi_out; -+ -+ memset(&fi_in, 0, sizeof(fi_in)); -+ fi_in.fh = arg->fh_in; -+ -+ memset(&fi_out, 0, sizeof(fi_out)); -+ fi_out.fh = arg->fh_out; -+ -+ -+ if (req->se->op.copy_file_range) -+ req->se->op.copy_file_range(req, nodeid_in, arg->off_in, -+ &fi_in, arg->nodeid_out, -+ arg->off_out, &fi_out, arg->len, -+ arg->flags); -+ else -+ fuse_reply_err(req, ENOSYS); -+} -+ -+static void do_lseek(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ struct fuse_lseek_in *arg = (struct fuse_lseek_in *) inarg; -+ struct fuse_file_info fi; -+ -+ memset(&fi, 0, sizeof(fi)); -+ fi.fh = arg->fh; -+ -+ if (req->se->op.lseek) -+ req->se->op.lseek(req, nodeid, arg->offset, arg->whence, &fi); -+ else -+ fuse_reply_err(req, ENOSYS); -+} -+ -+static void do_init(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ struct fuse_init_in *arg = (struct fuse_init_in *) inarg; -+ struct fuse_init_out outarg; -+ struct fuse_session *se = req->se; -+ size_t bufsize = se->bufsize; -+ size_t outargsize = sizeof(outarg); -+ -+ (void) nodeid; -+ if (se->debug) { -+ fuse_log(FUSE_LOG_DEBUG, "INIT: %u.%u\n", arg->major, arg->minor); -+ if (arg->major == 7 && arg->minor >= 6) { -+ fuse_log(FUSE_LOG_DEBUG, "flags=0x%08x\n", arg->flags); -+ fuse_log(FUSE_LOG_DEBUG, "max_readahead=0x%08x\n", -+ arg->max_readahead); -+ } -+ } -+ se->conn.proto_major = arg->major; -+ se->conn.proto_minor = arg->minor; -+ se->conn.capable = 0; -+ se->conn.want = 0; -+ -+ memset(&outarg, 0, sizeof(outarg)); -+ outarg.major = FUSE_KERNEL_VERSION; -+ outarg.minor = FUSE_KERNEL_MINOR_VERSION; -+ -+ if (arg->major < 7) { -+ fuse_log(FUSE_LOG_ERR, "fuse: unsupported protocol version: %u.%u\n", -+ arg->major, arg->minor); -+ fuse_reply_err(req, EPROTO); -+ return; -+ } -+ -+ if (arg->major > 7) { -+ /* Wait for a second INIT request with a 7.X version */ -+ send_reply_ok(req, &outarg, sizeof(outarg)); -+ return; -+ } -+ -+ if (arg->minor >= 6) { -+ if (arg->max_readahead < se->conn.max_readahead) -+ se->conn.max_readahead = arg->max_readahead; -+ if (arg->flags & FUSE_ASYNC_READ) -+ se->conn.capable |= FUSE_CAP_ASYNC_READ; -+ if (arg->flags & FUSE_POSIX_LOCKS) -+ se->conn.capable |= FUSE_CAP_POSIX_LOCKS; -+ if (arg->flags & FUSE_ATOMIC_O_TRUNC) -+ se->conn.capable |= FUSE_CAP_ATOMIC_O_TRUNC; -+ if (arg->flags & FUSE_EXPORT_SUPPORT) -+ se->conn.capable |= FUSE_CAP_EXPORT_SUPPORT; -+ if (arg->flags & FUSE_DONT_MASK) -+ se->conn.capable |= FUSE_CAP_DONT_MASK; -+ if (arg->flags & FUSE_FLOCK_LOCKS) -+ se->conn.capable |= FUSE_CAP_FLOCK_LOCKS; -+ if (arg->flags & FUSE_AUTO_INVAL_DATA) -+ se->conn.capable |= FUSE_CAP_AUTO_INVAL_DATA; -+ if (arg->flags & FUSE_DO_READDIRPLUS) -+ se->conn.capable |= FUSE_CAP_READDIRPLUS; -+ if (arg->flags & FUSE_READDIRPLUS_AUTO) -+ se->conn.capable |= FUSE_CAP_READDIRPLUS_AUTO; -+ if (arg->flags & FUSE_ASYNC_DIO) -+ se->conn.capable |= FUSE_CAP_ASYNC_DIO; -+ if (arg->flags & FUSE_WRITEBACK_CACHE) -+ se->conn.capable |= FUSE_CAP_WRITEBACK_CACHE; -+ if (arg->flags & FUSE_NO_OPEN_SUPPORT) -+ se->conn.capable |= FUSE_CAP_NO_OPEN_SUPPORT; -+ if (arg->flags & FUSE_PARALLEL_DIROPS) -+ se->conn.capable |= FUSE_CAP_PARALLEL_DIROPS; -+ if (arg->flags & FUSE_POSIX_ACL) -+ se->conn.capable |= FUSE_CAP_POSIX_ACL; -+ if (arg->flags & FUSE_HANDLE_KILLPRIV) -+ se->conn.capable |= FUSE_CAP_HANDLE_KILLPRIV; -+ if (arg->flags & FUSE_NO_OPENDIR_SUPPORT) -+ se->conn.capable |= FUSE_CAP_NO_OPENDIR_SUPPORT; -+ if (!(arg->flags & FUSE_MAX_PAGES)) { -+ size_t max_bufsize = -+ FUSE_DEFAULT_MAX_PAGES_PER_REQ * getpagesize() -+ + FUSE_BUFFER_HEADER_SIZE; -+ if (bufsize > max_bufsize) { -+ bufsize = max_bufsize; -+ } -+ } -+ } else { -+ se->conn.max_readahead = 0; -+ } -+ -+ if (se->conn.proto_minor >= 14) { -+#ifdef HAVE_SPLICE -+#ifdef HAVE_VMSPLICE -+ se->conn.capable |= FUSE_CAP_SPLICE_WRITE | FUSE_CAP_SPLICE_MOVE; -+#endif -+ se->conn.capable |= FUSE_CAP_SPLICE_READ; -+#endif -+ } -+ if (se->conn.proto_minor >= 18) -+ se->conn.capable |= FUSE_CAP_IOCTL_DIR; -+ -+ /* Default settings for modern filesystems. -+ * -+ * Most of these capabilities were disabled by default in -+ * libfuse2 for backwards compatibility reasons. In libfuse3, -+ * we can finally enable them by default (as long as they're -+ * supported by the kernel). -+ */ -+#define LL_SET_DEFAULT(cond, cap) \ -+ if ((cond) && (se->conn.capable & (cap))) \ -+ se->conn.want |= (cap) -+ LL_SET_DEFAULT(1, FUSE_CAP_ASYNC_READ); -+ LL_SET_DEFAULT(1, FUSE_CAP_PARALLEL_DIROPS); -+ LL_SET_DEFAULT(1, FUSE_CAP_AUTO_INVAL_DATA); -+ LL_SET_DEFAULT(1, FUSE_CAP_HANDLE_KILLPRIV); -+ LL_SET_DEFAULT(1, FUSE_CAP_ASYNC_DIO); -+ LL_SET_DEFAULT(1, FUSE_CAP_IOCTL_DIR); -+ LL_SET_DEFAULT(1, FUSE_CAP_ATOMIC_O_TRUNC); -+ LL_SET_DEFAULT(se->op.write_buf, FUSE_CAP_SPLICE_READ); -+ LL_SET_DEFAULT(se->op.getlk && se->op.setlk, -+ FUSE_CAP_POSIX_LOCKS); -+ LL_SET_DEFAULT(se->op.flock, FUSE_CAP_FLOCK_LOCKS); -+ LL_SET_DEFAULT(se->op.readdirplus, FUSE_CAP_READDIRPLUS); -+ LL_SET_DEFAULT(se->op.readdirplus && se->op.readdir, -+ FUSE_CAP_READDIRPLUS_AUTO); -+ se->conn.time_gran = 1; -+ -+ if (bufsize < FUSE_MIN_READ_BUFFER) { -+ fuse_log(FUSE_LOG_ERR, "fuse: warning: buffer size too small: %zu\n", -+ bufsize); -+ bufsize = FUSE_MIN_READ_BUFFER; -+ } -+ se->bufsize = bufsize; -+ -+ if (se->conn.max_write > bufsize - FUSE_BUFFER_HEADER_SIZE) -+ se->conn.max_write = bufsize - FUSE_BUFFER_HEADER_SIZE; -+ -+ se->got_init = 1; -+ if (se->op.init) -+ se->op.init(se->userdata, &se->conn); -+ -+ if (se->conn.want & (~se->conn.capable)) { -+ fuse_log(FUSE_LOG_ERR, "fuse: error: filesystem requested capabilities " -+ "0x%x that are not supported by kernel, aborting.\n", -+ se->conn.want & (~se->conn.capable)); -+ fuse_reply_err(req, EPROTO); -+ se->error = -EPROTO; -+ fuse_session_exit(se); -+ return; -+ } -+ -+ unsigned max_read_mo = get_max_read(se->mo); -+ if (se->conn.max_read != max_read_mo) { -+ fuse_log(FUSE_LOG_ERR, "fuse: error: init() and fuse_session_new() " -+ "requested different maximum read size (%u vs %u)\n", -+ se->conn.max_read, max_read_mo); -+ fuse_reply_err(req, EPROTO); -+ se->error = -EPROTO; -+ fuse_session_exit(se); -+ return; -+ } -+ -+ if (se->conn.max_write < bufsize - FUSE_BUFFER_HEADER_SIZE) { -+ se->bufsize = se->conn.max_write + FUSE_BUFFER_HEADER_SIZE; -+ } -+ if (arg->flags & FUSE_MAX_PAGES) { -+ outarg.flags |= FUSE_MAX_PAGES; -+ outarg.max_pages = (se->conn.max_write - 1) / getpagesize() + 1; -+ } -+ -+ /* Always enable big writes, this is superseded -+ by the max_write option */ -+ outarg.flags |= FUSE_BIG_WRITES; -+ -+ if (se->conn.want & FUSE_CAP_ASYNC_READ) -+ outarg.flags |= FUSE_ASYNC_READ; -+ if (se->conn.want & FUSE_CAP_POSIX_LOCKS) -+ outarg.flags |= FUSE_POSIX_LOCKS; -+ if (se->conn.want & FUSE_CAP_ATOMIC_O_TRUNC) -+ outarg.flags |= FUSE_ATOMIC_O_TRUNC; -+ if (se->conn.want & FUSE_CAP_EXPORT_SUPPORT) -+ outarg.flags |= FUSE_EXPORT_SUPPORT; -+ if (se->conn.want & FUSE_CAP_DONT_MASK) -+ outarg.flags |= FUSE_DONT_MASK; -+ if (se->conn.want & FUSE_CAP_FLOCK_LOCKS) -+ outarg.flags |= FUSE_FLOCK_LOCKS; -+ if (se->conn.want & FUSE_CAP_AUTO_INVAL_DATA) -+ outarg.flags |= FUSE_AUTO_INVAL_DATA; -+ if (se->conn.want & FUSE_CAP_READDIRPLUS) -+ outarg.flags |= FUSE_DO_READDIRPLUS; -+ if (se->conn.want & FUSE_CAP_READDIRPLUS_AUTO) -+ outarg.flags |= FUSE_READDIRPLUS_AUTO; -+ if (se->conn.want & FUSE_CAP_ASYNC_DIO) -+ outarg.flags |= FUSE_ASYNC_DIO; -+ if (se->conn.want & FUSE_CAP_WRITEBACK_CACHE) -+ outarg.flags |= FUSE_WRITEBACK_CACHE; -+ if (se->conn.want & FUSE_CAP_POSIX_ACL) -+ outarg.flags |= FUSE_POSIX_ACL; -+ outarg.max_readahead = se->conn.max_readahead; -+ outarg.max_write = se->conn.max_write; -+ if (se->conn.proto_minor >= 13) { -+ if (se->conn.max_background >= (1 << 16)) -+ se->conn.max_background = (1 << 16) - 1; -+ if (se->conn.congestion_threshold > se->conn.max_background) -+ se->conn.congestion_threshold = se->conn.max_background; -+ if (!se->conn.congestion_threshold) { -+ se->conn.congestion_threshold = -+ se->conn.max_background * 3 / 4; -+ } -+ -+ outarg.max_background = se->conn.max_background; -+ outarg.congestion_threshold = se->conn.congestion_threshold; -+ } -+ if (se->conn.proto_minor >= 23) -+ outarg.time_gran = se->conn.time_gran; -+ -+ if (se->debug) { -+ fuse_log(FUSE_LOG_DEBUG, " INIT: %u.%u\n", outarg.major, outarg.minor); -+ fuse_log(FUSE_LOG_DEBUG, " flags=0x%08x\n", outarg.flags); -+ fuse_log(FUSE_LOG_DEBUG, " max_readahead=0x%08x\n", -+ outarg.max_readahead); -+ fuse_log(FUSE_LOG_DEBUG, " max_write=0x%08x\n", outarg.max_write); -+ fuse_log(FUSE_LOG_DEBUG, " max_background=%i\n", -+ outarg.max_background); -+ fuse_log(FUSE_LOG_DEBUG, " congestion_threshold=%i\n", -+ outarg.congestion_threshold); -+ fuse_log(FUSE_LOG_DEBUG, " time_gran=%u\n", -+ outarg.time_gran); -+ } -+ if (arg->minor < 5) -+ outargsize = FUSE_COMPAT_INIT_OUT_SIZE; -+ else if (arg->minor < 23) -+ outargsize = FUSE_COMPAT_22_INIT_OUT_SIZE; -+ -+ send_reply_ok(req, &outarg, outargsize); -+} -+ -+static void do_destroy(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ struct fuse_session *se = req->se; -+ -+ (void) nodeid; -+ (void) inarg; -+ -+ se->got_destroy = 1; -+ if (se->op.destroy) -+ se->op.destroy(se->userdata); -+ -+ send_reply_ok(req, NULL, 0); -+} -+ -+static void list_del_nreq(struct fuse_notify_req *nreq) -+{ -+ struct fuse_notify_req *prev = nreq->prev; -+ struct fuse_notify_req *next = nreq->next; -+ prev->next = next; -+ next->prev = prev; -+} -+ -+static void list_add_nreq(struct fuse_notify_req *nreq, -+ struct fuse_notify_req *next) -+{ -+ struct fuse_notify_req *prev = next->prev; -+ nreq->next = next; -+ nreq->prev = prev; -+ prev->next = nreq; -+ next->prev = nreq; -+} -+ -+static void list_init_nreq(struct fuse_notify_req *nreq) -+{ -+ nreq->next = nreq; -+ nreq->prev = nreq; -+} -+ -+static void do_notify_reply(fuse_req_t req, fuse_ino_t nodeid, -+ const void *inarg, const struct fuse_buf *buf) -+{ -+ struct fuse_session *se = req->se; -+ struct fuse_notify_req *nreq; -+ struct fuse_notify_req *head; -+ -+ pthread_mutex_lock(&se->lock); -+ head = &se->notify_list; -+ for (nreq = head->next; nreq != head; nreq = nreq->next) { -+ if (nreq->unique == req->unique) { -+ list_del_nreq(nreq); -+ break; -+ } -+ } -+ pthread_mutex_unlock(&se->lock); -+ -+ if (nreq != head) -+ nreq->reply(nreq, req, nodeid, inarg, buf); -+} -+ -+static int send_notify_iov(struct fuse_session *se, int notify_code, -+ struct iovec *iov, int count) -+{ -+ struct fuse_out_header out; -+ -+ if (!se->got_init) -+ return -ENOTCONN; -+ -+ out.unique = 0; -+ out.error = notify_code; -+ iov[0].iov_base = &out; -+ iov[0].iov_len = sizeof(struct fuse_out_header); -+ -+ return fuse_send_msg(se, NULL, iov, count); -+} -+ -+int fuse_lowlevel_notify_poll(struct fuse_pollhandle *ph) -+{ -+ if (ph != NULL) { -+ struct fuse_notify_poll_wakeup_out outarg; -+ struct iovec iov[2]; -+ -+ outarg.kh = ph->kh; -+ -+ iov[1].iov_base = &outarg; -+ iov[1].iov_len = sizeof(outarg); -+ -+ return send_notify_iov(ph->se, FUSE_NOTIFY_POLL, iov, 2); -+ } else { -+ return 0; -+ } -+} -+ -+int fuse_lowlevel_notify_inval_inode(struct fuse_session *se, fuse_ino_t ino, -+ off_t off, off_t len) -+{ -+ struct fuse_notify_inval_inode_out outarg; -+ struct iovec iov[2]; -+ -+ if (!se) -+ return -EINVAL; -+ -+ if (se->conn.proto_major < 6 || se->conn.proto_minor < 12) -+ return -ENOSYS; -+ -+ outarg.ino = ino; -+ outarg.off = off; -+ outarg.len = len; -+ -+ iov[1].iov_base = &outarg; -+ iov[1].iov_len = sizeof(outarg); -+ -+ return send_notify_iov(se, FUSE_NOTIFY_INVAL_INODE, iov, 2); -+} -+ -+int fuse_lowlevel_notify_inval_entry(struct fuse_session *se, fuse_ino_t parent, -+ const char *name, size_t namelen) -+{ -+ struct fuse_notify_inval_entry_out outarg; -+ struct iovec iov[3]; -+ -+ if (!se) -+ return -EINVAL; -+ -+ if (se->conn.proto_major < 6 || se->conn.proto_minor < 12) -+ return -ENOSYS; -+ -+ outarg.parent = parent; -+ outarg.namelen = namelen; -+ outarg.padding = 0; -+ -+ iov[1].iov_base = &outarg; -+ iov[1].iov_len = sizeof(outarg); -+ iov[2].iov_base = (void *)name; -+ iov[2].iov_len = namelen + 1; -+ -+ return send_notify_iov(se, FUSE_NOTIFY_INVAL_ENTRY, iov, 3); -+} -+ -+int fuse_lowlevel_notify_delete(struct fuse_session *se, -+ fuse_ino_t parent, fuse_ino_t child, -+ const char *name, size_t namelen) -+{ -+ struct fuse_notify_delete_out outarg; -+ struct iovec iov[3]; -+ -+ if (!se) -+ return -EINVAL; -+ -+ if (se->conn.proto_major < 6 || se->conn.proto_minor < 18) -+ return -ENOSYS; -+ -+ outarg.parent = parent; -+ outarg.child = child; -+ outarg.namelen = namelen; -+ outarg.padding = 0; -+ -+ iov[1].iov_base = &outarg; -+ iov[1].iov_len = sizeof(outarg); -+ iov[2].iov_base = (void *)name; -+ iov[2].iov_len = namelen + 1; -+ -+ return send_notify_iov(se, FUSE_NOTIFY_DELETE, iov, 3); -+} -+ -+int fuse_lowlevel_notify_store(struct fuse_session *se, fuse_ino_t ino, -+ off_t offset, struct fuse_bufvec *bufv, -+ enum fuse_buf_copy_flags flags) -+{ -+ struct fuse_out_header out; -+ struct fuse_notify_store_out outarg; -+ struct iovec iov[3]; -+ size_t size = fuse_buf_size(bufv); -+ int res; -+ -+ if (!se) -+ return -EINVAL; -+ -+ if (se->conn.proto_major < 6 || se->conn.proto_minor < 15) -+ return -ENOSYS; -+ -+ out.unique = 0; -+ out.error = FUSE_NOTIFY_STORE; -+ -+ outarg.nodeid = ino; -+ outarg.offset = offset; -+ outarg.size = size; -+ outarg.padding = 0; -+ -+ iov[0].iov_base = &out; -+ iov[0].iov_len = sizeof(out); -+ iov[1].iov_base = &outarg; -+ iov[1].iov_len = sizeof(outarg); -+ -+ res = fuse_send_data_iov(se, NULL, iov, 2, bufv, flags); -+ if (res > 0) -+ res = -res; -+ -+ return res; -+} -+ -+struct fuse_retrieve_req { -+ struct fuse_notify_req nreq; -+ void *cookie; -+}; -+ -+static void fuse_ll_retrieve_reply(struct fuse_notify_req *nreq, -+ fuse_req_t req, fuse_ino_t ino, -+ const void *inarg, -+ const struct fuse_buf *ibuf) -+{ -+ struct fuse_session *se = req->se; -+ struct fuse_retrieve_req *rreq = -+ container_of(nreq, struct fuse_retrieve_req, nreq); -+ const struct fuse_notify_retrieve_in *arg = inarg; -+ struct fuse_bufvec bufv = { -+ .buf[0] = *ibuf, -+ .count = 1, -+ }; -+ -+ if (!(bufv.buf[0].flags & FUSE_BUF_IS_FD)) -+ bufv.buf[0].mem = PARAM(arg); -+ -+ bufv.buf[0].size -= sizeof(struct fuse_in_header) + -+ sizeof(struct fuse_notify_retrieve_in); -+ -+ if (bufv.buf[0].size < arg->size) { -+ fuse_log(FUSE_LOG_ERR, "fuse: retrieve reply: buffer size too small\n"); -+ fuse_reply_none(req); -+ goto out; -+ } -+ bufv.buf[0].size = arg->size; -+ -+ if (se->op.retrieve_reply) { -+ se->op.retrieve_reply(req, rreq->cookie, ino, -+ arg->offset, &bufv); -+ } else { -+ fuse_reply_none(req); -+ } -+out: -+ free(rreq); -+ if ((ibuf->flags & FUSE_BUF_IS_FD) && bufv.idx < bufv.count) -+ fuse_ll_clear_pipe(se); -+} -+ -+int fuse_lowlevel_notify_retrieve(struct fuse_session *se, fuse_ino_t ino, -+ size_t size, off_t offset, void *cookie) -+{ -+ struct fuse_notify_retrieve_out outarg; -+ struct iovec iov[2]; -+ struct fuse_retrieve_req *rreq; -+ int err; -+ -+ if (!se) -+ return -EINVAL; -+ -+ if (se->conn.proto_major < 6 || se->conn.proto_minor < 15) -+ return -ENOSYS; -+ -+ rreq = malloc(sizeof(*rreq)); -+ if (rreq == NULL) -+ return -ENOMEM; -+ -+ pthread_mutex_lock(&se->lock); -+ rreq->cookie = cookie; -+ rreq->nreq.unique = se->notify_ctr++; -+ rreq->nreq.reply = fuse_ll_retrieve_reply; -+ list_add_nreq(&rreq->nreq, &se->notify_list); -+ pthread_mutex_unlock(&se->lock); -+ -+ outarg.notify_unique = rreq->nreq.unique; -+ outarg.nodeid = ino; -+ outarg.offset = offset; -+ outarg.size = size; -+ outarg.padding = 0; -+ -+ iov[1].iov_base = &outarg; -+ iov[1].iov_len = sizeof(outarg); -+ -+ err = send_notify_iov(se, FUSE_NOTIFY_RETRIEVE, iov, 2); -+ if (err) { -+ pthread_mutex_lock(&se->lock); -+ list_del_nreq(&rreq->nreq); -+ pthread_mutex_unlock(&se->lock); -+ free(rreq); -+ } -+ -+ return err; -+} -+ -+void *fuse_req_userdata(fuse_req_t req) -+{ -+ return req->se->userdata; -+} -+ -+const struct fuse_ctx *fuse_req_ctx(fuse_req_t req) -+{ -+ return &req->ctx; -+} -+ -+void fuse_req_interrupt_func(fuse_req_t req, fuse_interrupt_func_t func, -+ void *data) -+{ -+ pthread_mutex_lock(&req->lock); -+ pthread_mutex_lock(&req->se->lock); -+ req->u.ni.func = func; -+ req->u.ni.data = data; -+ pthread_mutex_unlock(&req->se->lock); -+ if (req->interrupted && func) -+ func(req, data); -+ pthread_mutex_unlock(&req->lock); -+} -+ -+int fuse_req_interrupted(fuse_req_t req) -+{ -+ int interrupted; -+ -+ pthread_mutex_lock(&req->se->lock); -+ interrupted = req->interrupted; -+ pthread_mutex_unlock(&req->se->lock); -+ -+ return interrupted; -+} -+ -+static struct { -+ void (*func)(fuse_req_t, fuse_ino_t, const void *); -+ const char *name; -+} fuse_ll_ops[] = { -+ [FUSE_LOOKUP] = { do_lookup, "LOOKUP" }, -+ [FUSE_FORGET] = { do_forget, "FORGET" }, -+ [FUSE_GETATTR] = { do_getattr, "GETATTR" }, -+ [FUSE_SETATTR] = { do_setattr, "SETATTR" }, -+ [FUSE_READLINK] = { do_readlink, "READLINK" }, -+ [FUSE_SYMLINK] = { do_symlink, "SYMLINK" }, -+ [FUSE_MKNOD] = { do_mknod, "MKNOD" }, -+ [FUSE_MKDIR] = { do_mkdir, "MKDIR" }, -+ [FUSE_UNLINK] = { do_unlink, "UNLINK" }, -+ [FUSE_RMDIR] = { do_rmdir, "RMDIR" }, -+ [FUSE_RENAME] = { do_rename, "RENAME" }, -+ [FUSE_LINK] = { do_link, "LINK" }, -+ [FUSE_OPEN] = { do_open, "OPEN" }, -+ [FUSE_READ] = { do_read, "READ" }, -+ [FUSE_WRITE] = { do_write, "WRITE" }, -+ [FUSE_STATFS] = { do_statfs, "STATFS" }, -+ [FUSE_RELEASE] = { do_release, "RELEASE" }, -+ [FUSE_FSYNC] = { do_fsync, "FSYNC" }, -+ [FUSE_SETXATTR] = { do_setxattr, "SETXATTR" }, -+ [FUSE_GETXATTR] = { do_getxattr, "GETXATTR" }, -+ [FUSE_LISTXATTR] = { do_listxattr, "LISTXATTR" }, -+ [FUSE_REMOVEXATTR] = { do_removexattr, "REMOVEXATTR" }, -+ [FUSE_FLUSH] = { do_flush, "FLUSH" }, -+ [FUSE_INIT] = { do_init, "INIT" }, -+ [FUSE_OPENDIR] = { do_opendir, "OPENDIR" }, -+ [FUSE_READDIR] = { do_readdir, "READDIR" }, -+ [FUSE_RELEASEDIR] = { do_releasedir, "RELEASEDIR" }, -+ [FUSE_FSYNCDIR] = { do_fsyncdir, "FSYNCDIR" }, -+ [FUSE_GETLK] = { do_getlk, "GETLK" }, -+ [FUSE_SETLK] = { do_setlk, "SETLK" }, -+ [FUSE_SETLKW] = { do_setlkw, "SETLKW" }, -+ [FUSE_ACCESS] = { do_access, "ACCESS" }, -+ [FUSE_CREATE] = { do_create, "CREATE" }, -+ [FUSE_INTERRUPT] = { do_interrupt, "INTERRUPT" }, -+ [FUSE_BMAP] = { do_bmap, "BMAP" }, -+ [FUSE_IOCTL] = { do_ioctl, "IOCTL" }, -+ [FUSE_POLL] = { do_poll, "POLL" }, -+ [FUSE_FALLOCATE] = { do_fallocate, "FALLOCATE" }, -+ [FUSE_DESTROY] = { do_destroy, "DESTROY" }, -+ [FUSE_NOTIFY_REPLY] = { (void *) 1, "NOTIFY_REPLY" }, -+ [FUSE_BATCH_FORGET] = { do_batch_forget, "BATCH_FORGET" }, -+ [FUSE_READDIRPLUS] = { do_readdirplus, "READDIRPLUS"}, -+ [FUSE_RENAME2] = { do_rename2, "RENAME2" }, -+ [FUSE_COPY_FILE_RANGE] = { do_copy_file_range, "COPY_FILE_RANGE" }, -+ [FUSE_LSEEK] = { do_lseek, "LSEEK" }, -+ [CUSE_INIT] = { cuse_lowlevel_init, "CUSE_INIT" }, -+}; -+ -+#define FUSE_MAXOP (sizeof(fuse_ll_ops) / sizeof(fuse_ll_ops[0])) -+ -+static const char *opname(enum fuse_opcode opcode) -+{ -+ if (opcode >= FUSE_MAXOP || !fuse_ll_ops[opcode].name) -+ return "???"; -+ else -+ return fuse_ll_ops[opcode].name; -+} -+ -+static int fuse_ll_copy_from_pipe(struct fuse_bufvec *dst, -+ struct fuse_bufvec *src) -+{ -+ ssize_t res = fuse_buf_copy(dst, src, 0); -+ if (res < 0) { -+ fuse_log(FUSE_LOG_ERR, "fuse: copy from pipe: %s\n", strerror(-res)); -+ return res; -+ } -+ if ((size_t)res < fuse_buf_size(dst)) { -+ fuse_log(FUSE_LOG_ERR, "fuse: copy from pipe: short read\n"); -+ return -1; -+ } -+ return 0; -+} -+ -+void fuse_session_process_buf(struct fuse_session *se, -+ const struct fuse_buf *buf) -+{ -+ fuse_session_process_buf_int(se, buf, NULL); -+} -+ -+void fuse_session_process_buf_int(struct fuse_session *se, -+ const struct fuse_buf *buf, struct fuse_chan *ch) -+{ -+ const size_t write_header_size = sizeof(struct fuse_in_header) + -+ sizeof(struct fuse_write_in); -+ struct fuse_bufvec bufv = { .buf[0] = *buf, .count = 1 }; -+ struct fuse_bufvec tmpbuf = FUSE_BUFVEC_INIT(write_header_size); -+ struct fuse_in_header *in; -+ const void *inarg; -+ struct fuse_req *req; -+ void *mbuf = NULL; -+ int err; -+ int res; -+ -+ if (buf->flags & FUSE_BUF_IS_FD) { -+ if (buf->size < tmpbuf.buf[0].size) -+ tmpbuf.buf[0].size = buf->size; -+ -+ mbuf = malloc(tmpbuf.buf[0].size); -+ if (mbuf == NULL) { -+ fuse_log(FUSE_LOG_ERR, "fuse: failed to allocate header\n"); -+ goto clear_pipe; -+ } -+ tmpbuf.buf[0].mem = mbuf; -+ -+ res = fuse_ll_copy_from_pipe(&tmpbuf, &bufv); -+ if (res < 0) -+ goto clear_pipe; -+ -+ in = mbuf; -+ } else { -+ in = buf->mem; -+ } -+ -+ if (se->debug) { -+ fuse_log(FUSE_LOG_DEBUG, -+ "unique: %llu, opcode: %s (%i), nodeid: %llu, insize: %zu, pid: %u\n", -+ (unsigned long long) in->unique, -+ opname((enum fuse_opcode) in->opcode), in->opcode, -+ (unsigned long long) in->nodeid, buf->size, in->pid); -+ } -+ -+ req = fuse_ll_alloc_req(se); -+ if (req == NULL) { -+ struct fuse_out_header out = { -+ .unique = in->unique, -+ .error = -ENOMEM, -+ }; -+ struct iovec iov = { -+ .iov_base = &out, -+ .iov_len = sizeof(struct fuse_out_header), -+ }; -+ -+ fuse_send_msg(se, ch, &iov, 1); -+ goto clear_pipe; -+ } -+ -+ req->unique = in->unique; -+ req->ctx.uid = in->uid; -+ req->ctx.gid = in->gid; -+ req->ctx.pid = in->pid; -+ req->ch = ch ? fuse_chan_get(ch) : NULL; -+ -+ err = EIO; -+ if (!se->got_init) { -+ enum fuse_opcode expected; -+ -+ expected = se->cuse_data ? CUSE_INIT : FUSE_INIT; -+ if (in->opcode != expected) -+ goto reply_err; -+ } else if (in->opcode == FUSE_INIT || in->opcode == CUSE_INIT) -+ goto reply_err; -+ -+ err = EACCES; -+ /* Implement -o allow_root */ -+ if (se->deny_others && in->uid != se->owner && in->uid != 0 && -+ in->opcode != FUSE_INIT && in->opcode != FUSE_READ && -+ in->opcode != FUSE_WRITE && in->opcode != FUSE_FSYNC && -+ in->opcode != FUSE_RELEASE && in->opcode != FUSE_READDIR && -+ in->opcode != FUSE_FSYNCDIR && in->opcode != FUSE_RELEASEDIR && -+ in->opcode != FUSE_NOTIFY_REPLY && -+ in->opcode != FUSE_READDIRPLUS) -+ goto reply_err; -+ -+ err = ENOSYS; -+ if (in->opcode >= FUSE_MAXOP || !fuse_ll_ops[in->opcode].func) -+ goto reply_err; -+ if (in->opcode != FUSE_INTERRUPT) { -+ struct fuse_req *intr; -+ pthread_mutex_lock(&se->lock); -+ intr = check_interrupt(se, req); -+ list_add_req(req, &se->list); -+ pthread_mutex_unlock(&se->lock); -+ if (intr) -+ fuse_reply_err(intr, EAGAIN); -+ } -+ -+ if ((buf->flags & FUSE_BUF_IS_FD) && write_header_size < buf->size && -+ (in->opcode != FUSE_WRITE || !se->op.write_buf) && -+ in->opcode != FUSE_NOTIFY_REPLY) { -+ void *newmbuf; -+ -+ err = ENOMEM; -+ newmbuf = realloc(mbuf, buf->size); -+ if (newmbuf == NULL) -+ goto reply_err; -+ mbuf = newmbuf; -+ -+ tmpbuf = FUSE_BUFVEC_INIT(buf->size - write_header_size); -+ tmpbuf.buf[0].mem = (char *)mbuf + write_header_size; -+ -+ res = fuse_ll_copy_from_pipe(&tmpbuf, &bufv); -+ err = -res; -+ if (res < 0) -+ goto reply_err; -+ -+ in = mbuf; -+ } -+ -+ inarg = (void *) &in[1]; -+ if (in->opcode == FUSE_WRITE && se->op.write_buf) -+ do_write_buf(req, in->nodeid, inarg, buf); -+ else if (in->opcode == FUSE_NOTIFY_REPLY) -+ do_notify_reply(req, in->nodeid, inarg, buf); -+ else -+ fuse_ll_ops[in->opcode].func(req, in->nodeid, inarg); -+ -+out_free: -+ free(mbuf); -+ return; -+ -+reply_err: -+ fuse_reply_err(req, err); -+clear_pipe: -+ if (buf->flags & FUSE_BUF_IS_FD) -+ fuse_ll_clear_pipe(se); -+ goto out_free; -+} -+ -+#define LL_OPTION(n,o,v) \ -+ { n, offsetof(struct fuse_session, o), v } -+ -+static const struct fuse_opt fuse_ll_opts[] = { -+ LL_OPTION("debug", debug, 1), -+ LL_OPTION("-d", debug, 1), -+ LL_OPTION("--debug", debug, 1), -+ LL_OPTION("allow_root", deny_others, 1), -+ FUSE_OPT_END -+}; -+ -+void fuse_lowlevel_version(void) -+{ -+ printf("using FUSE kernel interface version %i.%i\n", -+ FUSE_KERNEL_VERSION, FUSE_KERNEL_MINOR_VERSION); -+ fuse_mount_version(); -+} -+ -+void fuse_lowlevel_help(void) -+{ -+ /* These are not all options, but the ones that are -+ potentially of interest to an end-user */ -+ printf( -+" -o allow_other allow access by all users\n" -+" -o allow_root allow access by root\n" -+" -o auto_unmount auto unmount on process termination\n"); -+} -+ -+void fuse_session_destroy(struct fuse_session *se) -+{ -+ struct fuse_ll_pipe *llp; -+ -+ if (se->got_init && !se->got_destroy) { -+ if (se->op.destroy) -+ se->op.destroy(se->userdata); -+ } -+ llp = pthread_getspecific(se->pipe_key); -+ if (llp != NULL) -+ fuse_ll_pipe_free(llp); -+ pthread_key_delete(se->pipe_key); -+ pthread_mutex_destroy(&se->lock); -+ free(se->cuse_data); -+ if (se->fd != -1) -+ close(se->fd); -+ destroy_mount_opts(se->mo); -+ free(se); -+} -+ -+ -+static void fuse_ll_pipe_destructor(void *data) -+{ -+ struct fuse_ll_pipe *llp = data; -+ fuse_ll_pipe_free(llp); -+} -+ -+int fuse_session_receive_buf(struct fuse_session *se, struct fuse_buf *buf) -+{ -+ return fuse_session_receive_buf_int(se, buf, NULL); -+} -+ -+int fuse_session_receive_buf_int(struct fuse_session *se, struct fuse_buf *buf, -+ struct fuse_chan *ch) -+{ -+ int err; -+ ssize_t res; -+#ifdef HAVE_SPLICE -+ size_t bufsize = se->bufsize; -+ struct fuse_ll_pipe *llp; -+ struct fuse_buf tmpbuf; -+ -+ if (se->conn.proto_minor < 14 || !(se->conn.want & FUSE_CAP_SPLICE_READ)) -+ goto fallback; -+ -+ llp = fuse_ll_get_pipe(se); -+ if (llp == NULL) -+ goto fallback; -+ -+ if (llp->size < bufsize) { -+ if (llp->can_grow) { -+ res = fcntl(llp->pipe[0], F_SETPIPE_SZ, bufsize); -+ if (res == -1) { -+ llp->can_grow = 0; -+ res = grow_pipe_to_max(llp->pipe[0]); -+ if (res > 0) -+ llp->size = res; -+ goto fallback; -+ } -+ llp->size = res; -+ } -+ if (llp->size < bufsize) -+ goto fallback; -+ } -+ -+ res = splice(ch ? ch->fd : se->fd, -+ NULL, llp->pipe[1], NULL, bufsize, 0); -+ err = errno; -+ -+ if (fuse_session_exited(se)) -+ return 0; -+ -+ if (res == -1) { -+ if (err == ENODEV) { -+ /* Filesystem was unmounted, or connection was aborted -+ via /sys/fs/fuse/connections */ -+ fuse_session_exit(se); -+ return 0; -+ } -+ if (err != EINTR && err != EAGAIN) -+ perror("fuse: splice from device"); -+ return -err; -+ } -+ -+ if (res < sizeof(struct fuse_in_header)) { -+ fuse_log(FUSE_LOG_ERR, "short splice from fuse device\n"); -+ return -EIO; -+ } -+ -+ tmpbuf = (struct fuse_buf) { -+ .size = res, -+ .flags = FUSE_BUF_IS_FD, -+ .fd = llp->pipe[0], -+ }; -+ -+ /* -+ * Don't bother with zero copy for small requests. -+ * fuse_loop_mt() needs to check for FORGET so this more than -+ * just an optimization. -+ */ -+ if (res < sizeof(struct fuse_in_header) + -+ sizeof(struct fuse_write_in) + pagesize) { -+ struct fuse_bufvec src = { .buf[0] = tmpbuf, .count = 1 }; -+ struct fuse_bufvec dst = { .count = 1 }; -+ -+ if (!buf->mem) { -+ buf->mem = malloc(se->bufsize); -+ if (!buf->mem) { -+ fuse_log(FUSE_LOG_ERR, -+ "fuse: failed to allocate read buffer\n"); -+ return -ENOMEM; -+ } -+ } -+ buf->size = se->bufsize; -+ buf->flags = 0; -+ dst.buf[0] = *buf; -+ -+ res = fuse_buf_copy(&dst, &src, 0); -+ if (res < 0) { -+ fuse_log(FUSE_LOG_ERR, "fuse: copy from pipe: %s\n", -+ strerror(-res)); -+ fuse_ll_clear_pipe(se); -+ return res; -+ } -+ if (res < tmpbuf.size) { -+ fuse_log(FUSE_LOG_ERR, "fuse: copy from pipe: short read\n"); -+ fuse_ll_clear_pipe(se); -+ return -EIO; -+ } -+ assert(res == tmpbuf.size); -+ -+ } else { -+ /* Don't overwrite buf->mem, as that would cause a leak */ -+ buf->fd = tmpbuf.fd; -+ buf->flags = tmpbuf.flags; -+ } -+ buf->size = tmpbuf.size; -+ -+ return res; -+ -+fallback: -+#endif -+ if (!buf->mem) { -+ buf->mem = malloc(se->bufsize); -+ if (!buf->mem) { -+ fuse_log(FUSE_LOG_ERR, -+ "fuse: failed to allocate read buffer\n"); -+ return -ENOMEM; -+ } -+ } -+ -+restart: -+ res = read(ch ? ch->fd : se->fd, buf->mem, se->bufsize); -+ err = errno; -+ -+ if (fuse_session_exited(se)) -+ return 0; -+ if (res == -1) { -+ /* ENOENT means the operation was interrupted, it's safe -+ to restart */ -+ if (err == ENOENT) -+ goto restart; -+ -+ if (err == ENODEV) { -+ /* Filesystem was unmounted, or connection was aborted -+ via /sys/fs/fuse/connections */ -+ fuse_session_exit(se); -+ return 0; -+ } -+ /* Errors occurring during normal operation: EINTR (read -+ interrupted), EAGAIN (nonblocking I/O), ENODEV (filesystem -+ umounted) */ -+ if (err != EINTR && err != EAGAIN) -+ perror("fuse: reading device"); -+ return -err; -+ } -+ if ((size_t) res < sizeof(struct fuse_in_header)) { -+ fuse_log(FUSE_LOG_ERR, "short read on fuse device\n"); -+ return -EIO; -+ } -+ -+ buf->size = res; -+ -+ return res; -+} -+ -+struct fuse_session *fuse_session_new(struct fuse_args *args, -+ const struct fuse_lowlevel_ops *op, -+ size_t op_size, void *userdata) -+{ -+ int err; -+ struct fuse_session *se; -+ struct mount_opts *mo; -+ -+ if (sizeof(struct fuse_lowlevel_ops) < op_size) { -+ fuse_log(FUSE_LOG_ERR, "fuse: warning: library too old, some operations may not work\n"); -+ op_size = sizeof(struct fuse_lowlevel_ops); -+ } -+ -+ if (args->argc == 0) { -+ fuse_log(FUSE_LOG_ERR, "fuse: empty argv passed to fuse_session_new().\n"); -+ return NULL; -+ } -+ -+ se = (struct fuse_session *) calloc(1, sizeof(struct fuse_session)); -+ if (se == NULL) { -+ fuse_log(FUSE_LOG_ERR, "fuse: failed to allocate fuse object\n"); -+ goto out1; -+ } -+ se->fd = -1; -+ se->conn.max_write = UINT_MAX; -+ se->conn.max_readahead = UINT_MAX; -+ -+ /* Parse options */ -+ if(fuse_opt_parse(args, se, fuse_ll_opts, NULL) == -1) -+ goto out2; -+ if(se->deny_others) { -+ /* Allowing access only by root is done by instructing -+ * kernel to allow access by everyone, and then restricting -+ * access to root and mountpoint owner in libfuse. -+ */ -+ // We may be adding the option a second time, but -+ // that doesn't hurt. -+ if(fuse_opt_add_arg(args, "-oallow_other") == -1) -+ goto out2; -+ } -+ mo = parse_mount_opts(args); -+ if (mo == NULL) -+ goto out3; -+ -+ if(args->argc == 1 && -+ args->argv[0][0] == '-') { -+ fuse_log(FUSE_LOG_ERR, "fuse: warning: argv[0] looks like an option, but " -+ "will be ignored\n"); -+ } else if (args->argc != 1) { -+ int i; -+ fuse_log(FUSE_LOG_ERR, "fuse: unknown option(s): `"); -+ for(i = 1; i < args->argc-1; i++) -+ fuse_log(FUSE_LOG_ERR, "%s ", args->argv[i]); -+ fuse_log(FUSE_LOG_ERR, "%s'\n", args->argv[i]); -+ goto out4; -+ } -+ -+ if (se->debug) -+ fuse_log(FUSE_LOG_DEBUG, "FUSE library version: %s\n", PACKAGE_VERSION); -+ -+ se->bufsize = FUSE_MAX_MAX_PAGES * getpagesize() + -+ FUSE_BUFFER_HEADER_SIZE; -+ -+ list_init_req(&se->list); -+ list_init_req(&se->interrupts); -+ list_init_nreq(&se->notify_list); -+ se->notify_ctr = 1; -+ fuse_mutex_init(&se->lock); -+ -+ err = pthread_key_create(&se->pipe_key, fuse_ll_pipe_destructor); -+ if (err) { -+ fuse_log(FUSE_LOG_ERR, "fuse: failed to create thread specific key: %s\n", -+ strerror(err)); -+ goto out5; -+ } -+ -+ memcpy(&se->op, op, op_size); -+ se->owner = getuid(); -+ se->userdata = userdata; -+ -+ se->mo = mo; -+ return se; -+ -+out5: -+ pthread_mutex_destroy(&se->lock); -+out4: -+ fuse_opt_free_args(args); -+out3: -+ free(mo); -+out2: -+ free(se); -+out1: -+ return NULL; -+} -+ -+int fuse_session_mount(struct fuse_session *se, const char *mountpoint) -+{ -+ int fd; -+ -+ /* -+ * Make sure file descriptors 0, 1 and 2 are open, otherwise chaos -+ * would ensue. -+ */ -+ do { -+ fd = open("/dev/null", O_RDWR); -+ if (fd > 2) -+ close(fd); -+ } while (fd >= 0 && fd <= 2); -+ -+ /* -+ * To allow FUSE daemons to run without privileges, the caller may open -+ * /dev/fuse before launching the file system and pass on the file -+ * descriptor by specifying /dev/fd/N as the mount point. Note that the -+ * parent process takes care of performing the mount in this case. -+ */ -+ fd = fuse_mnt_parse_fuse_fd(mountpoint); -+ if (fd != -1) { -+ if (fcntl(fd, F_GETFD) == -1) { -+ fuse_log(FUSE_LOG_ERR, -+ "fuse: Invalid file descriptor /dev/fd/%u\n", -+ fd); -+ return -1; -+ } -+ se->fd = fd; -+ return 0; -+ } -+ -+ /* Open channel */ -+ fd = fuse_kern_mount(mountpoint, se->mo); -+ if (fd == -1) -+ return -1; -+ se->fd = fd; -+ -+ /* Save mountpoint */ -+ se->mountpoint = strdup(mountpoint); -+ if (se->mountpoint == NULL) -+ goto error_out; -+ -+ return 0; -+ -+error_out: -+ fuse_kern_unmount(mountpoint, fd); -+ return -1; -+} -+ -+int fuse_session_fd(struct fuse_session *se) -+{ -+ return se->fd; -+} -+ -+void fuse_session_unmount(struct fuse_session *se) -+{ -+ if (se->mountpoint != NULL) { -+ fuse_kern_unmount(se->mountpoint, se->fd); -+ free(se->mountpoint); -+ se->mountpoint = NULL; -+ } -+} -+ -+#ifdef linux -+int fuse_req_getgroups(fuse_req_t req, int size, gid_t list[]) -+{ -+ char *buf; -+ size_t bufsize = 1024; -+ char path[128]; -+ int ret; -+ int fd; -+ unsigned long pid = req->ctx.pid; -+ char *s; -+ -+ sprintf(path, "/proc/%lu/task/%lu/status", pid, pid); -+ -+retry: -+ buf = malloc(bufsize); -+ if (buf == NULL) -+ return -ENOMEM; -+ -+ ret = -EIO; -+ fd = open(path, O_RDONLY); -+ if (fd == -1) -+ goto out_free; -+ -+ ret = read(fd, buf, bufsize); -+ close(fd); -+ if (ret < 0) { -+ ret = -EIO; -+ goto out_free; -+ } -+ -+ if ((size_t)ret == bufsize) { -+ free(buf); -+ bufsize *= 4; -+ goto retry; -+ } -+ -+ ret = -EIO; -+ s = strstr(buf, "\nGroups:"); -+ if (s == NULL) -+ goto out_free; -+ -+ s += 8; -+ ret = 0; -+ while (1) { -+ char *end; -+ unsigned long val = strtoul(s, &end, 0); -+ if (end == s) -+ break; -+ -+ s = end; -+ if (ret < size) -+ list[ret] = val; -+ ret++; -+ } -+ -+out_free: -+ free(buf); -+ return ret; -+} -+#else /* linux */ -+/* -+ * This is currently not implemented on other than Linux... -+ */ -+int fuse_req_getgroups(fuse_req_t req, int size, gid_t list[]) -+{ -+ (void) req; (void) size; (void) list; -+ return -ENOSYS; -+} -+#endif -+ -+void fuse_session_exit(struct fuse_session *se) -+{ -+ se->exited = 1; -+} -+ -+void fuse_session_reset(struct fuse_session *se) -+{ -+ se->exited = 0; -+ se->error = 0; -+} -+ -+int fuse_session_exited(struct fuse_session *se) -+{ -+ return se->exited; -+} --- -1.8.3.1 - diff --git a/kvm-virtiofsd-Add-main-virtio-loop.patch b/kvm-virtiofsd-Add-main-virtio-loop.patch deleted file mode 100644 index c0ba96a..0000000 --- a/kvm-virtiofsd-Add-main-virtio-loop.patch +++ /dev/null @@ -1,105 +0,0 @@ -From 6f413d8b76ff38e5bc01f36515ca71d7fd6e6144 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:00:58 +0100 -Subject: [PATCH 027/116] virtiofsd: Add main virtio loop -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-24-dgilbert@redhat.com> -Patchwork-id: 93475 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 023/112] virtiofsd: Add main virtio loop -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -Processes incoming requests on the vhost-user fd. - -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 204d8ae57b3c57098642c79b3c03d42495149c09) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_virtio.c | 42 +++++++++++++++++++++++++++++++++++++++--- - 1 file changed, 39 insertions(+), 3 deletions(-) - -diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c -index 2ae3c76..1928a20 100644 ---- a/tools/virtiofsd/fuse_virtio.c -+++ b/tools/virtiofsd/fuse_virtio.c -@@ -11,12 +11,14 @@ - * See the file COPYING.LIB - */ - -+#include "fuse_virtio.h" - #include "fuse_i.h" - #include "standard-headers/linux/fuse.h" - #include "fuse_misc.h" - #include "fuse_opt.h" --#include "fuse_virtio.h" - -+#include -+#include - #include - #include - #include -@@ -80,15 +82,49 @@ static const VuDevIface fv_iface = { - .queue_is_processed_in_order = fv_queue_order, - }; - -+/* -+ * Main loop; this mostly deals with events on the vhost-user -+ * socket itself, and not actual fuse data. -+ */ - int virtio_loop(struct fuse_session *se) - { - fuse_log(FUSE_LOG_INFO, "%s: Entry\n", __func__); - -- while (1) { -- /* TODO: Add stuffing */ -+ while (!fuse_session_exited(se)) { -+ struct pollfd pf[1]; -+ pf[0].fd = se->vu_socketfd; -+ pf[0].events = POLLIN; -+ pf[0].revents = 0; -+ -+ fuse_log(FUSE_LOG_DEBUG, "%s: Waiting for VU event\n", __func__); -+ int poll_res = ppoll(pf, 1, NULL, NULL); -+ -+ if (poll_res == -1) { -+ if (errno == EINTR) { -+ fuse_log(FUSE_LOG_INFO, "%s: ppoll interrupted, going around\n", -+ __func__); -+ continue; -+ } -+ fuse_log(FUSE_LOG_ERR, "virtio_loop ppoll: %m\n"); -+ break; -+ } -+ assert(poll_res == 1); -+ if (pf[0].revents & (POLLERR | POLLHUP | POLLNVAL)) { -+ fuse_log(FUSE_LOG_ERR, "%s: Unexpected poll revents %x\n", __func__, -+ pf[0].revents); -+ break; -+ } -+ assert(pf[0].revents & POLLIN); -+ fuse_log(FUSE_LOG_DEBUG, "%s: Got VU event\n", __func__); -+ if (!vu_dispatch(&se->virtio_dev->dev)) { -+ fuse_log(FUSE_LOG_ERR, "%s: vu_dispatch failed\n", __func__); -+ break; -+ } - } - - fuse_log(FUSE_LOG_INFO, "%s: Exit\n", __func__); -+ -+ return 0; - } - - int virtio_session_mount(struct fuse_session *se) --- -1.8.3.1 - diff --git a/kvm-virtiofsd-Add-options-for-virtio.patch b/kvm-virtiofsd-Add-options-for-virtio.patch deleted file mode 100644 index 8ac7fa7..0000000 --- a/kvm-virtiofsd-Add-options-for-virtio.patch +++ /dev/null @@ -1,103 +0,0 @@ -From 9c1bbe327cf8f88ffc78eed0fce8cdd6f3f006ef Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:00:54 +0100 -Subject: [PATCH 023/116] virtiofsd: Add options for virtio -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-20-dgilbert@redhat.com> -Patchwork-id: 93473 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 019/112] virtiofsd: Add options for virtio -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -Add options to specify parameters for virtio-fs paths, i.e. - - ./virtiofsd -o vhost_user_socket=/tmp/vhostqemu - -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Misono Tomohiro -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 205de006aab8dcbe546a7e3a51d295c2d05e654b) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_i.h | 1 + - tools/virtiofsd/fuse_lowlevel.c | 11 ++++++++--- - tools/virtiofsd/helper.c | 14 +++++++------- - 3 files changed, 16 insertions(+), 10 deletions(-) - -diff --git a/tools/virtiofsd/fuse_i.h b/tools/virtiofsd/fuse_i.h -index bae0699..26b1a7d 100644 ---- a/tools/virtiofsd/fuse_i.h -+++ b/tools/virtiofsd/fuse_i.h -@@ -63,6 +63,7 @@ struct fuse_session { - struct fuse_notify_req notify_list; - size_t bufsize; - int error; -+ char *vu_socket_path; - }; - - struct fuse_chan { -diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -index 8552cfb..17e8718 100644 ---- a/tools/virtiofsd/fuse_lowlevel.c -+++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -2115,8 +2115,11 @@ reply_err: - } - - static const struct fuse_opt fuse_ll_opts[] = { -- LL_OPTION("debug", debug, 1), LL_OPTION("-d", debug, 1), -- LL_OPTION("--debug", debug, 1), LL_OPTION("allow_root", deny_others, 1), -+ LL_OPTION("debug", debug, 1), -+ LL_OPTION("-d", debug, 1), -+ LL_OPTION("--debug", debug, 1), -+ LL_OPTION("allow_root", deny_others, 1), -+ LL_OPTION("--socket-path=%s", vu_socket_path, 0), - FUSE_OPT_END - }; - -@@ -2132,7 +2135,9 @@ void fuse_lowlevel_help(void) - * These are not all options, but the ones that are - * potentially of interest to an end-user - */ -- printf(" -o allow_root allow access by root\n"); -+ printf( -+ " -o allow_root allow access by root\n" -+ " --socket-path=PATH path for the vhost-user socket\n"); - } - - void fuse_session_destroy(struct fuse_session *se) -diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c -index 9333691..676032e 100644 ---- a/tools/virtiofsd/helper.c -+++ b/tools/virtiofsd/helper.c -@@ -127,13 +127,13 @@ static const struct fuse_opt conn_info_opt_spec[] = { - - void fuse_cmdline_help(void) - { -- printf( -- " -h --help print help\n" -- " -V --version print version\n" -- " -d -o debug enable debug output (implies -f)\n" -- " -f foreground operation\n" -- " -o max_idle_threads the maximum number of idle worker threads\n" -- " allowed (default: 10)\n"); -+ printf(" -h --help print help\n" -+ " -V --version print version\n" -+ " -d -o debug enable debug output (implies -f)\n" -+ " -f foreground operation\n" -+ " -o max_idle_threads the maximum number of idle worker " -+ "threads\n" -+ " allowed (default: 10)\n"); - } - - static int fuse_helper_opt_proc(void *data, const char *arg, int key, --- -1.8.3.1 - diff --git a/kvm-virtiofsd-Add-passthrough_ll.patch b/kvm-virtiofsd-Add-passthrough_ll.patch deleted file mode 100644 index 2510551..0000000 --- a/kvm-virtiofsd-Add-passthrough_ll.patch +++ /dev/null @@ -1,1387 +0,0 @@ -From 18ef831cac81a6bd2336c73dda357d9d69f8fd25 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:00:43 +0100 -Subject: [PATCH 012/116] virtiofsd: Add passthrough_ll -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-9-dgilbert@redhat.com> -Patchwork-id: 93462 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 008/112] virtiofsd: Add passthrough_ll -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -passthrough_ll is one of the examples in the upstream fuse project -and is the main part of our daemon here. It passes through requests -from fuse to the underlying filesystem, using syscalls as directly -as possible. - ->From libfuse fuse-3.8.0 - -Signed-off-by: Dr. David Alan Gilbert - Fixed up 'GPL' to 'GPLv2' as per Dan's comments and consistent - with the 'LICENSE' file in libfuse; patch sent to libfuse to fix - it upstream. -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 7c6b66027241f41720240fc6ee1021cdbd975b2e) - -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 1338 ++++++++++++++++++++++++++++++++++++++ - 1 file changed, 1338 insertions(+) - create mode 100644 tools/virtiofsd/passthrough_ll.c - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -new file mode 100644 -index 0000000..e1a6056 ---- /dev/null -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -0,0 +1,1338 @@ -+/* -+ FUSE: Filesystem in Userspace -+ Copyright (C) 2001-2007 Miklos Szeredi -+ -+ This program can be distributed under the terms of the GNU GPLv2. -+ See the file COPYING. -+*/ -+ -+/** @file -+ * -+ * This file system mirrors the existing file system hierarchy of the -+ * system, starting at the root file system. This is implemented by -+ * just "passing through" all requests to the corresponding user-space -+ * libc functions. In contrast to passthrough.c and passthrough_fh.c, -+ * this implementation uses the low-level API. Its performance should -+ * be the least bad among the three, but many operations are not -+ * implemented. In particular, it is not possible to remove files (or -+ * directories) because the code necessary to defer actual removal -+ * until the file is not opened anymore would make the example much -+ * more complicated. -+ * -+ * When writeback caching is enabled (-o writeback mount option), it -+ * is only possible to write to files for which the mounting user has -+ * read permissions. This is because the writeback cache requires the -+ * kernel to be able to issue read requests for all files (which the -+ * passthrough filesystem cannot satisfy if it can't read the file in -+ * the underlying filesystem). -+ * -+ * Compile with: -+ * -+ * gcc -Wall passthrough_ll.c `pkg-config fuse3 --cflags --libs` -o passthrough_ll -+ * -+ * ## Source code ## -+ * \include passthrough_ll.c -+ */ -+ -+#define _GNU_SOURCE -+#define FUSE_USE_VERSION 31 -+ -+#include "config.h" -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include "passthrough_helpers.h" -+ -+/* We are re-using pointers to our `struct lo_inode` and `struct -+ lo_dirp` elements as inodes. This means that we must be able to -+ store uintptr_t values in a fuse_ino_t variable. The following -+ incantation checks this condition at compile time. */ -+#if defined(__GNUC__) && (__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 6) && !defined __cplusplus -+_Static_assert(sizeof(fuse_ino_t) >= sizeof(uintptr_t), -+ "fuse_ino_t too small to hold uintptr_t values!"); -+#else -+struct _uintptr_to_must_hold_fuse_ino_t_dummy_struct \ -+ { unsigned _uintptr_to_must_hold_fuse_ino_t: -+ ((sizeof(fuse_ino_t) >= sizeof(uintptr_t)) ? 1 : -1); }; -+#endif -+ -+struct lo_inode { -+ struct lo_inode *next; /* protected by lo->mutex */ -+ struct lo_inode *prev; /* protected by lo->mutex */ -+ int fd; -+ bool is_symlink; -+ ino_t ino; -+ dev_t dev; -+ uint64_t refcount; /* protected by lo->mutex */ -+}; -+ -+enum { -+ CACHE_NEVER, -+ CACHE_NORMAL, -+ CACHE_ALWAYS, -+}; -+ -+struct lo_data { -+ pthread_mutex_t mutex; -+ int debug; -+ int writeback; -+ int flock; -+ int xattr; -+ const char *source; -+ double timeout; -+ int cache; -+ int timeout_set; -+ struct lo_inode root; /* protected by lo->mutex */ -+}; -+ -+static const struct fuse_opt lo_opts[] = { -+ { "writeback", -+ offsetof(struct lo_data, writeback), 1 }, -+ { "no_writeback", -+ offsetof(struct lo_data, writeback), 0 }, -+ { "source=%s", -+ offsetof(struct lo_data, source), 0 }, -+ { "flock", -+ offsetof(struct lo_data, flock), 1 }, -+ { "no_flock", -+ offsetof(struct lo_data, flock), 0 }, -+ { "xattr", -+ offsetof(struct lo_data, xattr), 1 }, -+ { "no_xattr", -+ offsetof(struct lo_data, xattr), 0 }, -+ { "timeout=%lf", -+ offsetof(struct lo_data, timeout), 0 }, -+ { "timeout=", -+ offsetof(struct lo_data, timeout_set), 1 }, -+ { "cache=never", -+ offsetof(struct lo_data, cache), CACHE_NEVER }, -+ { "cache=auto", -+ offsetof(struct lo_data, cache), CACHE_NORMAL }, -+ { "cache=always", -+ offsetof(struct lo_data, cache), CACHE_ALWAYS }, -+ -+ FUSE_OPT_END -+}; -+ -+static struct lo_data *lo_data(fuse_req_t req) -+{ -+ return (struct lo_data *) fuse_req_userdata(req); -+} -+ -+static struct lo_inode *lo_inode(fuse_req_t req, fuse_ino_t ino) -+{ -+ if (ino == FUSE_ROOT_ID) -+ return &lo_data(req)->root; -+ else -+ return (struct lo_inode *) (uintptr_t) ino; -+} -+ -+static int lo_fd(fuse_req_t req, fuse_ino_t ino) -+{ -+ return lo_inode(req, ino)->fd; -+} -+ -+static bool lo_debug(fuse_req_t req) -+{ -+ return lo_data(req)->debug != 0; -+} -+ -+static void lo_init(void *userdata, -+ struct fuse_conn_info *conn) -+{ -+ struct lo_data *lo = (struct lo_data*) userdata; -+ -+ if(conn->capable & FUSE_CAP_EXPORT_SUPPORT) -+ conn->want |= FUSE_CAP_EXPORT_SUPPORT; -+ -+ if (lo->writeback && -+ conn->capable & FUSE_CAP_WRITEBACK_CACHE) { -+ if (lo->debug) -+ fuse_log(FUSE_LOG_DEBUG, "lo_init: activating writeback\n"); -+ conn->want |= FUSE_CAP_WRITEBACK_CACHE; -+ } -+ if (lo->flock && conn->capable & FUSE_CAP_FLOCK_LOCKS) { -+ if (lo->debug) -+ fuse_log(FUSE_LOG_DEBUG, "lo_init: activating flock locks\n"); -+ conn->want |= FUSE_CAP_FLOCK_LOCKS; -+ } -+} -+ -+static void lo_getattr(fuse_req_t req, fuse_ino_t ino, -+ struct fuse_file_info *fi) -+{ -+ int res; -+ struct stat buf; -+ struct lo_data *lo = lo_data(req); -+ -+ (void) fi; -+ -+ res = fstatat(lo_fd(req, ino), "", &buf, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); -+ if (res == -1) -+ return (void) fuse_reply_err(req, errno); -+ -+ fuse_reply_attr(req, &buf, lo->timeout); -+} -+ -+static int utimensat_empty_nofollow(struct lo_inode *inode, -+ const struct timespec *tv) -+{ -+ int res; -+ char procname[64]; -+ -+ if (inode->is_symlink) { -+ res = utimensat(inode->fd, "", tv, -+ AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); -+ if (res == -1 && errno == EINVAL) { -+ /* Sorry, no race free way to set times on symlink. */ -+ errno = EPERM; -+ } -+ return res; -+ } -+ sprintf(procname, "/proc/self/fd/%i", inode->fd); -+ -+ return utimensat(AT_FDCWD, procname, tv, 0); -+} -+ -+static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, -+ int valid, struct fuse_file_info *fi) -+{ -+ int saverr; -+ char procname[64]; -+ struct lo_inode *inode = lo_inode(req, ino); -+ int ifd = inode->fd; -+ int res; -+ -+ if (valid & FUSE_SET_ATTR_MODE) { -+ if (fi) { -+ res = fchmod(fi->fh, attr->st_mode); -+ } else { -+ sprintf(procname, "/proc/self/fd/%i", ifd); -+ res = chmod(procname, attr->st_mode); -+ } -+ if (res == -1) -+ goto out_err; -+ } -+ if (valid & (FUSE_SET_ATTR_UID | FUSE_SET_ATTR_GID)) { -+ uid_t uid = (valid & FUSE_SET_ATTR_UID) ? -+ attr->st_uid : (uid_t) -1; -+ gid_t gid = (valid & FUSE_SET_ATTR_GID) ? -+ attr->st_gid : (gid_t) -1; -+ -+ res = fchownat(ifd, "", uid, gid, -+ AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); -+ if (res == -1) -+ goto out_err; -+ } -+ if (valid & FUSE_SET_ATTR_SIZE) { -+ if (fi) { -+ res = ftruncate(fi->fh, attr->st_size); -+ } else { -+ sprintf(procname, "/proc/self/fd/%i", ifd); -+ res = truncate(procname, attr->st_size); -+ } -+ if (res == -1) -+ goto out_err; -+ } -+ if (valid & (FUSE_SET_ATTR_ATIME | FUSE_SET_ATTR_MTIME)) { -+ struct timespec tv[2]; -+ -+ tv[0].tv_sec = 0; -+ tv[1].tv_sec = 0; -+ tv[0].tv_nsec = UTIME_OMIT; -+ tv[1].tv_nsec = UTIME_OMIT; -+ -+ if (valid & FUSE_SET_ATTR_ATIME_NOW) -+ tv[0].tv_nsec = UTIME_NOW; -+ else if (valid & FUSE_SET_ATTR_ATIME) -+ tv[0] = attr->st_atim; -+ -+ if (valid & FUSE_SET_ATTR_MTIME_NOW) -+ tv[1].tv_nsec = UTIME_NOW; -+ else if (valid & FUSE_SET_ATTR_MTIME) -+ tv[1] = attr->st_mtim; -+ -+ if (fi) -+ res = futimens(fi->fh, tv); -+ else -+ res = utimensat_empty_nofollow(inode, tv); -+ if (res == -1) -+ goto out_err; -+ } -+ -+ return lo_getattr(req, ino, fi); -+ -+out_err: -+ saverr = errno; -+ fuse_reply_err(req, saverr); -+} -+ -+static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st) -+{ -+ struct lo_inode *p; -+ struct lo_inode *ret = NULL; -+ -+ pthread_mutex_lock(&lo->mutex); -+ for (p = lo->root.next; p != &lo->root; p = p->next) { -+ if (p->ino == st->st_ino && p->dev == st->st_dev) { -+ assert(p->refcount > 0); -+ ret = p; -+ ret->refcount++; -+ break; -+ } -+ } -+ pthread_mutex_unlock(&lo->mutex); -+ return ret; -+} -+ -+static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, -+ struct fuse_entry_param *e) -+{ -+ int newfd; -+ int res; -+ int saverr; -+ struct lo_data *lo = lo_data(req); -+ struct lo_inode *inode; -+ -+ memset(e, 0, sizeof(*e)); -+ e->attr_timeout = lo->timeout; -+ e->entry_timeout = lo->timeout; -+ -+ newfd = openat(lo_fd(req, parent), name, O_PATH | O_NOFOLLOW); -+ if (newfd == -1) -+ goto out_err; -+ -+ res = fstatat(newfd, "", &e->attr, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); -+ if (res == -1) -+ goto out_err; -+ -+ inode = lo_find(lo_data(req), &e->attr); -+ if (inode) { -+ close(newfd); -+ newfd = -1; -+ } else { -+ struct lo_inode *prev, *next; -+ -+ saverr = ENOMEM; -+ inode = calloc(1, sizeof(struct lo_inode)); -+ if (!inode) -+ goto out_err; -+ -+ inode->is_symlink = S_ISLNK(e->attr.st_mode); -+ inode->refcount = 1; -+ inode->fd = newfd; -+ inode->ino = e->attr.st_ino; -+ inode->dev = e->attr.st_dev; -+ -+ pthread_mutex_lock(&lo->mutex); -+ prev = &lo->root; -+ next = prev->next; -+ next->prev = inode; -+ inode->next = next; -+ inode->prev = prev; -+ prev->next = inode; -+ pthread_mutex_unlock(&lo->mutex); -+ } -+ e->ino = (uintptr_t) inode; -+ -+ if (lo_debug(req)) -+ fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", -+ (unsigned long long) parent, name, (unsigned long long) e->ino); -+ -+ return 0; -+ -+out_err: -+ saverr = errno; -+ if (newfd != -1) -+ close(newfd); -+ return saverr; -+} -+ -+static void lo_lookup(fuse_req_t req, fuse_ino_t parent, const char *name) -+{ -+ struct fuse_entry_param e; -+ int err; -+ -+ if (lo_debug(req)) -+ fuse_log(FUSE_LOG_DEBUG, "lo_lookup(parent=%" PRIu64 ", name=%s)\n", -+ parent, name); -+ -+ err = lo_do_lookup(req, parent, name, &e); -+ if (err) -+ fuse_reply_err(req, err); -+ else -+ fuse_reply_entry(req, &e); -+} -+ -+static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent, -+ const char *name, mode_t mode, dev_t rdev, -+ const char *link) -+{ -+ int res; -+ int saverr; -+ struct lo_inode *dir = lo_inode(req, parent); -+ struct fuse_entry_param e; -+ -+ saverr = ENOMEM; -+ -+ res = mknod_wrapper(dir->fd, name, link, mode, rdev); -+ -+ saverr = errno; -+ if (res == -1) -+ goto out; -+ -+ saverr = lo_do_lookup(req, parent, name, &e); -+ if (saverr) -+ goto out; -+ -+ if (lo_debug(req)) -+ fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", -+ (unsigned long long) parent, name, (unsigned long long) e.ino); -+ -+ fuse_reply_entry(req, &e); -+ return; -+ -+out: -+ fuse_reply_err(req, saverr); -+} -+ -+static void lo_mknod(fuse_req_t req, fuse_ino_t parent, -+ const char *name, mode_t mode, dev_t rdev) -+{ -+ lo_mknod_symlink(req, parent, name, mode, rdev, NULL); -+} -+ -+static void lo_mkdir(fuse_req_t req, fuse_ino_t parent, const char *name, -+ mode_t mode) -+{ -+ lo_mknod_symlink(req, parent, name, S_IFDIR | mode, 0, NULL); -+} -+ -+static void lo_symlink(fuse_req_t req, const char *link, -+ fuse_ino_t parent, const char *name) -+{ -+ lo_mknod_symlink(req, parent, name, S_IFLNK, 0, link); -+} -+ -+static int linkat_empty_nofollow(struct lo_inode *inode, int dfd, -+ const char *name) -+{ -+ int res; -+ char procname[64]; -+ -+ if (inode->is_symlink) { -+ res = linkat(inode->fd, "", dfd, name, AT_EMPTY_PATH); -+ if (res == -1 && (errno == ENOENT || errno == EINVAL)) { -+ /* Sorry, no race free way to hard-link a symlink. */ -+ errno = EPERM; -+ } -+ return res; -+ } -+ -+ sprintf(procname, "/proc/self/fd/%i", inode->fd); -+ -+ return linkat(AT_FDCWD, procname, dfd, name, AT_SYMLINK_FOLLOW); -+} -+ -+static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent, -+ const char *name) -+{ -+ int res; -+ struct lo_data *lo = lo_data(req); -+ struct lo_inode *inode = lo_inode(req, ino); -+ struct fuse_entry_param e; -+ int saverr; -+ -+ memset(&e, 0, sizeof(struct fuse_entry_param)); -+ e.attr_timeout = lo->timeout; -+ e.entry_timeout = lo->timeout; -+ -+ res = linkat_empty_nofollow(inode, lo_fd(req, parent), name); -+ if (res == -1) -+ goto out_err; -+ -+ res = fstatat(inode->fd, "", &e.attr, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); -+ if (res == -1) -+ goto out_err; -+ -+ pthread_mutex_lock(&lo->mutex); -+ inode->refcount++; -+ pthread_mutex_unlock(&lo->mutex); -+ e.ino = (uintptr_t) inode; -+ -+ if (lo_debug(req)) -+ fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", -+ (unsigned long long) parent, name, -+ (unsigned long long) e.ino); -+ -+ fuse_reply_entry(req, &e); -+ return; -+ -+out_err: -+ saverr = errno; -+ fuse_reply_err(req, saverr); -+} -+ -+static void lo_rmdir(fuse_req_t req, fuse_ino_t parent, const char *name) -+{ -+ int res; -+ -+ res = unlinkat(lo_fd(req, parent), name, AT_REMOVEDIR); -+ -+ fuse_reply_err(req, res == -1 ? errno : 0); -+} -+ -+static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name, -+ fuse_ino_t newparent, const char *newname, -+ unsigned int flags) -+{ -+ int res; -+ -+ if (flags) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } -+ -+ res = renameat(lo_fd(req, parent), name, -+ lo_fd(req, newparent), newname); -+ -+ fuse_reply_err(req, res == -1 ? errno : 0); -+} -+ -+static void lo_unlink(fuse_req_t req, fuse_ino_t parent, const char *name) -+{ -+ int res; -+ -+ res = unlinkat(lo_fd(req, parent), name, 0); -+ -+ fuse_reply_err(req, res == -1 ? errno : 0); -+} -+ -+static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n) -+{ -+ if (!inode) -+ return; -+ -+ pthread_mutex_lock(&lo->mutex); -+ assert(inode->refcount >= n); -+ inode->refcount -= n; -+ if (!inode->refcount) { -+ struct lo_inode *prev, *next; -+ -+ prev = inode->prev; -+ next = inode->next; -+ next->prev = prev; -+ prev->next = next; -+ -+ pthread_mutex_unlock(&lo->mutex); -+ close(inode->fd); -+ free(inode); -+ -+ } else { -+ pthread_mutex_unlock(&lo->mutex); -+ } -+} -+ -+static void lo_forget_one(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) -+{ -+ struct lo_data *lo = lo_data(req); -+ struct lo_inode *inode = lo_inode(req, ino); -+ -+ if (lo_debug(req)) { -+ fuse_log(FUSE_LOG_DEBUG, " forget %lli %lli -%lli\n", -+ (unsigned long long) ino, -+ (unsigned long long) inode->refcount, -+ (unsigned long long) nlookup); -+ } -+ -+ unref_inode(lo, inode, nlookup); -+} -+ -+static void lo_forget(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) -+{ -+ lo_forget_one(req, ino, nlookup); -+ fuse_reply_none(req); -+} -+ -+static void lo_forget_multi(fuse_req_t req, size_t count, -+ struct fuse_forget_data *forgets) -+{ -+ int i; -+ -+ for (i = 0; i < count; i++) -+ lo_forget_one(req, forgets[i].ino, forgets[i].nlookup); -+ fuse_reply_none(req); -+} -+ -+static void lo_readlink(fuse_req_t req, fuse_ino_t ino) -+{ -+ char buf[PATH_MAX + 1]; -+ int res; -+ -+ res = readlinkat(lo_fd(req, ino), "", buf, sizeof(buf)); -+ if (res == -1) -+ return (void) fuse_reply_err(req, errno); -+ -+ if (res == sizeof(buf)) -+ return (void) fuse_reply_err(req, ENAMETOOLONG); -+ -+ buf[res] = '\0'; -+ -+ fuse_reply_readlink(req, buf); -+} -+ -+struct lo_dirp { -+ DIR *dp; -+ struct dirent *entry; -+ off_t offset; -+}; -+ -+static struct lo_dirp *lo_dirp(struct fuse_file_info *fi) -+{ -+ return (struct lo_dirp *) (uintptr_t) fi->fh; -+} -+ -+static void lo_opendir(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) -+{ -+ int error = ENOMEM; -+ struct lo_data *lo = lo_data(req); -+ struct lo_dirp *d; -+ int fd; -+ -+ d = calloc(1, sizeof(struct lo_dirp)); -+ if (d == NULL) -+ goto out_err; -+ -+ fd = openat(lo_fd(req, ino), ".", O_RDONLY); -+ if (fd == -1) -+ goto out_errno; -+ -+ d->dp = fdopendir(fd); -+ if (d->dp == NULL) -+ goto out_errno; -+ -+ d->offset = 0; -+ d->entry = NULL; -+ -+ fi->fh = (uintptr_t) d; -+ if (lo->cache == CACHE_ALWAYS) -+ fi->keep_cache = 1; -+ fuse_reply_open(req, fi); -+ return; -+ -+out_errno: -+ error = errno; -+out_err: -+ if (d) { -+ if (fd != -1) -+ close(fd); -+ free(d); -+ } -+ fuse_reply_err(req, error); -+} -+ -+static int is_dot_or_dotdot(const char *name) -+{ -+ return name[0] == '.' && (name[1] == '\0' || -+ (name[1] == '.' && name[2] == '\0')); -+} -+ -+static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, -+ off_t offset, struct fuse_file_info *fi, int plus) -+{ -+ struct lo_dirp *d = lo_dirp(fi); -+ char *buf; -+ char *p; -+ size_t rem = size; -+ int err; -+ -+ (void) ino; -+ -+ buf = calloc(1, size); -+ if (!buf) { -+ err = ENOMEM; -+ goto error; -+ } -+ p = buf; -+ -+ if (offset != d->offset) { -+ seekdir(d->dp, offset); -+ d->entry = NULL; -+ d->offset = offset; -+ } -+ while (1) { -+ size_t entsize; -+ off_t nextoff; -+ const char *name; -+ -+ if (!d->entry) { -+ errno = 0; -+ d->entry = readdir(d->dp); -+ if (!d->entry) { -+ if (errno) { // Error -+ err = errno; -+ goto error; -+ } else { // End of stream -+ break; -+ } -+ } -+ } -+ nextoff = d->entry->d_off; -+ name = d->entry->d_name; -+ fuse_ino_t entry_ino = 0; -+ if (plus) { -+ struct fuse_entry_param e; -+ if (is_dot_or_dotdot(name)) { -+ e = (struct fuse_entry_param) { -+ .attr.st_ino = d->entry->d_ino, -+ .attr.st_mode = d->entry->d_type << 12, -+ }; -+ } else { -+ err = lo_do_lookup(req, ino, name, &e); -+ if (err) -+ goto error; -+ entry_ino = e.ino; -+ } -+ -+ entsize = fuse_add_direntry_plus(req, p, rem, name, -+ &e, nextoff); -+ } else { -+ struct stat st = { -+ .st_ino = d->entry->d_ino, -+ .st_mode = d->entry->d_type << 12, -+ }; -+ entsize = fuse_add_direntry(req, p, rem, name, -+ &st, nextoff); -+ } -+ if (entsize > rem) { -+ if (entry_ino != 0) -+ lo_forget_one(req, entry_ino, 1); -+ break; -+ } -+ -+ p += entsize; -+ rem -= entsize; -+ -+ d->entry = NULL; -+ d->offset = nextoff; -+ } -+ -+ err = 0; -+error: -+ // If there's an error, we can only signal it if we haven't stored -+ // any entries yet - otherwise we'd end up with wrong lookup -+ // counts for the entries that are already in the buffer. So we -+ // return what we've collected until that point. -+ if (err && rem == size) -+ fuse_reply_err(req, err); -+ else -+ fuse_reply_buf(req, buf, size - rem); -+ free(buf); -+} -+ -+static void lo_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, -+ off_t offset, struct fuse_file_info *fi) -+{ -+ lo_do_readdir(req, ino, size, offset, fi, 0); -+} -+ -+static void lo_readdirplus(fuse_req_t req, fuse_ino_t ino, size_t size, -+ off_t offset, struct fuse_file_info *fi) -+{ -+ lo_do_readdir(req, ino, size, offset, fi, 1); -+} -+ -+static void lo_releasedir(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) -+{ -+ struct lo_dirp *d = lo_dirp(fi); -+ (void) ino; -+ closedir(d->dp); -+ free(d); -+ fuse_reply_err(req, 0); -+} -+ -+static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, -+ mode_t mode, struct fuse_file_info *fi) -+{ -+ int fd; -+ struct lo_data *lo = lo_data(req); -+ struct fuse_entry_param e; -+ int err; -+ -+ if (lo_debug(req)) -+ fuse_log(FUSE_LOG_DEBUG, "lo_create(parent=%" PRIu64 ", name=%s)\n", -+ parent, name); -+ -+ fd = openat(lo_fd(req, parent), name, -+ (fi->flags | O_CREAT) & ~O_NOFOLLOW, mode); -+ if (fd == -1) -+ return (void) fuse_reply_err(req, errno); -+ -+ fi->fh = fd; -+ if (lo->cache == CACHE_NEVER) -+ fi->direct_io = 1; -+ else if (lo->cache == CACHE_ALWAYS) -+ fi->keep_cache = 1; -+ -+ err = lo_do_lookup(req, parent, name, &e); -+ if (err) -+ fuse_reply_err(req, err); -+ else -+ fuse_reply_create(req, &e, fi); -+} -+ -+static void lo_fsyncdir(fuse_req_t req, fuse_ino_t ino, int datasync, -+ struct fuse_file_info *fi) -+{ -+ int res; -+ int fd = dirfd(lo_dirp(fi)->dp); -+ (void) ino; -+ if (datasync) -+ res = fdatasync(fd); -+ else -+ res = fsync(fd); -+ fuse_reply_err(req, res == -1 ? errno : 0); -+} -+ -+static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) -+{ -+ int fd; -+ char buf[64]; -+ struct lo_data *lo = lo_data(req); -+ -+ if (lo_debug(req)) -+ fuse_log(FUSE_LOG_DEBUG, "lo_open(ino=%" PRIu64 ", flags=%d)\n", -+ ino, fi->flags); -+ -+ /* With writeback cache, kernel may send read requests even -+ when userspace opened write-only */ -+ if (lo->writeback && (fi->flags & O_ACCMODE) == O_WRONLY) { -+ fi->flags &= ~O_ACCMODE; -+ fi->flags |= O_RDWR; -+ } -+ -+ /* With writeback cache, O_APPEND is handled by the kernel. -+ This breaks atomicity (since the file may change in the -+ underlying filesystem, so that the kernel's idea of the -+ end of the file isn't accurate anymore). In this example, -+ we just accept that. A more rigorous filesystem may want -+ to return an error here */ -+ if (lo->writeback && (fi->flags & O_APPEND)) -+ fi->flags &= ~O_APPEND; -+ -+ sprintf(buf, "/proc/self/fd/%i", lo_fd(req, ino)); -+ fd = open(buf, fi->flags & ~O_NOFOLLOW); -+ if (fd == -1) -+ return (void) fuse_reply_err(req, errno); -+ -+ fi->fh = fd; -+ if (lo->cache == CACHE_NEVER) -+ fi->direct_io = 1; -+ else if (lo->cache == CACHE_ALWAYS) -+ fi->keep_cache = 1; -+ fuse_reply_open(req, fi); -+} -+ -+static void lo_release(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) -+{ -+ (void) ino; -+ -+ close(fi->fh); -+ fuse_reply_err(req, 0); -+} -+ -+static void lo_flush(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) -+{ -+ int res; -+ (void) ino; -+ res = close(dup(fi->fh)); -+ fuse_reply_err(req, res == -1 ? errno : 0); -+} -+ -+static void lo_fsync(fuse_req_t req, fuse_ino_t ino, int datasync, -+ struct fuse_file_info *fi) -+{ -+ int res; -+ (void) ino; -+ if (datasync) -+ res = fdatasync(fi->fh); -+ else -+ res = fsync(fi->fh); -+ fuse_reply_err(req, res == -1 ? errno : 0); -+} -+ -+static void lo_read(fuse_req_t req, fuse_ino_t ino, size_t size, -+ off_t offset, struct fuse_file_info *fi) -+{ -+ struct fuse_bufvec buf = FUSE_BUFVEC_INIT(size); -+ -+ if (lo_debug(req)) -+ fuse_log(FUSE_LOG_DEBUG, "lo_read(ino=%" PRIu64 ", size=%zd, " -+ "off=%lu)\n", ino, size, (unsigned long) offset); -+ -+ buf.buf[0].flags = FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK; -+ buf.buf[0].fd = fi->fh; -+ buf.buf[0].pos = offset; -+ -+ fuse_reply_data(req, &buf, FUSE_BUF_SPLICE_MOVE); -+} -+ -+static void lo_write_buf(fuse_req_t req, fuse_ino_t ino, -+ struct fuse_bufvec *in_buf, off_t off, -+ struct fuse_file_info *fi) -+{ -+ (void) ino; -+ ssize_t res; -+ struct fuse_bufvec out_buf = FUSE_BUFVEC_INIT(fuse_buf_size(in_buf)); -+ -+ out_buf.buf[0].flags = FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK; -+ out_buf.buf[0].fd = fi->fh; -+ out_buf.buf[0].pos = off; -+ -+ if (lo_debug(req)) -+ fuse_log(FUSE_LOG_DEBUG, "lo_write(ino=%" PRIu64 ", size=%zd, off=%lu)\n", -+ ino, out_buf.buf[0].size, (unsigned long) off); -+ -+ res = fuse_buf_copy(&out_buf, in_buf, 0); -+ if(res < 0) -+ fuse_reply_err(req, -res); -+ else -+ fuse_reply_write(req, (size_t) res); -+} -+ -+static void lo_statfs(fuse_req_t req, fuse_ino_t ino) -+{ -+ int res; -+ struct statvfs stbuf; -+ -+ res = fstatvfs(lo_fd(req, ino), &stbuf); -+ if (res == -1) -+ fuse_reply_err(req, errno); -+ else -+ fuse_reply_statfs(req, &stbuf); -+} -+ -+static void lo_fallocate(fuse_req_t req, fuse_ino_t ino, int mode, -+ off_t offset, off_t length, struct fuse_file_info *fi) -+{ -+ int err = EOPNOTSUPP; -+ (void) ino; -+ -+#ifdef HAVE_FALLOCATE -+ err = fallocate(fi->fh, mode, offset, length); -+ if (err < 0) -+ err = errno; -+ -+#elif defined(HAVE_POSIX_FALLOCATE) -+ if (mode) { -+ fuse_reply_err(req, EOPNOTSUPP); -+ return; -+ } -+ -+ err = posix_fallocate(fi->fh, offset, length); -+#endif -+ -+ fuse_reply_err(req, err); -+} -+ -+static void lo_flock(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, -+ int op) -+{ -+ int res; -+ (void) ino; -+ -+ res = flock(fi->fh, op); -+ -+ fuse_reply_err(req, res == -1 ? errno : 0); -+} -+ -+static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *name, -+ size_t size) -+{ -+ char *value = NULL; -+ char procname[64]; -+ struct lo_inode *inode = lo_inode(req, ino); -+ ssize_t ret; -+ int saverr; -+ -+ saverr = ENOSYS; -+ if (!lo_data(req)->xattr) -+ goto out; -+ -+ if (lo_debug(req)) { -+ fuse_log(FUSE_LOG_DEBUG, "lo_getxattr(ino=%" PRIu64 ", name=%s size=%zd)\n", -+ ino, name, size); -+ } -+ -+ if (inode->is_symlink) { -+ /* Sorry, no race free way to getxattr on symlink. */ -+ saverr = EPERM; -+ goto out; -+ } -+ -+ sprintf(procname, "/proc/self/fd/%i", inode->fd); -+ -+ if (size) { -+ value = malloc(size); -+ if (!value) -+ goto out_err; -+ -+ ret = getxattr(procname, name, value, size); -+ if (ret == -1) -+ goto out_err; -+ saverr = 0; -+ if (ret == 0) -+ goto out; -+ -+ fuse_reply_buf(req, value, ret); -+ } else { -+ ret = getxattr(procname, name, NULL, 0); -+ if (ret == -1) -+ goto out_err; -+ -+ fuse_reply_xattr(req, ret); -+ } -+out_free: -+ free(value); -+ return; -+ -+out_err: -+ saverr = errno; -+out: -+ fuse_reply_err(req, saverr); -+ goto out_free; -+} -+ -+static void lo_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size) -+{ -+ char *value = NULL; -+ char procname[64]; -+ struct lo_inode *inode = lo_inode(req, ino); -+ ssize_t ret; -+ int saverr; -+ -+ saverr = ENOSYS; -+ if (!lo_data(req)->xattr) -+ goto out; -+ -+ if (lo_debug(req)) { -+ fuse_log(FUSE_LOG_DEBUG, "lo_listxattr(ino=%" PRIu64 ", size=%zd)\n", -+ ino, size); -+ } -+ -+ if (inode->is_symlink) { -+ /* Sorry, no race free way to listxattr on symlink. */ -+ saverr = EPERM; -+ goto out; -+ } -+ -+ sprintf(procname, "/proc/self/fd/%i", inode->fd); -+ -+ if (size) { -+ value = malloc(size); -+ if (!value) -+ goto out_err; -+ -+ ret = listxattr(procname, value, size); -+ if (ret == -1) -+ goto out_err; -+ saverr = 0; -+ if (ret == 0) -+ goto out; -+ -+ fuse_reply_buf(req, value, ret); -+ } else { -+ ret = listxattr(procname, NULL, 0); -+ if (ret == -1) -+ goto out_err; -+ -+ fuse_reply_xattr(req, ret); -+ } -+out_free: -+ free(value); -+ return; -+ -+out_err: -+ saverr = errno; -+out: -+ fuse_reply_err(req, saverr); -+ goto out_free; -+} -+ -+static void lo_setxattr(fuse_req_t req, fuse_ino_t ino, const char *name, -+ const char *value, size_t size, int flags) -+{ -+ char procname[64]; -+ struct lo_inode *inode = lo_inode(req, ino); -+ ssize_t ret; -+ int saverr; -+ -+ saverr = ENOSYS; -+ if (!lo_data(req)->xattr) -+ goto out; -+ -+ if (lo_debug(req)) { -+ fuse_log(FUSE_LOG_DEBUG, "lo_setxattr(ino=%" PRIu64 ", name=%s value=%s size=%zd)\n", -+ ino, name, value, size); -+ } -+ -+ if (inode->is_symlink) { -+ /* Sorry, no race free way to setxattr on symlink. */ -+ saverr = EPERM; -+ goto out; -+ } -+ -+ sprintf(procname, "/proc/self/fd/%i", inode->fd); -+ -+ ret = setxattr(procname, name, value, size, flags); -+ saverr = ret == -1 ? errno : 0; -+ -+out: -+ fuse_reply_err(req, saverr); -+} -+ -+static void lo_removexattr(fuse_req_t req, fuse_ino_t ino, const char *name) -+{ -+ char procname[64]; -+ struct lo_inode *inode = lo_inode(req, ino); -+ ssize_t ret; -+ int saverr; -+ -+ saverr = ENOSYS; -+ if (!lo_data(req)->xattr) -+ goto out; -+ -+ if (lo_debug(req)) { -+ fuse_log(FUSE_LOG_DEBUG, "lo_removexattr(ino=%" PRIu64 ", name=%s)\n", -+ ino, name); -+ } -+ -+ if (inode->is_symlink) { -+ /* Sorry, no race free way to setxattr on symlink. */ -+ saverr = EPERM; -+ goto out; -+ } -+ -+ sprintf(procname, "/proc/self/fd/%i", inode->fd); -+ -+ ret = removexattr(procname, name); -+ saverr = ret == -1 ? errno : 0; -+ -+out: -+ fuse_reply_err(req, saverr); -+} -+ -+#ifdef HAVE_COPY_FILE_RANGE -+static void lo_copy_file_range(fuse_req_t req, fuse_ino_t ino_in, off_t off_in, -+ struct fuse_file_info *fi_in, -+ fuse_ino_t ino_out, off_t off_out, -+ struct fuse_file_info *fi_out, size_t len, -+ int flags) -+{ -+ ssize_t res; -+ -+ if (lo_debug(req)) -+ fuse_log(FUSE_LOG_DEBUG, "lo_copy_file_range(ino=%" PRIu64 "/fd=%lu, " -+ "off=%lu, ino=%" PRIu64 "/fd=%lu, " -+ "off=%lu, size=%zd, flags=0x%x)\n", -+ ino_in, fi_in->fh, off_in, ino_out, fi_out->fh, off_out, -+ len, flags); -+ -+ res = copy_file_range(fi_in->fh, &off_in, fi_out->fh, &off_out, len, -+ flags); -+ if (res < 0) -+ fuse_reply_err(req, -errno); -+ else -+ fuse_reply_write(req, res); -+} -+#endif -+ -+static void lo_lseek(fuse_req_t req, fuse_ino_t ino, off_t off, int whence, -+ struct fuse_file_info *fi) -+{ -+ off_t res; -+ -+ (void)ino; -+ res = lseek(fi->fh, off, whence); -+ if (res != -1) -+ fuse_reply_lseek(req, res); -+ else -+ fuse_reply_err(req, errno); -+} -+ -+static struct fuse_lowlevel_ops lo_oper = { -+ .init = lo_init, -+ .lookup = lo_lookup, -+ .mkdir = lo_mkdir, -+ .mknod = lo_mknod, -+ .symlink = lo_symlink, -+ .link = lo_link, -+ .unlink = lo_unlink, -+ .rmdir = lo_rmdir, -+ .rename = lo_rename, -+ .forget = lo_forget, -+ .forget_multi = lo_forget_multi, -+ .getattr = lo_getattr, -+ .setattr = lo_setattr, -+ .readlink = lo_readlink, -+ .opendir = lo_opendir, -+ .readdir = lo_readdir, -+ .readdirplus = lo_readdirplus, -+ .releasedir = lo_releasedir, -+ .fsyncdir = lo_fsyncdir, -+ .create = lo_create, -+ .open = lo_open, -+ .release = lo_release, -+ .flush = lo_flush, -+ .fsync = lo_fsync, -+ .read = lo_read, -+ .write_buf = lo_write_buf, -+ .statfs = lo_statfs, -+ .fallocate = lo_fallocate, -+ .flock = lo_flock, -+ .getxattr = lo_getxattr, -+ .listxattr = lo_listxattr, -+ .setxattr = lo_setxattr, -+ .removexattr = lo_removexattr, -+#ifdef HAVE_COPY_FILE_RANGE -+ .copy_file_range = lo_copy_file_range, -+#endif -+ .lseek = lo_lseek, -+}; -+ -+int main(int argc, char *argv[]) -+{ -+ struct fuse_args args = FUSE_ARGS_INIT(argc, argv); -+ struct fuse_session *se; -+ struct fuse_cmdline_opts opts; -+ struct lo_data lo = { .debug = 0, -+ .writeback = 0 }; -+ int ret = -1; -+ -+ /* Don't mask creation mode, kernel already did that */ -+ umask(0); -+ -+ pthread_mutex_init(&lo.mutex, NULL); -+ lo.root.next = lo.root.prev = &lo.root; -+ lo.root.fd = -1; -+ lo.cache = CACHE_NORMAL; -+ -+ if (fuse_parse_cmdline(&args, &opts) != 0) -+ return 1; -+ if (opts.show_help) { -+ printf("usage: %s [options] \n\n", argv[0]); -+ fuse_cmdline_help(); -+ fuse_lowlevel_help(); -+ ret = 0; -+ goto err_out1; -+ } else if (opts.show_version) { -+ printf("FUSE library version %s\n", fuse_pkgversion()); -+ fuse_lowlevel_version(); -+ ret = 0; -+ goto err_out1; -+ } -+ -+ if(opts.mountpoint == NULL) { -+ printf("usage: %s [options] \n", argv[0]); -+ printf(" %s --help\n", argv[0]); -+ ret = 1; -+ goto err_out1; -+ } -+ -+ if (fuse_opt_parse(&args, &lo, lo_opts, NULL)== -1) -+ return 1; -+ -+ lo.debug = opts.debug; -+ lo.root.refcount = 2; -+ if (lo.source) { -+ struct stat stat; -+ int res; -+ -+ res = lstat(lo.source, &stat); -+ if (res == -1) { -+ fuse_log(FUSE_LOG_ERR, "failed to stat source (\"%s\"): %m\n", -+ lo.source); -+ exit(1); -+ } -+ if (!S_ISDIR(stat.st_mode)) { -+ fuse_log(FUSE_LOG_ERR, "source is not a directory\n"); -+ exit(1); -+ } -+ -+ } else { -+ lo.source = "/"; -+ } -+ lo.root.is_symlink = false; -+ if (!lo.timeout_set) { -+ switch (lo.cache) { -+ case CACHE_NEVER: -+ lo.timeout = 0.0; -+ break; -+ -+ case CACHE_NORMAL: -+ lo.timeout = 1.0; -+ break; -+ -+ case CACHE_ALWAYS: -+ lo.timeout = 86400.0; -+ break; -+ } -+ } else if (lo.timeout < 0) { -+ fuse_log(FUSE_LOG_ERR, "timeout is negative (%lf)\n", -+ lo.timeout); -+ exit(1); -+ } -+ -+ lo.root.fd = open(lo.source, O_PATH); -+ if (lo.root.fd == -1) { -+ fuse_log(FUSE_LOG_ERR, "open(\"%s\", O_PATH): %m\n", -+ lo.source); -+ exit(1); -+ } -+ -+ se = fuse_session_new(&args, &lo_oper, sizeof(lo_oper), &lo); -+ if (se == NULL) -+ goto err_out1; -+ -+ if (fuse_set_signal_handlers(se) != 0) -+ goto err_out2; -+ -+ if (fuse_session_mount(se, opts.mountpoint) != 0) -+ goto err_out3; -+ -+ fuse_daemonize(opts.foreground); -+ -+ /* Block until ctrl+c or fusermount -u */ -+ if (opts.singlethread) -+ ret = fuse_session_loop(se); -+ else -+ ret = fuse_session_loop_mt(se, opts.clone_fd); -+ -+ fuse_session_unmount(se); -+err_out3: -+ fuse_remove_signal_handlers(se); -+err_out2: -+ fuse_session_destroy(se); -+err_out1: -+ free(opts.mountpoint); -+ fuse_opt_free_args(&args); -+ -+ if (lo.root.fd >= 0) -+ close(lo.root.fd); -+ -+ return ret ? 1 : 0; -+} --- -1.8.3.1 - diff --git a/kvm-virtiofsd-Add-timestamp-to-the-log-with-FUSE_LOG_DEB.patch b/kvm-virtiofsd-Add-timestamp-to-the-log-with-FUSE_LOG_DEB.patch deleted file mode 100644 index cef537a..0000000 --- a/kvm-virtiofsd-Add-timestamp-to-the-log-with-FUSE_LOG_DEB.patch +++ /dev/null @@ -1,73 +0,0 @@ -From 52e93f2dc499ead339bf808dac3480b369dfadd1 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:39 +0100 -Subject: [PATCH 068/116] virtiofsd: Add timestamp to the log with - FUSE_LOG_DEBUG level -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-65-dgilbert@redhat.com> -Patchwork-id: 93517 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 064/112] virtiofsd: Add timestamp to the log with FUSE_LOG_DEBUG level -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Masayoshi Mizuma - -virtiofsd has some threads, so we see a lot of logs with debug option. -It would be useful for debugging if we can see the timestamp. - -Add nano second timestamp, which got by get_clock(), to the log with -FUSE_LOG_DEBUG level if the syslog option isn't set. - -The log is like as: - - # ./virtiofsd -d -o vhost_user_socket=/tmp/vhostqemu0 -o source=/tmp/share0 -o cache=auto - ... - [5365943125463727] [ID: 00000002] fv_queue_thread: Start for queue 0 kick_fd 9 - [5365943125568644] [ID: 00000002] fv_queue_thread: Waiting for Queue 0 event - [5365943125573561] [ID: 00000002] fv_queue_thread: Got queue event on Queue 0 - -Signed-off-by: Masayoshi Mizuma -Reviewed-by: Philippe Mathieu-Daudé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 50fb955aa0e6ede929422146936cf68bf1ca876f) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 9 ++++++++- - 1 file changed, 8 insertions(+), 1 deletion(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index f08324f..98114a3 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -36,6 +36,7 @@ - */ - - #include "qemu/osdep.h" -+#include "qemu/timer.h" - #include "fuse_virtio.h" - #include "fuse_log.h" - #include "fuse_lowlevel.h" -@@ -2276,7 +2277,13 @@ static void log_func(enum fuse_log_level level, const char *fmt, va_list ap) - } - - if (current_log_level == FUSE_LOG_DEBUG) { -- localfmt = g_strdup_printf("[ID: %08ld] %s", syscall(__NR_gettid), fmt); -+ if (!use_syslog) { -+ localfmt = g_strdup_printf("[%" PRId64 "] [ID: %08ld] %s", -+ get_clock(), syscall(__NR_gettid), fmt); -+ } else { -+ localfmt = g_strdup_printf("[ID: %08ld] %s", syscall(__NR_gettid), -+ fmt); -+ } - fmt = localfmt; - } - --- -1.8.3.1 - diff --git a/kvm-virtiofsd-Clean-up-inodes-on-destroy.patch b/kvm-virtiofsd-Clean-up-inodes-on-destroy.patch deleted file mode 100644 index 4713a0d..0000000 --- a/kvm-virtiofsd-Clean-up-inodes-on-destroy.patch +++ /dev/null @@ -1,85 +0,0 @@ -From 2b921f7162b53204051955228bf99bbed55d2457 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:53 +0100 -Subject: [PATCH 082/116] virtiofsd: Clean up inodes on destroy -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-79-dgilbert@redhat.com> -Patchwork-id: 93532 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 078/112] virtiofsd: Clean up inodes on destroy -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -Clear out our inodes and fd's on a 'destroy' - so we get rid -of them if we reboot the guest. - -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 771b01eb76ff480fee984bd1d21727147cc3e702) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 26 ++++++++++++++++++++++++++ - 1 file changed, 26 insertions(+) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index b176a31..9ed77a1 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -1169,6 +1169,25 @@ static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode, - } - } - -+static int unref_all_inodes_cb(gpointer key, gpointer value, gpointer user_data) -+{ -+ struct lo_inode *inode = value; -+ struct lo_data *lo = user_data; -+ -+ inode->refcount = 0; -+ lo_map_remove(&lo->ino_map, inode->fuse_ino); -+ close(inode->fd); -+ -+ return TRUE; -+} -+ -+static void unref_all_inodes(struct lo_data *lo) -+{ -+ pthread_mutex_lock(&lo->mutex); -+ g_hash_table_foreach_remove(lo->inodes, unref_all_inodes_cb, lo); -+ pthread_mutex_unlock(&lo->mutex); -+} -+ - static void lo_forget_one(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) - { - struct lo_data *lo = lo_data(req); -@@ -2035,6 +2054,12 @@ static void lo_lseek(fuse_req_t req, fuse_ino_t ino, off_t off, int whence, - } - } - -+static void lo_destroy(void *userdata) -+{ -+ struct lo_data *lo = (struct lo_data *)userdata; -+ unref_all_inodes(lo); -+} -+ - static struct fuse_lowlevel_ops lo_oper = { - .init = lo_init, - .lookup = lo_lookup, -@@ -2073,6 +2098,7 @@ static struct fuse_lowlevel_ops lo_oper = { - .copy_file_range = lo_copy_file_range, - #endif - .lseek = lo_lseek, -+ .destroy = lo_destroy, - }; - - /* Print vhost-user.json backend program capabilities */ --- -1.8.3.1 - diff --git a/kvm-virtiofsd-Convert-lo_destroy-to-take-the-lo-mutex-lo.patch b/kvm-virtiofsd-Convert-lo_destroy-to-take-the-lo-mutex-lo.patch deleted file mode 100644 index c421365..0000000 --- a/kvm-virtiofsd-Convert-lo_destroy-to-take-the-lo-mutex-lo.patch +++ /dev/null @@ -1,112 +0,0 @@ -From 24f91062f571ad2dd2ac22db3b7d456a2c8bd2cb Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:02:23 +0100 -Subject: [PATCH 112/116] virtiofsd: Convert lo_destroy to take the lo->mutex - lock itself -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-109-dgilbert@redhat.com> -Patchwork-id: 93563 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 108/112] virtiofsd: Convert lo_destroy to take the lo->mutex lock itself -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -lo_destroy was relying on some implicit knowledge of the locking; -we can avoid this if we create an unref_inode that doesn't take -the lock and then grab it for the whole of the lo_destroy. - -Suggested-by: Vivek Goyal -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Philippe Mathieu-Daudé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit fe4c15798a48143dd6b1f58d2d3cad12206ce211) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 31 +++++++++++++++++-------------- - 1 file changed, 17 insertions(+), 14 deletions(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index eb001b9..fc15d61 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -1344,14 +1344,13 @@ static void lo_unlink(fuse_req_t req, fuse_ino_t parent, const char *name) - lo_inode_put(lo, &inode); - } - --static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode, -- uint64_t n) -+/* To be called with lo->mutex held */ -+static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n) - { - if (!inode) { - return; - } - -- pthread_mutex_lock(&lo->mutex); - assert(inode->nlookup >= n); - inode->nlookup -= n; - if (!inode->nlookup) { -@@ -1362,15 +1361,24 @@ static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode, - } - g_hash_table_destroy(inode->posix_locks); - pthread_mutex_destroy(&inode->plock_mutex); -- pthread_mutex_unlock(&lo->mutex); - - /* Drop our refcount from lo_do_lookup() */ - lo_inode_put(lo, &inode); -- } else { -- pthread_mutex_unlock(&lo->mutex); - } - } - -+static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode, -+ uint64_t n) -+{ -+ if (!inode) { -+ return; -+ } -+ -+ pthread_mutex_lock(&lo->mutex); -+ unref_inode(lo, inode, n); -+ pthread_mutex_unlock(&lo->mutex); -+} -+ - static void lo_forget_one(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) - { - struct lo_data *lo = lo_data(req); -@@ -2458,13 +2466,7 @@ static void lo_destroy(void *userdata) - { - struct lo_data *lo = (struct lo_data *)userdata; - -- /* -- * Normally lo->mutex must be taken when traversing lo->inodes but -- * lo_destroy() is a serialized request so no races are possible here. -- * -- * In addition, we cannot acquire lo->mutex since unref_inode() takes it -- * too and this would result in a recursive lock. -- */ -+ pthread_mutex_lock(&lo->mutex); - while (true) { - GHashTableIter iter; - gpointer key, value; -@@ -2475,8 +2477,9 @@ static void lo_destroy(void *userdata) - } - - struct lo_inode *inode = value; -- unref_inode_lolocked(lo, inode, inode->nlookup); -+ unref_inode(lo, inode, inode->nlookup); - } -+ pthread_mutex_unlock(&lo->mutex); - } - - static struct fuse_lowlevel_ops lo_oper = { --- -1.8.3.1 - diff --git a/kvm-virtiofsd-Drop-CAP_FSETID-if-client-asked-for-it.patch b/kvm-virtiofsd-Drop-CAP_FSETID-if-client-asked-for-it.patch deleted file mode 100644 index 9f198c2..0000000 --- a/kvm-virtiofsd-Drop-CAP_FSETID-if-client-asked-for-it.patch +++ /dev/null @@ -1,176 +0,0 @@ -From e217ab392e0d4c770ec18dbfbe986771773cb557 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:33 +0100 -Subject: [PATCH 062/116] virtiofsd: Drop CAP_FSETID if client asked for it -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-59-dgilbert@redhat.com> -Patchwork-id: 93513 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 058/112] virtiofsd: Drop CAP_FSETID if client asked for it -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Vivek Goyal - -If client requested killing setuid/setgid bits on file being written, drop -CAP_FSETID capability so that setuid/setgid bits are cleared upon write -automatically. - -pjdfstest chown/12.t needs this. - -Signed-off-by: Vivek Goyal - dgilbert: reworked for libcap-ng -Reviewed-by: Misono Tomohiro -Reviewed-by: Sergio Lopez -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit ee88465224b3aed2596049caa28f86cbe0d5a3d0) - -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 105 +++++++++++++++++++++++++++++++++++++++ - 1 file changed, 105 insertions(+) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 97e7c75..d53cb1e 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -201,6 +201,91 @@ static int load_capng(void) - return 0; - } - -+/* -+ * Helpers for dropping and regaining effective capabilities. Returns 0 -+ * on success, error otherwise -+ */ -+static int drop_effective_cap(const char *cap_name, bool *cap_dropped) -+{ -+ int cap, ret; -+ -+ cap = capng_name_to_capability(cap_name); -+ if (cap < 0) { -+ ret = errno; -+ fuse_log(FUSE_LOG_ERR, "capng_name_to_capability(%s) failed:%s\n", -+ cap_name, strerror(errno)); -+ goto out; -+ } -+ -+ if (load_capng()) { -+ ret = errno; -+ fuse_log(FUSE_LOG_ERR, "load_capng() failed\n"); -+ goto out; -+ } -+ -+ /* We dont have this capability in effective set already. */ -+ if (!capng_have_capability(CAPNG_EFFECTIVE, cap)) { -+ ret = 0; -+ goto out; -+ } -+ -+ if (capng_update(CAPNG_DROP, CAPNG_EFFECTIVE, cap)) { -+ ret = errno; -+ fuse_log(FUSE_LOG_ERR, "capng_update(DROP,) failed\n"); -+ goto out; -+ } -+ -+ if (capng_apply(CAPNG_SELECT_CAPS)) { -+ ret = errno; -+ fuse_log(FUSE_LOG_ERR, "drop:capng_apply() failed\n"); -+ goto out; -+ } -+ -+ ret = 0; -+ if (cap_dropped) { -+ *cap_dropped = true; -+ } -+ -+out: -+ return ret; -+} -+ -+static int gain_effective_cap(const char *cap_name) -+{ -+ int cap; -+ int ret = 0; -+ -+ cap = capng_name_to_capability(cap_name); -+ if (cap < 0) { -+ ret = errno; -+ fuse_log(FUSE_LOG_ERR, "capng_name_to_capability(%s) failed:%s\n", -+ cap_name, strerror(errno)); -+ goto out; -+ } -+ -+ if (load_capng()) { -+ ret = errno; -+ fuse_log(FUSE_LOG_ERR, "load_capng() failed\n"); -+ goto out; -+ } -+ -+ if (capng_update(CAPNG_ADD, CAPNG_EFFECTIVE, cap)) { -+ ret = errno; -+ fuse_log(FUSE_LOG_ERR, "capng_update(ADD,) failed\n"); -+ goto out; -+ } -+ -+ if (capng_apply(CAPNG_SELECT_CAPS)) { -+ ret = errno; -+ fuse_log(FUSE_LOG_ERR, "gain:capng_apply() failed\n"); -+ goto out; -+ } -+ ret = 0; -+ -+out: -+ return ret; -+} -+ - static void lo_map_init(struct lo_map *map) - { - map->elems = NULL; -@@ -1577,6 +1662,7 @@ static void lo_write_buf(fuse_req_t req, fuse_ino_t ino, - (void)ino; - ssize_t res; - struct fuse_bufvec out_buf = FUSE_BUFVEC_INIT(fuse_buf_size(in_buf)); -+ bool cap_fsetid_dropped = false; - - out_buf.buf[0].flags = FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK; - out_buf.buf[0].fd = lo_fi_fd(req, fi); -@@ -1588,12 +1674,31 @@ static void lo_write_buf(fuse_req_t req, fuse_ino_t ino, - out_buf.buf[0].size, (unsigned long)off); - } - -+ /* -+ * If kill_priv is set, drop CAP_FSETID which should lead to kernel -+ * clearing setuid/setgid on file. -+ */ -+ if (fi->kill_priv) { -+ res = drop_effective_cap("FSETID", &cap_fsetid_dropped); -+ if (res != 0) { -+ fuse_reply_err(req, res); -+ return; -+ } -+ } -+ - res = fuse_buf_copy(&out_buf, in_buf); - if (res < 0) { - fuse_reply_err(req, -res); - } else { - fuse_reply_write(req, (size_t)res); - } -+ -+ if (cap_fsetid_dropped) { -+ res = gain_effective_cap("FSETID"); -+ if (res) { -+ fuse_log(FUSE_LOG_ERR, "Failed to gain CAP_FSETID\n"); -+ } -+ } - } - - static void lo_statfs(fuse_req_t req, fuse_ino_t ino) --- -1.8.3.1 - diff --git a/kvm-virtiofsd-Fast-path-for-virtio-read.patch b/kvm-virtiofsd-Fast-path-for-virtio-read.patch deleted file mode 100644 index 03874ce..0000000 --- a/kvm-virtiofsd-Fast-path-for-virtio-read.patch +++ /dev/null @@ -1,240 +0,0 @@ -From 7d2efc3e4af15eff57b0c38cff7c81b371a98303 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:06 +0100 -Subject: [PATCH 035/116] virtiofsd: Fast path for virtio read -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-32-dgilbert@redhat.com> -Patchwork-id: 93480 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 031/112] virtiofsd: Fast path for virtio read -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -Readv the data straight into the guests buffer. - -Signed-off-by: Dr. David Alan Gilbert -With fix by: -Signed-off-by: Eryu Guan -Reviewed-by: Masayoshi Mizuma -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit eb49d187ef5134483a34c970bbfece28aaa686a7) - -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_lowlevel.c | 5 ++ - tools/virtiofsd/fuse_virtio.c | 162 ++++++++++++++++++++++++++++++++++++++++ - tools/virtiofsd/fuse_virtio.h | 4 + - 3 files changed, 171 insertions(+) - -diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -index 380d93b..4f4684d 100644 ---- a/tools/virtiofsd/fuse_lowlevel.c -+++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -475,6 +475,11 @@ static int fuse_send_data_iov_fallback(struct fuse_session *se, - return fuse_send_msg(se, ch, iov, iov_count); - } - -+ if (fuse_lowlevel_is_virtio(se) && buf->count == 1 && -+ buf->buf[0].flags == (FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK)) { -+ return virtio_send_data_iov(se, ch, iov, iov_count, buf, len); -+ } -+ - abort(); /* Will have taken vhost path */ - return 0; - } -diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c -index f1adeb6..7e2711b 100644 ---- a/tools/virtiofsd/fuse_virtio.c -+++ b/tools/virtiofsd/fuse_virtio.c -@@ -230,6 +230,168 @@ err: - return ret; - } - -+/* -+ * Callback from fuse_send_data_iov_* when it's virtio and the buffer -+ * is a single FD with FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK -+ * We need send the iov and then the buffer. -+ * Return 0 on success -+ */ -+int virtio_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, -+ struct iovec *iov, int count, struct fuse_bufvec *buf, -+ size_t len) -+{ -+ int ret = 0; -+ VuVirtqElement *elem; -+ VuVirtq *q; -+ -+ assert(count >= 1); -+ assert(iov[0].iov_len >= sizeof(struct fuse_out_header)); -+ -+ struct fuse_out_header *out = iov[0].iov_base; -+ /* TODO: Endianness! */ -+ -+ size_t iov_len = iov_size(iov, count); -+ size_t tosend_len = iov_len + len; -+ -+ out->len = tosend_len; -+ -+ fuse_log(FUSE_LOG_DEBUG, "%s: count=%d len=%zd iov_len=%zd\n", __func__, -+ count, len, iov_len); -+ -+ /* unique == 0 is notification which we don't support */ -+ assert(out->unique); -+ -+ /* For virtio we always have ch */ -+ assert(ch); -+ assert(!ch->qi->reply_sent); -+ elem = ch->qi->qe; -+ q = &ch->qi->virtio_dev->dev.vq[ch->qi->qidx]; -+ -+ /* The 'in' part of the elem is to qemu */ -+ unsigned int in_num = elem->in_num; -+ struct iovec *in_sg = elem->in_sg; -+ size_t in_len = iov_size(in_sg, in_num); -+ fuse_log(FUSE_LOG_DEBUG, "%s: elem %d: with %d in desc of length %zd\n", -+ __func__, elem->index, in_num, in_len); -+ -+ /* -+ * The elem should have room for a 'fuse_out_header' (out from fuse) -+ * plus the data based on the len in the header. -+ */ -+ if (in_len < sizeof(struct fuse_out_header)) { -+ fuse_log(FUSE_LOG_ERR, "%s: elem %d too short for out_header\n", -+ __func__, elem->index); -+ ret = E2BIG; -+ goto err; -+ } -+ if (in_len < tosend_len) { -+ fuse_log(FUSE_LOG_ERR, "%s: elem %d too small for data len %zd\n", -+ __func__, elem->index, tosend_len); -+ ret = E2BIG; -+ goto err; -+ } -+ -+ /* TODO: Limit to 'len' */ -+ -+ /* First copy the header data from iov->in_sg */ -+ copy_iov(iov, count, in_sg, in_num, iov_len); -+ -+ /* -+ * Build a copy of the the in_sg iov so we can skip bits in it, -+ * including changing the offsets -+ */ -+ struct iovec *in_sg_cpy = calloc(sizeof(struct iovec), in_num); -+ assert(in_sg_cpy); -+ memcpy(in_sg_cpy, in_sg, sizeof(struct iovec) * in_num); -+ /* These get updated as we skip */ -+ struct iovec *in_sg_ptr = in_sg_cpy; -+ int in_sg_cpy_count = in_num; -+ -+ /* skip over parts of in_sg that contained the header iov */ -+ size_t skip_size = iov_len; -+ -+ size_t in_sg_left = 0; -+ do { -+ while (skip_size != 0 && in_sg_cpy_count) { -+ if (skip_size >= in_sg_ptr[0].iov_len) { -+ skip_size -= in_sg_ptr[0].iov_len; -+ in_sg_ptr++; -+ in_sg_cpy_count--; -+ } else { -+ in_sg_ptr[0].iov_len -= skip_size; -+ in_sg_ptr[0].iov_base += skip_size; -+ break; -+ } -+ } -+ -+ int i; -+ for (i = 0, in_sg_left = 0; i < in_sg_cpy_count; i++) { -+ in_sg_left += in_sg_ptr[i].iov_len; -+ } -+ fuse_log(FUSE_LOG_DEBUG, -+ "%s: after skip skip_size=%zd in_sg_cpy_count=%d " -+ "in_sg_left=%zd\n", -+ __func__, skip_size, in_sg_cpy_count, in_sg_left); -+ ret = preadv(buf->buf[0].fd, in_sg_ptr, in_sg_cpy_count, -+ buf->buf[0].pos); -+ -+ if (ret == -1) { -+ ret = errno; -+ fuse_log(FUSE_LOG_DEBUG, "%s: preadv failed (%m) len=%zd\n", -+ __func__, len); -+ free(in_sg_cpy); -+ goto err; -+ } -+ fuse_log(FUSE_LOG_DEBUG, "%s: preadv ret=%d len=%zd\n", __func__, -+ ret, len); -+ if (ret < len && ret) { -+ fuse_log(FUSE_LOG_DEBUG, "%s: ret < len\n", __func__); -+ /* Skip over this much next time around */ -+ skip_size = ret; -+ buf->buf[0].pos += ret; -+ len -= ret; -+ -+ /* Lets do another read */ -+ continue; -+ } -+ if (!ret) { -+ /* EOF case? */ -+ fuse_log(FUSE_LOG_DEBUG, "%s: !ret in_sg_left=%zd\n", __func__, -+ in_sg_left); -+ break; -+ } -+ if (ret != len) { -+ fuse_log(FUSE_LOG_DEBUG, "%s: ret!=len\n", __func__); -+ ret = EIO; -+ free(in_sg_cpy); -+ goto err; -+ } -+ in_sg_left -= ret; -+ len -= ret; -+ } while (in_sg_left); -+ free(in_sg_cpy); -+ -+ /* Need to fix out->len on EOF */ -+ if (len) { -+ struct fuse_out_header *out_sg = in_sg[0].iov_base; -+ -+ tosend_len -= len; -+ out_sg->len = tosend_len; -+ } -+ -+ ret = 0; -+ -+ vu_queue_push(&se->virtio_dev->dev, q, elem, tosend_len); -+ vu_queue_notify(&se->virtio_dev->dev, q); -+ -+err: -+ if (ret == 0) { -+ ch->qi->reply_sent = true; -+ } -+ -+ return ret; -+} -+ - /* Thread function for individual queues, created when a queue is 'started' */ - static void *fv_queue_thread(void *opaque) - { -diff --git a/tools/virtiofsd/fuse_virtio.h b/tools/virtiofsd/fuse_virtio.h -index 135a148..cc676b9 100644 ---- a/tools/virtiofsd/fuse_virtio.h -+++ b/tools/virtiofsd/fuse_virtio.h -@@ -26,4 +26,8 @@ int virtio_loop(struct fuse_session *se); - int virtio_send_msg(struct fuse_session *se, struct fuse_chan *ch, - struct iovec *iov, int count); - -+int virtio_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, -+ struct iovec *iov, int count, -+ struct fuse_bufvec *buf, size_t len); -+ - #endif --- -1.8.3.1 - diff --git a/kvm-virtiofsd-Fix-common-header-and-define-for-QEMU-buil.patch b/kvm-virtiofsd-Fix-common-header-and-define-for-QEMU-buil.patch deleted file mode 100644 index 12bb9a2..0000000 --- a/kvm-virtiofsd-Fix-common-header-and-define-for-QEMU-buil.patch +++ /dev/null @@ -1,164 +0,0 @@ -From 6d41fc549198e140f38fddcb02975098df040ae1 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:00:50 +0100 -Subject: [PATCH 019/116] virtiofsd: Fix common header and define for QEMU - builds -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-16-dgilbert@redhat.com> -Patchwork-id: 93470 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 015/112] virtiofsd: Fix common header and define for QEMU builds -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -All of the fuse files include config.h and define GNU_SOURCE -where we don't have either under our build - remove them. -Fixup path to the kernel's fuse.h in the QEMUs world. - -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Philippe Mathieu-Daudé -Tested-by: Philippe Mathieu-Daudé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 09863ebc7e32a107235b3c815ad54d26cc64f07a) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/buffer.c | 4 +--- - tools/virtiofsd/fuse_i.h | 3 +++ - tools/virtiofsd/fuse_log.c | 1 + - tools/virtiofsd/fuse_lowlevel.c | 6 ++---- - tools/virtiofsd/fuse_opt.c | 2 +- - tools/virtiofsd/fuse_signals.c | 2 +- - tools/virtiofsd/helper.c | 1 + - tools/virtiofsd/passthrough_ll.c | 8 ++------ - 8 files changed, 12 insertions(+), 15 deletions(-) - -diff --git a/tools/virtiofsd/buffer.c b/tools/virtiofsd/buffer.c -index 4d507f3..772efa9 100644 ---- a/tools/virtiofsd/buffer.c -+++ b/tools/virtiofsd/buffer.c -@@ -9,9 +9,7 @@ - * See the file COPYING.LIB - */ - --#define _GNU_SOURCE -- --#include "config.h" -+#include "qemu/osdep.h" - #include "fuse_i.h" - #include "fuse_lowlevel.h" - #include -diff --git a/tools/virtiofsd/fuse_i.h b/tools/virtiofsd/fuse_i.h -index e63cb58..bae0699 100644 ---- a/tools/virtiofsd/fuse_i.h -+++ b/tools/virtiofsd/fuse_i.h -@@ -6,6 +6,9 @@ - * See the file COPYING.LIB - */ - -+#define FUSE_USE_VERSION 31 -+ -+ - #include "fuse.h" - #include "fuse_lowlevel.h" - -diff --git a/tools/virtiofsd/fuse_log.c b/tools/virtiofsd/fuse_log.c -index 11345f9..c301ff6 100644 ---- a/tools/virtiofsd/fuse_log.c -+++ b/tools/virtiofsd/fuse_log.c -@@ -8,6 +8,7 @@ - * See the file COPYING.LIB - */ - -+#include "qemu/osdep.h" - #include "fuse_log.h" - - #include -diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -index 3da80de..07fb8a6 100644 ---- a/tools/virtiofsd/fuse_lowlevel.c -+++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -9,11 +9,9 @@ - * See the file COPYING.LIB - */ - --#define _GNU_SOURCE -- --#include "config.h" -+#include "qemu/osdep.h" - #include "fuse_i.h" --#include "fuse_kernel.h" -+#include "standard-headers/linux/fuse.h" - #include "fuse_misc.h" - #include "fuse_opt.h" - -diff --git a/tools/virtiofsd/fuse_opt.c b/tools/virtiofsd/fuse_opt.c -index edd36f4..2892236 100644 ---- a/tools/virtiofsd/fuse_opt.c -+++ b/tools/virtiofsd/fuse_opt.c -@@ -9,8 +9,8 @@ - * See the file COPYING.LIB - */ - -+#include "qemu/osdep.h" - #include "fuse_opt.h" --#include "config.h" - #include "fuse_i.h" - #include "fuse_misc.h" - -diff --git a/tools/virtiofsd/fuse_signals.c b/tools/virtiofsd/fuse_signals.c -index 19d6791..dc7c8ac 100644 ---- a/tools/virtiofsd/fuse_signals.c -+++ b/tools/virtiofsd/fuse_signals.c -@@ -8,7 +8,7 @@ - * See the file COPYING.LIB - */ - --#include "config.h" -+#include "qemu/osdep.h" - #include "fuse_i.h" - #include "fuse_lowlevel.h" - -diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c -index d9227d7..9333691 100644 ---- a/tools/virtiofsd/helper.c -+++ b/tools/virtiofsd/helper.c -@@ -10,6 +10,7 @@ - * See the file COPYING.LIB. - */ - -+#include "qemu/osdep.h" - #include "fuse_i.h" - #include "fuse_lowlevel.h" - #include "fuse_misc.h" -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 126a56c..322a889 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -35,15 +35,11 @@ - * \include passthrough_ll.c - */ - --#define _GNU_SOURCE --#define FUSE_USE_VERSION 31 -- --#include "config.h" -- -+#include "qemu/osdep.h" -+#include "fuse_lowlevel.h" - #include - #include - #include --#include - #include - #include - #include --- -1.8.3.1 - diff --git a/kvm-virtiofsd-Fix-data-corruption-with-O_APPEND-write-in.patch b/kvm-virtiofsd-Fix-data-corruption-with-O_APPEND-write-in.patch deleted file mode 100644 index f929bab..0000000 --- a/kvm-virtiofsd-Fix-data-corruption-with-O_APPEND-write-in.patch +++ /dev/null @@ -1,136 +0,0 @@ -From 9b5fbc95a287b2ce9448142194b161d8360d5e4e Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:02:15 +0100 -Subject: [PATCH 104/116] virtiofsd: Fix data corruption with O_APPEND write in - writeback mode -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-101-dgilbert@redhat.com> -Patchwork-id: 93556 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 100/112] virtiofsd: Fix data corruption with O_APPEND write in writeback mode -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Misono Tomohiro - -When writeback mode is enabled (-o writeback), O_APPEND handling is -done in kernel. Therefore virtiofsd clears O_APPEND flag when open. -Otherwise O_APPEND flag takes precedence over pwrite() and write -data may corrupt. - -Currently clearing O_APPEND flag is done in lo_open(), but we also -need the same operation in lo_create(). So, factor out the flag -update operation in lo_open() to update_open_flags() and call it -in both lo_open() and lo_create(). - -This fixes the failure of xfstest generic/069 in writeback mode -(which tests O_APPEND write data integrity). - -Signed-off-by: Misono Tomohiro -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 8e4e41e39eac5ee5f378d66f069a2f70a1734317) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 66 ++++++++++++++++++++-------------------- - 1 file changed, 33 insertions(+), 33 deletions(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 948cb19..4c61ac5 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -1692,6 +1692,37 @@ static void lo_releasedir(fuse_req_t req, fuse_ino_t ino, - fuse_reply_err(req, 0); - } - -+static void update_open_flags(int writeback, struct fuse_file_info *fi) -+{ -+ /* -+ * With writeback cache, kernel may send read requests even -+ * when userspace opened write-only -+ */ -+ if (writeback && (fi->flags & O_ACCMODE) == O_WRONLY) { -+ fi->flags &= ~O_ACCMODE; -+ fi->flags |= O_RDWR; -+ } -+ -+ /* -+ * With writeback cache, O_APPEND is handled by the kernel. -+ * This breaks atomicity (since the file may change in the -+ * underlying filesystem, so that the kernel's idea of the -+ * end of the file isn't accurate anymore). In this example, -+ * we just accept that. A more rigorous filesystem may want -+ * to return an error here -+ */ -+ if (writeback && (fi->flags & O_APPEND)) { -+ fi->flags &= ~O_APPEND; -+ } -+ -+ /* -+ * O_DIRECT in guest should not necessarily mean bypassing page -+ * cache on host as well. If somebody needs that behavior, it -+ * probably should be a configuration knob in daemon. -+ */ -+ fi->flags &= ~O_DIRECT; -+} -+ - static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, - mode_t mode, struct fuse_file_info *fi) - { -@@ -1721,12 +1752,7 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, - goto out; - } - -- /* -- * O_DIRECT in guest should not necessarily mean bypassing page -- * cache on host as well. If somebody needs that behavior, it -- * probably should be a configuration knob in daemon. -- */ -- fi->flags &= ~O_DIRECT; -+ update_open_flags(lo->writeback, fi); - - fd = openat(parent_inode->fd, name, (fi->flags | O_CREAT) & ~O_NOFOLLOW, - mode); -@@ -1936,33 +1962,7 @@ static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) - fuse_log(FUSE_LOG_DEBUG, "lo_open(ino=%" PRIu64 ", flags=%d)\n", ino, - fi->flags); - -- /* -- * With writeback cache, kernel may send read requests even -- * when userspace opened write-only -- */ -- if (lo->writeback && (fi->flags & O_ACCMODE) == O_WRONLY) { -- fi->flags &= ~O_ACCMODE; -- fi->flags |= O_RDWR; -- } -- -- /* -- * With writeback cache, O_APPEND is handled by the kernel. -- * This breaks atomicity (since the file may change in the -- * underlying filesystem, so that the kernel's idea of the -- * end of the file isn't accurate anymore). In this example, -- * we just accept that. A more rigorous filesystem may want -- * to return an error here -- */ -- if (lo->writeback && (fi->flags & O_APPEND)) { -- fi->flags &= ~O_APPEND; -- } -- -- /* -- * O_DIRECT in guest should not necessarily mean bypassing page -- * cache on host as well. If somebody needs that behavior, it -- * probably should be a configuration knob in daemon. -- */ -- fi->flags &= ~O_DIRECT; -+ update_open_flags(lo->writeback, fi); - - sprintf(buf, "%i", lo_fd(req, ino)); - fd = openat(lo->proc_self_fd, buf, fi->flags & ~O_NOFOLLOW); --- -1.8.3.1 - diff --git a/kvm-virtiofsd-Fix-fuse_daemonize-ignored-return-values.patch b/kvm-virtiofsd-Fix-fuse_daemonize-ignored-return-values.patch deleted file mode 100644 index 306c183..0000000 --- a/kvm-virtiofsd-Fix-fuse_daemonize-ignored-return-values.patch +++ /dev/null @@ -1,120 +0,0 @@ -From 9f726593bc3acbc247876dcc4d79fbf046958003 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:00:49 +0100 -Subject: [PATCH 018/116] virtiofsd: Fix fuse_daemonize ignored return values -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-15-dgilbert@redhat.com> -Patchwork-id: 93469 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 014/112] virtiofsd: Fix fuse_daemonize ignored return values -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -QEMU's compiler enables warnings/errors for ignored values -and the (void) trick used in the fuse code isn't enough. -Turn all the return values into a return value on the function. - -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Daniel P. Berrangé -Reviewed-by: Philippe Mathieu-Daudé -Tested-by: Philippe Mathieu-Daudé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 30d8e49760712d65697ea517c53671bd1d214fc7) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/helper.c | 33 ++++++++++++++++++++++----------- - 1 file changed, 22 insertions(+), 11 deletions(-) - -diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c -index 5e6f205..d9227d7 100644 ---- a/tools/virtiofsd/helper.c -+++ b/tools/virtiofsd/helper.c -@@ -10,12 +10,10 @@ - * See the file COPYING.LIB. - */ - --#include "config.h" - #include "fuse_i.h" - #include "fuse_lowlevel.h" - #include "fuse_misc.h" - #include "fuse_opt.h" --#include "mount_util.h" - - #include - #include -@@ -171,6 +169,7 @@ int fuse_parse_cmdline(struct fuse_args *args, struct fuse_cmdline_opts *opts) - - int fuse_daemonize(int foreground) - { -+ int ret = 0, rett; - if (!foreground) { - int nullfd; - int waiter[2]; -@@ -192,8 +191,8 @@ int fuse_daemonize(int foreground) - case 0: - break; - default: -- (void)read(waiter[0], &completed, sizeof(completed)); -- _exit(0); -+ _exit(read(waiter[0], &completed, -+ sizeof(completed) != sizeof(completed))); - } - - if (setsid() == -1) { -@@ -201,13 +200,22 @@ int fuse_daemonize(int foreground) - return -1; - } - -- (void)chdir("/"); -+ ret = chdir("/"); - - nullfd = open("/dev/null", O_RDWR, 0); - if (nullfd != -1) { -- (void)dup2(nullfd, 0); -- (void)dup2(nullfd, 1); -- (void)dup2(nullfd, 2); -+ rett = dup2(nullfd, 0); -+ if (!ret) { -+ ret = rett; -+ } -+ rett = dup2(nullfd, 1); -+ if (!ret) { -+ ret = rett; -+ } -+ rett = dup2(nullfd, 2); -+ if (!ret) { -+ ret = rett; -+ } - if (nullfd > 2) { - close(nullfd); - } -@@ -215,13 +223,16 @@ int fuse_daemonize(int foreground) - - /* Propagate completion of daemon initialization */ - completed = 1; -- (void)write(waiter[1], &completed, sizeof(completed)); -+ rett = write(waiter[1], &completed, sizeof(completed)); -+ if (!ret) { -+ ret = rett; -+ } - close(waiter[0]); - close(waiter[1]); - } else { -- (void)chdir("/"); -+ ret = chdir("/"); - } -- return 0; -+ return ret; - } - - void fuse_apply_conn_info_opts(struct fuse_conn_info_opts *opts, --- -1.8.3.1 - diff --git a/kvm-virtiofsd-Fix-xattr-operations.patch b/kvm-virtiofsd-Fix-xattr-operations.patch deleted file mode 100644 index 532948f..0000000 --- a/kvm-virtiofsd-Fix-xattr-operations.patch +++ /dev/null @@ -1,327 +0,0 @@ -From 8721796f22a8a61d82974088e542377ee6db209e Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Tue, 3 Mar 2020 18:43:14 +0000 -Subject: [PATCH 18/18] virtiofsd: Fix xattr operations -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200303184314.155564-8-dgilbert@redhat.com> -Patchwork-id: 94123 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 7/7] virtiofsd: Fix xattr operations -Bugzilla: 1797064 -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual -RH-Acked-by: Ján Tomko - -From: Misono Tomohiro - -Current virtiofsd has problems about xattr operations and -they does not work properly for directory/symlink/special file. - -The fundamental cause is that virtiofsd uses openat() + f...xattr() -systemcalls for xattr operation but we should not open symlink/special -file in the daemon. Therefore the function is restricted. - -Fix this problem by: - 1. during setup of each thread, call unshare(CLONE_FS) - 2. in xattr operations (i.e. lo_getxattr), if inode is not a regular - file or directory, use fchdir(proc_loot_fd) + ...xattr() + - fchdir(root.fd) instead of openat() + f...xattr() - - (Note: for a regular file/directory openat() + f...xattr() - is still used for performance reason) - -With this patch, xfstests generic/062 passes on virtiofs. - -This fix is suggested by Miklos Szeredi and Stefan Hajnoczi. -The original discussion can be found here: - https://www.redhat.com/archives/virtio-fs/2019-October/msg00046.html - -Signed-off-by: Misono Tomohiro -Message-Id: <20200227055927.24566-3-misono.tomohiro@jp.fujitsu.com> -Acked-by: Vivek Goyal -Reviewed-by: Dr. David Alan Gilbert -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit bdfd66788349acc43cd3f1298718ad491663cfcc) -Signed-off-by: Danilo C. L. de Paula ---- - tools/virtiofsd/fuse_virtio.c | 13 +++++ - tools/virtiofsd/passthrough_ll.c | 105 +++++++++++++++++++++------------------ - tools/virtiofsd/seccomp.c | 6 +++ - 3 files changed, 77 insertions(+), 47 deletions(-) - -diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c -index dd1c605..3b6d16a 100644 ---- a/tools/virtiofsd/fuse_virtio.c -+++ b/tools/virtiofsd/fuse_virtio.c -@@ -426,6 +426,8 @@ err: - return ret; - } - -+static __thread bool clone_fs_called; -+ - /* Process one FVRequest in a thread pool */ - static void fv_queue_worker(gpointer data, gpointer user_data) - { -@@ -441,6 +443,17 @@ static void fv_queue_worker(gpointer data, gpointer user_data) - - assert(se->bufsize > sizeof(struct fuse_in_header)); - -+ if (!clone_fs_called) { -+ int ret; -+ -+ /* unshare FS for xattr operation */ -+ ret = unshare(CLONE_FS); -+ /* should not fail */ -+ assert(ret == 0); -+ -+ clone_fs_called = true; -+ } -+ - /* - * An element contains one request and the space to send our response - * They're spread over multiple descriptors in a scatter/gather set -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 50c7273..9cba3f1 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -123,7 +123,7 @@ struct lo_inode { - pthread_mutex_t plock_mutex; - GHashTable *posix_locks; /* protected by lo_inode->plock_mutex */ - -- bool is_symlink; -+ mode_t filetype; - }; - - struct lo_cred { -@@ -695,7 +695,7 @@ static int utimensat_empty(struct lo_data *lo, struct lo_inode *inode, - struct lo_inode *parent; - char path[PATH_MAX]; - -- if (inode->is_symlink) { -+ if (S_ISLNK(inode->filetype)) { - res = utimensat(inode->fd, "", tv, AT_EMPTY_PATH); - if (res == -1 && errno == EINVAL) { - /* Sorry, no race free way to set times on symlink. */ -@@ -929,7 +929,8 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, - goto out_err; - } - -- inode->is_symlink = S_ISLNK(e->attr.st_mode); -+ /* cache only filetype */ -+ inode->filetype = (e->attr.st_mode & S_IFMT); - - /* - * One for the caller and one for nlookup (released in -@@ -1139,7 +1140,7 @@ static int linkat_empty_nofollow(struct lo_data *lo, struct lo_inode *inode, - struct lo_inode *parent; - char path[PATH_MAX]; - -- if (inode->is_symlink) { -+ if (S_ISLNK(inode->filetype)) { - res = linkat(inode->fd, "", dfd, name, AT_EMPTY_PATH); - if (res == -1 && (errno == ENOENT || errno == EINVAL)) { - /* Sorry, no race free way to hard-link a symlink. */ -@@ -2193,12 +2194,6 @@ static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *name, - fuse_log(FUSE_LOG_DEBUG, "lo_getxattr(ino=%" PRIu64 ", name=%s size=%zd)\n", - ino, name, size); - -- if (inode->is_symlink) { -- /* Sorry, no race free way to getxattr on symlink. */ -- saverr = EPERM; -- goto out; -- } -- - if (size) { - value = malloc(size); - if (!value) { -@@ -2207,12 +2202,25 @@ static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *name, - } - - sprintf(procname, "%i", inode->fd); -- fd = openat(lo->proc_self_fd, procname, O_RDONLY); -- if (fd < 0) { -- goto out_err; -+ /* -+ * It is not safe to open() non-regular/non-dir files in file server -+ * unless O_PATH is used, so use that method for regular files/dir -+ * only (as it seems giving less performance overhead). -+ * Otherwise, call fchdir() to avoid open(). -+ */ -+ if (S_ISREG(inode->filetype) || S_ISDIR(inode->filetype)) { -+ fd = openat(lo->proc_self_fd, procname, O_RDONLY); -+ if (fd < 0) { -+ goto out_err; -+ } -+ ret = fgetxattr(fd, name, value, size); -+ } else { -+ /* fchdir should not fail here */ -+ assert(fchdir(lo->proc_self_fd) == 0); -+ ret = getxattr(procname, name, value, size); -+ assert(fchdir(lo->root.fd) == 0); - } - -- ret = fgetxattr(fd, name, value, size); - if (ret == -1) { - goto out_err; - } -@@ -2266,12 +2274,6 @@ static void lo_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size) - fuse_log(FUSE_LOG_DEBUG, "lo_listxattr(ino=%" PRIu64 ", size=%zd)\n", ino, - size); - -- if (inode->is_symlink) { -- /* Sorry, no race free way to listxattr on symlink. */ -- saverr = EPERM; -- goto out; -- } -- - if (size) { - value = malloc(size); - if (!value) { -@@ -2280,12 +2282,19 @@ static void lo_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size) - } - - sprintf(procname, "%i", inode->fd); -- fd = openat(lo->proc_self_fd, procname, O_RDONLY); -- if (fd < 0) { -- goto out_err; -+ if (S_ISREG(inode->filetype) || S_ISDIR(inode->filetype)) { -+ fd = openat(lo->proc_self_fd, procname, O_RDONLY); -+ if (fd < 0) { -+ goto out_err; -+ } -+ ret = flistxattr(fd, value, size); -+ } else { -+ /* fchdir should not fail here */ -+ assert(fchdir(lo->proc_self_fd) == 0); -+ ret = listxattr(procname, value, size); -+ assert(fchdir(lo->root.fd) == 0); - } - -- ret = flistxattr(fd, value, size); - if (ret == -1) { - goto out_err; - } -@@ -2339,20 +2348,21 @@ static void lo_setxattr(fuse_req_t req, fuse_ino_t ino, const char *name, - fuse_log(FUSE_LOG_DEBUG, "lo_setxattr(ino=%" PRIu64 - ", name=%s value=%s size=%zd)\n", ino, name, value, size); - -- if (inode->is_symlink) { -- /* Sorry, no race free way to setxattr on symlink. */ -- saverr = EPERM; -- goto out; -- } -- - sprintf(procname, "%i", inode->fd); -- fd = openat(lo->proc_self_fd, procname, O_RDWR); -- if (fd < 0) { -- saverr = errno; -- goto out; -+ if (S_ISREG(inode->filetype) || S_ISDIR(inode->filetype)) { -+ fd = openat(lo->proc_self_fd, procname, O_RDONLY); -+ if (fd < 0) { -+ saverr = errno; -+ goto out; -+ } -+ ret = fsetxattr(fd, name, value, size, flags); -+ } else { -+ /* fchdir should not fail here */ -+ assert(fchdir(lo->proc_self_fd) == 0); -+ ret = setxattr(procname, name, value, size, flags); -+ assert(fchdir(lo->root.fd) == 0); - } - -- ret = fsetxattr(fd, name, value, size, flags); - saverr = ret == -1 ? errno : 0; - - out: -@@ -2387,20 +2397,21 @@ static void lo_removexattr(fuse_req_t req, fuse_ino_t ino, const char *name) - fuse_log(FUSE_LOG_DEBUG, "lo_removexattr(ino=%" PRIu64 ", name=%s)\n", ino, - name); - -- if (inode->is_symlink) { -- /* Sorry, no race free way to setxattr on symlink. */ -- saverr = EPERM; -- goto out; -- } -- - sprintf(procname, "%i", inode->fd); -- fd = openat(lo->proc_self_fd, procname, O_RDWR); -- if (fd < 0) { -- saverr = errno; -- goto out; -+ if (S_ISREG(inode->filetype) || S_ISDIR(inode->filetype)) { -+ fd = openat(lo->proc_self_fd, procname, O_RDONLY); -+ if (fd < 0) { -+ saverr = errno; -+ goto out; -+ } -+ ret = fremovexattr(fd, name); -+ } else { -+ /* fchdir should not fail here */ -+ assert(fchdir(lo->proc_self_fd) == 0); -+ ret = removexattr(procname, name); -+ assert(fchdir(lo->root.fd) == 0); - } - -- ret = fremovexattr(fd, name); - saverr = ret == -1 ? errno : 0; - - out: -@@ -2800,7 +2811,7 @@ static void setup_root(struct lo_data *lo, struct lo_inode *root) - exit(1); - } - -- root->is_symlink = false; -+ root->filetype = S_IFDIR; - root->fd = fd; - root->key.ino = stat.st_ino; - root->key.dev = stat.st_dev; -diff --git a/tools/virtiofsd/seccomp.c b/tools/virtiofsd/seccomp.c -index 2d9d4a7..bd9e7b0 100644 ---- a/tools/virtiofsd/seccomp.c -+++ b/tools/virtiofsd/seccomp.c -@@ -41,6 +41,7 @@ static const int syscall_whitelist[] = { - SCMP_SYS(exit), - SCMP_SYS(exit_group), - SCMP_SYS(fallocate), -+ SCMP_SYS(fchdir), - SCMP_SYS(fchmodat), - SCMP_SYS(fchownat), - SCMP_SYS(fcntl), -@@ -62,7 +63,9 @@ static const int syscall_whitelist[] = { - SCMP_SYS(getpid), - SCMP_SYS(gettid), - SCMP_SYS(gettimeofday), -+ SCMP_SYS(getxattr), - SCMP_SYS(linkat), -+ SCMP_SYS(listxattr), - SCMP_SYS(lseek), - SCMP_SYS(madvise), - SCMP_SYS(mkdirat), -@@ -85,6 +88,7 @@ static const int syscall_whitelist[] = { - SCMP_SYS(recvmsg), - SCMP_SYS(renameat), - SCMP_SYS(renameat2), -+ SCMP_SYS(removexattr), - SCMP_SYS(rt_sigaction), - SCMP_SYS(rt_sigprocmask), - SCMP_SYS(rt_sigreturn), -@@ -98,10 +102,12 @@ static const int syscall_whitelist[] = { - SCMP_SYS(setresuid32), - #endif - SCMP_SYS(set_robust_list), -+ SCMP_SYS(setxattr), - SCMP_SYS(symlinkat), - SCMP_SYS(time), /* Rarely needed, except on static builds */ - SCMP_SYS(tgkill), - SCMP_SYS(unlinkat), -+ SCMP_SYS(unshare), - SCMP_SYS(utimensat), - SCMP_SYS(write), - SCMP_SYS(writev), --- -1.8.3.1 - diff --git a/kvm-virtiofsd-Format-imported-files-to-qemu-style.patch b/kvm-virtiofsd-Format-imported-files-to-qemu-style.patch deleted file mode 100644 index 5593a33..0000000 --- a/kvm-virtiofsd-Format-imported-files-to-qemu-style.patch +++ /dev/null @@ -1,14743 +0,0 @@ -From e313ab94af558bbc133e7a93b0a6dbff706dd1d8 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:00:45 +0100 -Subject: [PATCH 014/116] virtiofsd: Format imported files to qemu style -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-11-dgilbert@redhat.com> -Patchwork-id: 93464 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 010/112] virtiofsd: Format imported files to qemu style -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -Mostly using a set like: - -indent -nut -i 4 -nlp -br -cs -ce --no-space-after-function-call-names file -clang-format -style=file -i -- file -clang-tidy -fix-errors -checks=readability-braces-around-statements file -clang-format -style=file -i -- file - -With manual cleanups. - -The .clang-format used is below. - -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Daniel P. Berrangé -Reviewed by: Aleksandar Markovic - -Language: Cpp -AlignAfterOpenBracket: Align -AlignConsecutiveAssignments: false # although we like it, it creates churn -AlignConsecutiveDeclarations: false -AlignEscapedNewlinesLeft: true -AlignOperands: true -AlignTrailingComments: false # churn -AllowAllParametersOfDeclarationOnNextLine: true -AllowShortBlocksOnASingleLine: false -AllowShortCaseLabelsOnASingleLine: false -AllowShortFunctionsOnASingleLine: None -AllowShortIfStatementsOnASingleLine: false -AllowShortLoopsOnASingleLine: false -AlwaysBreakAfterReturnType: None # AlwaysBreakAfterDefinitionReturnType is taken into account -AlwaysBreakBeforeMultilineStrings: false -BinPackArguments: true -BinPackParameters: true -BraceWrapping: - AfterControlStatement: false - AfterEnum: false - AfterFunction: true - AfterStruct: false - AfterUnion: false - BeforeElse: false - IndentBraces: false -BreakBeforeBinaryOperators: None -BreakBeforeBraces: Custom -BreakBeforeTernaryOperators: false -BreakStringLiterals: true -ColumnLimit: 80 -ContinuationIndentWidth: 4 -Cpp11BracedListStyle: false -DerivePointerAlignment: false -DisableFormat: false -ForEachMacros: [ - 'CPU_FOREACH', - 'CPU_FOREACH_REVERSE', - 'CPU_FOREACH_SAFE', - 'IOMMU_NOTIFIER_FOREACH', - 'QLIST_FOREACH', - 'QLIST_FOREACH_ENTRY', - 'QLIST_FOREACH_RCU', - 'QLIST_FOREACH_SAFE', - 'QLIST_FOREACH_SAFE_RCU', - 'QSIMPLEQ_FOREACH', - 'QSIMPLEQ_FOREACH_SAFE', - 'QSLIST_FOREACH', - 'QSLIST_FOREACH_SAFE', - 'QTAILQ_FOREACH', - 'QTAILQ_FOREACH_REVERSE', - 'QTAILQ_FOREACH_SAFE', - 'QTAILQ_RAW_FOREACH', - 'RAMBLOCK_FOREACH' -] -IncludeCategories: - - Regex: '^"qemu/osdep.h' - Priority: -3 - - Regex: '^"(block|chardev|crypto|disas|exec|fpu|hw|io|libdecnumber|migration|monitor|net|qapi|qemu|qom|standard-headers|sysemu|ui)/' - Priority: -2 - - Regex: '^"(elf.h|qemu-common.h|glib-compat.h|qemu-io.h|trace-tcg.h)' - Priority: -1 - - Regex: '.*' - Priority: 1 -IncludeIsMainRegex: '$' -IndentCaseLabels: false -IndentWidth: 4 -IndentWrappedFunctionNames: false -KeepEmptyLinesAtTheStartOfBlocks: false -MacroBlockBegin: '.*_BEGIN$' # only PREC_BEGIN ? -MacroBlockEnd: '.*_END$' -MaxEmptyLinesToKeep: 2 -PointerAlignment: Right -ReflowComments: true -SortIncludes: true -SpaceAfterCStyleCast: false -SpaceBeforeAssignmentOperators: true -SpaceBeforeParens: ControlStatements -SpaceInEmptyParentheses: false -SpacesBeforeTrailingComments: 1 -SpacesInContainerLiterals: true -SpacesInParentheses: false -SpacesInSquareBrackets: false -Standard: Auto -UseTab: Never -... - -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 7387863d033e8028aa09a815736617a7c4490827) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/buffer.c | 434 ++-- - tools/virtiofsd/fuse.h | 1572 +++++++------- - tools/virtiofsd/fuse_common.h | 730 +++---- - tools/virtiofsd/fuse_i.h | 121 +- - tools/virtiofsd/fuse_log.c | 38 +- - tools/virtiofsd/fuse_log.h | 32 +- - tools/virtiofsd/fuse_lowlevel.c | 3638 +++++++++++++++++---------------- - tools/virtiofsd/fuse_lowlevel.h | 2392 +++++++++++----------- - tools/virtiofsd/fuse_misc.h | 30 +- - tools/virtiofsd/fuse_opt.c | 659 +++--- - tools/virtiofsd/fuse_opt.h | 79 +- - tools/virtiofsd/fuse_signals.c | 118 +- - tools/virtiofsd/helper.c | 506 ++--- - tools/virtiofsd/passthrough_helpers.h | 33 +- - tools/virtiofsd/passthrough_ll.c | 2061 ++++++++++--------- - 15 files changed, 6382 insertions(+), 6061 deletions(-) - -diff --git a/tools/virtiofsd/buffer.c b/tools/virtiofsd/buffer.c -index aefb7db..5df946c 100644 ---- a/tools/virtiofsd/buffer.c -+++ b/tools/virtiofsd/buffer.c -@@ -1,252 +1,272 @@ - /* -- FUSE: Filesystem in Userspace -- Copyright (C) 2010 Miklos Szeredi -- -- Functions for dealing with `struct fuse_buf` and `struct -- fuse_bufvec`. -- -- This program can be distributed under the terms of the GNU LGPLv2. -- See the file COPYING.LIB --*/ -+ * FUSE: Filesystem in Userspace -+ * Copyright (C) 2010 Miklos Szeredi -+ * -+ * Functions for dealing with `struct fuse_buf` and `struct -+ * fuse_bufvec`. -+ * -+ * This program can be distributed under the terms of the GNU LGPLv2. -+ * See the file COPYING.LIB -+ */ - - #define _GNU_SOURCE - - #include "config.h" - #include "fuse_i.h" - #include "fuse_lowlevel.h" -+#include -+#include - #include - #include --#include --#include - - size_t fuse_buf_size(const struct fuse_bufvec *bufv) - { -- size_t i; -- size_t size = 0; -- -- for (i = 0; i < bufv->count; i++) { -- if (bufv->buf[i].size == SIZE_MAX) -- size = SIZE_MAX; -- else -- size += bufv->buf[i].size; -- } -- -- return size; -+ size_t i; -+ size_t size = 0; -+ -+ for (i = 0; i < bufv->count; i++) { -+ if (bufv->buf[i].size == SIZE_MAX) { -+ size = SIZE_MAX; -+ } else { -+ size += bufv->buf[i].size; -+ } -+ } -+ -+ return size; - } - - static size_t min_size(size_t s1, size_t s2) - { -- return s1 < s2 ? s1 : s2; -+ return s1 < s2 ? s1 : s2; - } - - static ssize_t fuse_buf_write(const struct fuse_buf *dst, size_t dst_off, -- const struct fuse_buf *src, size_t src_off, -- size_t len) -+ const struct fuse_buf *src, size_t src_off, -+ size_t len) - { -- ssize_t res = 0; -- size_t copied = 0; -- -- while (len) { -- if (dst->flags & FUSE_BUF_FD_SEEK) { -- res = pwrite(dst->fd, (char *)src->mem + src_off, len, -- dst->pos + dst_off); -- } else { -- res = write(dst->fd, (char *)src->mem + src_off, len); -- } -- if (res == -1) { -- if (!copied) -- return -errno; -- break; -- } -- if (res == 0) -- break; -- -- copied += res; -- if (!(dst->flags & FUSE_BUF_FD_RETRY)) -- break; -- -- src_off += res; -- dst_off += res; -- len -= res; -- } -- -- return copied; -+ ssize_t res = 0; -+ size_t copied = 0; -+ -+ while (len) { -+ if (dst->flags & FUSE_BUF_FD_SEEK) { -+ res = pwrite(dst->fd, (char *)src->mem + src_off, len, -+ dst->pos + dst_off); -+ } else { -+ res = write(dst->fd, (char *)src->mem + src_off, len); -+ } -+ if (res == -1) { -+ if (!copied) { -+ return -errno; -+ } -+ break; -+ } -+ if (res == 0) { -+ break; -+ } -+ -+ copied += res; -+ if (!(dst->flags & FUSE_BUF_FD_RETRY)) { -+ break; -+ } -+ -+ src_off += res; -+ dst_off += res; -+ len -= res; -+ } -+ -+ return copied; - } - - static ssize_t fuse_buf_read(const struct fuse_buf *dst, size_t dst_off, -- const struct fuse_buf *src, size_t src_off, -- size_t len) -+ const struct fuse_buf *src, size_t src_off, -+ size_t len) - { -- ssize_t res = 0; -- size_t copied = 0; -- -- while (len) { -- if (src->flags & FUSE_BUF_FD_SEEK) { -- res = pread(src->fd, (char *)dst->mem + dst_off, len, -- src->pos + src_off); -- } else { -- res = read(src->fd, (char *)dst->mem + dst_off, len); -- } -- if (res == -1) { -- if (!copied) -- return -errno; -- break; -- } -- if (res == 0) -- break; -- -- copied += res; -- if (!(src->flags & FUSE_BUF_FD_RETRY)) -- break; -- -- dst_off += res; -- src_off += res; -- len -= res; -- } -- -- return copied; -+ ssize_t res = 0; -+ size_t copied = 0; -+ -+ while (len) { -+ if (src->flags & FUSE_BUF_FD_SEEK) { -+ res = pread(src->fd, (char *)dst->mem + dst_off, len, -+ src->pos + src_off); -+ } else { -+ res = read(src->fd, (char *)dst->mem + dst_off, len); -+ } -+ if (res == -1) { -+ if (!copied) { -+ return -errno; -+ } -+ break; -+ } -+ if (res == 0) { -+ break; -+ } -+ -+ copied += res; -+ if (!(src->flags & FUSE_BUF_FD_RETRY)) { -+ break; -+ } -+ -+ dst_off += res; -+ src_off += res; -+ len -= res; -+ } -+ -+ return copied; - } - - static ssize_t fuse_buf_fd_to_fd(const struct fuse_buf *dst, size_t dst_off, -- const struct fuse_buf *src, size_t src_off, -- size_t len) -+ const struct fuse_buf *src, size_t src_off, -+ size_t len) - { -- char buf[4096]; -- struct fuse_buf tmp = { -- .size = sizeof(buf), -- .flags = 0, -- }; -- ssize_t res; -- size_t copied = 0; -- -- tmp.mem = buf; -- -- while (len) { -- size_t this_len = min_size(tmp.size, len); -- size_t read_len; -- -- res = fuse_buf_read(&tmp, 0, src, src_off, this_len); -- if (res < 0) { -- if (!copied) -- return res; -- break; -- } -- if (res == 0) -- break; -- -- read_len = res; -- res = fuse_buf_write(dst, dst_off, &tmp, 0, read_len); -- if (res < 0) { -- if (!copied) -- return res; -- break; -- } -- if (res == 0) -- break; -- -- copied += res; -- -- if (res < this_len) -- break; -- -- dst_off += res; -- src_off += res; -- len -= res; -- } -- -- return copied; -+ char buf[4096]; -+ struct fuse_buf tmp = { -+ .size = sizeof(buf), -+ .flags = 0, -+ }; -+ ssize_t res; -+ size_t copied = 0; -+ -+ tmp.mem = buf; -+ -+ while (len) { -+ size_t this_len = min_size(tmp.size, len); -+ size_t read_len; -+ -+ res = fuse_buf_read(&tmp, 0, src, src_off, this_len); -+ if (res < 0) { -+ if (!copied) { -+ return res; -+ } -+ break; -+ } -+ if (res == 0) { -+ break; -+ } -+ -+ read_len = res; -+ res = fuse_buf_write(dst, dst_off, &tmp, 0, read_len); -+ if (res < 0) { -+ if (!copied) { -+ return res; -+ } -+ break; -+ } -+ if (res == 0) { -+ break; -+ } -+ -+ copied += res; -+ -+ if (res < this_len) { -+ break; -+ } -+ -+ dst_off += res; -+ src_off += res; -+ len -= res; -+ } -+ -+ return copied; - } - - static ssize_t fuse_buf_copy_one(const struct fuse_buf *dst, size_t dst_off, -- const struct fuse_buf *src, size_t src_off, -- size_t len, enum fuse_buf_copy_flags flags) -+ const struct fuse_buf *src, size_t src_off, -+ size_t len, enum fuse_buf_copy_flags flags) - { -- int src_is_fd = src->flags & FUSE_BUF_IS_FD; -- int dst_is_fd = dst->flags & FUSE_BUF_IS_FD; -- -- if (!src_is_fd && !dst_is_fd) { -- char *dstmem = (char *)dst->mem + dst_off; -- char *srcmem = (char *)src->mem + src_off; -- -- if (dstmem != srcmem) { -- if (dstmem + len <= srcmem || srcmem + len <= dstmem) -- memcpy(dstmem, srcmem, len); -- else -- memmove(dstmem, srcmem, len); -- } -- -- return len; -- } else if (!src_is_fd) { -- return fuse_buf_write(dst, dst_off, src, src_off, len); -- } else if (!dst_is_fd) { -- return fuse_buf_read(dst, dst_off, src, src_off, len); -- } else { -- return fuse_buf_fd_to_fd(dst, dst_off, src, src_off, len); -- } -+ int src_is_fd = src->flags & FUSE_BUF_IS_FD; -+ int dst_is_fd = dst->flags & FUSE_BUF_IS_FD; -+ -+ if (!src_is_fd && !dst_is_fd) { -+ char *dstmem = (char *)dst->mem + dst_off; -+ char *srcmem = (char *)src->mem + src_off; -+ -+ if (dstmem != srcmem) { -+ if (dstmem + len <= srcmem || srcmem + len <= dstmem) { -+ memcpy(dstmem, srcmem, len); -+ } else { -+ memmove(dstmem, srcmem, len); -+ } -+ } -+ -+ return len; -+ } else if (!src_is_fd) { -+ return fuse_buf_write(dst, dst_off, src, src_off, len); -+ } else if (!dst_is_fd) { -+ return fuse_buf_read(dst, dst_off, src, src_off, len); -+ } else { -+ return fuse_buf_fd_to_fd(dst, dst_off, src, src_off, len); -+ } - } - - static const struct fuse_buf *fuse_bufvec_current(struct fuse_bufvec *bufv) - { -- if (bufv->idx < bufv->count) -- return &bufv->buf[bufv->idx]; -- else -- return NULL; -+ if (bufv->idx < bufv->count) { -+ return &bufv->buf[bufv->idx]; -+ } else { -+ return NULL; -+ } - } - - static int fuse_bufvec_advance(struct fuse_bufvec *bufv, size_t len) - { -- const struct fuse_buf *buf = fuse_bufvec_current(bufv); -- -- bufv->off += len; -- assert(bufv->off <= buf->size); -- if (bufv->off == buf->size) { -- assert(bufv->idx < bufv->count); -- bufv->idx++; -- if (bufv->idx == bufv->count) -- return 0; -- bufv->off = 0; -- } -- return 1; -+ const struct fuse_buf *buf = fuse_bufvec_current(bufv); -+ -+ bufv->off += len; -+ assert(bufv->off <= buf->size); -+ if (bufv->off == buf->size) { -+ assert(bufv->idx < bufv->count); -+ bufv->idx++; -+ if (bufv->idx == bufv->count) { -+ return 0; -+ } -+ bufv->off = 0; -+ } -+ return 1; - } - - ssize_t fuse_buf_copy(struct fuse_bufvec *dstv, struct fuse_bufvec *srcv, -- enum fuse_buf_copy_flags flags) -+ enum fuse_buf_copy_flags flags) - { -- size_t copied = 0; -- -- if (dstv == srcv) -- return fuse_buf_size(dstv); -- -- for (;;) { -- const struct fuse_buf *src = fuse_bufvec_current(srcv); -- const struct fuse_buf *dst = fuse_bufvec_current(dstv); -- size_t src_len; -- size_t dst_len; -- size_t len; -- ssize_t res; -- -- if (src == NULL || dst == NULL) -- break; -- -- src_len = src->size - srcv->off; -- dst_len = dst->size - dstv->off; -- len = min_size(src_len, dst_len); -- -- res = fuse_buf_copy_one(dst, dstv->off, src, srcv->off, len, flags); -- if (res < 0) { -- if (!copied) -- return res; -- break; -- } -- copied += res; -- -- if (!fuse_bufvec_advance(srcv, res) || -- !fuse_bufvec_advance(dstv, res)) -- break; -- -- if (res < len) -- break; -- } -- -- return copied; -+ size_t copied = 0; -+ -+ if (dstv == srcv) { -+ return fuse_buf_size(dstv); -+ } -+ -+ for (;;) { -+ const struct fuse_buf *src = fuse_bufvec_current(srcv); -+ const struct fuse_buf *dst = fuse_bufvec_current(dstv); -+ size_t src_len; -+ size_t dst_len; -+ size_t len; -+ ssize_t res; -+ -+ if (src == NULL || dst == NULL) { -+ break; -+ } -+ -+ src_len = src->size - srcv->off; -+ dst_len = dst->size - dstv->off; -+ len = min_size(src_len, dst_len); -+ -+ res = fuse_buf_copy_one(dst, dstv->off, src, srcv->off, len, flags); -+ if (res < 0) { -+ if (!copied) { -+ return res; -+ } -+ break; -+ } -+ copied += res; -+ -+ if (!fuse_bufvec_advance(srcv, res) || -+ !fuse_bufvec_advance(dstv, res)) { -+ break; -+ } -+ -+ if (res < len) { -+ break; -+ } -+ } -+ -+ return copied; - } -diff --git a/tools/virtiofsd/fuse.h b/tools/virtiofsd/fuse.h -index 3202fba..7a4c713 100644 ---- a/tools/virtiofsd/fuse.h -+++ b/tools/virtiofsd/fuse.h -@@ -1,15 +1,15 @@ - /* -- FUSE: Filesystem in Userspace -- Copyright (C) 2001-2007 Miklos Szeredi -- -- This program can be distributed under the terms of the GNU LGPLv2. -- See the file COPYING.LIB. --*/ -+ * FUSE: Filesystem in Userspace -+ * Copyright (C) 2001-2007 Miklos Szeredi -+ * -+ * This program can be distributed under the terms of the GNU LGPLv2. -+ * See the file COPYING.LIB. -+ */ - - #ifndef FUSE_H_ - #define FUSE_H_ - --/** @file -+/* - * - * This file defines the library interface of FUSE - * -@@ -19,15 +19,15 @@ - #include "fuse_common.h" - - #include --#include --#include - #include - #include -+#include - #include -+#include - --/* ----------------------------------------------------------- * -- * Basic FUSE API * -- * ----------------------------------------------------------- */ -+/* -+ * Basic FUSE API -+ */ - - /** Handle for a FUSE filesystem */ - struct fuse; -@@ -36,38 +36,39 @@ struct fuse; - * Readdir flags, passed to ->readdir() - */ - enum fuse_readdir_flags { -- /** -- * "Plus" mode. -- * -- * The kernel wants to prefill the inode cache during readdir. The -- * filesystem may honour this by filling in the attributes and setting -- * FUSE_FILL_DIR_FLAGS for the filler function. The filesystem may also -- * just ignore this flag completely. -- */ -- FUSE_READDIR_PLUS = (1 << 0), -+ /** -+ * "Plus" mode. -+ * -+ * The kernel wants to prefill the inode cache during readdir. The -+ * filesystem may honour this by filling in the attributes and setting -+ * FUSE_FILL_DIR_FLAGS for the filler function. The filesystem may also -+ * just ignore this flag completely. -+ */ -+ FUSE_READDIR_PLUS = (1 << 0), - }; - - enum fuse_fill_dir_flags { -- /** -- * "Plus" mode: all file attributes are valid -- * -- * The attributes are used by the kernel to prefill the inode cache -- * during a readdir. -- * -- * It is okay to set FUSE_FILL_DIR_PLUS if FUSE_READDIR_PLUS is not set -- * and vice versa. -- */ -- FUSE_FILL_DIR_PLUS = (1 << 1), -+ /** -+ * "Plus" mode: all file attributes are valid -+ * -+ * The attributes are used by the kernel to prefill the inode cache -+ * during a readdir. -+ * -+ * It is okay to set FUSE_FILL_DIR_PLUS if FUSE_READDIR_PLUS is not set -+ * and vice versa. -+ */ -+ FUSE_FILL_DIR_PLUS = (1 << 1), - }; - --/** Function to add an entry in a readdir() operation -+/** -+ * Function to add an entry in a readdir() operation - * - * The *off* parameter can be any non-zero value that enables the - * filesystem to identify the current point in the directory - * stream. It does not need to be the actual physical position. A - * value of zero is reserved to indicate that seeking in directories - * is not supported. -- * -+ * - * @param buf the buffer passed to the readdir() operation - * @param name the file name of the directory entry - * @param stat file attributes, can be NULL -@@ -75,9 +76,9 @@ enum fuse_fill_dir_flags { - * @param flags fill flags - * @return 1 if buffer is full, zero otherwise - */ --typedef int (*fuse_fill_dir_t) (void *buf, const char *name, -- const struct stat *stbuf, off_t off, -- enum fuse_fill_dir_flags flags); -+typedef int (*fuse_fill_dir_t)(void *buf, const char *name, -+ const struct stat *stbuf, off_t off, -+ enum fuse_fill_dir_flags flags); - /** - * Configuration of the high-level API - * -@@ -87,186 +88,186 @@ typedef int (*fuse_fill_dir_t) (void *buf, const char *name, - * file system implementation. - */ - struct fuse_config { -- /** -- * If `set_gid` is non-zero, the st_gid attribute of each file -- * is overwritten with the value of `gid`. -- */ -- int set_gid; -- unsigned int gid; -- -- /** -- * If `set_uid` is non-zero, the st_uid attribute of each file -- * is overwritten with the value of `uid`. -- */ -- int set_uid; -- unsigned int uid; -- -- /** -- * If `set_mode` is non-zero, the any permissions bits set in -- * `umask` are unset in the st_mode attribute of each file. -- */ -- int set_mode; -- unsigned int umask; -- -- /** -- * The timeout in seconds for which name lookups will be -- * cached. -- */ -- double entry_timeout; -- -- /** -- * The timeout in seconds for which a negative lookup will be -- * cached. This means, that if file did not exist (lookup -- * retuned ENOENT), the lookup will only be redone after the -- * timeout, and the file/directory will be assumed to not -- * exist until then. A value of zero means that negative -- * lookups are not cached. -- */ -- double negative_timeout; -- -- /** -- * The timeout in seconds for which file/directory attributes -- * (as returned by e.g. the `getattr` handler) are cached. -- */ -- double attr_timeout; -- -- /** -- * Allow requests to be interrupted -- */ -- int intr; -- -- /** -- * Specify which signal number to send to the filesystem when -- * a request is interrupted. The default is hardcoded to -- * USR1. -- */ -- int intr_signal; -- -- /** -- * Normally, FUSE assigns inodes to paths only for as long as -- * the kernel is aware of them. With this option inodes are -- * instead remembered for at least this many seconds. This -- * will require more memory, but may be necessary when using -- * applications that make use of inode numbers. -- * -- * A number of -1 means that inodes will be remembered for the -- * entire life-time of the file-system process. -- */ -- int remember; -- -- /** -- * The default behavior is that if an open file is deleted, -- * the file is renamed to a hidden file (.fuse_hiddenXXX), and -- * only removed when the file is finally released. This -- * relieves the filesystem implementation of having to deal -- * with this problem. This option disables the hiding -- * behavior, and files are removed immediately in an unlink -- * operation (or in a rename operation which overwrites an -- * existing file). -- * -- * It is recommended that you not use the hard_remove -- * option. When hard_remove is set, the following libc -- * functions fail on unlinked files (returning errno of -- * ENOENT): read(2), write(2), fsync(2), close(2), f*xattr(2), -- * ftruncate(2), fstat(2), fchmod(2), fchown(2) -- */ -- int hard_remove; -- -- /** -- * Honor the st_ino field in the functions getattr() and -- * fill_dir(). This value is used to fill in the st_ino field -- * in the stat(2), lstat(2), fstat(2) functions and the d_ino -- * field in the readdir(2) function. The filesystem does not -- * have to guarantee uniqueness, however some applications -- * rely on this value being unique for the whole filesystem. -- * -- * Note that this does *not* affect the inode that libfuse -- * and the kernel use internally (also called the "nodeid"). -- */ -- int use_ino; -- -- /** -- * If use_ino option is not given, still try to fill in the -- * d_ino field in readdir(2). If the name was previously -- * looked up, and is still in the cache, the inode number -- * found there will be used. Otherwise it will be set to -1. -- * If use_ino option is given, this option is ignored. -- */ -- int readdir_ino; -- -- /** -- * This option disables the use of page cache (file content cache) -- * in the kernel for this filesystem. This has several affects: -- * -- * 1. Each read(2) or write(2) system call will initiate one -- * or more read or write operations, data will not be -- * cached in the kernel. -- * -- * 2. The return value of the read() and write() system calls -- * will correspond to the return values of the read and -- * write operations. This is useful for example if the -- * file size is not known in advance (before reading it). -- * -- * Internally, enabling this option causes fuse to set the -- * `direct_io` field of `struct fuse_file_info` - overwriting -- * any value that was put there by the file system. -- */ -- int direct_io; -- -- /** -- * This option disables flushing the cache of the file -- * contents on every open(2). This should only be enabled on -- * filesystems where the file data is never changed -- * externally (not through the mounted FUSE filesystem). Thus -- * it is not suitable for network filesystems and other -- * intermediate filesystems. -- * -- * NOTE: if this option is not specified (and neither -- * direct_io) data is still cached after the open(2), so a -- * read(2) system call will not always initiate a read -- * operation. -- * -- * Internally, enabling this option causes fuse to set the -- * `keep_cache` field of `struct fuse_file_info` - overwriting -- * any value that was put there by the file system. -- */ -- int kernel_cache; -- -- /** -- * This option is an alternative to `kernel_cache`. Instead of -- * unconditionally keeping cached data, the cached data is -- * invalidated on open(2) if if the modification time or the -- * size of the file has changed since it was last opened. -- */ -- int auto_cache; -- -- /** -- * The timeout in seconds for which file attributes are cached -- * for the purpose of checking if auto_cache should flush the -- * file data on open. -- */ -- int ac_attr_timeout_set; -- double ac_attr_timeout; -- -- /** -- * If this option is given the file-system handlers for the -- * following operations will not receive path information: -- * read, write, flush, release, fsync, readdir, releasedir, -- * fsyncdir, lock, ioctl and poll. -- * -- * For the truncate, getattr, chmod, chown and utimens -- * operations the path will be provided only if the struct -- * fuse_file_info argument is NULL. -- */ -- int nullpath_ok; -- -- /** -- * The remaining options are used by libfuse internally and -- * should not be touched. -- */ -- int show_help; -- char *modules; -- int debug; -+ /** -+ * If `set_gid` is non-zero, the st_gid attribute of each file -+ * is overwritten with the value of `gid`. -+ */ -+ int set_gid; -+ unsigned int gid; -+ -+ /** -+ * If `set_uid` is non-zero, the st_uid attribute of each file -+ * is overwritten with the value of `uid`. -+ */ -+ int set_uid; -+ unsigned int uid; -+ -+ /** -+ * If `set_mode` is non-zero, the any permissions bits set in -+ * `umask` are unset in the st_mode attribute of each file. -+ */ -+ int set_mode; -+ unsigned int umask; -+ -+ /** -+ * The timeout in seconds for which name lookups will be -+ * cached. -+ */ -+ double entry_timeout; -+ -+ /** -+ * The timeout in seconds for which a negative lookup will be -+ * cached. This means, that if file did not exist (lookup -+ * retuned ENOENT), the lookup will only be redone after the -+ * timeout, and the file/directory will be assumed to not -+ * exist until then. A value of zero means that negative -+ * lookups are not cached. -+ */ -+ double negative_timeout; -+ -+ /** -+ * The timeout in seconds for which file/directory attributes -+ * (as returned by e.g. the `getattr` handler) are cached. -+ */ -+ double attr_timeout; -+ -+ /** -+ * Allow requests to be interrupted -+ */ -+ int intr; -+ -+ /** -+ * Specify which signal number to send to the filesystem when -+ * a request is interrupted. The default is hardcoded to -+ * USR1. -+ */ -+ int intr_signal; -+ -+ /** -+ * Normally, FUSE assigns inodes to paths only for as long as -+ * the kernel is aware of them. With this option inodes are -+ * instead remembered for at least this many seconds. This -+ * will require more memory, but may be necessary when using -+ * applications that make use of inode numbers. -+ * -+ * A number of -1 means that inodes will be remembered for the -+ * entire life-time of the file-system process. -+ */ -+ int remember; -+ -+ /** -+ * The default behavior is that if an open file is deleted, -+ * the file is renamed to a hidden file (.fuse_hiddenXXX), and -+ * only removed when the file is finally released. This -+ * relieves the filesystem implementation of having to deal -+ * with this problem. This option disables the hiding -+ * behavior, and files are removed immediately in an unlink -+ * operation (or in a rename operation which overwrites an -+ * existing file). -+ * -+ * It is recommended that you not use the hard_remove -+ * option. When hard_remove is set, the following libc -+ * functions fail on unlinked files (returning errno of -+ * ENOENT): read(2), write(2), fsync(2), close(2), f*xattr(2), -+ * ftruncate(2), fstat(2), fchmod(2), fchown(2) -+ */ -+ int hard_remove; -+ -+ /** -+ * Honor the st_ino field in the functions getattr() and -+ * fill_dir(). This value is used to fill in the st_ino field -+ * in the stat(2), lstat(2), fstat(2) functions and the d_ino -+ * field in the readdir(2) function. The filesystem does not -+ * have to guarantee uniqueness, however some applications -+ * rely on this value being unique for the whole filesystem. -+ * -+ * Note that this does *not* affect the inode that libfuse -+ * and the kernel use internally (also called the "nodeid"). -+ */ -+ int use_ino; -+ -+ /** -+ * If use_ino option is not given, still try to fill in the -+ * d_ino field in readdir(2). If the name was previously -+ * looked up, and is still in the cache, the inode number -+ * found there will be used. Otherwise it will be set to -1. -+ * If use_ino option is given, this option is ignored. -+ */ -+ int readdir_ino; -+ -+ /** -+ * This option disables the use of page cache (file content cache) -+ * in the kernel for this filesystem. This has several affects: -+ * -+ * 1. Each read(2) or write(2) system call will initiate one -+ * or more read or write operations, data will not be -+ * cached in the kernel. -+ * -+ * 2. The return value of the read() and write() system calls -+ * will correspond to the return values of the read and -+ * write operations. This is useful for example if the -+ * file size is not known in advance (before reading it). -+ * -+ * Internally, enabling this option causes fuse to set the -+ * `direct_io` field of `struct fuse_file_info` - overwriting -+ * any value that was put there by the file system. -+ */ -+ int direct_io; -+ -+ /** -+ * This option disables flushing the cache of the file -+ * contents on every open(2). This should only be enabled on -+ * filesystems where the file data is never changed -+ * externally (not through the mounted FUSE filesystem). Thus -+ * it is not suitable for network filesystems and other -+ * intermediate filesystems. -+ * -+ * NOTE: if this option is not specified (and neither -+ * direct_io) data is still cached after the open(2), so a -+ * read(2) system call will not always initiate a read -+ * operation. -+ * -+ * Internally, enabling this option causes fuse to set the -+ * `keep_cache` field of `struct fuse_file_info` - overwriting -+ * any value that was put there by the file system. -+ */ -+ int kernel_cache; -+ -+ /** -+ * This option is an alternative to `kernel_cache`. Instead of -+ * unconditionally keeping cached data, the cached data is -+ * invalidated on open(2) if if the modification time or the -+ * size of the file has changed since it was last opened. -+ */ -+ int auto_cache; -+ -+ /** -+ * The timeout in seconds for which file attributes are cached -+ * for the purpose of checking if auto_cache should flush the -+ * file data on open. -+ */ -+ int ac_attr_timeout_set; -+ double ac_attr_timeout; -+ -+ /** -+ * If this option is given the file-system handlers for the -+ * following operations will not receive path information: -+ * read, write, flush, release, fsync, readdir, releasedir, -+ * fsyncdir, lock, ioctl and poll. -+ * -+ * For the truncate, getattr, chmod, chown and utimens -+ * operations the path will be provided only if the struct -+ * fuse_file_info argument is NULL. -+ */ -+ int nullpath_ok; -+ -+ /** -+ * The remaining options are used by libfuse internally and -+ * should not be touched. -+ */ -+ int show_help; -+ char *modules; -+ int debug; - }; - - -@@ -293,515 +294,535 @@ struct fuse_config { - * Almost all operations take a path which can be of any length. - */ - struct fuse_operations { -- /** Get file attributes. -- * -- * Similar to stat(). The 'st_dev' and 'st_blksize' fields are -- * ignored. The 'st_ino' field is ignored except if the 'use_ino' -- * mount option is given. In that case it is passed to userspace, -- * but libfuse and the kernel will still assign a different -- * inode for internal use (called the "nodeid"). -- * -- * `fi` will always be NULL if the file is not currently open, but -- * may also be NULL if the file is open. -- */ -- int (*getattr) (const char *, struct stat *, struct fuse_file_info *fi); -- -- /** Read the target of a symbolic link -- * -- * The buffer should be filled with a null terminated string. The -- * buffer size argument includes the space for the terminating -- * null character. If the linkname is too long to fit in the -- * buffer, it should be truncated. The return value should be 0 -- * for success. -- */ -- int (*readlink) (const char *, char *, size_t); -- -- /** Create a file node -- * -- * This is called for creation of all non-directory, non-symlink -- * nodes. If the filesystem defines a create() method, then for -- * regular files that will be called instead. -- */ -- int (*mknod) (const char *, mode_t, dev_t); -- -- /** Create a directory -- * -- * Note that the mode argument may not have the type specification -- * bits set, i.e. S_ISDIR(mode) can be false. To obtain the -- * correct directory type bits use mode|S_IFDIR -- * */ -- int (*mkdir) (const char *, mode_t); -- -- /** Remove a file */ -- int (*unlink) (const char *); -- -- /** Remove a directory */ -- int (*rmdir) (const char *); -- -- /** Create a symbolic link */ -- int (*symlink) (const char *, const char *); -- -- /** Rename a file -- * -- * *flags* may be `RENAME_EXCHANGE` or `RENAME_NOREPLACE`. If -- * RENAME_NOREPLACE is specified, the filesystem must not -- * overwrite *newname* if it exists and return an error -- * instead. If `RENAME_EXCHANGE` is specified, the filesystem -- * must atomically exchange the two files, i.e. both must -- * exist and neither may be deleted. -- */ -- int (*rename) (const char *, const char *, unsigned int flags); -- -- /** Create a hard link to a file */ -- int (*link) (const char *, const char *); -- -- /** Change the permission bits of a file -- * -- * `fi` will always be NULL if the file is not currenlty open, but -- * may also be NULL if the file is open. -- */ -- int (*chmod) (const char *, mode_t, struct fuse_file_info *fi); -- -- /** Change the owner and group of a file -- * -- * `fi` will always be NULL if the file is not currenlty open, but -- * may also be NULL if the file is open. -- * -- * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is -- * expected to reset the setuid and setgid bits. -- */ -- int (*chown) (const char *, uid_t, gid_t, struct fuse_file_info *fi); -- -- /** Change the size of a file -- * -- * `fi` will always be NULL if the file is not currenlty open, but -- * may also be NULL if the file is open. -- * -- * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is -- * expected to reset the setuid and setgid bits. -- */ -- int (*truncate) (const char *, off_t, struct fuse_file_info *fi); -- -- /** Open a file -- * -- * Open flags are available in fi->flags. The following rules -- * apply. -- * -- * - Creation (O_CREAT, O_EXCL, O_NOCTTY) flags will be -- * filtered out / handled by the kernel. -- * -- * - Access modes (O_RDONLY, O_WRONLY, O_RDWR, O_EXEC, O_SEARCH) -- * should be used by the filesystem to check if the operation is -- * permitted. If the ``-o default_permissions`` mount option is -- * given, this check is already done by the kernel before calling -- * open() and may thus be omitted by the filesystem. -- * -- * - When writeback caching is enabled, the kernel may send -- * read requests even for files opened with O_WRONLY. The -- * filesystem should be prepared to handle this. -- * -- * - When writeback caching is disabled, the filesystem is -- * expected to properly handle the O_APPEND flag and ensure -- * that each write is appending to the end of the file. -- * -- * - When writeback caching is enabled, the kernel will -- * handle O_APPEND. However, unless all changes to the file -- * come through the kernel this will not work reliably. The -- * filesystem should thus either ignore the O_APPEND flag -- * (and let the kernel handle it), or return an error -- * (indicating that reliably O_APPEND is not available). -- * -- * Filesystem may store an arbitrary file handle (pointer, -- * index, etc) in fi->fh, and use this in other all other file -- * operations (read, write, flush, release, fsync). -- * -- * Filesystem may also implement stateless file I/O and not store -- * anything in fi->fh. -- * -- * There are also some flags (direct_io, keep_cache) which the -- * filesystem may set in fi, to change the way the file is opened. -- * See fuse_file_info structure in for more details. -- * -- * If this request is answered with an error code of ENOSYS -- * and FUSE_CAP_NO_OPEN_SUPPORT is set in -- * `fuse_conn_info.capable`, this is treated as success and -- * future calls to open will also succeed without being send -- * to the filesystem process. -- * -- */ -- int (*open) (const char *, struct fuse_file_info *); -- -- /** Read data from an open file -- * -- * Read should return exactly the number of bytes requested except -- * on EOF or error, otherwise the rest of the data will be -- * substituted with zeroes. An exception to this is when the -- * 'direct_io' mount option is specified, in which case the return -- * value of the read system call will reflect the return value of -- * this operation. -- */ -- int (*read) (const char *, char *, size_t, off_t, -- struct fuse_file_info *); -- -- /** Write data to an open file -- * -- * Write should return exactly the number of bytes requested -- * except on error. An exception to this is when the 'direct_io' -- * mount option is specified (see read operation). -- * -- * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is -- * expected to reset the setuid and setgid bits. -- */ -- int (*write) (const char *, const char *, size_t, off_t, -- struct fuse_file_info *); -- -- /** Get file system statistics -- * -- * The 'f_favail', 'f_fsid' and 'f_flag' fields are ignored -- */ -- int (*statfs) (const char *, struct statvfs *); -- -- /** Possibly flush cached data -- * -- * BIG NOTE: This is not equivalent to fsync(). It's not a -- * request to sync dirty data. -- * -- * Flush is called on each close() of a file descriptor, as opposed to -- * release which is called on the close of the last file descriptor for -- * a file. Under Linux, errors returned by flush() will be passed to -- * userspace as errors from close(), so flush() is a good place to write -- * back any cached dirty data. However, many applications ignore errors -- * on close(), and on non-Linux systems, close() may succeed even if flush() -- * returns an error. For these reasons, filesystems should not assume -- * that errors returned by flush will ever be noticed or even -- * delivered. -- * -- * NOTE: The flush() method may be called more than once for each -- * open(). This happens if more than one file descriptor refers to an -- * open file handle, e.g. due to dup(), dup2() or fork() calls. It is -- * not possible to determine if a flush is final, so each flush should -- * be treated equally. Multiple write-flush sequences are relatively -- * rare, so this shouldn't be a problem. -- * -- * Filesystems shouldn't assume that flush will be called at any -- * particular point. It may be called more times than expected, or not -- * at all. -- * -- * [close]: http://pubs.opengroup.org/onlinepubs/9699919799/functions/close.html -- */ -- int (*flush) (const char *, struct fuse_file_info *); -- -- /** Release an open file -- * -- * Release is called when there are no more references to an open -- * file: all file descriptors are closed and all memory mappings -- * are unmapped. -- * -- * For every open() call there will be exactly one release() call -- * with the same flags and file handle. It is possible to -- * have a file opened more than once, in which case only the last -- * release will mean, that no more reads/writes will happen on the -- * file. The return value of release is ignored. -- */ -- int (*release) (const char *, struct fuse_file_info *); -- -- /** Synchronize file contents -- * -- * If the datasync parameter is non-zero, then only the user data -- * should be flushed, not the meta data. -- */ -- int (*fsync) (const char *, int, struct fuse_file_info *); -- -- /** Set extended attributes */ -- int (*setxattr) (const char *, const char *, const char *, size_t, int); -- -- /** Get extended attributes */ -- int (*getxattr) (const char *, const char *, char *, size_t); -- -- /** List extended attributes */ -- int (*listxattr) (const char *, char *, size_t); -- -- /** Remove extended attributes */ -- int (*removexattr) (const char *, const char *); -- -- /** Open directory -- * -- * Unless the 'default_permissions' mount option is given, -- * this method should check if opendir is permitted for this -- * directory. Optionally opendir may also return an arbitrary -- * filehandle in the fuse_file_info structure, which will be -- * passed to readdir, releasedir and fsyncdir. -- */ -- int (*opendir) (const char *, struct fuse_file_info *); -- -- /** Read directory -- * -- * The filesystem may choose between two modes of operation: -- * -- * 1) The readdir implementation ignores the offset parameter, and -- * passes zero to the filler function's offset. The filler -- * function will not return '1' (unless an error happens), so the -- * whole directory is read in a single readdir operation. -- * -- * 2) The readdir implementation keeps track of the offsets of the -- * directory entries. It uses the offset parameter and always -- * passes non-zero offset to the filler function. When the buffer -- * is full (or an error happens) the filler function will return -- * '1'. -- */ -- int (*readdir) (const char *, void *, fuse_fill_dir_t, off_t, -- struct fuse_file_info *, enum fuse_readdir_flags); -- -- /** Release directory -- */ -- int (*releasedir) (const char *, struct fuse_file_info *); -- -- /** Synchronize directory contents -- * -- * If the datasync parameter is non-zero, then only the user data -- * should be flushed, not the meta data -- */ -- int (*fsyncdir) (const char *, int, struct fuse_file_info *); -- -- /** -- * Initialize filesystem -- * -- * The return value will passed in the `private_data` field of -- * `struct fuse_context` to all file operations, and as a -- * parameter to the destroy() method. It overrides the initial -- * value provided to fuse_main() / fuse_new(). -- */ -- void *(*init) (struct fuse_conn_info *conn, -- struct fuse_config *cfg); -- -- /** -- * Clean up filesystem -- * -- * Called on filesystem exit. -- */ -- void (*destroy) (void *private_data); -- -- /** -- * Check file access permissions -- * -- * This will be called for the access() system call. If the -- * 'default_permissions' mount option is given, this method is not -- * called. -- * -- * This method is not called under Linux kernel versions 2.4.x -- */ -- int (*access) (const char *, int); -- -- /** -- * Create and open a file -- * -- * If the file does not exist, first create it with the specified -- * mode, and then open it. -- * -- * If this method is not implemented or under Linux kernel -- * versions earlier than 2.6.15, the mknod() and open() methods -- * will be called instead. -- */ -- int (*create) (const char *, mode_t, struct fuse_file_info *); -- -- /** -- * Perform POSIX file locking operation -- * -- * The cmd argument will be either F_GETLK, F_SETLK or F_SETLKW. -- * -- * For the meaning of fields in 'struct flock' see the man page -- * for fcntl(2). The l_whence field will always be set to -- * SEEK_SET. -- * -- * For checking lock ownership, the 'fuse_file_info->owner' -- * argument must be used. -- * -- * For F_GETLK operation, the library will first check currently -- * held locks, and if a conflicting lock is found it will return -- * information without calling this method. This ensures, that -- * for local locks the l_pid field is correctly filled in. The -- * results may not be accurate in case of race conditions and in -- * the presence of hard links, but it's unlikely that an -- * application would rely on accurate GETLK results in these -- * cases. If a conflicting lock is not found, this method will be -- * called, and the filesystem may fill out l_pid by a meaningful -- * value, or it may leave this field zero. -- * -- * For F_SETLK and F_SETLKW the l_pid field will be set to the pid -- * of the process performing the locking operation. -- * -- * Note: if this method is not implemented, the kernel will still -- * allow file locking to work locally. Hence it is only -- * interesting for network filesystems and similar. -- */ -- int (*lock) (const char *, struct fuse_file_info *, int cmd, -- struct flock *); -- -- /** -- * Change the access and modification times of a file with -- * nanosecond resolution -- * -- * This supersedes the old utime() interface. New applications -- * should use this. -- * -- * `fi` will always be NULL if the file is not currenlty open, but -- * may also be NULL if the file is open. -- * -- * See the utimensat(2) man page for details. -- */ -- int (*utimens) (const char *, const struct timespec tv[2], -- struct fuse_file_info *fi); -- -- /** -- * Map block index within file to block index within device -- * -- * Note: This makes sense only for block device backed filesystems -- * mounted with the 'blkdev' option -- */ -- int (*bmap) (const char *, size_t blocksize, uint64_t *idx); -- -- /** -- * Ioctl -- * -- * flags will have FUSE_IOCTL_COMPAT set for 32bit ioctls in -- * 64bit environment. The size and direction of data is -- * determined by _IOC_*() decoding of cmd. For _IOC_NONE, -- * data will be NULL, for _IOC_WRITE data is out area, for -- * _IOC_READ in area and if both are set in/out area. In all -- * non-NULL cases, the area is of _IOC_SIZE(cmd) bytes. -- * -- * If flags has FUSE_IOCTL_DIR then the fuse_file_info refers to a -- * directory file handle. -- * -- * Note : the unsigned long request submitted by the application -- * is truncated to 32 bits. -- */ -- int (*ioctl) (const char *, unsigned int cmd, void *arg, -- struct fuse_file_info *, unsigned int flags, void *data); -- -- /** -- * Poll for IO readiness events -- * -- * Note: If ph is non-NULL, the client should notify -- * when IO readiness events occur by calling -- * fuse_notify_poll() with the specified ph. -- * -- * Regardless of the number of times poll with a non-NULL ph -- * is received, single notification is enough to clear all. -- * Notifying more times incurs overhead but doesn't harm -- * correctness. -- * -- * The callee is responsible for destroying ph with -- * fuse_pollhandle_destroy() when no longer in use. -- */ -- int (*poll) (const char *, struct fuse_file_info *, -- struct fuse_pollhandle *ph, unsigned *reventsp); -- -- /** Write contents of buffer to an open file -- * -- * Similar to the write() method, but data is supplied in a -- * generic buffer. Use fuse_buf_copy() to transfer data to -- * the destination. -- * -- * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is -- * expected to reset the setuid and setgid bits. -- */ -- int (*write_buf) (const char *, struct fuse_bufvec *buf, off_t off, -- struct fuse_file_info *); -- -- /** Store data from an open file in a buffer -- * -- * Similar to the read() method, but data is stored and -- * returned in a generic buffer. -- * -- * No actual copying of data has to take place, the source -- * file descriptor may simply be stored in the buffer for -- * later data transfer. -- * -- * The buffer must be allocated dynamically and stored at the -- * location pointed to by bufp. If the buffer contains memory -- * regions, they too must be allocated using malloc(). The -- * allocated memory will be freed by the caller. -- */ -- int (*read_buf) (const char *, struct fuse_bufvec **bufp, -- size_t size, off_t off, struct fuse_file_info *); -- /** -- * Perform BSD file locking operation -- * -- * The op argument will be either LOCK_SH, LOCK_EX or LOCK_UN -- * -- * Nonblocking requests will be indicated by ORing LOCK_NB to -- * the above operations -- * -- * For more information see the flock(2) manual page. -- * -- * Additionally fi->owner will be set to a value unique to -- * this open file. This same value will be supplied to -- * ->release() when the file is released. -- * -- * Note: if this method is not implemented, the kernel will still -- * allow file locking to work locally. Hence it is only -- * interesting for network filesystems and similar. -- */ -- int (*flock) (const char *, struct fuse_file_info *, int op); -- -- /** -- * Allocates space for an open file -- * -- * This function ensures that required space is allocated for specified -- * file. If this function returns success then any subsequent write -- * request to specified range is guaranteed not to fail because of lack -- * of space on the file system media. -- */ -- int (*fallocate) (const char *, int, off_t, off_t, -- struct fuse_file_info *); -- -- /** -- * Copy a range of data from one file to another -- * -- * Performs an optimized copy between two file descriptors without the -- * additional cost of transferring data through the FUSE kernel module -- * to user space (glibc) and then back into the FUSE filesystem again. -- * -- * In case this method is not implemented, glibc falls back to reading -- * data from the source and writing to the destination. Effectively -- * doing an inefficient copy of the data. -- */ -- ssize_t (*copy_file_range) (const char *path_in, -- struct fuse_file_info *fi_in, -- off_t offset_in, const char *path_out, -- struct fuse_file_info *fi_out, -- off_t offset_out, size_t size, int flags); -- -- /** -- * Find next data or hole after the specified offset -- */ -- off_t (*lseek) (const char *, off_t off, int whence, struct fuse_file_info *); -+ /** -+ * Get file attributes. -+ * -+ * Similar to stat(). The 'st_dev' and 'st_blksize' fields are -+ * ignored. The 'st_ino' field is ignored except if the 'use_ino' -+ * mount option is given. In that case it is passed to userspace, -+ * but libfuse and the kernel will still assign a different -+ * inode for internal use (called the "nodeid"). -+ * -+ * `fi` will always be NULL if the file is not currently open, but -+ * may also be NULL if the file is open. -+ */ -+ int (*getattr)(const char *, struct stat *, struct fuse_file_info *fi); -+ -+ /** -+ * Read the target of a symbolic link -+ * -+ * The buffer should be filled with a null terminated string. The -+ * buffer size argument includes the space for the terminating -+ * null character. If the linkname is too long to fit in the -+ * buffer, it should be truncated. The return value should be 0 -+ * for success. -+ */ -+ int (*readlink)(const char *, char *, size_t); -+ -+ /** -+ * Create a file node -+ * -+ * This is called for creation of all non-directory, non-symlink -+ * nodes. If the filesystem defines a create() method, then for -+ * regular files that will be called instead. -+ */ -+ int (*mknod)(const char *, mode_t, dev_t); -+ -+ /** -+ * Create a directory -+ * -+ * Note that the mode argument may not have the type specification -+ * bits set, i.e. S_ISDIR(mode) can be false. To obtain the -+ * correct directory type bits use mode|S_IFDIR -+ */ -+ int (*mkdir)(const char *, mode_t); -+ -+ /** Remove a file */ -+ int (*unlink)(const char *); -+ -+ /** Remove a directory */ -+ int (*rmdir)(const char *); -+ -+ /** Create a symbolic link */ -+ int (*symlink)(const char *, const char *); -+ -+ /** -+ * Rename a file -+ * -+ * *flags* may be `RENAME_EXCHANGE` or `RENAME_NOREPLACE`. If -+ * RENAME_NOREPLACE is specified, the filesystem must not -+ * overwrite *newname* if it exists and return an error -+ * instead. If `RENAME_EXCHANGE` is specified, the filesystem -+ * must atomically exchange the two files, i.e. both must -+ * exist and neither may be deleted. -+ */ -+ int (*rename)(const char *, const char *, unsigned int flags); -+ -+ /** Create a hard link to a file */ -+ int (*link)(const char *, const char *); -+ -+ /** -+ * Change the permission bits of a file -+ * -+ * `fi` will always be NULL if the file is not currenlty open, but -+ * may also be NULL if the file is open. -+ */ -+ int (*chmod)(const char *, mode_t, struct fuse_file_info *fi); -+ -+ /** -+ * Change the owner and group of a file -+ * -+ * `fi` will always be NULL if the file is not currenlty open, but -+ * may also be NULL if the file is open. -+ * -+ * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is -+ * expected to reset the setuid and setgid bits. -+ */ -+ int (*chown)(const char *, uid_t, gid_t, struct fuse_file_info *fi); -+ -+ /** -+ * Change the size of a file -+ * -+ * `fi` will always be NULL if the file is not currenlty open, but -+ * may also be NULL if the file is open. -+ * -+ * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is -+ * expected to reset the setuid and setgid bits. -+ */ -+ int (*truncate)(const char *, off_t, struct fuse_file_info *fi); -+ -+ /** -+ * Open a file -+ * -+ * Open flags are available in fi->flags. The following rules -+ * apply. -+ * -+ * - Creation (O_CREAT, O_EXCL, O_NOCTTY) flags will be -+ * filtered out / handled by the kernel. -+ * -+ * - Access modes (O_RDONLY, O_WRONLY, O_RDWR, O_EXEC, O_SEARCH) -+ * should be used by the filesystem to check if the operation is -+ * permitted. If the ``-o default_permissions`` mount option is -+ * given, this check is already done by the kernel before calling -+ * open() and may thus be omitted by the filesystem. -+ * -+ * - When writeback caching is enabled, the kernel may send -+ * read requests even for files opened with O_WRONLY. The -+ * filesystem should be prepared to handle this. -+ * -+ * - When writeback caching is disabled, the filesystem is -+ * expected to properly handle the O_APPEND flag and ensure -+ * that each write is appending to the end of the file. -+ * -+ * - When writeback caching is enabled, the kernel will -+ * handle O_APPEND. However, unless all changes to the file -+ * come through the kernel this will not work reliably. The -+ * filesystem should thus either ignore the O_APPEND flag -+ * (and let the kernel handle it), or return an error -+ * (indicating that reliably O_APPEND is not available). -+ * -+ * Filesystem may store an arbitrary file handle (pointer, -+ * index, etc) in fi->fh, and use this in other all other file -+ * operations (read, write, flush, release, fsync). -+ * -+ * Filesystem may also implement stateless file I/O and not store -+ * anything in fi->fh. -+ * -+ * There are also some flags (direct_io, keep_cache) which the -+ * filesystem may set in fi, to change the way the file is opened. -+ * See fuse_file_info structure in for more details. -+ * -+ * If this request is answered with an error code of ENOSYS -+ * and FUSE_CAP_NO_OPEN_SUPPORT is set in -+ * `fuse_conn_info.capable`, this is treated as success and -+ * future calls to open will also succeed without being send -+ * to the filesystem process. -+ * -+ */ -+ int (*open)(const char *, struct fuse_file_info *); -+ -+ /** -+ * Read data from an open file -+ * -+ * Read should return exactly the number of bytes requested except -+ * on EOF or error, otherwise the rest of the data will be -+ * substituted with zeroes. An exception to this is when the -+ * 'direct_io' mount option is specified, in which case the return -+ * value of the read system call will reflect the return value of -+ * this operation. -+ */ -+ int (*read)(const char *, char *, size_t, off_t, struct fuse_file_info *); -+ -+ /** -+ * Write data to an open file -+ * -+ * Write should return exactly the number of bytes requested -+ * except on error. An exception to this is when the 'direct_io' -+ * mount option is specified (see read operation). -+ * -+ * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is -+ * expected to reset the setuid and setgid bits. -+ */ -+ int (*write)(const char *, const char *, size_t, off_t, -+ struct fuse_file_info *); -+ -+ /** -+ * Get file system statistics -+ * -+ * The 'f_favail', 'f_fsid' and 'f_flag' fields are ignored -+ */ -+ int (*statfs)(const char *, struct statvfs *); -+ -+ /** -+ * Possibly flush cached data -+ * -+ * BIG NOTE: This is not equivalent to fsync(). It's not a -+ * request to sync dirty data. -+ * -+ * Flush is called on each close() of a file descriptor, as opposed to -+ * release which is called on the close of the last file descriptor for -+ * a file. Under Linux, errors returned by flush() will be passed to -+ * userspace as errors from close(), so flush() is a good place to write -+ * back any cached dirty data. However, many applications ignore errors -+ * on close(), and on non-Linux systems, close() may succeed even if flush() -+ * returns an error. For these reasons, filesystems should not assume -+ * that errors returned by flush will ever be noticed or even -+ * delivered. -+ * -+ * NOTE: The flush() method may be called more than once for each -+ * open(). This happens if more than one file descriptor refers to an -+ * open file handle, e.g. due to dup(), dup2() or fork() calls. It is -+ * not possible to determine if a flush is final, so each flush should -+ * be treated equally. Multiple write-flush sequences are relatively -+ * rare, so this shouldn't be a problem. -+ * -+ * Filesystems shouldn't assume that flush will be called at any -+ * particular point. It may be called more times than expected, or not -+ * at all. -+ * -+ * [close]: -+ * http://pubs.opengroup.org/onlinepubs/9699919799/functions/close.html -+ */ -+ int (*flush)(const char *, struct fuse_file_info *); -+ -+ /** -+ * Release an open file -+ * -+ * Release is called when there are no more references to an open -+ * file: all file descriptors are closed and all memory mappings -+ * are unmapped. -+ * -+ * For every open() call there will be exactly one release() call -+ * with the same flags and file handle. It is possible to -+ * have a file opened more than once, in which case only the last -+ * release will mean, that no more reads/writes will happen on the -+ * file. The return value of release is ignored. -+ */ -+ int (*release)(const char *, struct fuse_file_info *); -+ -+ /* -+ * Synchronize file contents -+ * -+ * If the datasync parameter is non-zero, then only the user data -+ * should be flushed, not the meta data. -+ */ -+ int (*fsync)(const char *, int, struct fuse_file_info *); -+ -+ /** Set extended attributes */ -+ int (*setxattr)(const char *, const char *, const char *, size_t, int); -+ -+ /** Get extended attributes */ -+ int (*getxattr)(const char *, const char *, char *, size_t); -+ -+ /** List extended attributes */ -+ int (*listxattr)(const char *, char *, size_t); -+ -+ /** Remove extended attributes */ -+ int (*removexattr)(const char *, const char *); -+ -+ /* -+ * Open directory -+ * -+ * Unless the 'default_permissions' mount option is given, -+ * this method should check if opendir is permitted for this -+ * directory. Optionally opendir may also return an arbitrary -+ * filehandle in the fuse_file_info structure, which will be -+ * passed to readdir, releasedir and fsyncdir. -+ */ -+ int (*opendir)(const char *, struct fuse_file_info *); -+ -+ /* -+ * Read directory -+ * -+ * The filesystem may choose between two modes of operation: -+ * -+ * 1) The readdir implementation ignores the offset parameter, and -+ * passes zero to the filler function's offset. The filler -+ * function will not return '1' (unless an error happens), so the -+ * whole directory is read in a single readdir operation. -+ * -+ * 2) The readdir implementation keeps track of the offsets of the -+ * directory entries. It uses the offset parameter and always -+ * passes non-zero offset to the filler function. When the buffer -+ * is full (or an error happens) the filler function will return -+ * '1'. -+ */ -+ int (*readdir)(const char *, void *, fuse_fill_dir_t, off_t, -+ struct fuse_file_info *, enum fuse_readdir_flags); -+ -+ /** -+ * Release directory -+ */ -+ int (*releasedir)(const char *, struct fuse_file_info *); -+ -+ /** -+ * Synchronize directory contents -+ * -+ * If the datasync parameter is non-zero, then only the user data -+ * should be flushed, not the meta data -+ */ -+ int (*fsyncdir)(const char *, int, struct fuse_file_info *); -+ -+ /** -+ * Initialize filesystem -+ * -+ * The return value will passed in the `private_data` field of -+ * `struct fuse_context` to all file operations, and as a -+ * parameter to the destroy() method. It overrides the initial -+ * value provided to fuse_main() / fuse_new(). -+ */ -+ void *(*init)(struct fuse_conn_info *conn, struct fuse_config *cfg); -+ -+ /** -+ * Clean up filesystem -+ * -+ * Called on filesystem exit. -+ */ -+ void (*destroy)(void *private_data); -+ -+ /** -+ * Check file access permissions -+ * -+ * This will be called for the access() system call. If the -+ * 'default_permissions' mount option is given, this method is not -+ * called. -+ * -+ * This method is not called under Linux kernel versions 2.4.x -+ */ -+ int (*access)(const char *, int); -+ -+ /** -+ * Create and open a file -+ * -+ * If the file does not exist, first create it with the specified -+ * mode, and then open it. -+ * -+ * If this method is not implemented or under Linux kernel -+ * versions earlier than 2.6.15, the mknod() and open() methods -+ * will be called instead. -+ */ -+ int (*create)(const char *, mode_t, struct fuse_file_info *); -+ -+ /** -+ * Perform POSIX file locking operation -+ * -+ * The cmd argument will be either F_GETLK, F_SETLK or F_SETLKW. -+ * -+ * For the meaning of fields in 'struct flock' see the man page -+ * for fcntl(2). The l_whence field will always be set to -+ * SEEK_SET. -+ * -+ * For checking lock ownership, the 'fuse_file_info->owner' -+ * argument must be used. -+ * -+ * For F_GETLK operation, the library will first check currently -+ * held locks, and if a conflicting lock is found it will return -+ * information without calling this method. This ensures, that -+ * for local locks the l_pid field is correctly filled in. The -+ * results may not be accurate in case of race conditions and in -+ * the presence of hard links, but it's unlikely that an -+ * application would rely on accurate GETLK results in these -+ * cases. If a conflicting lock is not found, this method will be -+ * called, and the filesystem may fill out l_pid by a meaningful -+ * value, or it may leave this field zero. -+ * -+ * For F_SETLK and F_SETLKW the l_pid field will be set to the pid -+ * of the process performing the locking operation. -+ * -+ * Note: if this method is not implemented, the kernel will still -+ * allow file locking to work locally. Hence it is only -+ * interesting for network filesystems and similar. -+ */ -+ int (*lock)(const char *, struct fuse_file_info *, int cmd, struct flock *); -+ -+ /** -+ * Change the access and modification times of a file with -+ * nanosecond resolution -+ * -+ * This supersedes the old utime() interface. New applications -+ * should use this. -+ * -+ * `fi` will always be NULL if the file is not currenlty open, but -+ * may also be NULL if the file is open. -+ * -+ * See the utimensat(2) man page for details. -+ */ -+ int (*utimens)(const char *, const struct timespec tv[2], -+ struct fuse_file_info *fi); -+ -+ /** -+ * Map block index within file to block index within device -+ * -+ * Note: This makes sense only for block device backed filesystems -+ * mounted with the 'blkdev' option -+ */ -+ int (*bmap)(const char *, size_t blocksize, uint64_t *idx); -+ -+ /** -+ * Ioctl -+ * -+ * flags will have FUSE_IOCTL_COMPAT set for 32bit ioctls in -+ * 64bit environment. The size and direction of data is -+ * determined by _IOC_*() decoding of cmd. For _IOC_NONE, -+ * data will be NULL, for _IOC_WRITE data is out area, for -+ * _IOC_READ in area and if both are set in/out area. In all -+ * non-NULL cases, the area is of _IOC_SIZE(cmd) bytes. -+ * -+ * If flags has FUSE_IOCTL_DIR then the fuse_file_info refers to a -+ * directory file handle. -+ * -+ * Note : the unsigned long request submitted by the application -+ * is truncated to 32 bits. -+ */ -+ int (*ioctl)(const char *, unsigned int cmd, void *arg, -+ struct fuse_file_info *, unsigned int flags, void *data); -+ -+ /** -+ * Poll for IO readiness events -+ * -+ * Note: If ph is non-NULL, the client should notify -+ * when IO readiness events occur by calling -+ * fuse_notify_poll() with the specified ph. -+ * -+ * Regardless of the number of times poll with a non-NULL ph -+ * is received, single notification is enough to clear all. -+ * Notifying more times incurs overhead but doesn't harm -+ * correctness. -+ * -+ * The callee is responsible for destroying ph with -+ * fuse_pollhandle_destroy() when no longer in use. -+ */ -+ int (*poll)(const char *, struct fuse_file_info *, -+ struct fuse_pollhandle *ph, unsigned *reventsp); -+ -+ /* -+ * Write contents of buffer to an open file -+ * -+ * Similar to the write() method, but data is supplied in a -+ * generic buffer. Use fuse_buf_copy() to transfer data to -+ * the destination. -+ * -+ * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is -+ * expected to reset the setuid and setgid bits. -+ */ -+ int (*write_buf)(const char *, struct fuse_bufvec *buf, off_t off, -+ struct fuse_file_info *); -+ -+ /* -+ * Store data from an open file in a buffer -+ * -+ * Similar to the read() method, but data is stored and -+ * returned in a generic buffer. -+ * -+ * No actual copying of data has to take place, the source -+ * file descriptor may simply be stored in the buffer for -+ * later data transfer. -+ * -+ * The buffer must be allocated dynamically and stored at the -+ * location pointed to by bufp. If the buffer contains memory -+ * regions, they too must be allocated using malloc(). The -+ * allocated memory will be freed by the caller. -+ */ -+ int (*read_buf)(const char *, struct fuse_bufvec **bufp, size_t size, -+ off_t off, struct fuse_file_info *); -+ /** -+ * Perform BSD file locking operation -+ * -+ * The op argument will be either LOCK_SH, LOCK_EX or LOCK_UN -+ * -+ * Nonblocking requests will be indicated by ORing LOCK_NB to -+ * the above operations -+ * -+ * For more information see the flock(2) manual page. -+ * -+ * Additionally fi->owner will be set to a value unique to -+ * this open file. This same value will be supplied to -+ * ->release() when the file is released. -+ * -+ * Note: if this method is not implemented, the kernel will still -+ * allow file locking to work locally. Hence it is only -+ * interesting for network filesystems and similar. -+ */ -+ int (*flock)(const char *, struct fuse_file_info *, int op); -+ -+ /** -+ * Allocates space for an open file -+ * -+ * This function ensures that required space is allocated for specified -+ * file. If this function returns success then any subsequent write -+ * request to specified range is guaranteed not to fail because of lack -+ * of space on the file system media. -+ */ -+ int (*fallocate)(const char *, int, off_t, off_t, struct fuse_file_info *); -+ -+ /** -+ * Copy a range of data from one file to another -+ * -+ * Performs an optimized copy between two file descriptors without the -+ * additional cost of transferring data through the FUSE kernel module -+ * to user space (glibc) and then back into the FUSE filesystem again. -+ * -+ * In case this method is not implemented, glibc falls back to reading -+ * data from the source and writing to the destination. Effectively -+ * doing an inefficient copy of the data. -+ */ -+ ssize_t (*copy_file_range)(const char *path_in, -+ struct fuse_file_info *fi_in, off_t offset_in, -+ const char *path_out, -+ struct fuse_file_info *fi_out, off_t offset_out, -+ size_t size, int flags); -+ -+ /** -+ * Find next data or hole after the specified offset -+ */ -+ off_t (*lseek)(const char *, off_t off, int whence, -+ struct fuse_file_info *); - }; - --/** Extra context that may be needed by some filesystems -+/* -+ * Extra context that may be needed by some filesystems - * - * The uid, gid and pid fields are not filled in case of a writepage - * operation. - */ - struct fuse_context { -- /** Pointer to the fuse object */ -- struct fuse *fuse; -+ /** Pointer to the fuse object */ -+ struct fuse *fuse; - -- /** User ID of the calling process */ -- uid_t uid; -+ /** User ID of the calling process */ -+ uid_t uid; - -- /** Group ID of the calling process */ -- gid_t gid; -+ /** Group ID of the calling process */ -+ gid_t gid; - -- /** Process ID of the calling thread */ -- pid_t pid; -+ /** Process ID of the calling thread */ -+ pid_t pid; - -- /** Private filesystem data */ -- void *private_data; -+ /** Private filesystem data */ -+ void *private_data; - -- /** Umask of the calling process */ -- mode_t umask; -+ /** Umask of the calling process */ -+ mode_t umask; - }; - - /** -@@ -859,15 +880,15 @@ struct fuse_context { - * Example usage, see hello.c - */ - /* -- int fuse_main(int argc, char *argv[], const struct fuse_operations *op, -- void *private_data); --*/ --#define fuse_main(argc, argv, op, private_data) \ -- fuse_main_real(argc, argv, op, sizeof(*(op)), private_data) -+ * int fuse_main(int argc, char *argv[], const struct fuse_operations *op, -+ * void *private_data); -+ */ -+#define fuse_main(argc, argv, op, private_data) \ -+ fuse_main_real(argc, argv, op, sizeof(*(op)), private_data) - --/* ----------------------------------------------------------- * -- * More detailed API * -- * ----------------------------------------------------------- */ -+/* -+ * More detailed API -+ */ - - /** - * Print available options (high- and low-level) to stdout. This is -@@ -910,12 +931,13 @@ void fuse_lib_help(struct fuse_args *args); - * @return the created FUSE handle - */ - #if FUSE_USE_VERSION == 30 --struct fuse *fuse_new_30(struct fuse_args *args, const struct fuse_operations *op, -- size_t op_size, void *private_data); -+struct fuse *fuse_new_30(struct fuse_args *args, -+ const struct fuse_operations *op, size_t op_size, -+ void *private_data); - #define fuse_new(args, op, size, data) fuse_new_30(args, op, size, data) - #else - struct fuse *fuse_new(struct fuse_args *args, const struct fuse_operations *op, -- size_t op_size, void *private_data); -+ size_t op_size, void *private_data); - #endif - - /** -@@ -940,7 +962,7 @@ void fuse_unmount(struct fuse *f); - /** - * Destroy the FUSE handle. - * -- * NOTE: This function does not unmount the filesystem. If this is -+ * NOTE: This function does not unmount the filesystem. If this is - * needed, call fuse_unmount() before calling this function. - * - * @param f the FUSE handle -@@ -1030,7 +1052,7 @@ int fuse_invalidate_path(struct fuse *f, const char *path); - * Do not call this directly, use fuse_main() - */ - int fuse_main_real(int argc, char *argv[], const struct fuse_operations *op, -- size_t op_size, void *private_data); -+ size_t op_size, void *private_data); - - /** - * Start the cleanup thread when using option "remember". -@@ -1081,89 +1103,87 @@ struct fuse_fs; - */ - - int fuse_fs_getattr(struct fuse_fs *fs, const char *path, struct stat *buf, -- struct fuse_file_info *fi); --int fuse_fs_rename(struct fuse_fs *fs, const char *oldpath, -- const char *newpath, unsigned int flags); -+ struct fuse_file_info *fi); -+int fuse_fs_rename(struct fuse_fs *fs, const char *oldpath, const char *newpath, -+ unsigned int flags); - int fuse_fs_unlink(struct fuse_fs *fs, const char *path); - int fuse_fs_rmdir(struct fuse_fs *fs, const char *path); --int fuse_fs_symlink(struct fuse_fs *fs, const char *linkname, -- const char *path); -+int fuse_fs_symlink(struct fuse_fs *fs, const char *linkname, const char *path); - int fuse_fs_link(struct fuse_fs *fs, const char *oldpath, const char *newpath); --int fuse_fs_release(struct fuse_fs *fs, const char *path, -- struct fuse_file_info *fi); -+int fuse_fs_release(struct fuse_fs *fs, const char *path, -+ struct fuse_file_info *fi); - int fuse_fs_open(struct fuse_fs *fs, const char *path, -- struct fuse_file_info *fi); -+ struct fuse_file_info *fi); - int fuse_fs_read(struct fuse_fs *fs, const char *path, char *buf, size_t size, -- off_t off, struct fuse_file_info *fi); -+ off_t off, struct fuse_file_info *fi); - int fuse_fs_read_buf(struct fuse_fs *fs, const char *path, -- struct fuse_bufvec **bufp, size_t size, off_t off, -- struct fuse_file_info *fi); -+ struct fuse_bufvec **bufp, size_t size, off_t off, -+ struct fuse_file_info *fi); - int fuse_fs_write(struct fuse_fs *fs, const char *path, const char *buf, -- size_t size, off_t off, struct fuse_file_info *fi); -+ size_t size, off_t off, struct fuse_file_info *fi); - int fuse_fs_write_buf(struct fuse_fs *fs, const char *path, -- struct fuse_bufvec *buf, off_t off, -- struct fuse_file_info *fi); -+ struct fuse_bufvec *buf, off_t off, -+ struct fuse_file_info *fi); - int fuse_fs_fsync(struct fuse_fs *fs, const char *path, int datasync, -- struct fuse_file_info *fi); -+ struct fuse_file_info *fi); - int fuse_fs_flush(struct fuse_fs *fs, const char *path, -- struct fuse_file_info *fi); -+ struct fuse_file_info *fi); - int fuse_fs_statfs(struct fuse_fs *fs, const char *path, struct statvfs *buf); - int fuse_fs_opendir(struct fuse_fs *fs, const char *path, -- struct fuse_file_info *fi); -+ struct fuse_file_info *fi); - int fuse_fs_readdir(struct fuse_fs *fs, const char *path, void *buf, -- fuse_fill_dir_t filler, off_t off, -- struct fuse_file_info *fi, enum fuse_readdir_flags flags); -+ fuse_fill_dir_t filler, off_t off, -+ struct fuse_file_info *fi, enum fuse_readdir_flags flags); - int fuse_fs_fsyncdir(struct fuse_fs *fs, const char *path, int datasync, -- struct fuse_file_info *fi); -+ struct fuse_file_info *fi); - int fuse_fs_releasedir(struct fuse_fs *fs, const char *path, -- struct fuse_file_info *fi); -+ struct fuse_file_info *fi); - int fuse_fs_create(struct fuse_fs *fs, const char *path, mode_t mode, -- struct fuse_file_info *fi); -+ struct fuse_file_info *fi); - int fuse_fs_lock(struct fuse_fs *fs, const char *path, -- struct fuse_file_info *fi, int cmd, struct flock *lock); -+ struct fuse_file_info *fi, int cmd, struct flock *lock); - int fuse_fs_flock(struct fuse_fs *fs, const char *path, -- struct fuse_file_info *fi, int op); -+ struct fuse_file_info *fi, int op); - int fuse_fs_chmod(struct fuse_fs *fs, const char *path, mode_t mode, -- struct fuse_file_info *fi); -+ struct fuse_file_info *fi); - int fuse_fs_chown(struct fuse_fs *fs, const char *path, uid_t uid, gid_t gid, -- struct fuse_file_info *fi); -+ struct fuse_file_info *fi); - int fuse_fs_truncate(struct fuse_fs *fs, const char *path, off_t size, -- struct fuse_file_info *fi); -+ struct fuse_file_info *fi); - int fuse_fs_utimens(struct fuse_fs *fs, const char *path, -- const struct timespec tv[2], struct fuse_file_info *fi); -+ const struct timespec tv[2], struct fuse_file_info *fi); - int fuse_fs_access(struct fuse_fs *fs, const char *path, int mask); - int fuse_fs_readlink(struct fuse_fs *fs, const char *path, char *buf, -- size_t len); -+ size_t len); - int fuse_fs_mknod(struct fuse_fs *fs, const char *path, mode_t mode, -- dev_t rdev); -+ dev_t rdev); - int fuse_fs_mkdir(struct fuse_fs *fs, const char *path, mode_t mode); - int fuse_fs_setxattr(struct fuse_fs *fs, const char *path, const char *name, -- const char *value, size_t size, int flags); -+ const char *value, size_t size, int flags); - int fuse_fs_getxattr(struct fuse_fs *fs, const char *path, const char *name, -- char *value, size_t size); -+ char *value, size_t size); - int fuse_fs_listxattr(struct fuse_fs *fs, const char *path, char *list, -- size_t size); --int fuse_fs_removexattr(struct fuse_fs *fs, const char *path, -- const char *name); -+ size_t size); -+int fuse_fs_removexattr(struct fuse_fs *fs, const char *path, const char *name); - int fuse_fs_bmap(struct fuse_fs *fs, const char *path, size_t blocksize, -- uint64_t *idx); -+ uint64_t *idx); - int fuse_fs_ioctl(struct fuse_fs *fs, const char *path, unsigned int cmd, -- void *arg, struct fuse_file_info *fi, unsigned int flags, -- void *data); -+ void *arg, struct fuse_file_info *fi, unsigned int flags, -+ void *data); - int fuse_fs_poll(struct fuse_fs *fs, const char *path, -- struct fuse_file_info *fi, struct fuse_pollhandle *ph, -- unsigned *reventsp); -+ struct fuse_file_info *fi, struct fuse_pollhandle *ph, -+ unsigned *reventsp); - int fuse_fs_fallocate(struct fuse_fs *fs, const char *path, int mode, -- off_t offset, off_t length, struct fuse_file_info *fi); -+ off_t offset, off_t length, struct fuse_file_info *fi); - ssize_t fuse_fs_copy_file_range(struct fuse_fs *fs, const char *path_in, -- struct fuse_file_info *fi_in, off_t off_in, -- const char *path_out, -- struct fuse_file_info *fi_out, off_t off_out, -- size_t len, int flags); -+ struct fuse_file_info *fi_in, off_t off_in, -+ const char *path_out, -+ struct fuse_file_info *fi_out, off_t off_out, -+ size_t len, int flags); - off_t fuse_fs_lseek(struct fuse_fs *fs, const char *path, off_t off, int whence, -- struct fuse_file_info *fi); -+ struct fuse_file_info *fi); - void fuse_fs_init(struct fuse_fs *fs, struct fuse_conn_info *conn, -- struct fuse_config *cfg); -+ struct fuse_config *cfg); - void fuse_fs_destroy(struct fuse_fs *fs); - - int fuse_notify_poll(struct fuse_pollhandle *ph); -@@ -1182,7 +1202,7 @@ int fuse_notify_poll(struct fuse_pollhandle *ph); - * @return a new filesystem object - */ - struct fuse_fs *fuse_fs_new(const struct fuse_operations *op, size_t op_size, -- void *private_data); -+ void *private_data); - - /** - * Factory for creating filesystem objects -@@ -1199,7 +1219,7 @@ struct fuse_fs *fuse_fs_new(const struct fuse_operations *op, size_t op_size, - * @return the new filesystem object - */ - typedef struct fuse_fs *(*fuse_module_factory_t)(struct fuse_args *args, -- struct fuse_fs *fs[]); -+ struct fuse_fs *fs[]); - /** - * Register filesystem module - * -@@ -1211,7 +1231,7 @@ typedef struct fuse_fs *(*fuse_module_factory_t)(struct fuse_args *args, - * @param factory_ the factory function for this filesystem module - */ - #define FUSE_REGISTER_MODULE(name_, factory_) \ -- fuse_module_factory_t fuse_module_ ## name_ ## _factory = factory_ -+ fuse_module_factory_t fuse_module_##name_##_factory = factory_ - - /** Get session from fuse object */ - struct fuse_session *fuse_get_session(struct fuse *f); -diff --git a/tools/virtiofsd/fuse_common.h b/tools/virtiofsd/fuse_common.h -index bf8f8cc..bd9bf86 100644 ---- a/tools/virtiofsd/fuse_common.h -+++ b/tools/virtiofsd/fuse_common.h -@@ -1,21 +1,23 @@ --/* FUSE: Filesystem in Userspace -- Copyright (C) 2001-2007 Miklos Szeredi -- -- This program can be distributed under the terms of the GNU LGPLv2. -- See the file COPYING.LIB. --*/ -+/* -+ * FUSE: Filesystem in Userspace -+ * Copyright (C) 2001-2007 Miklos Szeredi -+ * -+ * This program can be distributed under the terms of the GNU LGPLv2. -+ * See the file COPYING.LIB. -+ */ - - /** @file */ - - #if !defined(FUSE_H_) && !defined(FUSE_LOWLEVEL_H_) --#error "Never include directly; use or instead." -+#error \ -+ "Never include directly; use or instead." - #endif - - #ifndef FUSE_COMMON_H_ - #define FUSE_COMMON_H_ - --#include "fuse_opt.h" - #include "fuse_log.h" -+#include "fuse_opt.h" - #include - #include - -@@ -25,7 +27,7 @@ - /** Minor version of FUSE library interface */ - #define FUSE_MINOR_VERSION 2 - --#define FUSE_MAKE_VERSION(maj, min) ((maj) * 10 + (min)) -+#define FUSE_MAKE_VERSION(maj, min) ((maj) * 10 + (min)) - #define FUSE_VERSION FUSE_MAKE_VERSION(FUSE_MAJOR_VERSION, FUSE_MINOR_VERSION) - - /** -@@ -38,67 +40,83 @@ - * descriptors can share a single file handle. - */ - struct fuse_file_info { -- /** Open flags. Available in open() and release() */ -- int flags; -- -- /** In case of a write operation indicates if this was caused -- by a delayed write from the page cache. If so, then the -- context's pid, uid, and gid fields will not be valid, and -- the *fh* value may not match the *fh* value that would -- have been sent with the corresponding individual write -- requests if write caching had been disabled. */ -- unsigned int writepage : 1; -- -- /** Can be filled in by open, to use direct I/O on this file. */ -- unsigned int direct_io : 1; -- -- /** Can be filled in by open. It signals the kernel that any -- currently cached file data (ie., data that the filesystem -- provided the last time the file was open) need not be -- invalidated. Has no effect when set in other contexts (in -- particular it does nothing when set by opendir()). */ -- unsigned int keep_cache : 1; -- -- /** Indicates a flush operation. Set in flush operation, also -- maybe set in highlevel lock operation and lowlevel release -- operation. */ -- unsigned int flush : 1; -- -- /** Can be filled in by open, to indicate that the file is not -- seekable. */ -- unsigned int nonseekable : 1; -- -- /* Indicates that flock locks for this file should be -- released. If set, lock_owner shall contain a valid value. -- May only be set in ->release(). */ -- unsigned int flock_release : 1; -- -- /** Can be filled in by opendir. It signals the kernel to -- enable caching of entries returned by readdir(). Has no -- effect when set in other contexts (in particular it does -- nothing when set by open()). */ -- unsigned int cache_readdir : 1; -- -- /** Padding. Reserved for future use*/ -- unsigned int padding : 25; -- unsigned int padding2 : 32; -- -- /** File handle id. May be filled in by filesystem in create, -- * open, and opendir(). Available in most other file operations on the -- * same file handle. */ -- uint64_t fh; -- -- /** Lock owner id. Available in locking operations and flush */ -- uint64_t lock_owner; -- -- /** Requested poll events. Available in ->poll. Only set on kernels -- which support it. If unsupported, this field is set to zero. */ -- uint32_t poll_events; -+ /** Open flags. Available in open() and release() */ -+ int flags; -+ -+ /* -+ * In case of a write operation indicates if this was caused -+ * by a delayed write from the page cache. If so, then the -+ * context's pid, uid, and gid fields will not be valid, and -+ * the *fh* value may not match the *fh* value that would -+ * have been sent with the corresponding individual write -+ * requests if write caching had been disabled. -+ */ -+ unsigned int writepage:1; -+ -+ /** Can be filled in by open, to use direct I/O on this file. */ -+ unsigned int direct_io:1; -+ -+ /* -+ * Can be filled in by open. It signals the kernel that any -+ * currently cached file data (ie., data that the filesystem -+ * provided the last time the file was open) need not be -+ * invalidated. Has no effect when set in other contexts (in -+ * particular it does nothing when set by opendir()). -+ */ -+ unsigned int keep_cache:1; -+ -+ /* -+ * Indicates a flush operation. Set in flush operation, also -+ * maybe set in highlevel lock operation and lowlevel release -+ * operation. -+ */ -+ unsigned int flush:1; -+ -+ /* -+ * Can be filled in by open, to indicate that the file is not -+ * seekable. -+ */ -+ unsigned int nonseekable:1; -+ -+ /* -+ * Indicates that flock locks for this file should be -+ * released. If set, lock_owner shall contain a valid value. -+ * May only be set in ->release(). -+ */ -+ unsigned int flock_release:1; -+ -+ /* -+ * Can be filled in by opendir. It signals the kernel to -+ * enable caching of entries returned by readdir(). Has no -+ * effect when set in other contexts (in particular it does -+ * nothing when set by open()). -+ */ -+ unsigned int cache_readdir:1; -+ -+ /** Padding. Reserved for future use*/ -+ unsigned int padding:25; -+ unsigned int padding2:32; -+ -+ /* -+ * File handle id. May be filled in by filesystem in create, -+ * open, and opendir(). Available in most other file operations on the -+ * same file handle. -+ */ -+ uint64_t fh; -+ -+ /** Lock owner id. Available in locking operations and flush */ -+ uint64_t lock_owner; -+ -+ /* -+ * Requested poll events. Available in ->poll. Only set on kernels -+ * which support it. If unsupported, this field is set to zero. -+ */ -+ uint32_t poll_events; - }; - --/************************************************************************** -- * Capability bits for 'fuse_conn_info.capable' and 'fuse_conn_info.want' * -- **************************************************************************/ -+/* -+ * Capability bits for 'fuse_conn_info.capable' and 'fuse_conn_info.want' -+ */ - - /** - * Indicates that the filesystem supports asynchronous read requests. -@@ -110,7 +128,7 @@ struct fuse_file_info { - * - * This feature is enabled by default when supported by the kernel. - */ --#define FUSE_CAP_ASYNC_READ (1 << 0) -+#define FUSE_CAP_ASYNC_READ (1 << 0) - - /** - * Indicates that the filesystem supports "remote" locking. -@@ -118,7 +136,7 @@ struct fuse_file_info { - * This feature is enabled by default when supported by the kernel, - * and if getlk() and setlk() handlers are implemented. - */ --#define FUSE_CAP_POSIX_LOCKS (1 << 1) -+#define FUSE_CAP_POSIX_LOCKS (1 << 1) - - /** - * Indicates that the filesystem supports the O_TRUNC open flag. If -@@ -127,14 +145,14 @@ struct fuse_file_info { - * - * This feature is enabled by default when supported by the kernel. - */ --#define FUSE_CAP_ATOMIC_O_TRUNC (1 << 3) -+#define FUSE_CAP_ATOMIC_O_TRUNC (1 << 3) - - /** - * Indicates that the filesystem supports lookups of "." and "..". - * - * This feature is disabled by default. - */ --#define FUSE_CAP_EXPORT_SUPPORT (1 << 4) -+#define FUSE_CAP_EXPORT_SUPPORT (1 << 4) - - /** - * Indicates that the kernel should not apply the umask to the -@@ -142,7 +160,7 @@ struct fuse_file_info { - * - * This feature is disabled by default. - */ --#define FUSE_CAP_DONT_MASK (1 << 6) -+#define FUSE_CAP_DONT_MASK (1 << 6) - - /** - * Indicates that libfuse should try to use splice() when writing to -@@ -150,7 +168,7 @@ struct fuse_file_info { - * - * This feature is disabled by default. - */ --#define FUSE_CAP_SPLICE_WRITE (1 << 7) -+#define FUSE_CAP_SPLICE_WRITE (1 << 7) - - /** - * Indicates that libfuse should try to move pages instead of copying when -@@ -158,7 +176,7 @@ struct fuse_file_info { - * - * This feature is disabled by default. - */ --#define FUSE_CAP_SPLICE_MOVE (1 << 8) -+#define FUSE_CAP_SPLICE_MOVE (1 << 8) - - /** - * Indicates that libfuse should try to use splice() when reading from -@@ -167,7 +185,7 @@ struct fuse_file_info { - * This feature is enabled by default when supported by the kernel and - * if the filesystem implements a write_buf() handler. - */ --#define FUSE_CAP_SPLICE_READ (1 << 9) -+#define FUSE_CAP_SPLICE_READ (1 << 9) - - /** - * If set, the calls to flock(2) will be emulated using POSIX locks and must -@@ -180,14 +198,14 @@ struct fuse_file_info { - * This feature is enabled by default when supported by the kernel and - * if the filesystem implements a flock() handler. - */ --#define FUSE_CAP_FLOCK_LOCKS (1 << 10) -+#define FUSE_CAP_FLOCK_LOCKS (1 << 10) - - /** - * Indicates that the filesystem supports ioctl's on directories. - * - * This feature is enabled by default when supported by the kernel. - */ --#define FUSE_CAP_IOCTL_DIR (1 << 11) -+#define FUSE_CAP_IOCTL_DIR (1 << 11) - - /** - * Traditionally, while a file is open the FUSE kernel module only -@@ -209,7 +227,7 @@ struct fuse_file_info { - * - * This feature is enabled by default when supported by the kernel. - */ --#define FUSE_CAP_AUTO_INVAL_DATA (1 << 12) -+#define FUSE_CAP_AUTO_INVAL_DATA (1 << 12) - - /** - * Indicates that the filesystem supports readdirplus. -@@ -217,7 +235,7 @@ struct fuse_file_info { - * This feature is enabled by default when supported by the kernel and if the - * filesystem implements a readdirplus() handler. - */ --#define FUSE_CAP_READDIRPLUS (1 << 13) -+#define FUSE_CAP_READDIRPLUS (1 << 13) - - /** - * Indicates that the filesystem supports adaptive readdirplus. -@@ -245,7 +263,7 @@ struct fuse_file_info { - * if the filesystem implements both a readdirplus() and a readdir() - * handler. - */ --#define FUSE_CAP_READDIRPLUS_AUTO (1 << 14) -+#define FUSE_CAP_READDIRPLUS_AUTO (1 << 14) - - /** - * Indicates that the filesystem supports asynchronous direct I/O submission. -@@ -256,7 +274,7 @@ struct fuse_file_info { - * - * This feature is enabled by default when supported by the kernel. - */ --#define FUSE_CAP_ASYNC_DIO (1 << 15) -+#define FUSE_CAP_ASYNC_DIO (1 << 15) - - /** - * Indicates that writeback caching should be enabled. This means that -@@ -265,7 +283,7 @@ struct fuse_file_info { - * - * This feature is disabled by default. - */ --#define FUSE_CAP_WRITEBACK_CACHE (1 << 16) -+#define FUSE_CAP_WRITEBACK_CACHE (1 << 16) - - /** - * Indicates support for zero-message opens. If this flag is set in -@@ -278,7 +296,7 @@ struct fuse_file_info { - * Setting (or unsetting) this flag in the `want` field has *no - * effect*. - */ --#define FUSE_CAP_NO_OPEN_SUPPORT (1 << 17) -+#define FUSE_CAP_NO_OPEN_SUPPORT (1 << 17) - - /** - * Indicates support for parallel directory operations. If this flag -@@ -288,7 +306,7 @@ struct fuse_file_info { - * - * This feature is enabled by default when supported by the kernel. - */ --#define FUSE_CAP_PARALLEL_DIROPS (1 << 18) -+#define FUSE_CAP_PARALLEL_DIROPS (1 << 18) - - /** - * Indicates support for POSIX ACLs. -@@ -307,7 +325,7 @@ struct fuse_file_info { - * - * This feature is disabled by default. - */ --#define FUSE_CAP_POSIX_ACL (1 << 19) -+#define FUSE_CAP_POSIX_ACL (1 << 19) - - /** - * Indicates that the filesystem is responsible for unsetting -@@ -316,7 +334,7 @@ struct fuse_file_info { - * - * This feature is enabled by default when supported by the kernel. - */ --#define FUSE_CAP_HANDLE_KILLPRIV (1 << 20) -+#define FUSE_CAP_HANDLE_KILLPRIV (1 << 20) - - /** - * Indicates support for zero-message opendirs. If this flag is set in -@@ -328,7 +346,7 @@ struct fuse_file_info { - * - * Setting (or unsetting) this flag in the `want` field has *no effect*. - */ --#define FUSE_CAP_NO_OPENDIR_SUPPORT (1 << 24) -+#define FUSE_CAP_NO_OPENDIR_SUPPORT (1 << 24) - - /** - * Ioctl flags -@@ -340,12 +358,12 @@ struct fuse_file_info { - * - * FUSE_IOCTL_MAX_IOV: maximum of in_iovecs + out_iovecs - */ --#define FUSE_IOCTL_COMPAT (1 << 0) --#define FUSE_IOCTL_UNRESTRICTED (1 << 1) --#define FUSE_IOCTL_RETRY (1 << 2) --#define FUSE_IOCTL_DIR (1 << 4) -+#define FUSE_IOCTL_COMPAT (1 << 0) -+#define FUSE_IOCTL_UNRESTRICTED (1 << 1) -+#define FUSE_IOCTL_RETRY (1 << 2) -+#define FUSE_IOCTL_DIR (1 << 4) - --#define FUSE_IOCTL_MAX_IOV 256 -+#define FUSE_IOCTL_MAX_IOV 256 - - /** - * Connection information, passed to the ->init() method -@@ -355,114 +373,114 @@ struct fuse_file_info { - * value must usually be smaller than the indicated value. - */ - struct fuse_conn_info { -- /** -- * Major version of the protocol (read-only) -- */ -- unsigned proto_major; -- -- /** -- * Minor version of the protocol (read-only) -- */ -- unsigned proto_minor; -- -- /** -- * Maximum size of the write buffer -- */ -- unsigned max_write; -- -- /** -- * Maximum size of read requests. A value of zero indicates no -- * limit. However, even if the filesystem does not specify a -- * limit, the maximum size of read requests will still be -- * limited by the kernel. -- * -- * NOTE: For the time being, the maximum size of read requests -- * must be set both here *and* passed to fuse_session_new() -- * using the ``-o max_read=`` mount option. At some point -- * in the future, specifying the mount option will no longer -- * be necessary. -- */ -- unsigned max_read; -- -- /** -- * Maximum readahead -- */ -- unsigned max_readahead; -- -- /** -- * Capability flags that the kernel supports (read-only) -- */ -- unsigned capable; -- -- /** -- * Capability flags that the filesystem wants to enable. -- * -- * libfuse attempts to initialize this field with -- * reasonable default values before calling the init() handler. -- */ -- unsigned want; -- -- /** -- * Maximum number of pending "background" requests. A -- * background request is any type of request for which the -- * total number is not limited by other means. As of kernel -- * 4.8, only two types of requests fall into this category: -- * -- * 1. Read-ahead requests -- * 2. Asynchronous direct I/O requests -- * -- * Read-ahead requests are generated (if max_readahead is -- * non-zero) by the kernel to preemptively fill its caches -- * when it anticipates that userspace will soon read more -- * data. -- * -- * Asynchronous direct I/O requests are generated if -- * FUSE_CAP_ASYNC_DIO is enabled and userspace submits a large -- * direct I/O request. In this case the kernel will internally -- * split it up into multiple smaller requests and submit them -- * to the filesystem concurrently. -- * -- * Note that the following requests are *not* background -- * requests: writeback requests (limited by the kernel's -- * flusher algorithm), regular (i.e., synchronous and -- * buffered) userspace read/write requests (limited to one per -- * thread), asynchronous read requests (Linux's io_submit(2) -- * call actually blocks, so these are also limited to one per -- * thread). -- */ -- unsigned max_background; -- -- /** -- * Kernel congestion threshold parameter. If the number of pending -- * background requests exceeds this number, the FUSE kernel module will -- * mark the filesystem as "congested". This instructs the kernel to -- * expect that queued requests will take some time to complete, and to -- * adjust its algorithms accordingly (e.g. by putting a waiting thread -- * to sleep instead of using a busy-loop). -- */ -- unsigned congestion_threshold; -- -- /** -- * When FUSE_CAP_WRITEBACK_CACHE is enabled, the kernel is responsible -- * for updating mtime and ctime when write requests are received. The -- * updated values are passed to the filesystem with setattr() requests. -- * However, if the filesystem does not support the full resolution of -- * the kernel timestamps (nanoseconds), the mtime and ctime values used -- * by kernel and filesystem will differ (and result in an apparent -- * change of times after a cache flush). -- * -- * To prevent this problem, this variable can be used to inform the -- * kernel about the timestamp granularity supported by the file-system. -- * The value should be power of 10. The default is 1, i.e. full -- * nano-second resolution. Filesystems supporting only second resolution -- * should set this to 1000000000. -- */ -- unsigned time_gran; -- -- /** -- * For future use. -- */ -- unsigned reserved[22]; -+ /** -+ * Major version of the protocol (read-only) -+ */ -+ unsigned proto_major; -+ -+ /** -+ * Minor version of the protocol (read-only) -+ */ -+ unsigned proto_minor; -+ -+ /** -+ * Maximum size of the write buffer -+ */ -+ unsigned max_write; -+ -+ /** -+ * Maximum size of read requests. A value of zero indicates no -+ * limit. However, even if the filesystem does not specify a -+ * limit, the maximum size of read requests will still be -+ * limited by the kernel. -+ * -+ * NOTE: For the time being, the maximum size of read requests -+ * must be set both here *and* passed to fuse_session_new() -+ * using the ``-o max_read=`` mount option. At some point -+ * in the future, specifying the mount option will no longer -+ * be necessary. -+ */ -+ unsigned max_read; -+ -+ /** -+ * Maximum readahead -+ */ -+ unsigned max_readahead; -+ -+ /** -+ * Capability flags that the kernel supports (read-only) -+ */ -+ unsigned capable; -+ -+ /** -+ * Capability flags that the filesystem wants to enable. -+ * -+ * libfuse attempts to initialize this field with -+ * reasonable default values before calling the init() handler. -+ */ -+ unsigned want; -+ -+ /** -+ * Maximum number of pending "background" requests. A -+ * background request is any type of request for which the -+ * total number is not limited by other means. As of kernel -+ * 4.8, only two types of requests fall into this category: -+ * -+ * 1. Read-ahead requests -+ * 2. Asynchronous direct I/O requests -+ * -+ * Read-ahead requests are generated (if max_readahead is -+ * non-zero) by the kernel to preemptively fill its caches -+ * when it anticipates that userspace will soon read more -+ * data. -+ * -+ * Asynchronous direct I/O requests are generated if -+ * FUSE_CAP_ASYNC_DIO is enabled and userspace submits a large -+ * direct I/O request. In this case the kernel will internally -+ * split it up into multiple smaller requests and submit them -+ * to the filesystem concurrently. -+ * -+ * Note that the following requests are *not* background -+ * requests: writeback requests (limited by the kernel's -+ * flusher algorithm), regular (i.e., synchronous and -+ * buffered) userspace read/write requests (limited to one per -+ * thread), asynchronous read requests (Linux's io_submit(2) -+ * call actually blocks, so these are also limited to one per -+ * thread). -+ */ -+ unsigned max_background; -+ -+ /** -+ * Kernel congestion threshold parameter. If the number of pending -+ * background requests exceeds this number, the FUSE kernel module will -+ * mark the filesystem as "congested". This instructs the kernel to -+ * expect that queued requests will take some time to complete, and to -+ * adjust its algorithms accordingly (e.g. by putting a waiting thread -+ * to sleep instead of using a busy-loop). -+ */ -+ unsigned congestion_threshold; -+ -+ /** -+ * When FUSE_CAP_WRITEBACK_CACHE is enabled, the kernel is responsible -+ * for updating mtime and ctime when write requests are received. The -+ * updated values are passed to the filesystem with setattr() requests. -+ * However, if the filesystem does not support the full resolution of -+ * the kernel timestamps (nanoseconds), the mtime and ctime values used -+ * by kernel and filesystem will differ (and result in an apparent -+ * change of times after a cache flush). -+ * -+ * To prevent this problem, this variable can be used to inform the -+ * kernel about the timestamp granularity supported by the file-system. -+ * The value should be power of 10. The default is 1, i.e. full -+ * nano-second resolution. Filesystems supporting only second resolution -+ * should set this to 1000000000. -+ */ -+ unsigned time_gran; -+ -+ /** -+ * For future use. -+ */ -+ unsigned reserved[22]; - }; - - struct fuse_session; -@@ -489,21 +507,20 @@ struct fuse_conn_info_opts; - * -o async_read sets FUSE_CAP_ASYNC_READ in conn->want - * -o sync_read unsets FUSE_CAP_ASYNC_READ in conn->want - * -o atomic_o_trunc sets FUSE_CAP_ATOMIC_O_TRUNC in conn->want -- * -o no_remote_lock Equivalent to -o no_remote_flock,no_remote_posix_lock -- * -o no_remote_flock Unsets FUSE_CAP_FLOCK_LOCKS in conn->want -- * -o no_remote_posix_lock Unsets FUSE_CAP_POSIX_LOCKS in conn->want -- * -o [no_]splice_write (un-)sets FUSE_CAP_SPLICE_WRITE in conn->want -- * -o [no_]splice_move (un-)sets FUSE_CAP_SPLICE_MOVE in conn->want -- * -o [no_]splice_read (un-)sets FUSE_CAP_SPLICE_READ in conn->want -- * -o [no_]auto_inval_data (un-)sets FUSE_CAP_AUTO_INVAL_DATA in conn->want -- * -o readdirplus=no unsets FUSE_CAP_READDIRPLUS in conn->want -- * -o readdirplus=yes sets FUSE_CAP_READDIRPLUS and unsets -- * FUSE_CAP_READDIRPLUS_AUTO in conn->want -- * -o readdirplus=auto sets FUSE_CAP_READDIRPLUS and -- * FUSE_CAP_READDIRPLUS_AUTO in conn->want -- * -o [no_]async_dio (un-)sets FUSE_CAP_ASYNC_DIO in conn->want -- * -o [no_]writeback_cache (un-)sets FUSE_CAP_WRITEBACK_CACHE in conn->want -- * -o time_gran=N sets conn->time_gran -+ * -o no_remote_lock Equivalent to -o -+ *no_remote_flock,no_remote_posix_lock -o no_remote_flock Unsets -+ *FUSE_CAP_FLOCK_LOCKS in conn->want -o no_remote_posix_lock Unsets -+ *FUSE_CAP_POSIX_LOCKS in conn->want -o [no_]splice_write (un-)sets -+ *FUSE_CAP_SPLICE_WRITE in conn->want -o [no_]splice_move (un-)sets -+ *FUSE_CAP_SPLICE_MOVE in conn->want -o [no_]splice_read (un-)sets -+ *FUSE_CAP_SPLICE_READ in conn->want -o [no_]auto_inval_data (un-)sets -+ *FUSE_CAP_AUTO_INVAL_DATA in conn->want -o readdirplus=no unsets -+ *FUSE_CAP_READDIRPLUS in conn->want -o readdirplus=yes sets -+ *FUSE_CAP_READDIRPLUS and unsets FUSE_CAP_READDIRPLUS_AUTO in conn->want -o -+ *readdirplus=auto sets FUSE_CAP_READDIRPLUS and FUSE_CAP_READDIRPLUS_AUTO -+ *in conn->want -o [no_]async_dio (un-)sets FUSE_CAP_ASYNC_DIO in -+ *conn->want -o [no_]writeback_cache (un-)sets FUSE_CAP_WRITEBACK_CACHE in -+ *conn->want -o time_gran=N sets conn->time_gran - * - * Known options will be removed from *args*, unknown options will be - * passed through unchanged. -@@ -511,7 +528,7 @@ struct fuse_conn_info_opts; - * @param args argument vector (input+output) - * @return parsed options - **/ --struct fuse_conn_info_opts* fuse_parse_conn_info_opts(struct fuse_args *args); -+struct fuse_conn_info_opts *fuse_parse_conn_info_opts(struct fuse_args *args); - - /** - * This function applies the (parsed) parameters in *opts* to the -@@ -521,7 +538,7 @@ struct fuse_conn_info_opts* fuse_parse_conn_info_opts(struct fuse_args *args); - * option has been explicitly set. - */ - void fuse_apply_conn_info_opts(struct fuse_conn_info_opts *opts, -- struct fuse_conn_info *conn); -+ struct fuse_conn_info *conn); - - /** - * Go into the background -@@ -552,81 +569,81 @@ const char *fuse_pkgversion(void); - */ - void fuse_pollhandle_destroy(struct fuse_pollhandle *ph); - --/* ----------------------------------------------------------- * -- * Data buffer * -- * ----------------------------------------------------------- */ -+/* -+ * Data buffer -+ */ - - /** - * Buffer flags - */ - enum fuse_buf_flags { -- /** -- * Buffer contains a file descriptor -- * -- * If this flag is set, the .fd field is valid, otherwise the -- * .mem fields is valid. -- */ -- FUSE_BUF_IS_FD = (1 << 1), -- -- /** -- * Seek on the file descriptor -- * -- * If this flag is set then the .pos field is valid and is -- * used to seek to the given offset before performing -- * operation on file descriptor. -- */ -- FUSE_BUF_FD_SEEK = (1 << 2), -- -- /** -- * Retry operation on file descriptor -- * -- * If this flag is set then retry operation on file descriptor -- * until .size bytes have been copied or an error or EOF is -- * detected. -- */ -- FUSE_BUF_FD_RETRY = (1 << 3), -+ /** -+ * Buffer contains a file descriptor -+ * -+ * If this flag is set, the .fd field is valid, otherwise the -+ * .mem fields is valid. -+ */ -+ FUSE_BUF_IS_FD = (1 << 1), -+ -+ /** -+ * Seek on the file descriptor -+ * -+ * If this flag is set then the .pos field is valid and is -+ * used to seek to the given offset before performing -+ * operation on file descriptor. -+ */ -+ FUSE_BUF_FD_SEEK = (1 << 2), -+ -+ /** -+ * Retry operation on file descriptor -+ * -+ * If this flag is set then retry operation on file descriptor -+ * until .size bytes have been copied or an error or EOF is -+ * detected. -+ */ -+ FUSE_BUF_FD_RETRY = (1 << 3), - }; - - /** - * Buffer copy flags - */ - enum fuse_buf_copy_flags { -- /** -- * Don't use splice(2) -- * -- * Always fall back to using read and write instead of -- * splice(2) to copy data from one file descriptor to another. -- * -- * If this flag is not set, then only fall back if splice is -- * unavailable. -- */ -- FUSE_BUF_NO_SPLICE = (1 << 1), -- -- /** -- * Force splice -- * -- * Always use splice(2) to copy data from one file descriptor -- * to another. If splice is not available, return -EINVAL. -- */ -- FUSE_BUF_FORCE_SPLICE = (1 << 2), -- -- /** -- * Try to move data with splice. -- * -- * If splice is used, try to move pages from the source to the -- * destination instead of copying. See documentation of -- * SPLICE_F_MOVE in splice(2) man page. -- */ -- FUSE_BUF_SPLICE_MOVE = (1 << 3), -- -- /** -- * Don't block on the pipe when copying data with splice -- * -- * Makes the operations on the pipe non-blocking (if the pipe -- * is full or empty). See SPLICE_F_NONBLOCK in the splice(2) -- * man page. -- */ -- FUSE_BUF_SPLICE_NONBLOCK= (1 << 4), -+ /** -+ * Don't use splice(2) -+ * -+ * Always fall back to using read and write instead of -+ * splice(2) to copy data from one file descriptor to another. -+ * -+ * If this flag is not set, then only fall back if splice is -+ * unavailable. -+ */ -+ FUSE_BUF_NO_SPLICE = (1 << 1), -+ -+ /** -+ * Force splice -+ * -+ * Always use splice(2) to copy data from one file descriptor -+ * to another. If splice is not available, return -EINVAL. -+ */ -+ FUSE_BUF_FORCE_SPLICE = (1 << 2), -+ -+ /** -+ * Try to move data with splice. -+ * -+ * If splice is used, try to move pages from the source to the -+ * destination instead of copying. See documentation of -+ * SPLICE_F_MOVE in splice(2) man page. -+ */ -+ FUSE_BUF_SPLICE_MOVE = (1 << 3), -+ -+ /** -+ * Don't block on the pipe when copying data with splice -+ * -+ * Makes the operations on the pipe non-blocking (if the pipe -+ * is full or empty). See SPLICE_F_NONBLOCK in the splice(2) -+ * man page. -+ */ -+ FUSE_BUF_SPLICE_NONBLOCK = (1 << 4), - }; - - /** -@@ -636,36 +653,36 @@ enum fuse_buf_copy_flags { - * be supplied as a memory pointer or as a file descriptor - */ - struct fuse_buf { -- /** -- * Size of data in bytes -- */ -- size_t size; -- -- /** -- * Buffer flags -- */ -- enum fuse_buf_flags flags; -- -- /** -- * Memory pointer -- * -- * Used unless FUSE_BUF_IS_FD flag is set. -- */ -- void *mem; -- -- /** -- * File descriptor -- * -- * Used if FUSE_BUF_IS_FD flag is set. -- */ -- int fd; -- -- /** -- * File position -- * -- * Used if FUSE_BUF_FD_SEEK flag is set. -- */ -- off_t pos; -+ /** -+ * Size of data in bytes -+ */ -+ size_t size; -+ -+ /** -+ * Buffer flags -+ */ -+ enum fuse_buf_flags flags; -+ -+ /** -+ * Memory pointer -+ * -+ * Used unless FUSE_BUF_IS_FD flag is set. -+ */ -+ void *mem; -+ -+ /** -+ * File descriptor -+ * -+ * Used if FUSE_BUF_IS_FD flag is set. -+ */ -+ int fd; -+ -+ /** -+ * File position -+ * -+ * Used if FUSE_BUF_FD_SEEK flag is set. -+ */ -+ off_t pos; - }; - - /** -@@ -677,41 +694,39 @@ struct fuse_buf { - * Allocate dynamically to add more than one buffer. - */ - struct fuse_bufvec { -- /** -- * Number of buffers in the array -- */ -- size_t count; -- -- /** -- * Index of current buffer within the array -- */ -- size_t idx; -- -- /** -- * Current offset within the current buffer -- */ -- size_t off; -- -- /** -- * Array of buffers -- */ -- struct fuse_buf buf[1]; -+ /** -+ * Number of buffers in the array -+ */ -+ size_t count; -+ -+ /** -+ * Index of current buffer within the array -+ */ -+ size_t idx; -+ -+ /** -+ * Current offset within the current buffer -+ */ -+ size_t off; -+ -+ /** -+ * Array of buffers -+ */ -+ struct fuse_buf buf[1]; - }; - - /* Initialize bufvec with a single buffer of given size */ --#define FUSE_BUFVEC_INIT(size__) \ -- ((struct fuse_bufvec) { \ -- /* .count= */ 1, \ -- /* .idx = */ 0, \ -- /* .off = */ 0, \ -- /* .buf = */ { /* [0] = */ { \ -- /* .size = */ (size__), \ -- /* .flags = */ (enum fuse_buf_flags) 0, \ -- /* .mem = */ NULL, \ -- /* .fd = */ -1, \ -- /* .pos = */ 0, \ -- } } \ -- } ) -+#define FUSE_BUFVEC_INIT(size__) \ -+ ((struct fuse_bufvec){ /* .count= */ 1, \ -+ /* .idx = */ 0, \ -+ /* .off = */ 0, /* .buf = */ \ -+ { /* [0] = */ { \ -+ /* .size = */ (size__), \ -+ /* .flags = */ (enum fuse_buf_flags)0, \ -+ /* .mem = */ NULL, \ -+ /* .fd = */ -1, \ -+ /* .pos = */ 0, \ -+ } } }) - - /** - * Get total size of data in a fuse buffer vector -@@ -730,16 +745,16 @@ size_t fuse_buf_size(const struct fuse_bufvec *bufv); - * @return actual number of bytes copied or -errno on error - */ - ssize_t fuse_buf_copy(struct fuse_bufvec *dst, struct fuse_bufvec *src, -- enum fuse_buf_copy_flags flags); -+ enum fuse_buf_copy_flags flags); - --/* ----------------------------------------------------------- * -- * Signal handling * -- * ----------------------------------------------------------- */ -+/* -+ * Signal handling -+ */ - - /** - * Exit session on HUP, TERM and INT signals and ignore PIPE signal - * -- * Stores session in a global variable. May only be called once per -+ * Stores session in a global variable. May only be called once per - * process until fuse_remove_signal_handlers() is called. - * - * Once either of the POSIX signals arrives, the signal handler calls -@@ -766,12 +781,12 @@ int fuse_set_signal_handlers(struct fuse_session *se); - */ - void fuse_remove_signal_handlers(struct fuse_session *se); - --/* ----------------------------------------------------------- * -- * Compatibility stuff * -- * ----------------------------------------------------------- */ -+/* -+ * Compatibility stuff -+ */ - - #if !defined(FUSE_USE_VERSION) || FUSE_USE_VERSION < 30 --# error only API version 30 or greater is supported -+#error only API version 30 or greater is supported - #endif - - -@@ -781,11 +796,14 @@ void fuse_remove_signal_handlers(struct fuse_session *se); - * On 32bit systems please add -D_FILE_OFFSET_BITS=64 to your compile flags! - */ - --#if defined(__GNUC__) && (__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 6) && !defined __cplusplus -+#if defined(__GNUC__) && \ -+ (__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 6) && \ -+ !defined __cplusplus - _Static_assert(sizeof(off_t) == 8, "fuse: off_t must be 64bit"); - #else --struct _fuse_off_t_must_be_64bit_dummy_struct \ -- { unsigned _fuse_off_t_must_be_64bit:((sizeof(off_t) == 8) ? 1 : -1); }; -+struct _fuse_off_t_must_be_64bit_dummy_struct { -+ unsigned _fuse_off_t_must_be_64bit:((sizeof(off_t) == 8) ? 1 : -1); -+}; - #endif - - #endif /* FUSE_COMMON_H_ */ -diff --git a/tools/virtiofsd/fuse_i.h b/tools/virtiofsd/fuse_i.h -index b39522e..e63cb58 100644 ---- a/tools/virtiofsd/fuse_i.h -+++ b/tools/virtiofsd/fuse_i.h -@@ -1,71 +1,71 @@ - /* -- FUSE: Filesystem in Userspace -- Copyright (C) 2001-2007 Miklos Szeredi -- -- This program can be distributed under the terms of the GNU LGPLv2. -- See the file COPYING.LIB --*/ -+ * FUSE: Filesystem in Userspace -+ * Copyright (C) 2001-2007 Miklos Szeredi -+ * -+ * This program can be distributed under the terms of the GNU LGPLv2. -+ * See the file COPYING.LIB -+ */ - - #include "fuse.h" - #include "fuse_lowlevel.h" - - struct fuse_req { -- struct fuse_session *se; -- uint64_t unique; -- int ctr; -- pthread_mutex_t lock; -- struct fuse_ctx ctx; -- struct fuse_chan *ch; -- int interrupted; -- unsigned int ioctl_64bit : 1; -- union { -- struct { -- uint64_t unique; -- } i; -- struct { -- fuse_interrupt_func_t func; -- void *data; -- } ni; -- } u; -- struct fuse_req *next; -- struct fuse_req *prev; -+ struct fuse_session *se; -+ uint64_t unique; -+ int ctr; -+ pthread_mutex_t lock; -+ struct fuse_ctx ctx; -+ struct fuse_chan *ch; -+ int interrupted; -+ unsigned int ioctl_64bit:1; -+ union { -+ struct { -+ uint64_t unique; -+ } i; -+ struct { -+ fuse_interrupt_func_t func; -+ void *data; -+ } ni; -+ } u; -+ struct fuse_req *next; -+ struct fuse_req *prev; - }; - - struct fuse_notify_req { -- uint64_t unique; -- void (*reply)(struct fuse_notify_req *, fuse_req_t, fuse_ino_t, -- const void *, const struct fuse_buf *); -- struct fuse_notify_req *next; -- struct fuse_notify_req *prev; -+ uint64_t unique; -+ void (*reply)(struct fuse_notify_req *, fuse_req_t, fuse_ino_t, -+ const void *, const struct fuse_buf *); -+ struct fuse_notify_req *next; -+ struct fuse_notify_req *prev; - }; - - struct fuse_session { -- char *mountpoint; -- volatile int exited; -- int fd; -- int debug; -- int deny_others; -- struct fuse_lowlevel_ops op; -- int got_init; -- struct cuse_data *cuse_data; -- void *userdata; -- uid_t owner; -- struct fuse_conn_info conn; -- struct fuse_req list; -- struct fuse_req interrupts; -- pthread_mutex_t lock; -- int got_destroy; -- int broken_splice_nonblock; -- uint64_t notify_ctr; -- struct fuse_notify_req notify_list; -- size_t bufsize; -- int error; -+ char *mountpoint; -+ volatile int exited; -+ int fd; -+ int debug; -+ int deny_others; -+ struct fuse_lowlevel_ops op; -+ int got_init; -+ struct cuse_data *cuse_data; -+ void *userdata; -+ uid_t owner; -+ struct fuse_conn_info conn; -+ struct fuse_req list; -+ struct fuse_req interrupts; -+ pthread_mutex_t lock; -+ int got_destroy; -+ int broken_splice_nonblock; -+ uint64_t notify_ctr; -+ struct fuse_notify_req notify_list; -+ size_t bufsize; -+ int error; - }; - - struct fuse_chan { -- pthread_mutex_t lock; -- int ctr; -- int fd; -+ pthread_mutex_t lock; -+ int ctr; -+ int fd; - }; - - /** -@@ -76,19 +76,20 @@ struct fuse_chan { - * - */ - struct fuse_module { -- char *name; -- fuse_module_factory_t factory; -- struct fuse_module *next; -- struct fusemod_so *so; -- int ctr; -+ char *name; -+ fuse_module_factory_t factory; -+ struct fuse_module *next; -+ struct fusemod_so *so; -+ int ctr; - }; - - int fuse_send_reply_iov_nofree(fuse_req_t req, int error, struct iovec *iov, -- int count); -+ int count); - void fuse_free_req(fuse_req_t req); - - void fuse_session_process_buf_int(struct fuse_session *se, -- const struct fuse_buf *buf, struct fuse_chan *ch); -+ const struct fuse_buf *buf, -+ struct fuse_chan *ch); - - - #define FUSE_MAX_MAX_PAGES 256 -diff --git a/tools/virtiofsd/fuse_log.c b/tools/virtiofsd/fuse_log.c -index 0d268ab..11345f9 100644 ---- a/tools/virtiofsd/fuse_log.c -+++ b/tools/virtiofsd/fuse_log.c -@@ -1,40 +1,40 @@ - /* -- FUSE: Filesystem in Userspace -- Copyright (C) 2019 Red Hat, Inc. -- -- Logging API. -- -- This program can be distributed under the terms of the GNU LGPLv2. -- See the file COPYING.LIB --*/ -+ * FUSE: Filesystem in Userspace -+ * Copyright (C) 2019 Red Hat, Inc. -+ * -+ * Logging API. -+ * -+ * This program can be distributed under the terms of the GNU LGPLv2. -+ * See the file COPYING.LIB -+ */ - - #include "fuse_log.h" - - #include - #include - --static void default_log_func( -- __attribute__(( unused )) enum fuse_log_level level, -- const char *fmt, va_list ap) -+static void default_log_func(__attribute__((unused)) enum fuse_log_level level, -+ const char *fmt, va_list ap) - { -- vfprintf(stderr, fmt, ap); -+ vfprintf(stderr, fmt, ap); - } - - static fuse_log_func_t log_func = default_log_func; - - void fuse_set_log_func(fuse_log_func_t func) - { -- if (!func) -- func = default_log_func; -+ if (!func) { -+ func = default_log_func; -+ } - -- log_func = func; -+ log_func = func; - } - - void fuse_log(enum fuse_log_level level, const char *fmt, ...) - { -- va_list ap; -+ va_list ap; - -- va_start(ap, fmt); -- log_func(level, fmt, ap); -- va_end(ap); -+ va_start(ap, fmt); -+ log_func(level, fmt, ap); -+ va_end(ap); - } -diff --git a/tools/virtiofsd/fuse_log.h b/tools/virtiofsd/fuse_log.h -index 0af700d..bf6c11f 100644 ---- a/tools/virtiofsd/fuse_log.h -+++ b/tools/virtiofsd/fuse_log.h -@@ -1,10 +1,10 @@ - /* -- FUSE: Filesystem in Userspace -- Copyright (C) 2019 Red Hat, Inc. -- -- This program can be distributed under the terms of the GNU LGPLv2. -- See the file COPYING.LIB. --*/ -+ * FUSE: Filesystem in Userspace -+ * Copyright (C) 2019 Red Hat, Inc. -+ * -+ * This program can be distributed under the terms of the GNU LGPLv2. -+ * See the file COPYING.LIB. -+ */ - - #ifndef FUSE_LOG_H_ - #define FUSE_LOG_H_ -@@ -22,14 +22,14 @@ - * These levels correspond to syslog(2) log levels since they are widely used. - */ - enum fuse_log_level { -- FUSE_LOG_EMERG, -- FUSE_LOG_ALERT, -- FUSE_LOG_CRIT, -- FUSE_LOG_ERR, -- FUSE_LOG_WARNING, -- FUSE_LOG_NOTICE, -- FUSE_LOG_INFO, -- FUSE_LOG_DEBUG -+ FUSE_LOG_EMERG, -+ FUSE_LOG_ALERT, -+ FUSE_LOG_CRIT, -+ FUSE_LOG_ERR, -+ FUSE_LOG_WARNING, -+ FUSE_LOG_NOTICE, -+ FUSE_LOG_INFO, -+ FUSE_LOG_DEBUG - }; - - /** -@@ -45,8 +45,8 @@ enum fuse_log_level { - * @param fmt sprintf-style format string including newline - * @param ap format string arguments - */ --typedef void (*fuse_log_func_t)(enum fuse_log_level level, -- const char *fmt, va_list ap); -+typedef void (*fuse_log_func_t)(enum fuse_log_level level, const char *fmt, -+ va_list ap); - - /** - * Install a custom log handler function. -diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -index e6fa247..5c9cb52 100644 ---- a/tools/virtiofsd/fuse_lowlevel.c -+++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -1,2380 +1,2515 @@ - /* -- FUSE: Filesystem in Userspace -- Copyright (C) 2001-2007 Miklos Szeredi -- -- Implementation of (most of) the low-level FUSE API. The session loop -- functions are implemented in separate files. -- -- This program can be distributed under the terms of the GNU LGPLv2. -- See the file COPYING.LIB --*/ -+ * FUSE: Filesystem in Userspace -+ * Copyright (C) 2001-2007 Miklos Szeredi -+ * -+ * Implementation of (most of) the low-level FUSE API. The session loop -+ * functions are implemented in separate files. -+ * -+ * This program can be distributed under the terms of the GNU LGPLv2. -+ * See the file COPYING.LIB -+ */ - - #define _GNU_SOURCE - - #include "config.h" - #include "fuse_i.h" - #include "fuse_kernel.h" --#include "fuse_opt.h" - #include "fuse_misc.h" -+#include "fuse_opt.h" - -+#include -+#include -+#include -+#include - #include - #include --#include - #include --#include --#include --#include --#include - #include -- -+#include - - - #define PARAM(inarg) (((char *)(inarg)) + sizeof(*(inarg))) - #define OFFSET_MAX 0x7fffffffffffffffLL - --#define container_of(ptr, type, member) ({ \ -- const typeof( ((type *)0)->member ) *__mptr = (ptr); \ -- (type *)( (char *)__mptr - offsetof(type,member) );}) -+#define container_of(ptr, type, member) \ -+ ({ \ -+ const typeof(((type *)0)->member) *__mptr = (ptr); \ -+ (type *)((char *)__mptr - offsetof(type, member)); \ -+ }) - - struct fuse_pollhandle { -- uint64_t kh; -- struct fuse_session *se; -+ uint64_t kh; -+ struct fuse_session *se; - }; - - static size_t pagesize; - - static __attribute__((constructor)) void fuse_ll_init_pagesize(void) - { -- pagesize = getpagesize(); -+ pagesize = getpagesize(); - } - - static void convert_stat(const struct stat *stbuf, struct fuse_attr *attr) - { -- attr->ino = stbuf->st_ino; -- attr->mode = stbuf->st_mode; -- attr->nlink = stbuf->st_nlink; -- attr->uid = stbuf->st_uid; -- attr->gid = stbuf->st_gid; -- attr->rdev = stbuf->st_rdev; -- attr->size = stbuf->st_size; -- attr->blksize = stbuf->st_blksize; -- attr->blocks = stbuf->st_blocks; -- attr->atime = stbuf->st_atime; -- attr->mtime = stbuf->st_mtime; -- attr->ctime = stbuf->st_ctime; -- attr->atimensec = ST_ATIM_NSEC(stbuf); -- attr->mtimensec = ST_MTIM_NSEC(stbuf); -- attr->ctimensec = ST_CTIM_NSEC(stbuf); -+ attr->ino = stbuf->st_ino; -+ attr->mode = stbuf->st_mode; -+ attr->nlink = stbuf->st_nlink; -+ attr->uid = stbuf->st_uid; -+ attr->gid = stbuf->st_gid; -+ attr->rdev = stbuf->st_rdev; -+ attr->size = stbuf->st_size; -+ attr->blksize = stbuf->st_blksize; -+ attr->blocks = stbuf->st_blocks; -+ attr->atime = stbuf->st_atime; -+ attr->mtime = stbuf->st_mtime; -+ attr->ctime = stbuf->st_ctime; -+ attr->atimensec = ST_ATIM_NSEC(stbuf); -+ attr->mtimensec = ST_MTIM_NSEC(stbuf); -+ attr->ctimensec = ST_CTIM_NSEC(stbuf); - } - - static void convert_attr(const struct fuse_setattr_in *attr, struct stat *stbuf) - { -- stbuf->st_mode = attr->mode; -- stbuf->st_uid = attr->uid; -- stbuf->st_gid = attr->gid; -- stbuf->st_size = attr->size; -- stbuf->st_atime = attr->atime; -- stbuf->st_mtime = attr->mtime; -- stbuf->st_ctime = attr->ctime; -- ST_ATIM_NSEC_SET(stbuf, attr->atimensec); -- ST_MTIM_NSEC_SET(stbuf, attr->mtimensec); -- ST_CTIM_NSEC_SET(stbuf, attr->ctimensec); -+ stbuf->st_mode = attr->mode; -+ stbuf->st_uid = attr->uid; -+ stbuf->st_gid = attr->gid; -+ stbuf->st_size = attr->size; -+ stbuf->st_atime = attr->atime; -+ stbuf->st_mtime = attr->mtime; -+ stbuf->st_ctime = attr->ctime; -+ ST_ATIM_NSEC_SET(stbuf, attr->atimensec); -+ ST_MTIM_NSEC_SET(stbuf, attr->mtimensec); -+ ST_CTIM_NSEC_SET(stbuf, attr->ctimensec); - } - --static size_t iov_length(const struct iovec *iov, size_t count) -+static size_t iov_length(const struct iovec *iov, size_t count) - { -- size_t seg; -- size_t ret = 0; -+ size_t seg; -+ size_t ret = 0; - -- for (seg = 0; seg < count; seg++) -- ret += iov[seg].iov_len; -- return ret; -+ for (seg = 0; seg < count; seg++) { -+ ret += iov[seg].iov_len; -+ } -+ return ret; - } - - static void list_init_req(struct fuse_req *req) - { -- req->next = req; -- req->prev = req; -+ req->next = req; -+ req->prev = req; - } - - static void list_del_req(struct fuse_req *req) - { -- struct fuse_req *prev = req->prev; -- struct fuse_req *next = req->next; -- prev->next = next; -- next->prev = prev; -+ struct fuse_req *prev = req->prev; -+ struct fuse_req *next = req->next; -+ prev->next = next; -+ next->prev = prev; - } - - static void list_add_req(struct fuse_req *req, struct fuse_req *next) - { -- struct fuse_req *prev = next->prev; -- req->next = next; -- req->prev = prev; -- prev->next = req; -- next->prev = req; -+ struct fuse_req *prev = next->prev; -+ req->next = next; -+ req->prev = prev; -+ prev->next = req; -+ next->prev = req; - } - - static void destroy_req(fuse_req_t req) - { -- pthread_mutex_destroy(&req->lock); -- free(req); -+ pthread_mutex_destroy(&req->lock); -+ free(req); - } - - void fuse_free_req(fuse_req_t req) - { -- int ctr; -- struct fuse_session *se = req->se; -+ int ctr; -+ struct fuse_session *se = req->se; - -- pthread_mutex_lock(&se->lock); -- req->u.ni.func = NULL; -- req->u.ni.data = NULL; -- list_del_req(req); -- ctr = --req->ctr; -- req->ch = NULL; -- pthread_mutex_unlock(&se->lock); -- if (!ctr) -- destroy_req(req); -+ pthread_mutex_lock(&se->lock); -+ req->u.ni.func = NULL; -+ req->u.ni.data = NULL; -+ list_del_req(req); -+ ctr = --req->ctr; -+ req->ch = NULL; -+ pthread_mutex_unlock(&se->lock); -+ if (!ctr) { -+ destroy_req(req); -+ } - } - - static struct fuse_req *fuse_ll_alloc_req(struct fuse_session *se) - { -- struct fuse_req *req; -+ struct fuse_req *req; - -- req = (struct fuse_req *) calloc(1, sizeof(struct fuse_req)); -- if (req == NULL) { -- fuse_log(FUSE_LOG_ERR, "fuse: failed to allocate request\n"); -- } else { -- req->se = se; -- req->ctr = 1; -- list_init_req(req); -- fuse_mutex_init(&req->lock); -- } -+ req = (struct fuse_req *)calloc(1, sizeof(struct fuse_req)); -+ if (req == NULL) { -+ fuse_log(FUSE_LOG_ERR, "fuse: failed to allocate request\n"); -+ } else { -+ req->se = se; -+ req->ctr = 1; -+ list_init_req(req); -+ fuse_mutex_init(&req->lock); -+ } - -- return req; -+ return req; - } - - /* Send data. If *ch* is NULL, send via session master fd */ - static int fuse_send_msg(struct fuse_session *se, struct fuse_chan *ch, -- struct iovec *iov, int count) -+ struct iovec *iov, int count) - { -- struct fuse_out_header *out = iov[0].iov_base; -+ struct fuse_out_header *out = iov[0].iov_base; - -- out->len = iov_length(iov, count); -- if (se->debug) { -- if (out->unique == 0) { -- fuse_log(FUSE_LOG_DEBUG, "NOTIFY: code=%d length=%u\n", -- out->error, out->len); -- } else if (out->error) { -- fuse_log(FUSE_LOG_DEBUG, -- " unique: %llu, error: %i (%s), outsize: %i\n", -- (unsigned long long) out->unique, out->error, -- strerror(-out->error), out->len); -- } else { -- fuse_log(FUSE_LOG_DEBUG, -- " unique: %llu, success, outsize: %i\n", -- (unsigned long long) out->unique, out->len); -- } -- } -+ out->len = iov_length(iov, count); -+ if (se->debug) { -+ if (out->unique == 0) { -+ fuse_log(FUSE_LOG_DEBUG, "NOTIFY: code=%d length=%u\n", out->error, -+ out->len); -+ } else if (out->error) { -+ fuse_log(FUSE_LOG_DEBUG, -+ " unique: %llu, error: %i (%s), outsize: %i\n", -+ (unsigned long long)out->unique, out->error, -+ strerror(-out->error), out->len); -+ } else { -+ fuse_log(FUSE_LOG_DEBUG, " unique: %llu, success, outsize: %i\n", -+ (unsigned long long)out->unique, out->len); -+ } -+ } - -- abort(); /* virtio should have taken it before here */ -- return 0; -+ abort(); /* virtio should have taken it before here */ -+ return 0; - } - - - int fuse_send_reply_iov_nofree(fuse_req_t req, int error, struct iovec *iov, -- int count) -+ int count) - { -- struct fuse_out_header out; -+ struct fuse_out_header out; - -- if (error <= -1000 || error > 0) { -- fuse_log(FUSE_LOG_ERR, "fuse: bad error value: %i\n", error); -- error = -ERANGE; -- } -+ if (error <= -1000 || error > 0) { -+ fuse_log(FUSE_LOG_ERR, "fuse: bad error value: %i\n", error); -+ error = -ERANGE; -+ } - -- out.unique = req->unique; -- out.error = error; -+ out.unique = req->unique; -+ out.error = error; - -- iov[0].iov_base = &out; -- iov[0].iov_len = sizeof(struct fuse_out_header); -+ iov[0].iov_base = &out; -+ iov[0].iov_len = sizeof(struct fuse_out_header); - -- return fuse_send_msg(req->se, req->ch, iov, count); -+ return fuse_send_msg(req->se, req->ch, iov, count); - } - - static int send_reply_iov(fuse_req_t req, int error, struct iovec *iov, -- int count) -+ int count) - { -- int res; -+ int res; - -- res = fuse_send_reply_iov_nofree(req, error, iov, count); -- fuse_free_req(req); -- return res; -+ res = fuse_send_reply_iov_nofree(req, error, iov, count); -+ fuse_free_req(req); -+ return res; - } - - static int send_reply(fuse_req_t req, int error, const void *arg, -- size_t argsize) -+ size_t argsize) - { -- struct iovec iov[2]; -- int count = 1; -- if (argsize) { -- iov[1].iov_base = (void *) arg; -- iov[1].iov_len = argsize; -- count++; -- } -- return send_reply_iov(req, error, iov, count); -+ struct iovec iov[2]; -+ int count = 1; -+ if (argsize) { -+ iov[1].iov_base = (void *)arg; -+ iov[1].iov_len = argsize; -+ count++; -+ } -+ return send_reply_iov(req, error, iov, count); - } - - int fuse_reply_iov(fuse_req_t req, const struct iovec *iov, int count) - { -- int res; -- struct iovec *padded_iov; -+ int res; -+ struct iovec *padded_iov; - -- padded_iov = malloc((count + 1) * sizeof(struct iovec)); -- if (padded_iov == NULL) -- return fuse_reply_err(req, ENOMEM); -+ padded_iov = malloc((count + 1) * sizeof(struct iovec)); -+ if (padded_iov == NULL) { -+ return fuse_reply_err(req, ENOMEM); -+ } - -- memcpy(padded_iov + 1, iov, count * sizeof(struct iovec)); -- count++; -+ memcpy(padded_iov + 1, iov, count * sizeof(struct iovec)); -+ count++; - -- res = send_reply_iov(req, 0, padded_iov, count); -- free(padded_iov); -+ res = send_reply_iov(req, 0, padded_iov, count); -+ free(padded_iov); - -- return res; -+ return res; - } - - --/* `buf` is allowed to be empty so that the proper size may be -- allocated by the caller */ -+/* -+ * 'buf` is allowed to be empty so that the proper size may be -+ * allocated by the caller -+ */ - size_t fuse_add_direntry(fuse_req_t req, char *buf, size_t bufsize, -- const char *name, const struct stat *stbuf, off_t off) -+ const char *name, const struct stat *stbuf, off_t off) - { -- (void)req; -- size_t namelen; -- size_t entlen; -- size_t entlen_padded; -- struct fuse_dirent *dirent; -+ (void)req; -+ size_t namelen; -+ size_t entlen; -+ size_t entlen_padded; -+ struct fuse_dirent *dirent; - -- namelen = strlen(name); -- entlen = FUSE_NAME_OFFSET + namelen; -- entlen_padded = FUSE_DIRENT_ALIGN(entlen); -+ namelen = strlen(name); -+ entlen = FUSE_NAME_OFFSET + namelen; -+ entlen_padded = FUSE_DIRENT_ALIGN(entlen); - -- if ((buf == NULL) || (entlen_padded > bufsize)) -- return entlen_padded; -+ if ((buf == NULL) || (entlen_padded > bufsize)) { -+ return entlen_padded; -+ } - -- dirent = (struct fuse_dirent*) buf; -- dirent->ino = stbuf->st_ino; -- dirent->off = off; -- dirent->namelen = namelen; -- dirent->type = (stbuf->st_mode & S_IFMT) >> 12; -- memcpy(dirent->name, name, namelen); -- memset(dirent->name + namelen, 0, entlen_padded - entlen); -+ dirent = (struct fuse_dirent *)buf; -+ dirent->ino = stbuf->st_ino; -+ dirent->off = off; -+ dirent->namelen = namelen; -+ dirent->type = (stbuf->st_mode & S_IFMT) >> 12; -+ memcpy(dirent->name, name, namelen); -+ memset(dirent->name + namelen, 0, entlen_padded - entlen); - -- return entlen_padded; -+ return entlen_padded; - } - - static void convert_statfs(const struct statvfs *stbuf, -- struct fuse_kstatfs *kstatfs) -+ struct fuse_kstatfs *kstatfs) - { -- kstatfs->bsize = stbuf->f_bsize; -- kstatfs->frsize = stbuf->f_frsize; -- kstatfs->blocks = stbuf->f_blocks; -- kstatfs->bfree = stbuf->f_bfree; -- kstatfs->bavail = stbuf->f_bavail; -- kstatfs->files = stbuf->f_files; -- kstatfs->ffree = stbuf->f_ffree; -- kstatfs->namelen = stbuf->f_namemax; -+ kstatfs->bsize = stbuf->f_bsize; -+ kstatfs->frsize = stbuf->f_frsize; -+ kstatfs->blocks = stbuf->f_blocks; -+ kstatfs->bfree = stbuf->f_bfree; -+ kstatfs->bavail = stbuf->f_bavail; -+ kstatfs->files = stbuf->f_files; -+ kstatfs->ffree = stbuf->f_ffree; -+ kstatfs->namelen = stbuf->f_namemax; - } - - static int send_reply_ok(fuse_req_t req, const void *arg, size_t argsize) - { -- return send_reply(req, 0, arg, argsize); -+ return send_reply(req, 0, arg, argsize); - } - - int fuse_reply_err(fuse_req_t req, int err) - { -- return send_reply(req, -err, NULL, 0); -+ return send_reply(req, -err, NULL, 0); - } - - void fuse_reply_none(fuse_req_t req) - { -- fuse_free_req(req); -+ fuse_free_req(req); - } - - static unsigned long calc_timeout_sec(double t) - { -- if (t > (double) ULONG_MAX) -- return ULONG_MAX; -- else if (t < 0.0) -- return 0; -- else -- return (unsigned long) t; -+ if (t > (double)ULONG_MAX) { -+ return ULONG_MAX; -+ } else if (t < 0.0) { -+ return 0; -+ } else { -+ return (unsigned long)t; -+ } - } - - static unsigned int calc_timeout_nsec(double t) - { -- double f = t - (double) calc_timeout_sec(t); -- if (f < 0.0) -- return 0; -- else if (f >= 0.999999999) -- return 999999999; -- else -- return (unsigned int) (f * 1.0e9); -+ double f = t - (double)calc_timeout_sec(t); -+ if (f < 0.0) { -+ return 0; -+ } else if (f >= 0.999999999) { -+ return 999999999; -+ } else { -+ return (unsigned int)(f * 1.0e9); -+ } - } - - static void fill_entry(struct fuse_entry_out *arg, -- const struct fuse_entry_param *e) -+ const struct fuse_entry_param *e) - { -- arg->nodeid = e->ino; -- arg->generation = e->generation; -- arg->entry_valid = calc_timeout_sec(e->entry_timeout); -- arg->entry_valid_nsec = calc_timeout_nsec(e->entry_timeout); -- arg->attr_valid = calc_timeout_sec(e->attr_timeout); -- arg->attr_valid_nsec = calc_timeout_nsec(e->attr_timeout); -- convert_stat(&e->attr, &arg->attr); -+ arg->nodeid = e->ino; -+ arg->generation = e->generation; -+ arg->entry_valid = calc_timeout_sec(e->entry_timeout); -+ arg->entry_valid_nsec = calc_timeout_nsec(e->entry_timeout); -+ arg->attr_valid = calc_timeout_sec(e->attr_timeout); -+ arg->attr_valid_nsec = calc_timeout_nsec(e->attr_timeout); -+ convert_stat(&e->attr, &arg->attr); - } - --/* `buf` is allowed to be empty so that the proper size may be -- allocated by the caller */ -+/* -+ * `buf` is allowed to be empty so that the proper size may be -+ * allocated by the caller -+ */ - size_t fuse_add_direntry_plus(fuse_req_t req, char *buf, size_t bufsize, -- const char *name, -- const struct fuse_entry_param *e, off_t off) --{ -- (void)req; -- size_t namelen; -- size_t entlen; -- size_t entlen_padded; -- -- namelen = strlen(name); -- entlen = FUSE_NAME_OFFSET_DIRENTPLUS + namelen; -- entlen_padded = FUSE_DIRENT_ALIGN(entlen); -- if ((buf == NULL) || (entlen_padded > bufsize)) -- return entlen_padded; -- -- struct fuse_direntplus *dp = (struct fuse_direntplus *) buf; -- memset(&dp->entry_out, 0, sizeof(dp->entry_out)); -- fill_entry(&dp->entry_out, e); -- -- struct fuse_dirent *dirent = &dp->dirent; -- dirent->ino = e->attr.st_ino; -- dirent->off = off; -- dirent->namelen = namelen; -- dirent->type = (e->attr.st_mode & S_IFMT) >> 12; -- memcpy(dirent->name, name, namelen); -- memset(dirent->name + namelen, 0, entlen_padded - entlen); -- -- return entlen_padded; --} -- --static void fill_open(struct fuse_open_out *arg, -- const struct fuse_file_info *f) --{ -- arg->fh = f->fh; -- if (f->direct_io) -- arg->open_flags |= FOPEN_DIRECT_IO; -- if (f->keep_cache) -- arg->open_flags |= FOPEN_KEEP_CACHE; -- if (f->cache_readdir) -- arg->open_flags |= FOPEN_CACHE_DIR; -- if (f->nonseekable) -- arg->open_flags |= FOPEN_NONSEEKABLE; -+ const char *name, -+ const struct fuse_entry_param *e, off_t off) -+{ -+ (void)req; -+ size_t namelen; -+ size_t entlen; -+ size_t entlen_padded; -+ -+ namelen = strlen(name); -+ entlen = FUSE_NAME_OFFSET_DIRENTPLUS + namelen; -+ entlen_padded = FUSE_DIRENT_ALIGN(entlen); -+ if ((buf == NULL) || (entlen_padded > bufsize)) { -+ return entlen_padded; -+ } -+ -+ struct fuse_direntplus *dp = (struct fuse_direntplus *)buf; -+ memset(&dp->entry_out, 0, sizeof(dp->entry_out)); -+ fill_entry(&dp->entry_out, e); -+ -+ struct fuse_dirent *dirent = &dp->dirent; -+ dirent->ino = e->attr.st_ino; -+ dirent->off = off; -+ dirent->namelen = namelen; -+ dirent->type = (e->attr.st_mode & S_IFMT) >> 12; -+ memcpy(dirent->name, name, namelen); -+ memset(dirent->name + namelen, 0, entlen_padded - entlen); -+ -+ return entlen_padded; -+} -+ -+static void fill_open(struct fuse_open_out *arg, const struct fuse_file_info *f) -+{ -+ arg->fh = f->fh; -+ if (f->direct_io) { -+ arg->open_flags |= FOPEN_DIRECT_IO; -+ } -+ if (f->keep_cache) { -+ arg->open_flags |= FOPEN_KEEP_CACHE; -+ } -+ if (f->cache_readdir) { -+ arg->open_flags |= FOPEN_CACHE_DIR; -+ } -+ if (f->nonseekable) { -+ arg->open_flags |= FOPEN_NONSEEKABLE; -+ } - } - - int fuse_reply_entry(fuse_req_t req, const struct fuse_entry_param *e) - { -- struct fuse_entry_out arg; -- size_t size = req->se->conn.proto_minor < 9 ? -- FUSE_COMPAT_ENTRY_OUT_SIZE : sizeof(arg); -+ struct fuse_entry_out arg; -+ size_t size = req->se->conn.proto_minor < 9 ? FUSE_COMPAT_ENTRY_OUT_SIZE : -+ sizeof(arg); - -- /* before ABI 7.4 e->ino == 0 was invalid, only ENOENT meant -- negative entry */ -- if (!e->ino && req->se->conn.proto_minor < 4) -- return fuse_reply_err(req, ENOENT); -+ /* -+ * before ABI 7.4 e->ino == 0 was invalid, only ENOENT meant -+ * negative entry -+ */ -+ if (!e->ino && req->se->conn.proto_minor < 4) { -+ return fuse_reply_err(req, ENOENT); -+ } - -- memset(&arg, 0, sizeof(arg)); -- fill_entry(&arg, e); -- return send_reply_ok(req, &arg, size); -+ memset(&arg, 0, sizeof(arg)); -+ fill_entry(&arg, e); -+ return send_reply_ok(req, &arg, size); - } - - int fuse_reply_create(fuse_req_t req, const struct fuse_entry_param *e, -- const struct fuse_file_info *f) -+ const struct fuse_file_info *f) - { -- char buf[sizeof(struct fuse_entry_out) + sizeof(struct fuse_open_out)]; -- size_t entrysize = req->se->conn.proto_minor < 9 ? -- FUSE_COMPAT_ENTRY_OUT_SIZE : sizeof(struct fuse_entry_out); -- struct fuse_entry_out *earg = (struct fuse_entry_out *) buf; -- struct fuse_open_out *oarg = (struct fuse_open_out *) (buf + entrysize); -+ char buf[sizeof(struct fuse_entry_out) + sizeof(struct fuse_open_out)]; -+ size_t entrysize = req->se->conn.proto_minor < 9 ? -+ FUSE_COMPAT_ENTRY_OUT_SIZE : -+ sizeof(struct fuse_entry_out); -+ struct fuse_entry_out *earg = (struct fuse_entry_out *)buf; -+ struct fuse_open_out *oarg = (struct fuse_open_out *)(buf + entrysize); - -- memset(buf, 0, sizeof(buf)); -- fill_entry(earg, e); -- fill_open(oarg, f); -- return send_reply_ok(req, buf, -- entrysize + sizeof(struct fuse_open_out)); -+ memset(buf, 0, sizeof(buf)); -+ fill_entry(earg, e); -+ fill_open(oarg, f); -+ return send_reply_ok(req, buf, entrysize + sizeof(struct fuse_open_out)); - } - - int fuse_reply_attr(fuse_req_t req, const struct stat *attr, -- double attr_timeout) -+ double attr_timeout) - { -- struct fuse_attr_out arg; -- size_t size = req->se->conn.proto_minor < 9 ? -- FUSE_COMPAT_ATTR_OUT_SIZE : sizeof(arg); -+ struct fuse_attr_out arg; -+ size_t size = -+ req->se->conn.proto_minor < 9 ? FUSE_COMPAT_ATTR_OUT_SIZE : sizeof(arg); - -- memset(&arg, 0, sizeof(arg)); -- arg.attr_valid = calc_timeout_sec(attr_timeout); -- arg.attr_valid_nsec = calc_timeout_nsec(attr_timeout); -- convert_stat(attr, &arg.attr); -+ memset(&arg, 0, sizeof(arg)); -+ arg.attr_valid = calc_timeout_sec(attr_timeout); -+ arg.attr_valid_nsec = calc_timeout_nsec(attr_timeout); -+ convert_stat(attr, &arg.attr); - -- return send_reply_ok(req, &arg, size); -+ return send_reply_ok(req, &arg, size); - } - - int fuse_reply_readlink(fuse_req_t req, const char *linkname) - { -- return send_reply_ok(req, linkname, strlen(linkname)); -+ return send_reply_ok(req, linkname, strlen(linkname)); - } - - int fuse_reply_open(fuse_req_t req, const struct fuse_file_info *f) - { -- struct fuse_open_out arg; -+ struct fuse_open_out arg; - -- memset(&arg, 0, sizeof(arg)); -- fill_open(&arg, f); -- return send_reply_ok(req, &arg, sizeof(arg)); -+ memset(&arg, 0, sizeof(arg)); -+ fill_open(&arg, f); -+ return send_reply_ok(req, &arg, sizeof(arg)); - } - - int fuse_reply_write(fuse_req_t req, size_t count) - { -- struct fuse_write_out arg; -+ struct fuse_write_out arg; - -- memset(&arg, 0, sizeof(arg)); -- arg.size = count; -+ memset(&arg, 0, sizeof(arg)); -+ arg.size = count; - -- return send_reply_ok(req, &arg, sizeof(arg)); -+ return send_reply_ok(req, &arg, sizeof(arg)); - } - - int fuse_reply_buf(fuse_req_t req, const char *buf, size_t size) - { -- return send_reply_ok(req, buf, size); -+ return send_reply_ok(req, buf, size); - } - - static int fuse_send_data_iov_fallback(struct fuse_session *se, -- struct fuse_chan *ch, -- struct iovec *iov, int iov_count, -- struct fuse_bufvec *buf, -- size_t len) -+ struct fuse_chan *ch, struct iovec *iov, -+ int iov_count, struct fuse_bufvec *buf, -+ size_t len) - { -- /* Optimize common case */ -- if (buf->count == 1 && buf->idx == 0 && buf->off == 0 && -- !(buf->buf[0].flags & FUSE_BUF_IS_FD)) { -- /* FIXME: also avoid memory copy if there are multiple buffers -- but none of them contain an fd */ -+ /* Optimize common case */ -+ if (buf->count == 1 && buf->idx == 0 && buf->off == 0 && -+ !(buf->buf[0].flags & FUSE_BUF_IS_FD)) { -+ /* -+ * FIXME: also avoid memory copy if there are multiple buffers -+ * but none of them contain an fd -+ */ - -- iov[iov_count].iov_base = buf->buf[0].mem; -- iov[iov_count].iov_len = len; -- iov_count++; -- return fuse_send_msg(se, ch, iov, iov_count); -- } -+ iov[iov_count].iov_base = buf->buf[0].mem; -+ iov[iov_count].iov_len = len; -+ iov_count++; -+ return fuse_send_msg(se, ch, iov, iov_count); -+ } - -- abort(); /* Will have taken vhost path */ -- return 0; -+ abort(); /* Will have taken vhost path */ -+ return 0; - } - - static int fuse_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, -- struct iovec *iov, int iov_count, -- struct fuse_bufvec *buf, unsigned int flags) -+ struct iovec *iov, int iov_count, -+ struct fuse_bufvec *buf, unsigned int flags) - { -- size_t len = fuse_buf_size(buf); -- (void) flags; -+ size_t len = fuse_buf_size(buf); -+ (void)flags; - -- return fuse_send_data_iov_fallback(se, ch, iov, iov_count, buf, len); -+ return fuse_send_data_iov_fallback(se, ch, iov, iov_count, buf, len); - } - - int fuse_reply_data(fuse_req_t req, struct fuse_bufvec *bufv, -- enum fuse_buf_copy_flags flags) -+ enum fuse_buf_copy_flags flags) - { -- struct iovec iov[2]; -- struct fuse_out_header out; -- int res; -+ struct iovec iov[2]; -+ struct fuse_out_header out; -+ int res; - -- iov[0].iov_base = &out; -- iov[0].iov_len = sizeof(struct fuse_out_header); -+ iov[0].iov_base = &out; -+ iov[0].iov_len = sizeof(struct fuse_out_header); - -- out.unique = req->unique; -- out.error = 0; -+ out.unique = req->unique; -+ out.error = 0; - -- res = fuse_send_data_iov(req->se, req->ch, iov, 1, bufv, flags); -- if (res <= 0) { -- fuse_free_req(req); -- return res; -- } else { -- return fuse_reply_err(req, res); -- } -+ res = fuse_send_data_iov(req->se, req->ch, iov, 1, bufv, flags); -+ if (res <= 0) { -+ fuse_free_req(req); -+ return res; -+ } else { -+ return fuse_reply_err(req, res); -+ } - } - - int fuse_reply_statfs(fuse_req_t req, const struct statvfs *stbuf) - { -- struct fuse_statfs_out arg; -- size_t size = req->se->conn.proto_minor < 4 ? -- FUSE_COMPAT_STATFS_SIZE : sizeof(arg); -+ struct fuse_statfs_out arg; -+ size_t size = -+ req->se->conn.proto_minor < 4 ? FUSE_COMPAT_STATFS_SIZE : sizeof(arg); - -- memset(&arg, 0, sizeof(arg)); -- convert_statfs(stbuf, &arg.st); -+ memset(&arg, 0, sizeof(arg)); -+ convert_statfs(stbuf, &arg.st); - -- return send_reply_ok(req, &arg, size); -+ return send_reply_ok(req, &arg, size); - } - - int fuse_reply_xattr(fuse_req_t req, size_t count) - { -- struct fuse_getxattr_out arg; -+ struct fuse_getxattr_out arg; - -- memset(&arg, 0, sizeof(arg)); -- arg.size = count; -+ memset(&arg, 0, sizeof(arg)); -+ arg.size = count; - -- return send_reply_ok(req, &arg, sizeof(arg)); -+ return send_reply_ok(req, &arg, sizeof(arg)); - } - - int fuse_reply_lock(fuse_req_t req, const struct flock *lock) - { -- struct fuse_lk_out arg; -+ struct fuse_lk_out arg; - -- memset(&arg, 0, sizeof(arg)); -- arg.lk.type = lock->l_type; -- if (lock->l_type != F_UNLCK) { -- arg.lk.start = lock->l_start; -- if (lock->l_len == 0) -- arg.lk.end = OFFSET_MAX; -- else -- arg.lk.end = lock->l_start + lock->l_len - 1; -- } -- arg.lk.pid = lock->l_pid; -- return send_reply_ok(req, &arg, sizeof(arg)); -+ memset(&arg, 0, sizeof(arg)); -+ arg.lk.type = lock->l_type; -+ if (lock->l_type != F_UNLCK) { -+ arg.lk.start = lock->l_start; -+ if (lock->l_len == 0) { -+ arg.lk.end = OFFSET_MAX; -+ } else { -+ arg.lk.end = lock->l_start + lock->l_len - 1; -+ } -+ } -+ arg.lk.pid = lock->l_pid; -+ return send_reply_ok(req, &arg, sizeof(arg)); - } - - int fuse_reply_bmap(fuse_req_t req, uint64_t idx) - { -- struct fuse_bmap_out arg; -+ struct fuse_bmap_out arg; - -- memset(&arg, 0, sizeof(arg)); -- arg.block = idx; -+ memset(&arg, 0, sizeof(arg)); -+ arg.block = idx; - -- return send_reply_ok(req, &arg, sizeof(arg)); -+ return send_reply_ok(req, &arg, sizeof(arg)); - } - - static struct fuse_ioctl_iovec *fuse_ioctl_iovec_copy(const struct iovec *iov, -- size_t count) --{ -- struct fuse_ioctl_iovec *fiov; -- size_t i; -- -- fiov = malloc(sizeof(fiov[0]) * count); -- if (!fiov) -- return NULL; -- -- for (i = 0; i < count; i++) { -- fiov[i].base = (uintptr_t) iov[i].iov_base; -- fiov[i].len = iov[i].iov_len; -- } -- -- return fiov; --} -- --int fuse_reply_ioctl_retry(fuse_req_t req, -- const struct iovec *in_iov, size_t in_count, -- const struct iovec *out_iov, size_t out_count) --{ -- struct fuse_ioctl_out arg; -- struct fuse_ioctl_iovec *in_fiov = NULL; -- struct fuse_ioctl_iovec *out_fiov = NULL; -- struct iovec iov[4]; -- size_t count = 1; -- int res; -- -- memset(&arg, 0, sizeof(arg)); -- arg.flags |= FUSE_IOCTL_RETRY; -- arg.in_iovs = in_count; -- arg.out_iovs = out_count; -- iov[count].iov_base = &arg; -- iov[count].iov_len = sizeof(arg); -- count++; -- -- if (req->se->conn.proto_minor < 16) { -- if (in_count) { -- iov[count].iov_base = (void *)in_iov; -- iov[count].iov_len = sizeof(in_iov[0]) * in_count; -- count++; -- } -- -- if (out_count) { -- iov[count].iov_base = (void *)out_iov; -- iov[count].iov_len = sizeof(out_iov[0]) * out_count; -- count++; -- } -- } else { -- /* Can't handle non-compat 64bit ioctls on 32bit */ -- if (sizeof(void *) == 4 && req->ioctl_64bit) { -- res = fuse_reply_err(req, EINVAL); -- goto out; -- } -- -- if (in_count) { -- in_fiov = fuse_ioctl_iovec_copy(in_iov, in_count); -- if (!in_fiov) -- goto enomem; -- -- iov[count].iov_base = (void *)in_fiov; -- iov[count].iov_len = sizeof(in_fiov[0]) * in_count; -- count++; -- } -- if (out_count) { -- out_fiov = fuse_ioctl_iovec_copy(out_iov, out_count); -- if (!out_fiov) -- goto enomem; -- -- iov[count].iov_base = (void *)out_fiov; -- iov[count].iov_len = sizeof(out_fiov[0]) * out_count; -- count++; -- } -- } -- -- res = send_reply_iov(req, 0, iov, count); -+ size_t count) -+{ -+ struct fuse_ioctl_iovec *fiov; -+ size_t i; -+ -+ fiov = malloc(sizeof(fiov[0]) * count); -+ if (!fiov) { -+ return NULL; -+ } -+ -+ for (i = 0; i < count; i++) { -+ fiov[i].base = (uintptr_t)iov[i].iov_base; -+ fiov[i].len = iov[i].iov_len; -+ } -+ -+ return fiov; -+} -+ -+int fuse_reply_ioctl_retry(fuse_req_t req, const struct iovec *in_iov, -+ size_t in_count, const struct iovec *out_iov, -+ size_t out_count) -+{ -+ struct fuse_ioctl_out arg; -+ struct fuse_ioctl_iovec *in_fiov = NULL; -+ struct fuse_ioctl_iovec *out_fiov = NULL; -+ struct iovec iov[4]; -+ size_t count = 1; -+ int res; -+ -+ memset(&arg, 0, sizeof(arg)); -+ arg.flags |= FUSE_IOCTL_RETRY; -+ arg.in_iovs = in_count; -+ arg.out_iovs = out_count; -+ iov[count].iov_base = &arg; -+ iov[count].iov_len = sizeof(arg); -+ count++; -+ -+ if (req->se->conn.proto_minor < 16) { -+ if (in_count) { -+ iov[count].iov_base = (void *)in_iov; -+ iov[count].iov_len = sizeof(in_iov[0]) * in_count; -+ count++; -+ } -+ -+ if (out_count) { -+ iov[count].iov_base = (void *)out_iov; -+ iov[count].iov_len = sizeof(out_iov[0]) * out_count; -+ count++; -+ } -+ } else { -+ /* Can't handle non-compat 64bit ioctls on 32bit */ -+ if (sizeof(void *) == 4 && req->ioctl_64bit) { -+ res = fuse_reply_err(req, EINVAL); -+ goto out; -+ } -+ -+ if (in_count) { -+ in_fiov = fuse_ioctl_iovec_copy(in_iov, in_count); -+ if (!in_fiov) { -+ goto enomem; -+ } -+ -+ iov[count].iov_base = (void *)in_fiov; -+ iov[count].iov_len = sizeof(in_fiov[0]) * in_count; -+ count++; -+ } -+ if (out_count) { -+ out_fiov = fuse_ioctl_iovec_copy(out_iov, out_count); -+ if (!out_fiov) { -+ goto enomem; -+ } -+ -+ iov[count].iov_base = (void *)out_fiov; -+ iov[count].iov_len = sizeof(out_fiov[0]) * out_count; -+ count++; -+ } -+ } -+ -+ res = send_reply_iov(req, 0, iov, count); - out: -- free(in_fiov); -- free(out_fiov); -+ free(in_fiov); -+ free(out_fiov); - -- return res; -+ return res; - - enomem: -- res = fuse_reply_err(req, ENOMEM); -- goto out; -+ res = fuse_reply_err(req, ENOMEM); -+ goto out; - } - - int fuse_reply_ioctl(fuse_req_t req, int result, const void *buf, size_t size) - { -- struct fuse_ioctl_out arg; -- struct iovec iov[3]; -- size_t count = 1; -+ struct fuse_ioctl_out arg; -+ struct iovec iov[3]; -+ size_t count = 1; - -- memset(&arg, 0, sizeof(arg)); -- arg.result = result; -- iov[count].iov_base = &arg; -- iov[count].iov_len = sizeof(arg); -- count++; -+ memset(&arg, 0, sizeof(arg)); -+ arg.result = result; -+ iov[count].iov_base = &arg; -+ iov[count].iov_len = sizeof(arg); -+ count++; - -- if (size) { -- iov[count].iov_base = (char *) buf; -- iov[count].iov_len = size; -- count++; -- } -+ if (size) { -+ iov[count].iov_base = (char *)buf; -+ iov[count].iov_len = size; -+ count++; -+ } - -- return send_reply_iov(req, 0, iov, count); -+ return send_reply_iov(req, 0, iov, count); - } - - int fuse_reply_ioctl_iov(fuse_req_t req, int result, const struct iovec *iov, -- int count) -+ int count) - { -- struct iovec *padded_iov; -- struct fuse_ioctl_out arg; -- int res; -+ struct iovec *padded_iov; -+ struct fuse_ioctl_out arg; -+ int res; - -- padded_iov = malloc((count + 2) * sizeof(struct iovec)); -- if (padded_iov == NULL) -- return fuse_reply_err(req, ENOMEM); -+ padded_iov = malloc((count + 2) * sizeof(struct iovec)); -+ if (padded_iov == NULL) { -+ return fuse_reply_err(req, ENOMEM); -+ } - -- memset(&arg, 0, sizeof(arg)); -- arg.result = result; -- padded_iov[1].iov_base = &arg; -- padded_iov[1].iov_len = sizeof(arg); -+ memset(&arg, 0, sizeof(arg)); -+ arg.result = result; -+ padded_iov[1].iov_base = &arg; -+ padded_iov[1].iov_len = sizeof(arg); - -- memcpy(&padded_iov[2], iov, count * sizeof(struct iovec)); -+ memcpy(&padded_iov[2], iov, count * sizeof(struct iovec)); - -- res = send_reply_iov(req, 0, padded_iov, count + 2); -- free(padded_iov); -+ res = send_reply_iov(req, 0, padded_iov, count + 2); -+ free(padded_iov); - -- return res; -+ return res; - } - - int fuse_reply_poll(fuse_req_t req, unsigned revents) - { -- struct fuse_poll_out arg; -+ struct fuse_poll_out arg; - -- memset(&arg, 0, sizeof(arg)); -- arg.revents = revents; -+ memset(&arg, 0, sizeof(arg)); -+ arg.revents = revents; - -- return send_reply_ok(req, &arg, sizeof(arg)); -+ return send_reply_ok(req, &arg, sizeof(arg)); - } - - int fuse_reply_lseek(fuse_req_t req, off_t off) - { -- struct fuse_lseek_out arg; -+ struct fuse_lseek_out arg; - -- memset(&arg, 0, sizeof(arg)); -- arg.offset = off; -+ memset(&arg, 0, sizeof(arg)); -+ arg.offset = off; - -- return send_reply_ok(req, &arg, sizeof(arg)); -+ return send_reply_ok(req, &arg, sizeof(arg)); - } - - static void do_lookup(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- char *name = (char *) inarg; -+ char *name = (char *)inarg; - -- if (req->se->op.lookup) -- req->se->op.lookup(req, nodeid, name); -- else -- fuse_reply_err(req, ENOSYS); -+ if (req->se->op.lookup) { -+ req->se->op.lookup(req, nodeid, name); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } - } - - static void do_forget(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- struct fuse_forget_in *arg = (struct fuse_forget_in *) inarg; -+ struct fuse_forget_in *arg = (struct fuse_forget_in *)inarg; - -- if (req->se->op.forget) -- req->se->op.forget(req, nodeid, arg->nlookup); -- else -- fuse_reply_none(req); -+ if (req->se->op.forget) { -+ req->se->op.forget(req, nodeid, arg->nlookup); -+ } else { -+ fuse_reply_none(req); -+ } - } - - static void do_batch_forget(fuse_req_t req, fuse_ino_t nodeid, -- const void *inarg) -+ const void *inarg) - { -- struct fuse_batch_forget_in *arg = (void *) inarg; -- struct fuse_forget_one *param = (void *) PARAM(arg); -- unsigned int i; -+ struct fuse_batch_forget_in *arg = (void *)inarg; -+ struct fuse_forget_one *param = (void *)PARAM(arg); -+ unsigned int i; - -- (void) nodeid; -+ (void)nodeid; - -- if (req->se->op.forget_multi) { -- req->se->op.forget_multi(req, arg->count, -- (struct fuse_forget_data *) param); -- } else if (req->se->op.forget) { -- for (i = 0; i < arg->count; i++) { -- struct fuse_forget_one *forget = ¶m[i]; -- struct fuse_req *dummy_req; -+ if (req->se->op.forget_multi) { -+ req->se->op.forget_multi(req, arg->count, -+ (struct fuse_forget_data *)param); -+ } else if (req->se->op.forget) { -+ for (i = 0; i < arg->count; i++) { -+ struct fuse_forget_one *forget = ¶m[i]; -+ struct fuse_req *dummy_req; - -- dummy_req = fuse_ll_alloc_req(req->se); -- if (dummy_req == NULL) -- break; -+ dummy_req = fuse_ll_alloc_req(req->se); -+ if (dummy_req == NULL) { -+ break; -+ } - -- dummy_req->unique = req->unique; -- dummy_req->ctx = req->ctx; -- dummy_req->ch = NULL; -+ dummy_req->unique = req->unique; -+ dummy_req->ctx = req->ctx; -+ dummy_req->ch = NULL; - -- req->se->op.forget(dummy_req, forget->nodeid, -- forget->nlookup); -- } -- fuse_reply_none(req); -- } else { -- fuse_reply_none(req); -- } -+ req->se->op.forget(dummy_req, forget->nodeid, forget->nlookup); -+ } -+ fuse_reply_none(req); -+ } else { -+ fuse_reply_none(req); -+ } - } - - static void do_getattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- struct fuse_file_info *fip = NULL; -- struct fuse_file_info fi; -+ struct fuse_file_info *fip = NULL; -+ struct fuse_file_info fi; - -- if (req->se->conn.proto_minor >= 9) { -- struct fuse_getattr_in *arg = (struct fuse_getattr_in *) inarg; -+ if (req->se->conn.proto_minor >= 9) { -+ struct fuse_getattr_in *arg = (struct fuse_getattr_in *)inarg; - -- if (arg->getattr_flags & FUSE_GETATTR_FH) { -- memset(&fi, 0, sizeof(fi)); -- fi.fh = arg->fh; -- fip = &fi; -- } -- } -+ if (arg->getattr_flags & FUSE_GETATTR_FH) { -+ memset(&fi, 0, sizeof(fi)); -+ fi.fh = arg->fh; -+ fip = &fi; -+ } -+ } - -- if (req->se->op.getattr) -- req->se->op.getattr(req, nodeid, fip); -- else -- fuse_reply_err(req, ENOSYS); -+ if (req->se->op.getattr) { -+ req->se->op.getattr(req, nodeid, fip); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } - } - - static void do_setattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- struct fuse_setattr_in *arg = (struct fuse_setattr_in *) inarg; -- -- if (req->se->op.setattr) { -- struct fuse_file_info *fi = NULL; -- struct fuse_file_info fi_store; -- struct stat stbuf; -- memset(&stbuf, 0, sizeof(stbuf)); -- convert_attr(arg, &stbuf); -- if (arg->valid & FATTR_FH) { -- arg->valid &= ~FATTR_FH; -- memset(&fi_store, 0, sizeof(fi_store)); -- fi = &fi_store; -- fi->fh = arg->fh; -- } -- arg->valid &= -- FUSE_SET_ATTR_MODE | -- FUSE_SET_ATTR_UID | -- FUSE_SET_ATTR_GID | -- FUSE_SET_ATTR_SIZE | -- FUSE_SET_ATTR_ATIME | -- FUSE_SET_ATTR_MTIME | -- FUSE_SET_ATTR_ATIME_NOW | -- FUSE_SET_ATTR_MTIME_NOW | -- FUSE_SET_ATTR_CTIME; -- -- req->se->op.setattr(req, nodeid, &stbuf, arg->valid, fi); -- } else -- fuse_reply_err(req, ENOSYS); -+ struct fuse_setattr_in *arg = (struct fuse_setattr_in *)inarg; -+ -+ if (req->se->op.setattr) { -+ struct fuse_file_info *fi = NULL; -+ struct fuse_file_info fi_store; -+ struct stat stbuf; -+ memset(&stbuf, 0, sizeof(stbuf)); -+ convert_attr(arg, &stbuf); -+ if (arg->valid & FATTR_FH) { -+ arg->valid &= ~FATTR_FH; -+ memset(&fi_store, 0, sizeof(fi_store)); -+ fi = &fi_store; -+ fi->fh = arg->fh; -+ } -+ arg->valid &= FUSE_SET_ATTR_MODE | FUSE_SET_ATTR_UID | -+ FUSE_SET_ATTR_GID | FUSE_SET_ATTR_SIZE | -+ FUSE_SET_ATTR_ATIME | FUSE_SET_ATTR_MTIME | -+ FUSE_SET_ATTR_ATIME_NOW | FUSE_SET_ATTR_MTIME_NOW | -+ FUSE_SET_ATTR_CTIME; -+ -+ req->se->op.setattr(req, nodeid, &stbuf, arg->valid, fi); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } - } - - static void do_access(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- struct fuse_access_in *arg = (struct fuse_access_in *) inarg; -+ struct fuse_access_in *arg = (struct fuse_access_in *)inarg; - -- if (req->se->op.access) -- req->se->op.access(req, nodeid, arg->mask); -- else -- fuse_reply_err(req, ENOSYS); -+ if (req->se->op.access) { -+ req->se->op.access(req, nodeid, arg->mask); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } - } - - static void do_readlink(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- (void) inarg; -+ (void)inarg; - -- if (req->se->op.readlink) -- req->se->op.readlink(req, nodeid); -- else -- fuse_reply_err(req, ENOSYS); -+ if (req->se->op.readlink) { -+ req->se->op.readlink(req, nodeid); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } - } - - static void do_mknod(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- struct fuse_mknod_in *arg = (struct fuse_mknod_in *) inarg; -- char *name = PARAM(arg); -+ struct fuse_mknod_in *arg = (struct fuse_mknod_in *)inarg; -+ char *name = PARAM(arg); - -- if (req->se->conn.proto_minor >= 12) -- req->ctx.umask = arg->umask; -- else -- name = (char *) inarg + FUSE_COMPAT_MKNOD_IN_SIZE; -+ if (req->se->conn.proto_minor >= 12) { -+ req->ctx.umask = arg->umask; -+ } else { -+ name = (char *)inarg + FUSE_COMPAT_MKNOD_IN_SIZE; -+ } - -- if (req->se->op.mknod) -- req->se->op.mknod(req, nodeid, name, arg->mode, arg->rdev); -- else -- fuse_reply_err(req, ENOSYS); -+ if (req->se->op.mknod) { -+ req->se->op.mknod(req, nodeid, name, arg->mode, arg->rdev); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } - } - - static void do_mkdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- struct fuse_mkdir_in *arg = (struct fuse_mkdir_in *) inarg; -+ struct fuse_mkdir_in *arg = (struct fuse_mkdir_in *)inarg; - -- if (req->se->conn.proto_minor >= 12) -- req->ctx.umask = arg->umask; -+ if (req->se->conn.proto_minor >= 12) { -+ req->ctx.umask = arg->umask; -+ } - -- if (req->se->op.mkdir) -- req->se->op.mkdir(req, nodeid, PARAM(arg), arg->mode); -- else -- fuse_reply_err(req, ENOSYS); -+ if (req->se->op.mkdir) { -+ req->se->op.mkdir(req, nodeid, PARAM(arg), arg->mode); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } - } - - static void do_unlink(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- char *name = (char *) inarg; -+ char *name = (char *)inarg; - -- if (req->se->op.unlink) -- req->se->op.unlink(req, nodeid, name); -- else -- fuse_reply_err(req, ENOSYS); -+ if (req->se->op.unlink) { -+ req->se->op.unlink(req, nodeid, name); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } - } - - static void do_rmdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- char *name = (char *) inarg; -+ char *name = (char *)inarg; - -- if (req->se->op.rmdir) -- req->se->op.rmdir(req, nodeid, name); -- else -- fuse_reply_err(req, ENOSYS); -+ if (req->se->op.rmdir) { -+ req->se->op.rmdir(req, nodeid, name); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } - } - - static void do_symlink(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- char *name = (char *) inarg; -- char *linkname = ((char *) inarg) + strlen((char *) inarg) + 1; -+ char *name = (char *)inarg; -+ char *linkname = ((char *)inarg) + strlen((char *)inarg) + 1; - -- if (req->se->op.symlink) -- req->se->op.symlink(req, linkname, nodeid, name); -- else -- fuse_reply_err(req, ENOSYS); -+ if (req->se->op.symlink) { -+ req->se->op.symlink(req, linkname, nodeid, name); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } - } - - static void do_rename(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- struct fuse_rename_in *arg = (struct fuse_rename_in *) inarg; -- char *oldname = PARAM(arg); -- char *newname = oldname + strlen(oldname) + 1; -+ struct fuse_rename_in *arg = (struct fuse_rename_in *)inarg; -+ char *oldname = PARAM(arg); -+ char *newname = oldname + strlen(oldname) + 1; - -- if (req->se->op.rename) -- req->se->op.rename(req, nodeid, oldname, arg->newdir, newname, -- 0); -- else -- fuse_reply_err(req, ENOSYS); -+ if (req->se->op.rename) { -+ req->se->op.rename(req, nodeid, oldname, arg->newdir, newname, 0); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } - } - - static void do_rename2(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- struct fuse_rename2_in *arg = (struct fuse_rename2_in *) inarg; -- char *oldname = PARAM(arg); -- char *newname = oldname + strlen(oldname) + 1; -+ struct fuse_rename2_in *arg = (struct fuse_rename2_in *)inarg; -+ char *oldname = PARAM(arg); -+ char *newname = oldname + strlen(oldname) + 1; - -- if (req->se->op.rename) -- req->se->op.rename(req, nodeid, oldname, arg->newdir, newname, -- arg->flags); -- else -- fuse_reply_err(req, ENOSYS); -+ if (req->se->op.rename) { -+ req->se->op.rename(req, nodeid, oldname, arg->newdir, newname, -+ arg->flags); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } - } - - static void do_link(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- struct fuse_link_in *arg = (struct fuse_link_in *) inarg; -+ struct fuse_link_in *arg = (struct fuse_link_in *)inarg; - -- if (req->se->op.link) -- req->se->op.link(req, arg->oldnodeid, nodeid, PARAM(arg)); -- else -- fuse_reply_err(req, ENOSYS); -+ if (req->se->op.link) { -+ req->se->op.link(req, arg->oldnodeid, nodeid, PARAM(arg)); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } - } - - static void do_create(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- struct fuse_create_in *arg = (struct fuse_create_in *) inarg; -+ struct fuse_create_in *arg = (struct fuse_create_in *)inarg; - -- if (req->se->op.create) { -- struct fuse_file_info fi; -- char *name = PARAM(arg); -+ if (req->se->op.create) { -+ struct fuse_file_info fi; -+ char *name = PARAM(arg); - -- memset(&fi, 0, sizeof(fi)); -- fi.flags = arg->flags; -+ memset(&fi, 0, sizeof(fi)); -+ fi.flags = arg->flags; - -- if (req->se->conn.proto_minor >= 12) -- req->ctx.umask = arg->umask; -- else -- name = (char *) inarg + sizeof(struct fuse_open_in); -+ if (req->se->conn.proto_minor >= 12) { -+ req->ctx.umask = arg->umask; -+ } else { -+ name = (char *)inarg + sizeof(struct fuse_open_in); -+ } - -- req->se->op.create(req, nodeid, name, arg->mode, &fi); -- } else -- fuse_reply_err(req, ENOSYS); -+ req->se->op.create(req, nodeid, name, arg->mode, &fi); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } - } - - static void do_open(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- struct fuse_open_in *arg = (struct fuse_open_in *) inarg; -- struct fuse_file_info fi; -+ struct fuse_open_in *arg = (struct fuse_open_in *)inarg; -+ struct fuse_file_info fi; - -- memset(&fi, 0, sizeof(fi)); -- fi.flags = arg->flags; -+ memset(&fi, 0, sizeof(fi)); -+ fi.flags = arg->flags; - -- if (req->se->op.open) -- req->se->op.open(req, nodeid, &fi); -- else -- fuse_reply_open(req, &fi); -+ if (req->se->op.open) { -+ req->se->op.open(req, nodeid, &fi); -+ } else { -+ fuse_reply_open(req, &fi); -+ } - } - - static void do_read(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- struct fuse_read_in *arg = (struct fuse_read_in *) inarg; -+ struct fuse_read_in *arg = (struct fuse_read_in *)inarg; - -- if (req->se->op.read) { -- struct fuse_file_info fi; -+ if (req->se->op.read) { -+ struct fuse_file_info fi; - -- memset(&fi, 0, sizeof(fi)); -- fi.fh = arg->fh; -- if (req->se->conn.proto_minor >= 9) { -- fi.lock_owner = arg->lock_owner; -- fi.flags = arg->flags; -- } -- req->se->op.read(req, nodeid, arg->size, arg->offset, &fi); -- } else -- fuse_reply_err(req, ENOSYS); -+ memset(&fi, 0, sizeof(fi)); -+ fi.fh = arg->fh; -+ if (req->se->conn.proto_minor >= 9) { -+ fi.lock_owner = arg->lock_owner; -+ fi.flags = arg->flags; -+ } -+ req->se->op.read(req, nodeid, arg->size, arg->offset, &fi); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } - } - - static void do_write(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- struct fuse_write_in *arg = (struct fuse_write_in *) inarg; -- struct fuse_file_info fi; -- char *param; -+ struct fuse_write_in *arg = (struct fuse_write_in *)inarg; -+ struct fuse_file_info fi; -+ char *param; - -- memset(&fi, 0, sizeof(fi)); -- fi.fh = arg->fh; -- fi.writepage = (arg->write_flags & FUSE_WRITE_CACHE) != 0; -+ memset(&fi, 0, sizeof(fi)); -+ fi.fh = arg->fh; -+ fi.writepage = (arg->write_flags & FUSE_WRITE_CACHE) != 0; - -- if (req->se->conn.proto_minor < 9) { -- param = ((char *) arg) + FUSE_COMPAT_WRITE_IN_SIZE; -- } else { -- fi.lock_owner = arg->lock_owner; -- fi.flags = arg->flags; -- param = PARAM(arg); -- } -+ if (req->se->conn.proto_minor < 9) { -+ param = ((char *)arg) + FUSE_COMPAT_WRITE_IN_SIZE; -+ } else { -+ fi.lock_owner = arg->lock_owner; -+ fi.flags = arg->flags; -+ param = PARAM(arg); -+ } - -- if (req->se->op.write) -- req->se->op.write(req, nodeid, param, arg->size, -- arg->offset, &fi); -- else -- fuse_reply_err(req, ENOSYS); -+ if (req->se->op.write) { -+ req->se->op.write(req, nodeid, param, arg->size, arg->offset, &fi); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } - } - - static void do_write_buf(fuse_req_t req, fuse_ino_t nodeid, const void *inarg, -- const struct fuse_buf *ibuf) --{ -- struct fuse_session *se = req->se; -- struct fuse_bufvec bufv = { -- .buf[0] = *ibuf, -- .count = 1, -- }; -- struct fuse_write_in *arg = (struct fuse_write_in *) inarg; -- struct fuse_file_info fi; -- -- memset(&fi, 0, sizeof(fi)); -- fi.fh = arg->fh; -- fi.writepage = arg->write_flags & FUSE_WRITE_CACHE; -- -- if (se->conn.proto_minor < 9) { -- bufv.buf[0].mem = ((char *) arg) + FUSE_COMPAT_WRITE_IN_SIZE; -- bufv.buf[0].size -= sizeof(struct fuse_in_header) + -- FUSE_COMPAT_WRITE_IN_SIZE; -- assert(!(bufv.buf[0].flags & FUSE_BUF_IS_FD)); -- } else { -- fi.lock_owner = arg->lock_owner; -- fi.flags = arg->flags; -- if (!(bufv.buf[0].flags & FUSE_BUF_IS_FD)) -- bufv.buf[0].mem = PARAM(arg); -- -- bufv.buf[0].size -= sizeof(struct fuse_in_header) + -- sizeof(struct fuse_write_in); -- } -- if (bufv.buf[0].size < arg->size) { -- fuse_log(FUSE_LOG_ERR, "fuse: do_write_buf: buffer size too small\n"); -- fuse_reply_err(req, EIO); -- return; -- } -- bufv.buf[0].size = arg->size; -- -- se->op.write_buf(req, nodeid, &bufv, arg->offset, &fi); -+ const struct fuse_buf *ibuf) -+{ -+ struct fuse_session *se = req->se; -+ struct fuse_bufvec bufv = { -+ .buf[0] = *ibuf, -+ .count = 1, -+ }; -+ struct fuse_write_in *arg = (struct fuse_write_in *)inarg; -+ struct fuse_file_info fi; -+ -+ memset(&fi, 0, sizeof(fi)); -+ fi.fh = arg->fh; -+ fi.writepage = arg->write_flags & FUSE_WRITE_CACHE; -+ -+ if (se->conn.proto_minor < 9) { -+ bufv.buf[0].mem = ((char *)arg) + FUSE_COMPAT_WRITE_IN_SIZE; -+ bufv.buf[0].size -= -+ sizeof(struct fuse_in_header) + FUSE_COMPAT_WRITE_IN_SIZE; -+ assert(!(bufv.buf[0].flags & FUSE_BUF_IS_FD)); -+ } else { -+ fi.lock_owner = arg->lock_owner; -+ fi.flags = arg->flags; -+ if (!(bufv.buf[0].flags & FUSE_BUF_IS_FD)) { -+ bufv.buf[0].mem = PARAM(arg); -+ } -+ -+ bufv.buf[0].size -= -+ sizeof(struct fuse_in_header) + sizeof(struct fuse_write_in); -+ } -+ if (bufv.buf[0].size < arg->size) { -+ fuse_log(FUSE_LOG_ERR, "fuse: do_write_buf: buffer size too small\n"); -+ fuse_reply_err(req, EIO); -+ return; -+ } -+ bufv.buf[0].size = arg->size; -+ -+ se->op.write_buf(req, nodeid, &bufv, arg->offset, &fi); - } - - static void do_flush(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- struct fuse_flush_in *arg = (struct fuse_flush_in *) inarg; -- struct fuse_file_info fi; -+ struct fuse_flush_in *arg = (struct fuse_flush_in *)inarg; -+ struct fuse_file_info fi; - -- memset(&fi, 0, sizeof(fi)); -- fi.fh = arg->fh; -- fi.flush = 1; -- if (req->se->conn.proto_minor >= 7) -- fi.lock_owner = arg->lock_owner; -+ memset(&fi, 0, sizeof(fi)); -+ fi.fh = arg->fh; -+ fi.flush = 1; -+ if (req->se->conn.proto_minor >= 7) { -+ fi.lock_owner = arg->lock_owner; -+ } - -- if (req->se->op.flush) -- req->se->op.flush(req, nodeid, &fi); -- else -- fuse_reply_err(req, ENOSYS); -+ if (req->se->op.flush) { -+ req->se->op.flush(req, nodeid, &fi); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } - } - - static void do_release(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- struct fuse_release_in *arg = (struct fuse_release_in *) inarg; -- struct fuse_file_info fi; -+ struct fuse_release_in *arg = (struct fuse_release_in *)inarg; -+ struct fuse_file_info fi; - -- memset(&fi, 0, sizeof(fi)); -- fi.flags = arg->flags; -- fi.fh = arg->fh; -- if (req->se->conn.proto_minor >= 8) { -- fi.flush = (arg->release_flags & FUSE_RELEASE_FLUSH) ? 1 : 0; -- fi.lock_owner = arg->lock_owner; -- } -- if (arg->release_flags & FUSE_RELEASE_FLOCK_UNLOCK) { -- fi.flock_release = 1; -- fi.lock_owner = arg->lock_owner; -- } -+ memset(&fi, 0, sizeof(fi)); -+ fi.flags = arg->flags; -+ fi.fh = arg->fh; -+ if (req->se->conn.proto_minor >= 8) { -+ fi.flush = (arg->release_flags & FUSE_RELEASE_FLUSH) ? 1 : 0; -+ fi.lock_owner = arg->lock_owner; -+ } -+ if (arg->release_flags & FUSE_RELEASE_FLOCK_UNLOCK) { -+ fi.flock_release = 1; -+ fi.lock_owner = arg->lock_owner; -+ } - -- if (req->se->op.release) -- req->se->op.release(req, nodeid, &fi); -- else -- fuse_reply_err(req, 0); -+ if (req->se->op.release) { -+ req->se->op.release(req, nodeid, &fi); -+ } else { -+ fuse_reply_err(req, 0); -+ } - } - - static void do_fsync(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- struct fuse_fsync_in *arg = (struct fuse_fsync_in *) inarg; -- struct fuse_file_info fi; -- int datasync = arg->fsync_flags & 1; -+ struct fuse_fsync_in *arg = (struct fuse_fsync_in *)inarg; -+ struct fuse_file_info fi; -+ int datasync = arg->fsync_flags & 1; - -- memset(&fi, 0, sizeof(fi)); -- fi.fh = arg->fh; -+ memset(&fi, 0, sizeof(fi)); -+ fi.fh = arg->fh; - -- if (req->se->op.fsync) -- req->se->op.fsync(req, nodeid, datasync, &fi); -- else -- fuse_reply_err(req, ENOSYS); -+ if (req->se->op.fsync) { -+ req->se->op.fsync(req, nodeid, datasync, &fi); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } - } - - static void do_opendir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- struct fuse_open_in *arg = (struct fuse_open_in *) inarg; -- struct fuse_file_info fi; -+ struct fuse_open_in *arg = (struct fuse_open_in *)inarg; -+ struct fuse_file_info fi; - -- memset(&fi, 0, sizeof(fi)); -- fi.flags = arg->flags; -+ memset(&fi, 0, sizeof(fi)); -+ fi.flags = arg->flags; - -- if (req->se->op.opendir) -- req->se->op.opendir(req, nodeid, &fi); -- else -- fuse_reply_open(req, &fi); -+ if (req->se->op.opendir) { -+ req->se->op.opendir(req, nodeid, &fi); -+ } else { -+ fuse_reply_open(req, &fi); -+ } - } - - static void do_readdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- struct fuse_read_in *arg = (struct fuse_read_in *) inarg; -- struct fuse_file_info fi; -+ struct fuse_read_in *arg = (struct fuse_read_in *)inarg; -+ struct fuse_file_info fi; - -- memset(&fi, 0, sizeof(fi)); -- fi.fh = arg->fh; -+ memset(&fi, 0, sizeof(fi)); -+ fi.fh = arg->fh; - -- if (req->se->op.readdir) -- req->se->op.readdir(req, nodeid, arg->size, arg->offset, &fi); -- else -- fuse_reply_err(req, ENOSYS); -+ if (req->se->op.readdir) { -+ req->se->op.readdir(req, nodeid, arg->size, arg->offset, &fi); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } - } - - static void do_readdirplus(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- struct fuse_read_in *arg = (struct fuse_read_in *) inarg; -- struct fuse_file_info fi; -+ struct fuse_read_in *arg = (struct fuse_read_in *)inarg; -+ struct fuse_file_info fi; - -- memset(&fi, 0, sizeof(fi)); -- fi.fh = arg->fh; -+ memset(&fi, 0, sizeof(fi)); -+ fi.fh = arg->fh; - -- if (req->se->op.readdirplus) -- req->se->op.readdirplus(req, nodeid, arg->size, arg->offset, &fi); -- else -- fuse_reply_err(req, ENOSYS); -+ if (req->se->op.readdirplus) { -+ req->se->op.readdirplus(req, nodeid, arg->size, arg->offset, &fi); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } - } - - static void do_releasedir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- struct fuse_release_in *arg = (struct fuse_release_in *) inarg; -- struct fuse_file_info fi; -+ struct fuse_release_in *arg = (struct fuse_release_in *)inarg; -+ struct fuse_file_info fi; - -- memset(&fi, 0, sizeof(fi)); -- fi.flags = arg->flags; -- fi.fh = arg->fh; -+ memset(&fi, 0, sizeof(fi)); -+ fi.flags = arg->flags; -+ fi.fh = arg->fh; - -- if (req->se->op.releasedir) -- req->se->op.releasedir(req, nodeid, &fi); -- else -- fuse_reply_err(req, 0); -+ if (req->se->op.releasedir) { -+ req->se->op.releasedir(req, nodeid, &fi); -+ } else { -+ fuse_reply_err(req, 0); -+ } - } - - static void do_fsyncdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- struct fuse_fsync_in *arg = (struct fuse_fsync_in *) inarg; -- struct fuse_file_info fi; -- int datasync = arg->fsync_flags & 1; -+ struct fuse_fsync_in *arg = (struct fuse_fsync_in *)inarg; -+ struct fuse_file_info fi; -+ int datasync = arg->fsync_flags & 1; - -- memset(&fi, 0, sizeof(fi)); -- fi.fh = arg->fh; -+ memset(&fi, 0, sizeof(fi)); -+ fi.fh = arg->fh; - -- if (req->se->op.fsyncdir) -- req->se->op.fsyncdir(req, nodeid, datasync, &fi); -- else -- fuse_reply_err(req, ENOSYS); -+ if (req->se->op.fsyncdir) { -+ req->se->op.fsyncdir(req, nodeid, datasync, &fi); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } - } - - static void do_statfs(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- (void) nodeid; -- (void) inarg; -+ (void)nodeid; -+ (void)inarg; - -- if (req->se->op.statfs) -- req->se->op.statfs(req, nodeid); -- else { -- struct statvfs buf = { -- .f_namemax = 255, -- .f_bsize = 512, -- }; -- fuse_reply_statfs(req, &buf); -- } -+ if (req->se->op.statfs) { -+ req->se->op.statfs(req, nodeid); -+ } else { -+ struct statvfs buf = { -+ .f_namemax = 255, -+ .f_bsize = 512, -+ }; -+ fuse_reply_statfs(req, &buf); -+ } - } - - static void do_setxattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- struct fuse_setxattr_in *arg = (struct fuse_setxattr_in *) inarg; -- char *name = PARAM(arg); -- char *value = name + strlen(name) + 1; -+ struct fuse_setxattr_in *arg = (struct fuse_setxattr_in *)inarg; -+ char *name = PARAM(arg); -+ char *value = name + strlen(name) + 1; - -- if (req->se->op.setxattr) -- req->se->op.setxattr(req, nodeid, name, value, arg->size, -- arg->flags); -- else -- fuse_reply_err(req, ENOSYS); -+ if (req->se->op.setxattr) { -+ req->se->op.setxattr(req, nodeid, name, value, arg->size, arg->flags); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } - } - - static void do_getxattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- struct fuse_getxattr_in *arg = (struct fuse_getxattr_in *) inarg; -+ struct fuse_getxattr_in *arg = (struct fuse_getxattr_in *)inarg; - -- if (req->se->op.getxattr) -- req->se->op.getxattr(req, nodeid, PARAM(arg), arg->size); -- else -- fuse_reply_err(req, ENOSYS); -+ if (req->se->op.getxattr) { -+ req->se->op.getxattr(req, nodeid, PARAM(arg), arg->size); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } - } - - static void do_listxattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- struct fuse_getxattr_in *arg = (struct fuse_getxattr_in *) inarg; -+ struct fuse_getxattr_in *arg = (struct fuse_getxattr_in *)inarg; - -- if (req->se->op.listxattr) -- req->se->op.listxattr(req, nodeid, arg->size); -- else -- fuse_reply_err(req, ENOSYS); -+ if (req->se->op.listxattr) { -+ req->se->op.listxattr(req, nodeid, arg->size); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } - } - - static void do_removexattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- char *name = (char *) inarg; -+ char *name = (char *)inarg; - -- if (req->se->op.removexattr) -- req->se->op.removexattr(req, nodeid, name); -- else -- fuse_reply_err(req, ENOSYS); -+ if (req->se->op.removexattr) { -+ req->se->op.removexattr(req, nodeid, name); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } - } - - static void convert_fuse_file_lock(struct fuse_file_lock *fl, -- struct flock *flock) -+ struct flock *flock) - { -- memset(flock, 0, sizeof(struct flock)); -- flock->l_type = fl->type; -- flock->l_whence = SEEK_SET; -- flock->l_start = fl->start; -- if (fl->end == OFFSET_MAX) -- flock->l_len = 0; -- else -- flock->l_len = fl->end - fl->start + 1; -- flock->l_pid = fl->pid; -+ memset(flock, 0, sizeof(struct flock)); -+ flock->l_type = fl->type; -+ flock->l_whence = SEEK_SET; -+ flock->l_start = fl->start; -+ if (fl->end == OFFSET_MAX) { -+ flock->l_len = 0; -+ } else { -+ flock->l_len = fl->end - fl->start + 1; -+ } -+ flock->l_pid = fl->pid; - } - - static void do_getlk(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- struct fuse_lk_in *arg = (struct fuse_lk_in *) inarg; -- struct fuse_file_info fi; -- struct flock flock; -+ struct fuse_lk_in *arg = (struct fuse_lk_in *)inarg; -+ struct fuse_file_info fi; -+ struct flock flock; - -- memset(&fi, 0, sizeof(fi)); -- fi.fh = arg->fh; -- fi.lock_owner = arg->owner; -+ memset(&fi, 0, sizeof(fi)); -+ fi.fh = arg->fh; -+ fi.lock_owner = arg->owner; - -- convert_fuse_file_lock(&arg->lk, &flock); -- if (req->se->op.getlk) -- req->se->op.getlk(req, nodeid, &fi, &flock); -- else -- fuse_reply_err(req, ENOSYS); -+ convert_fuse_file_lock(&arg->lk, &flock); -+ if (req->se->op.getlk) { -+ req->se->op.getlk(req, nodeid, &fi, &flock); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } - } - - static void do_setlk_common(fuse_req_t req, fuse_ino_t nodeid, -- const void *inarg, int sleep) --{ -- struct fuse_lk_in *arg = (struct fuse_lk_in *) inarg; -- struct fuse_file_info fi; -- struct flock flock; -- -- memset(&fi, 0, sizeof(fi)); -- fi.fh = arg->fh; -- fi.lock_owner = arg->owner; -- -- if (arg->lk_flags & FUSE_LK_FLOCK) { -- int op = 0; -- -- switch (arg->lk.type) { -- case F_RDLCK: -- op = LOCK_SH; -- break; -- case F_WRLCK: -- op = LOCK_EX; -- break; -- case F_UNLCK: -- op = LOCK_UN; -- break; -- } -- if (!sleep) -- op |= LOCK_NB; -- -- if (req->se->op.flock) -- req->se->op.flock(req, nodeid, &fi, op); -- else -- fuse_reply_err(req, ENOSYS); -- } else { -- convert_fuse_file_lock(&arg->lk, &flock); -- if (req->se->op.setlk) -- req->se->op.setlk(req, nodeid, &fi, &flock, sleep); -- else -- fuse_reply_err(req, ENOSYS); -- } -+ const void *inarg, int sleep) -+{ -+ struct fuse_lk_in *arg = (struct fuse_lk_in *)inarg; -+ struct fuse_file_info fi; -+ struct flock flock; -+ -+ memset(&fi, 0, sizeof(fi)); -+ fi.fh = arg->fh; -+ fi.lock_owner = arg->owner; -+ -+ if (arg->lk_flags & FUSE_LK_FLOCK) { -+ int op = 0; -+ -+ switch (arg->lk.type) { -+ case F_RDLCK: -+ op = LOCK_SH; -+ break; -+ case F_WRLCK: -+ op = LOCK_EX; -+ break; -+ case F_UNLCK: -+ op = LOCK_UN; -+ break; -+ } -+ if (!sleep) { -+ op |= LOCK_NB; -+ } -+ -+ if (req->se->op.flock) { -+ req->se->op.flock(req, nodeid, &fi, op); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } -+ } else { -+ convert_fuse_file_lock(&arg->lk, &flock); -+ if (req->se->op.setlk) { -+ req->se->op.setlk(req, nodeid, &fi, &flock, sleep); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } -+ } - } - - static void do_setlk(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- do_setlk_common(req, nodeid, inarg, 0); -+ do_setlk_common(req, nodeid, inarg, 0); - } - - static void do_setlkw(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- do_setlk_common(req, nodeid, inarg, 1); -+ do_setlk_common(req, nodeid, inarg, 1); - } - - static int find_interrupted(struct fuse_session *se, struct fuse_req *req) - { -- struct fuse_req *curr; -- -- for (curr = se->list.next; curr != &se->list; curr = curr->next) { -- if (curr->unique == req->u.i.unique) { -- fuse_interrupt_func_t func; -- void *data; -- -- curr->ctr++; -- pthread_mutex_unlock(&se->lock); -- -- /* Ugh, ugly locking */ -- pthread_mutex_lock(&curr->lock); -- pthread_mutex_lock(&se->lock); -- curr->interrupted = 1; -- func = curr->u.ni.func; -- data = curr->u.ni.data; -- pthread_mutex_unlock(&se->lock); -- if (func) -- func(curr, data); -- pthread_mutex_unlock(&curr->lock); -- -- pthread_mutex_lock(&se->lock); -- curr->ctr--; -- if (!curr->ctr) -- destroy_req(curr); -- -- return 1; -- } -- } -- for (curr = se->interrupts.next; curr != &se->interrupts; -- curr = curr->next) { -- if (curr->u.i.unique == req->u.i.unique) -- return 1; -- } -- return 0; -+ struct fuse_req *curr; -+ -+ for (curr = se->list.next; curr != &se->list; curr = curr->next) { -+ if (curr->unique == req->u.i.unique) { -+ fuse_interrupt_func_t func; -+ void *data; -+ -+ curr->ctr++; -+ pthread_mutex_unlock(&se->lock); -+ -+ /* Ugh, ugly locking */ -+ pthread_mutex_lock(&curr->lock); -+ pthread_mutex_lock(&se->lock); -+ curr->interrupted = 1; -+ func = curr->u.ni.func; -+ data = curr->u.ni.data; -+ pthread_mutex_unlock(&se->lock); -+ if (func) { -+ func(curr, data); -+ } -+ pthread_mutex_unlock(&curr->lock); -+ -+ pthread_mutex_lock(&se->lock); -+ curr->ctr--; -+ if (!curr->ctr) { -+ destroy_req(curr); -+ } -+ -+ return 1; -+ } -+ } -+ for (curr = se->interrupts.next; curr != &se->interrupts; -+ curr = curr->next) { -+ if (curr->u.i.unique == req->u.i.unique) { -+ return 1; -+ } -+ } -+ return 0; - } - - static void do_interrupt(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- struct fuse_interrupt_in *arg = (struct fuse_interrupt_in *) inarg; -- struct fuse_session *se = req->se; -+ struct fuse_interrupt_in *arg = (struct fuse_interrupt_in *)inarg; -+ struct fuse_session *se = req->se; - -- (void) nodeid; -- if (se->debug) -- fuse_log(FUSE_LOG_DEBUG, "INTERRUPT: %llu\n", -- (unsigned long long) arg->unique); -+ (void)nodeid; -+ if (se->debug) { -+ fuse_log(FUSE_LOG_DEBUG, "INTERRUPT: %llu\n", -+ (unsigned long long)arg->unique); -+ } - -- req->u.i.unique = arg->unique; -+ req->u.i.unique = arg->unique; - -- pthread_mutex_lock(&se->lock); -- if (find_interrupted(se, req)) -- destroy_req(req); -- else -- list_add_req(req, &se->interrupts); -- pthread_mutex_unlock(&se->lock); -+ pthread_mutex_lock(&se->lock); -+ if (find_interrupted(se, req)) { -+ destroy_req(req); -+ } else { -+ list_add_req(req, &se->interrupts); -+ } -+ pthread_mutex_unlock(&se->lock); - } - - static struct fuse_req *check_interrupt(struct fuse_session *se, -- struct fuse_req *req) --{ -- struct fuse_req *curr; -- -- for (curr = se->interrupts.next; curr != &se->interrupts; -- curr = curr->next) { -- if (curr->u.i.unique == req->unique) { -- req->interrupted = 1; -- list_del_req(curr); -- free(curr); -- return NULL; -- } -- } -- curr = se->interrupts.next; -- if (curr != &se->interrupts) { -- list_del_req(curr); -- list_init_req(curr); -- return curr; -- } else -- return NULL; -+ struct fuse_req *req) -+{ -+ struct fuse_req *curr; -+ -+ for (curr = se->interrupts.next; curr != &se->interrupts; -+ curr = curr->next) { -+ if (curr->u.i.unique == req->unique) { -+ req->interrupted = 1; -+ list_del_req(curr); -+ free(curr); -+ return NULL; -+ } -+ } -+ curr = se->interrupts.next; -+ if (curr != &se->interrupts) { -+ list_del_req(curr); -+ list_init_req(curr); -+ return curr; -+ } else { -+ return NULL; -+ } - } - - static void do_bmap(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- struct fuse_bmap_in *arg = (struct fuse_bmap_in *) inarg; -+ struct fuse_bmap_in *arg = (struct fuse_bmap_in *)inarg; - -- if (req->se->op.bmap) -- req->se->op.bmap(req, nodeid, arg->blocksize, arg->block); -- else -- fuse_reply_err(req, ENOSYS); -+ if (req->se->op.bmap) { -+ req->se->op.bmap(req, nodeid, arg->blocksize, arg->block); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } - } - - static void do_ioctl(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- struct fuse_ioctl_in *arg = (struct fuse_ioctl_in *) inarg; -- unsigned int flags = arg->flags; -- void *in_buf = arg->in_size ? PARAM(arg) : NULL; -- struct fuse_file_info fi; -+ struct fuse_ioctl_in *arg = (struct fuse_ioctl_in *)inarg; -+ unsigned int flags = arg->flags; -+ void *in_buf = arg->in_size ? PARAM(arg) : NULL; -+ struct fuse_file_info fi; - -- if (flags & FUSE_IOCTL_DIR && -- !(req->se->conn.want & FUSE_CAP_IOCTL_DIR)) { -- fuse_reply_err(req, ENOTTY); -- return; -- } -+ if (flags & FUSE_IOCTL_DIR && !(req->se->conn.want & FUSE_CAP_IOCTL_DIR)) { -+ fuse_reply_err(req, ENOTTY); -+ return; -+ } - -- memset(&fi, 0, sizeof(fi)); -- fi.fh = arg->fh; -+ memset(&fi, 0, sizeof(fi)); -+ fi.fh = arg->fh; - -- if (sizeof(void *) == 4 && req->se->conn.proto_minor >= 16 && -- !(flags & FUSE_IOCTL_32BIT)) { -- req->ioctl_64bit = 1; -- } -+ if (sizeof(void *) == 4 && req->se->conn.proto_minor >= 16 && -+ !(flags & FUSE_IOCTL_32BIT)) { -+ req->ioctl_64bit = 1; -+ } - -- if (req->se->op.ioctl) -- req->se->op.ioctl(req, nodeid, arg->cmd, -- (void *)(uintptr_t)arg->arg, &fi, flags, -- in_buf, arg->in_size, arg->out_size); -- else -- fuse_reply_err(req, ENOSYS); -+ if (req->se->op.ioctl) { -+ req->se->op.ioctl(req, nodeid, arg->cmd, (void *)(uintptr_t)arg->arg, -+ &fi, flags, in_buf, arg->in_size, arg->out_size); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } - } - - void fuse_pollhandle_destroy(struct fuse_pollhandle *ph) - { -- free(ph); -+ free(ph); - } - - static void do_poll(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- struct fuse_poll_in *arg = (struct fuse_poll_in *) inarg; -- struct fuse_file_info fi; -+ struct fuse_poll_in *arg = (struct fuse_poll_in *)inarg; -+ struct fuse_file_info fi; - -- memset(&fi, 0, sizeof(fi)); -- fi.fh = arg->fh; -- fi.poll_events = arg->events; -+ memset(&fi, 0, sizeof(fi)); -+ fi.fh = arg->fh; -+ fi.poll_events = arg->events; - -- if (req->se->op.poll) { -- struct fuse_pollhandle *ph = NULL; -+ if (req->se->op.poll) { -+ struct fuse_pollhandle *ph = NULL; - -- if (arg->flags & FUSE_POLL_SCHEDULE_NOTIFY) { -- ph = malloc(sizeof(struct fuse_pollhandle)); -- if (ph == NULL) { -- fuse_reply_err(req, ENOMEM); -- return; -- } -- ph->kh = arg->kh; -- ph->se = req->se; -- } -+ if (arg->flags & FUSE_POLL_SCHEDULE_NOTIFY) { -+ ph = malloc(sizeof(struct fuse_pollhandle)); -+ if (ph == NULL) { -+ fuse_reply_err(req, ENOMEM); -+ return; -+ } -+ ph->kh = arg->kh; -+ ph->se = req->se; -+ } - -- req->se->op.poll(req, nodeid, &fi, ph); -- } else { -- fuse_reply_err(req, ENOSYS); -- } -+ req->se->op.poll(req, nodeid, &fi, ph); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } - } - - static void do_fallocate(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- struct fuse_fallocate_in *arg = (struct fuse_fallocate_in *) inarg; -- struct fuse_file_info fi; -+ struct fuse_fallocate_in *arg = (struct fuse_fallocate_in *)inarg; -+ struct fuse_file_info fi; - -- memset(&fi, 0, sizeof(fi)); -- fi.fh = arg->fh; -+ memset(&fi, 0, sizeof(fi)); -+ fi.fh = arg->fh; - -- if (req->se->op.fallocate) -- req->se->op.fallocate(req, nodeid, arg->mode, arg->offset, arg->length, &fi); -- else -- fuse_reply_err(req, ENOSYS); -+ if (req->se->op.fallocate) { -+ req->se->op.fallocate(req, nodeid, arg->mode, arg->offset, arg->length, -+ &fi); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } - } - --static void do_copy_file_range(fuse_req_t req, fuse_ino_t nodeid_in, const void *inarg) -+static void do_copy_file_range(fuse_req_t req, fuse_ino_t nodeid_in, -+ const void *inarg) - { -- struct fuse_copy_file_range_in *arg = (struct fuse_copy_file_range_in *) inarg; -- struct fuse_file_info fi_in, fi_out; -+ struct fuse_copy_file_range_in *arg = -+ (struct fuse_copy_file_range_in *)inarg; -+ struct fuse_file_info fi_in, fi_out; - -- memset(&fi_in, 0, sizeof(fi_in)); -- fi_in.fh = arg->fh_in; -+ memset(&fi_in, 0, sizeof(fi_in)); -+ fi_in.fh = arg->fh_in; - -- memset(&fi_out, 0, sizeof(fi_out)); -- fi_out.fh = arg->fh_out; -+ memset(&fi_out, 0, sizeof(fi_out)); -+ fi_out.fh = arg->fh_out; - - -- if (req->se->op.copy_file_range) -- req->se->op.copy_file_range(req, nodeid_in, arg->off_in, -- &fi_in, arg->nodeid_out, -- arg->off_out, &fi_out, arg->len, -- arg->flags); -- else -- fuse_reply_err(req, ENOSYS); -+ if (req->se->op.copy_file_range) { -+ req->se->op.copy_file_range(req, nodeid_in, arg->off_in, &fi_in, -+ arg->nodeid_out, arg->off_out, &fi_out, -+ arg->len, arg->flags); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } - } - - static void do_lseek(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- struct fuse_lseek_in *arg = (struct fuse_lseek_in *) inarg; -- struct fuse_file_info fi; -+ struct fuse_lseek_in *arg = (struct fuse_lseek_in *)inarg; -+ struct fuse_file_info fi; - -- memset(&fi, 0, sizeof(fi)); -- fi.fh = arg->fh; -+ memset(&fi, 0, sizeof(fi)); -+ fi.fh = arg->fh; - -- if (req->se->op.lseek) -- req->se->op.lseek(req, nodeid, arg->offset, arg->whence, &fi); -- else -- fuse_reply_err(req, ENOSYS); -+ if (req->se->op.lseek) { -+ req->se->op.lseek(req, nodeid, arg->offset, arg->whence, &fi); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } - } - - static void do_init(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- struct fuse_init_in *arg = (struct fuse_init_in *) inarg; -- struct fuse_init_out outarg; -- struct fuse_session *se = req->se; -- size_t bufsize = se->bufsize; -- size_t outargsize = sizeof(outarg); -- -- (void) nodeid; -- if (se->debug) { -- fuse_log(FUSE_LOG_DEBUG, "INIT: %u.%u\n", arg->major, arg->minor); -- if (arg->major == 7 && arg->minor >= 6) { -- fuse_log(FUSE_LOG_DEBUG, "flags=0x%08x\n", arg->flags); -- fuse_log(FUSE_LOG_DEBUG, "max_readahead=0x%08x\n", -- arg->max_readahead); -- } -- } -- se->conn.proto_major = arg->major; -- se->conn.proto_minor = arg->minor; -- se->conn.capable = 0; -- se->conn.want = 0; -- -- memset(&outarg, 0, sizeof(outarg)); -- outarg.major = FUSE_KERNEL_VERSION; -- outarg.minor = FUSE_KERNEL_MINOR_VERSION; -- -- if (arg->major < 7) { -- fuse_log(FUSE_LOG_ERR, "fuse: unsupported protocol version: %u.%u\n", -- arg->major, arg->minor); -- fuse_reply_err(req, EPROTO); -- return; -- } -- -- if (arg->major > 7) { -- /* Wait for a second INIT request with a 7.X version */ -- send_reply_ok(req, &outarg, sizeof(outarg)); -- return; -- } -- -- if (arg->minor >= 6) { -- if (arg->max_readahead < se->conn.max_readahead) -- se->conn.max_readahead = arg->max_readahead; -- if (arg->flags & FUSE_ASYNC_READ) -- se->conn.capable |= FUSE_CAP_ASYNC_READ; -- if (arg->flags & FUSE_POSIX_LOCKS) -- se->conn.capable |= FUSE_CAP_POSIX_LOCKS; -- if (arg->flags & FUSE_ATOMIC_O_TRUNC) -- se->conn.capable |= FUSE_CAP_ATOMIC_O_TRUNC; -- if (arg->flags & FUSE_EXPORT_SUPPORT) -- se->conn.capable |= FUSE_CAP_EXPORT_SUPPORT; -- if (arg->flags & FUSE_DONT_MASK) -- se->conn.capable |= FUSE_CAP_DONT_MASK; -- if (arg->flags & FUSE_FLOCK_LOCKS) -- se->conn.capable |= FUSE_CAP_FLOCK_LOCKS; -- if (arg->flags & FUSE_AUTO_INVAL_DATA) -- se->conn.capable |= FUSE_CAP_AUTO_INVAL_DATA; -- if (arg->flags & FUSE_DO_READDIRPLUS) -- se->conn.capable |= FUSE_CAP_READDIRPLUS; -- if (arg->flags & FUSE_READDIRPLUS_AUTO) -- se->conn.capable |= FUSE_CAP_READDIRPLUS_AUTO; -- if (arg->flags & FUSE_ASYNC_DIO) -- se->conn.capable |= FUSE_CAP_ASYNC_DIO; -- if (arg->flags & FUSE_WRITEBACK_CACHE) -- se->conn.capable |= FUSE_CAP_WRITEBACK_CACHE; -- if (arg->flags & FUSE_NO_OPEN_SUPPORT) -- se->conn.capable |= FUSE_CAP_NO_OPEN_SUPPORT; -- if (arg->flags & FUSE_PARALLEL_DIROPS) -- se->conn.capable |= FUSE_CAP_PARALLEL_DIROPS; -- if (arg->flags & FUSE_POSIX_ACL) -- se->conn.capable |= FUSE_CAP_POSIX_ACL; -- if (arg->flags & FUSE_HANDLE_KILLPRIV) -- se->conn.capable |= FUSE_CAP_HANDLE_KILLPRIV; -- if (arg->flags & FUSE_NO_OPENDIR_SUPPORT) -- se->conn.capable |= FUSE_CAP_NO_OPENDIR_SUPPORT; -- if (!(arg->flags & FUSE_MAX_PAGES)) { -- size_t max_bufsize = -- FUSE_DEFAULT_MAX_PAGES_PER_REQ * getpagesize() -- + FUSE_BUFFER_HEADER_SIZE; -- if (bufsize > max_bufsize) { -- bufsize = max_bufsize; -- } -- } -- } else { -- se->conn.max_readahead = 0; -- } -- -- if (se->conn.proto_minor >= 14) { -+ struct fuse_init_in *arg = (struct fuse_init_in *)inarg; -+ struct fuse_init_out outarg; -+ struct fuse_session *se = req->se; -+ size_t bufsize = se->bufsize; -+ size_t outargsize = sizeof(outarg); -+ -+ (void)nodeid; -+ if (se->debug) { -+ fuse_log(FUSE_LOG_DEBUG, "INIT: %u.%u\n", arg->major, arg->minor); -+ if (arg->major == 7 && arg->minor >= 6) { -+ fuse_log(FUSE_LOG_DEBUG, "flags=0x%08x\n", arg->flags); -+ fuse_log(FUSE_LOG_DEBUG, "max_readahead=0x%08x\n", -+ arg->max_readahead); -+ } -+ } -+ se->conn.proto_major = arg->major; -+ se->conn.proto_minor = arg->minor; -+ se->conn.capable = 0; -+ se->conn.want = 0; -+ -+ memset(&outarg, 0, sizeof(outarg)); -+ outarg.major = FUSE_KERNEL_VERSION; -+ outarg.minor = FUSE_KERNEL_MINOR_VERSION; -+ -+ if (arg->major < 7) { -+ fuse_log(FUSE_LOG_ERR, "fuse: unsupported protocol version: %u.%u\n", -+ arg->major, arg->minor); -+ fuse_reply_err(req, EPROTO); -+ return; -+ } -+ -+ if (arg->major > 7) { -+ /* Wait for a second INIT request with a 7.X version */ -+ send_reply_ok(req, &outarg, sizeof(outarg)); -+ return; -+ } -+ -+ if (arg->minor >= 6) { -+ if (arg->max_readahead < se->conn.max_readahead) { -+ se->conn.max_readahead = arg->max_readahead; -+ } -+ if (arg->flags & FUSE_ASYNC_READ) { -+ se->conn.capable |= FUSE_CAP_ASYNC_READ; -+ } -+ if (arg->flags & FUSE_POSIX_LOCKS) { -+ se->conn.capable |= FUSE_CAP_POSIX_LOCKS; -+ } -+ if (arg->flags & FUSE_ATOMIC_O_TRUNC) { -+ se->conn.capable |= FUSE_CAP_ATOMIC_O_TRUNC; -+ } -+ if (arg->flags & FUSE_EXPORT_SUPPORT) { -+ se->conn.capable |= FUSE_CAP_EXPORT_SUPPORT; -+ } -+ if (arg->flags & FUSE_DONT_MASK) { -+ se->conn.capable |= FUSE_CAP_DONT_MASK; -+ } -+ if (arg->flags & FUSE_FLOCK_LOCKS) { -+ se->conn.capable |= FUSE_CAP_FLOCK_LOCKS; -+ } -+ if (arg->flags & FUSE_AUTO_INVAL_DATA) { -+ se->conn.capable |= FUSE_CAP_AUTO_INVAL_DATA; -+ } -+ if (arg->flags & FUSE_DO_READDIRPLUS) { -+ se->conn.capable |= FUSE_CAP_READDIRPLUS; -+ } -+ if (arg->flags & FUSE_READDIRPLUS_AUTO) { -+ se->conn.capable |= FUSE_CAP_READDIRPLUS_AUTO; -+ } -+ if (arg->flags & FUSE_ASYNC_DIO) { -+ se->conn.capable |= FUSE_CAP_ASYNC_DIO; -+ } -+ if (arg->flags & FUSE_WRITEBACK_CACHE) { -+ se->conn.capable |= FUSE_CAP_WRITEBACK_CACHE; -+ } -+ if (arg->flags & FUSE_NO_OPEN_SUPPORT) { -+ se->conn.capable |= FUSE_CAP_NO_OPEN_SUPPORT; -+ } -+ if (arg->flags & FUSE_PARALLEL_DIROPS) { -+ se->conn.capable |= FUSE_CAP_PARALLEL_DIROPS; -+ } -+ if (arg->flags & FUSE_POSIX_ACL) { -+ se->conn.capable |= FUSE_CAP_POSIX_ACL; -+ } -+ if (arg->flags & FUSE_HANDLE_KILLPRIV) { -+ se->conn.capable |= FUSE_CAP_HANDLE_KILLPRIV; -+ } -+ if (arg->flags & FUSE_NO_OPENDIR_SUPPORT) { -+ se->conn.capable |= FUSE_CAP_NO_OPENDIR_SUPPORT; -+ } -+ if (!(arg->flags & FUSE_MAX_PAGES)) { -+ size_t max_bufsize = -+ FUSE_DEFAULT_MAX_PAGES_PER_REQ * getpagesize() + -+ FUSE_BUFFER_HEADER_SIZE; -+ if (bufsize > max_bufsize) { -+ bufsize = max_bufsize; -+ } -+ } -+ } else { -+ se->conn.max_readahead = 0; -+ } -+ -+ if (se->conn.proto_minor >= 14) { - #ifdef HAVE_SPLICE - #ifdef HAVE_VMSPLICE -- se->conn.capable |= FUSE_CAP_SPLICE_WRITE | FUSE_CAP_SPLICE_MOVE; -+ se->conn.capable |= FUSE_CAP_SPLICE_WRITE | FUSE_CAP_SPLICE_MOVE; - #endif -- se->conn.capable |= FUSE_CAP_SPLICE_READ; -+ se->conn.capable |= FUSE_CAP_SPLICE_READ; - #endif -- } -- if (se->conn.proto_minor >= 18) -- se->conn.capable |= FUSE_CAP_IOCTL_DIR; -- -- /* Default settings for modern filesystems. -- * -- * Most of these capabilities were disabled by default in -- * libfuse2 for backwards compatibility reasons. In libfuse3, -- * we can finally enable them by default (as long as they're -- * supported by the kernel). -- */ --#define LL_SET_DEFAULT(cond, cap) \ -- if ((cond) && (se->conn.capable & (cap))) \ -- se->conn.want |= (cap) -- LL_SET_DEFAULT(1, FUSE_CAP_ASYNC_READ); -- LL_SET_DEFAULT(1, FUSE_CAP_PARALLEL_DIROPS); -- LL_SET_DEFAULT(1, FUSE_CAP_AUTO_INVAL_DATA); -- LL_SET_DEFAULT(1, FUSE_CAP_HANDLE_KILLPRIV); -- LL_SET_DEFAULT(1, FUSE_CAP_ASYNC_DIO); -- LL_SET_DEFAULT(1, FUSE_CAP_IOCTL_DIR); -- LL_SET_DEFAULT(1, FUSE_CAP_ATOMIC_O_TRUNC); -- LL_SET_DEFAULT(se->op.write_buf, FUSE_CAP_SPLICE_READ); -- LL_SET_DEFAULT(se->op.getlk && se->op.setlk, -- FUSE_CAP_POSIX_LOCKS); -- LL_SET_DEFAULT(se->op.flock, FUSE_CAP_FLOCK_LOCKS); -- LL_SET_DEFAULT(se->op.readdirplus, FUSE_CAP_READDIRPLUS); -- LL_SET_DEFAULT(se->op.readdirplus && se->op.readdir, -- FUSE_CAP_READDIRPLUS_AUTO); -- se->conn.time_gran = 1; -- -- if (bufsize < FUSE_MIN_READ_BUFFER) { -- fuse_log(FUSE_LOG_ERR, "fuse: warning: buffer size too small: %zu\n", -- bufsize); -- bufsize = FUSE_MIN_READ_BUFFER; -- } -- se->bufsize = bufsize; -- -- if (se->conn.max_write > bufsize - FUSE_BUFFER_HEADER_SIZE) -- se->conn.max_write = bufsize - FUSE_BUFFER_HEADER_SIZE; -- -- se->got_init = 1; -- if (se->op.init) -- se->op.init(se->userdata, &se->conn); -- -- if (se->conn.want & (~se->conn.capable)) { -- fuse_log(FUSE_LOG_ERR, "fuse: error: filesystem requested capabilities " -- "0x%x that are not supported by kernel, aborting.\n", -- se->conn.want & (~se->conn.capable)); -- fuse_reply_err(req, EPROTO); -- se->error = -EPROTO; -- fuse_session_exit(se); -- return; -- } -- -- if (se->conn.max_write < bufsize - FUSE_BUFFER_HEADER_SIZE) { -- se->bufsize = se->conn.max_write + FUSE_BUFFER_HEADER_SIZE; -- } -- if (arg->flags & FUSE_MAX_PAGES) { -- outarg.flags |= FUSE_MAX_PAGES; -- outarg.max_pages = (se->conn.max_write - 1) / getpagesize() + 1; -- } -- -- /* Always enable big writes, this is superseded -- by the max_write option */ -- outarg.flags |= FUSE_BIG_WRITES; -- -- if (se->conn.want & FUSE_CAP_ASYNC_READ) -- outarg.flags |= FUSE_ASYNC_READ; -- if (se->conn.want & FUSE_CAP_POSIX_LOCKS) -- outarg.flags |= FUSE_POSIX_LOCKS; -- if (se->conn.want & FUSE_CAP_ATOMIC_O_TRUNC) -- outarg.flags |= FUSE_ATOMIC_O_TRUNC; -- if (se->conn.want & FUSE_CAP_EXPORT_SUPPORT) -- outarg.flags |= FUSE_EXPORT_SUPPORT; -- if (se->conn.want & FUSE_CAP_DONT_MASK) -- outarg.flags |= FUSE_DONT_MASK; -- if (se->conn.want & FUSE_CAP_FLOCK_LOCKS) -- outarg.flags |= FUSE_FLOCK_LOCKS; -- if (se->conn.want & FUSE_CAP_AUTO_INVAL_DATA) -- outarg.flags |= FUSE_AUTO_INVAL_DATA; -- if (se->conn.want & FUSE_CAP_READDIRPLUS) -- outarg.flags |= FUSE_DO_READDIRPLUS; -- if (se->conn.want & FUSE_CAP_READDIRPLUS_AUTO) -- outarg.flags |= FUSE_READDIRPLUS_AUTO; -- if (se->conn.want & FUSE_CAP_ASYNC_DIO) -- outarg.flags |= FUSE_ASYNC_DIO; -- if (se->conn.want & FUSE_CAP_WRITEBACK_CACHE) -- outarg.flags |= FUSE_WRITEBACK_CACHE; -- if (se->conn.want & FUSE_CAP_POSIX_ACL) -- outarg.flags |= FUSE_POSIX_ACL; -- outarg.max_readahead = se->conn.max_readahead; -- outarg.max_write = se->conn.max_write; -- if (se->conn.proto_minor >= 13) { -- if (se->conn.max_background >= (1 << 16)) -- se->conn.max_background = (1 << 16) - 1; -- if (se->conn.congestion_threshold > se->conn.max_background) -- se->conn.congestion_threshold = se->conn.max_background; -- if (!se->conn.congestion_threshold) { -- se->conn.congestion_threshold = -- se->conn.max_background * 3 / 4; -- } -- -- outarg.max_background = se->conn.max_background; -- outarg.congestion_threshold = se->conn.congestion_threshold; -- } -- if (se->conn.proto_minor >= 23) -- outarg.time_gran = se->conn.time_gran; -- -- if (se->debug) { -- fuse_log(FUSE_LOG_DEBUG, " INIT: %u.%u\n", outarg.major, outarg.minor); -- fuse_log(FUSE_LOG_DEBUG, " flags=0x%08x\n", outarg.flags); -- fuse_log(FUSE_LOG_DEBUG, " max_readahead=0x%08x\n", -- outarg.max_readahead); -- fuse_log(FUSE_LOG_DEBUG, " max_write=0x%08x\n", outarg.max_write); -- fuse_log(FUSE_LOG_DEBUG, " max_background=%i\n", -- outarg.max_background); -- fuse_log(FUSE_LOG_DEBUG, " congestion_threshold=%i\n", -- outarg.congestion_threshold); -- fuse_log(FUSE_LOG_DEBUG, " time_gran=%u\n", -- outarg.time_gran); -- } -- if (arg->minor < 5) -- outargsize = FUSE_COMPAT_INIT_OUT_SIZE; -- else if (arg->minor < 23) -- outargsize = FUSE_COMPAT_22_INIT_OUT_SIZE; -- -- send_reply_ok(req, &outarg, outargsize); -+ } -+ if (se->conn.proto_minor >= 18) { -+ se->conn.capable |= FUSE_CAP_IOCTL_DIR; -+ } -+ -+ /* -+ * Default settings for modern filesystems. -+ * -+ * Most of these capabilities were disabled by default in -+ * libfuse2 for backwards compatibility reasons. In libfuse3, -+ * we can finally enable them by default (as long as they're -+ * supported by the kernel). -+ */ -+#define LL_SET_DEFAULT(cond, cap) \ -+ if ((cond) && (se->conn.capable & (cap))) \ -+ se->conn.want |= (cap) -+ LL_SET_DEFAULT(1, FUSE_CAP_ASYNC_READ); -+ LL_SET_DEFAULT(1, FUSE_CAP_PARALLEL_DIROPS); -+ LL_SET_DEFAULT(1, FUSE_CAP_AUTO_INVAL_DATA); -+ LL_SET_DEFAULT(1, FUSE_CAP_HANDLE_KILLPRIV); -+ LL_SET_DEFAULT(1, FUSE_CAP_ASYNC_DIO); -+ LL_SET_DEFAULT(1, FUSE_CAP_IOCTL_DIR); -+ LL_SET_DEFAULT(1, FUSE_CAP_ATOMIC_O_TRUNC); -+ LL_SET_DEFAULT(se->op.write_buf, FUSE_CAP_SPLICE_READ); -+ LL_SET_DEFAULT(se->op.getlk && se->op.setlk, FUSE_CAP_POSIX_LOCKS); -+ LL_SET_DEFAULT(se->op.flock, FUSE_CAP_FLOCK_LOCKS); -+ LL_SET_DEFAULT(se->op.readdirplus, FUSE_CAP_READDIRPLUS); -+ LL_SET_DEFAULT(se->op.readdirplus && se->op.readdir, -+ FUSE_CAP_READDIRPLUS_AUTO); -+ se->conn.time_gran = 1; -+ -+ if (bufsize < FUSE_MIN_READ_BUFFER) { -+ fuse_log(FUSE_LOG_ERR, "fuse: warning: buffer size too small: %zu\n", -+ bufsize); -+ bufsize = FUSE_MIN_READ_BUFFER; -+ } -+ se->bufsize = bufsize; -+ -+ if (se->conn.max_write > bufsize - FUSE_BUFFER_HEADER_SIZE) { -+ se->conn.max_write = bufsize - FUSE_BUFFER_HEADER_SIZE; -+ } -+ -+ se->got_init = 1; -+ if (se->op.init) { -+ se->op.init(se->userdata, &se->conn); -+ } -+ -+ if (se->conn.want & (~se->conn.capable)) { -+ fuse_log(FUSE_LOG_ERR, -+ "fuse: error: filesystem requested capabilities " -+ "0x%x that are not supported by kernel, aborting.\n", -+ se->conn.want & (~se->conn.capable)); -+ fuse_reply_err(req, EPROTO); -+ se->error = -EPROTO; -+ fuse_session_exit(se); -+ return; -+ } -+ -+ if (se->conn.max_write < bufsize - FUSE_BUFFER_HEADER_SIZE) { -+ se->bufsize = se->conn.max_write + FUSE_BUFFER_HEADER_SIZE; -+ } -+ if (arg->flags & FUSE_MAX_PAGES) { -+ outarg.flags |= FUSE_MAX_PAGES; -+ outarg.max_pages = (se->conn.max_write - 1) / getpagesize() + 1; -+ } -+ -+ /* -+ * Always enable big writes, this is superseded -+ * by the max_write option -+ */ -+ outarg.flags |= FUSE_BIG_WRITES; -+ -+ if (se->conn.want & FUSE_CAP_ASYNC_READ) { -+ outarg.flags |= FUSE_ASYNC_READ; -+ } -+ if (se->conn.want & FUSE_CAP_POSIX_LOCKS) { -+ outarg.flags |= FUSE_POSIX_LOCKS; -+ } -+ if (se->conn.want & FUSE_CAP_ATOMIC_O_TRUNC) { -+ outarg.flags |= FUSE_ATOMIC_O_TRUNC; -+ } -+ if (se->conn.want & FUSE_CAP_EXPORT_SUPPORT) { -+ outarg.flags |= FUSE_EXPORT_SUPPORT; -+ } -+ if (se->conn.want & FUSE_CAP_DONT_MASK) { -+ outarg.flags |= FUSE_DONT_MASK; -+ } -+ if (se->conn.want & FUSE_CAP_FLOCK_LOCKS) { -+ outarg.flags |= FUSE_FLOCK_LOCKS; -+ } -+ if (se->conn.want & FUSE_CAP_AUTO_INVAL_DATA) { -+ outarg.flags |= FUSE_AUTO_INVAL_DATA; -+ } -+ if (se->conn.want & FUSE_CAP_READDIRPLUS) { -+ outarg.flags |= FUSE_DO_READDIRPLUS; -+ } -+ if (se->conn.want & FUSE_CAP_READDIRPLUS_AUTO) { -+ outarg.flags |= FUSE_READDIRPLUS_AUTO; -+ } -+ if (se->conn.want & FUSE_CAP_ASYNC_DIO) { -+ outarg.flags |= FUSE_ASYNC_DIO; -+ } -+ if (se->conn.want & FUSE_CAP_WRITEBACK_CACHE) { -+ outarg.flags |= FUSE_WRITEBACK_CACHE; -+ } -+ if (se->conn.want & FUSE_CAP_POSIX_ACL) { -+ outarg.flags |= FUSE_POSIX_ACL; -+ } -+ outarg.max_readahead = se->conn.max_readahead; -+ outarg.max_write = se->conn.max_write; -+ if (se->conn.proto_minor >= 13) { -+ if (se->conn.max_background >= (1 << 16)) { -+ se->conn.max_background = (1 << 16) - 1; -+ } -+ if (se->conn.congestion_threshold > se->conn.max_background) { -+ se->conn.congestion_threshold = se->conn.max_background; -+ } -+ if (!se->conn.congestion_threshold) { -+ se->conn.congestion_threshold = se->conn.max_background * 3 / 4; -+ } -+ -+ outarg.max_background = se->conn.max_background; -+ outarg.congestion_threshold = se->conn.congestion_threshold; -+ } -+ if (se->conn.proto_minor >= 23) { -+ outarg.time_gran = se->conn.time_gran; -+ } -+ -+ if (se->debug) { -+ fuse_log(FUSE_LOG_DEBUG, " INIT: %u.%u\n", outarg.major, -+ outarg.minor); -+ fuse_log(FUSE_LOG_DEBUG, " flags=0x%08x\n", outarg.flags); -+ fuse_log(FUSE_LOG_DEBUG, " max_readahead=0x%08x\n", -+ outarg.max_readahead); -+ fuse_log(FUSE_LOG_DEBUG, " max_write=0x%08x\n", outarg.max_write); -+ fuse_log(FUSE_LOG_DEBUG, " max_background=%i\n", -+ outarg.max_background); -+ fuse_log(FUSE_LOG_DEBUG, " congestion_threshold=%i\n", -+ outarg.congestion_threshold); -+ fuse_log(FUSE_LOG_DEBUG, " time_gran=%u\n", outarg.time_gran); -+ } -+ if (arg->minor < 5) { -+ outargsize = FUSE_COMPAT_INIT_OUT_SIZE; -+ } else if (arg->minor < 23) { -+ outargsize = FUSE_COMPAT_22_INIT_OUT_SIZE; -+ } -+ -+ send_reply_ok(req, &outarg, outargsize); - } - - static void do_destroy(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- struct fuse_session *se = req->se; -+ struct fuse_session *se = req->se; - -- (void) nodeid; -- (void) inarg; -+ (void)nodeid; -+ (void)inarg; - -- se->got_destroy = 1; -- if (se->op.destroy) -- se->op.destroy(se->userdata); -+ se->got_destroy = 1; -+ if (se->op.destroy) { -+ se->op.destroy(se->userdata); -+ } - -- send_reply_ok(req, NULL, 0); -+ send_reply_ok(req, NULL, 0); - } - - static void list_del_nreq(struct fuse_notify_req *nreq) - { -- struct fuse_notify_req *prev = nreq->prev; -- struct fuse_notify_req *next = nreq->next; -- prev->next = next; -- next->prev = prev; -+ struct fuse_notify_req *prev = nreq->prev; -+ struct fuse_notify_req *next = nreq->next; -+ prev->next = next; -+ next->prev = prev; - } - - static void list_add_nreq(struct fuse_notify_req *nreq, -- struct fuse_notify_req *next) -+ struct fuse_notify_req *next) - { -- struct fuse_notify_req *prev = next->prev; -- nreq->next = next; -- nreq->prev = prev; -- prev->next = nreq; -- next->prev = nreq; -+ struct fuse_notify_req *prev = next->prev; -+ nreq->next = next; -+ nreq->prev = prev; -+ prev->next = nreq; -+ next->prev = nreq; - } - - static void list_init_nreq(struct fuse_notify_req *nreq) - { -- nreq->next = nreq; -- nreq->prev = nreq; -+ nreq->next = nreq; -+ nreq->prev = nreq; - } - - static void do_notify_reply(fuse_req_t req, fuse_ino_t nodeid, -- const void *inarg, const struct fuse_buf *buf) -+ const void *inarg, const struct fuse_buf *buf) - { -- struct fuse_session *se = req->se; -- struct fuse_notify_req *nreq; -- struct fuse_notify_req *head; -+ struct fuse_session *se = req->se; -+ struct fuse_notify_req *nreq; -+ struct fuse_notify_req *head; - -- pthread_mutex_lock(&se->lock); -- head = &se->notify_list; -- for (nreq = head->next; nreq != head; nreq = nreq->next) { -- if (nreq->unique == req->unique) { -- list_del_nreq(nreq); -- break; -- } -- } -- pthread_mutex_unlock(&se->lock); -+ pthread_mutex_lock(&se->lock); -+ head = &se->notify_list; -+ for (nreq = head->next; nreq != head; nreq = nreq->next) { -+ if (nreq->unique == req->unique) { -+ list_del_nreq(nreq); -+ break; -+ } -+ } -+ pthread_mutex_unlock(&se->lock); - -- if (nreq != head) -- nreq->reply(nreq, req, nodeid, inarg, buf); -+ if (nreq != head) { -+ nreq->reply(nreq, req, nodeid, inarg, buf); -+ } - } - - static int send_notify_iov(struct fuse_session *se, int notify_code, -- struct iovec *iov, int count) -+ struct iovec *iov, int count) - { -- struct fuse_out_header out; -+ struct fuse_out_header out; - -- if (!se->got_init) -- return -ENOTCONN; -+ if (!se->got_init) { -+ return -ENOTCONN; -+ } - -- out.unique = 0; -- out.error = notify_code; -- iov[0].iov_base = &out; -- iov[0].iov_len = sizeof(struct fuse_out_header); -+ out.unique = 0; -+ out.error = notify_code; -+ iov[0].iov_base = &out; -+ iov[0].iov_len = sizeof(struct fuse_out_header); - -- return fuse_send_msg(se, NULL, iov, count); -+ return fuse_send_msg(se, NULL, iov, count); - } - - int fuse_lowlevel_notify_poll(struct fuse_pollhandle *ph) - { -- if (ph != NULL) { -- struct fuse_notify_poll_wakeup_out outarg; -- struct iovec iov[2]; -+ if (ph != NULL) { -+ struct fuse_notify_poll_wakeup_out outarg; -+ struct iovec iov[2]; - -- outarg.kh = ph->kh; -+ outarg.kh = ph->kh; - -- iov[1].iov_base = &outarg; -- iov[1].iov_len = sizeof(outarg); -+ iov[1].iov_base = &outarg; -+ iov[1].iov_len = sizeof(outarg); - -- return send_notify_iov(ph->se, FUSE_NOTIFY_POLL, iov, 2); -- } else { -- return 0; -- } -+ return send_notify_iov(ph->se, FUSE_NOTIFY_POLL, iov, 2); -+ } else { -+ return 0; -+ } - } - - int fuse_lowlevel_notify_inval_inode(struct fuse_session *se, fuse_ino_t ino, -- off_t off, off_t len) -+ off_t off, off_t len) - { -- struct fuse_notify_inval_inode_out outarg; -- struct iovec iov[2]; -+ struct fuse_notify_inval_inode_out outarg; -+ struct iovec iov[2]; -+ -+ if (!se) { -+ return -EINVAL; -+ } - -- if (!se) -- return -EINVAL; -+ if (se->conn.proto_major < 6 || se->conn.proto_minor < 12) { -+ return -ENOSYS; -+ } - -- if (se->conn.proto_major < 6 || se->conn.proto_minor < 12) -- return -ENOSYS; -- -- outarg.ino = ino; -- outarg.off = off; -- outarg.len = len; -+ outarg.ino = ino; -+ outarg.off = off; -+ outarg.len = len; - -- iov[1].iov_base = &outarg; -- iov[1].iov_len = sizeof(outarg); -+ iov[1].iov_base = &outarg; -+ iov[1].iov_len = sizeof(outarg); - -- return send_notify_iov(se, FUSE_NOTIFY_INVAL_INODE, iov, 2); -+ return send_notify_iov(se, FUSE_NOTIFY_INVAL_INODE, iov, 2); - } - - int fuse_lowlevel_notify_inval_entry(struct fuse_session *se, fuse_ino_t parent, -- const char *name, size_t namelen) -+ const char *name, size_t namelen) - { -- struct fuse_notify_inval_entry_out outarg; -- struct iovec iov[3]; -+ struct fuse_notify_inval_entry_out outarg; -+ struct iovec iov[3]; -+ -+ if (!se) { -+ return -EINVAL; -+ } - -- if (!se) -- return -EINVAL; -- -- if (se->conn.proto_major < 6 || se->conn.proto_minor < 12) -- return -ENOSYS; -+ if (se->conn.proto_major < 6 || se->conn.proto_minor < 12) { -+ return -ENOSYS; -+ } - -- outarg.parent = parent; -- outarg.namelen = namelen; -- outarg.padding = 0; -+ outarg.parent = parent; -+ outarg.namelen = namelen; -+ outarg.padding = 0; - -- iov[1].iov_base = &outarg; -- iov[1].iov_len = sizeof(outarg); -- iov[2].iov_base = (void *)name; -- iov[2].iov_len = namelen + 1; -+ iov[1].iov_base = &outarg; -+ iov[1].iov_len = sizeof(outarg); -+ iov[2].iov_base = (void *)name; -+ iov[2].iov_len = namelen + 1; - -- return send_notify_iov(se, FUSE_NOTIFY_INVAL_ENTRY, iov, 3); -+ return send_notify_iov(se, FUSE_NOTIFY_INVAL_ENTRY, iov, 3); - } - --int fuse_lowlevel_notify_delete(struct fuse_session *se, -- fuse_ino_t parent, fuse_ino_t child, -- const char *name, size_t namelen) -+int fuse_lowlevel_notify_delete(struct fuse_session *se, fuse_ino_t parent, -+ fuse_ino_t child, const char *name, -+ size_t namelen) - { -- struct fuse_notify_delete_out outarg; -- struct iovec iov[3]; -+ struct fuse_notify_delete_out outarg; -+ struct iovec iov[3]; - -- if (!se) -- return -EINVAL; -+ if (!se) { -+ return -EINVAL; -+ } - -- if (se->conn.proto_major < 6 || se->conn.proto_minor < 18) -- return -ENOSYS; -+ if (se->conn.proto_major < 6 || se->conn.proto_minor < 18) { -+ return -ENOSYS; -+ } - -- outarg.parent = parent; -- outarg.child = child; -- outarg.namelen = namelen; -- outarg.padding = 0; -+ outarg.parent = parent; -+ outarg.child = child; -+ outarg.namelen = namelen; -+ outarg.padding = 0; - -- iov[1].iov_base = &outarg; -- iov[1].iov_len = sizeof(outarg); -- iov[2].iov_base = (void *)name; -- iov[2].iov_len = namelen + 1; -+ iov[1].iov_base = &outarg; -+ iov[1].iov_len = sizeof(outarg); -+ iov[2].iov_base = (void *)name; -+ iov[2].iov_len = namelen + 1; - -- return send_notify_iov(se, FUSE_NOTIFY_DELETE, iov, 3); -+ return send_notify_iov(se, FUSE_NOTIFY_DELETE, iov, 3); - } - - int fuse_lowlevel_notify_store(struct fuse_session *se, fuse_ino_t ino, -- off_t offset, struct fuse_bufvec *bufv, -- enum fuse_buf_copy_flags flags) -+ off_t offset, struct fuse_bufvec *bufv, -+ enum fuse_buf_copy_flags flags) - { -- struct fuse_out_header out; -- struct fuse_notify_store_out outarg; -- struct iovec iov[3]; -- size_t size = fuse_buf_size(bufv); -- int res; -+ struct fuse_out_header out; -+ struct fuse_notify_store_out outarg; -+ struct iovec iov[3]; -+ size_t size = fuse_buf_size(bufv); -+ int res; - -- if (!se) -- return -EINVAL; -+ if (!se) { -+ return -EINVAL; -+ } - -- if (se->conn.proto_major < 6 || se->conn.proto_minor < 15) -- return -ENOSYS; -+ if (se->conn.proto_major < 6 || se->conn.proto_minor < 15) { -+ return -ENOSYS; -+ } - -- out.unique = 0; -- out.error = FUSE_NOTIFY_STORE; -+ out.unique = 0; -+ out.error = FUSE_NOTIFY_STORE; - -- outarg.nodeid = ino; -- outarg.offset = offset; -- outarg.size = size; -- outarg.padding = 0; -+ outarg.nodeid = ino; -+ outarg.offset = offset; -+ outarg.size = size; -+ outarg.padding = 0; - -- iov[0].iov_base = &out; -- iov[0].iov_len = sizeof(out); -- iov[1].iov_base = &outarg; -- iov[1].iov_len = sizeof(outarg); -+ iov[0].iov_base = &out; -+ iov[0].iov_len = sizeof(out); -+ iov[1].iov_base = &outarg; -+ iov[1].iov_len = sizeof(outarg); - -- res = fuse_send_data_iov(se, NULL, iov, 2, bufv, flags); -- if (res > 0) -- res = -res; -+ res = fuse_send_data_iov(se, NULL, iov, 2, bufv, flags); -+ if (res > 0) { -+ res = -res; -+ } - -- return res; -+ return res; - } - - struct fuse_retrieve_req { -- struct fuse_notify_req nreq; -- void *cookie; -+ struct fuse_notify_req nreq; -+ void *cookie; - }; - --static void fuse_ll_retrieve_reply(struct fuse_notify_req *nreq, -- fuse_req_t req, fuse_ino_t ino, -- const void *inarg, -- const struct fuse_buf *ibuf) --{ -- struct fuse_session *se = req->se; -- struct fuse_retrieve_req *rreq = -- container_of(nreq, struct fuse_retrieve_req, nreq); -- const struct fuse_notify_retrieve_in *arg = inarg; -- struct fuse_bufvec bufv = { -- .buf[0] = *ibuf, -- .count = 1, -- }; -- -- if (!(bufv.buf[0].flags & FUSE_BUF_IS_FD)) -- bufv.buf[0].mem = PARAM(arg); -- -- bufv.buf[0].size -= sizeof(struct fuse_in_header) + -- sizeof(struct fuse_notify_retrieve_in); -- -- if (bufv.buf[0].size < arg->size) { -- fuse_log(FUSE_LOG_ERR, "fuse: retrieve reply: buffer size too small\n"); -- fuse_reply_none(req); -- goto out; -- } -- bufv.buf[0].size = arg->size; -- -- if (se->op.retrieve_reply) { -- se->op.retrieve_reply(req, rreq->cookie, ino, -- arg->offset, &bufv); -- } else { -- fuse_reply_none(req); -- } -+static void fuse_ll_retrieve_reply(struct fuse_notify_req *nreq, fuse_req_t req, -+ fuse_ino_t ino, const void *inarg, -+ const struct fuse_buf *ibuf) -+{ -+ struct fuse_session *se = req->se; -+ struct fuse_retrieve_req *rreq = -+ container_of(nreq, struct fuse_retrieve_req, nreq); -+ const struct fuse_notify_retrieve_in *arg = inarg; -+ struct fuse_bufvec bufv = { -+ .buf[0] = *ibuf, -+ .count = 1, -+ }; -+ -+ if (!(bufv.buf[0].flags & FUSE_BUF_IS_FD)) { -+ bufv.buf[0].mem = PARAM(arg); -+ } -+ -+ bufv.buf[0].size -= -+ sizeof(struct fuse_in_header) + sizeof(struct fuse_notify_retrieve_in); -+ -+ if (bufv.buf[0].size < arg->size) { -+ fuse_log(FUSE_LOG_ERR, "fuse: retrieve reply: buffer size too small\n"); -+ fuse_reply_none(req); -+ goto out; -+ } -+ bufv.buf[0].size = arg->size; -+ -+ if (se->op.retrieve_reply) { -+ se->op.retrieve_reply(req, rreq->cookie, ino, arg->offset, &bufv); -+ } else { -+ fuse_reply_none(req); -+ } - out: -- free(rreq); -+ free(rreq); - } - - int fuse_lowlevel_notify_retrieve(struct fuse_session *se, fuse_ino_t ino, -- size_t size, off_t offset, void *cookie) -+ size_t size, off_t offset, void *cookie) - { -- struct fuse_notify_retrieve_out outarg; -- struct iovec iov[2]; -- struct fuse_retrieve_req *rreq; -- int err; -+ struct fuse_notify_retrieve_out outarg; -+ struct iovec iov[2]; -+ struct fuse_retrieve_req *rreq; -+ int err; - -- if (!se) -- return -EINVAL; -+ if (!se) { -+ return -EINVAL; -+ } - -- if (se->conn.proto_major < 6 || se->conn.proto_minor < 15) -- return -ENOSYS; -+ if (se->conn.proto_major < 6 || se->conn.proto_minor < 15) { -+ return -ENOSYS; -+ } - -- rreq = malloc(sizeof(*rreq)); -- if (rreq == NULL) -- return -ENOMEM; -+ rreq = malloc(sizeof(*rreq)); -+ if (rreq == NULL) { -+ return -ENOMEM; -+ } - -- pthread_mutex_lock(&se->lock); -- rreq->cookie = cookie; -- rreq->nreq.unique = se->notify_ctr++; -- rreq->nreq.reply = fuse_ll_retrieve_reply; -- list_add_nreq(&rreq->nreq, &se->notify_list); -- pthread_mutex_unlock(&se->lock); -+ pthread_mutex_lock(&se->lock); -+ rreq->cookie = cookie; -+ rreq->nreq.unique = se->notify_ctr++; -+ rreq->nreq.reply = fuse_ll_retrieve_reply; -+ list_add_nreq(&rreq->nreq, &se->notify_list); -+ pthread_mutex_unlock(&se->lock); - -- outarg.notify_unique = rreq->nreq.unique; -- outarg.nodeid = ino; -- outarg.offset = offset; -- outarg.size = size; -- outarg.padding = 0; -+ outarg.notify_unique = rreq->nreq.unique; -+ outarg.nodeid = ino; -+ outarg.offset = offset; -+ outarg.size = size; -+ outarg.padding = 0; - -- iov[1].iov_base = &outarg; -- iov[1].iov_len = sizeof(outarg); -+ iov[1].iov_base = &outarg; -+ iov[1].iov_len = sizeof(outarg); - -- err = send_notify_iov(se, FUSE_NOTIFY_RETRIEVE, iov, 2); -- if (err) { -- pthread_mutex_lock(&se->lock); -- list_del_nreq(&rreq->nreq); -- pthread_mutex_unlock(&se->lock); -- free(rreq); -- } -+ err = send_notify_iov(se, FUSE_NOTIFY_RETRIEVE, iov, 2); -+ if (err) { -+ pthread_mutex_lock(&se->lock); -+ list_del_nreq(&rreq->nreq); -+ pthread_mutex_unlock(&se->lock); -+ free(rreq); -+ } - -- return err; -+ return err; - } - - void *fuse_req_userdata(fuse_req_t req) - { -- return req->se->userdata; -+ return req->se->userdata; - } - - const struct fuse_ctx *fuse_req_ctx(fuse_req_t req) - { -- return &req->ctx; -+ return &req->ctx; - } - - void fuse_req_interrupt_func(fuse_req_t req, fuse_interrupt_func_t func, -- void *data) -+ void *data) - { -- pthread_mutex_lock(&req->lock); -- pthread_mutex_lock(&req->se->lock); -- req->u.ni.func = func; -- req->u.ni.data = data; -- pthread_mutex_unlock(&req->se->lock); -- if (req->interrupted && func) -- func(req, data); -- pthread_mutex_unlock(&req->lock); -+ pthread_mutex_lock(&req->lock); -+ pthread_mutex_lock(&req->se->lock); -+ req->u.ni.func = func; -+ req->u.ni.data = data; -+ pthread_mutex_unlock(&req->se->lock); -+ if (req->interrupted && func) { -+ func(req, data); -+ } -+ pthread_mutex_unlock(&req->lock); - } - - int fuse_req_interrupted(fuse_req_t req) - { -- int interrupted; -+ int interrupted; - -- pthread_mutex_lock(&req->se->lock); -- interrupted = req->interrupted; -- pthread_mutex_unlock(&req->se->lock); -+ pthread_mutex_lock(&req->se->lock); -+ interrupted = req->interrupted; -+ pthread_mutex_unlock(&req->se->lock); - -- return interrupted; -+ return interrupted; - } - - static struct { -- void (*func)(fuse_req_t, fuse_ino_t, const void *); -- const char *name; -+ void (*func)(fuse_req_t, fuse_ino_t, const void *); -+ const char *name; - } fuse_ll_ops[] = { -- [FUSE_LOOKUP] = { do_lookup, "LOOKUP" }, -- [FUSE_FORGET] = { do_forget, "FORGET" }, -- [FUSE_GETATTR] = { do_getattr, "GETATTR" }, -- [FUSE_SETATTR] = { do_setattr, "SETATTR" }, -- [FUSE_READLINK] = { do_readlink, "READLINK" }, -- [FUSE_SYMLINK] = { do_symlink, "SYMLINK" }, -- [FUSE_MKNOD] = { do_mknod, "MKNOD" }, -- [FUSE_MKDIR] = { do_mkdir, "MKDIR" }, -- [FUSE_UNLINK] = { do_unlink, "UNLINK" }, -- [FUSE_RMDIR] = { do_rmdir, "RMDIR" }, -- [FUSE_RENAME] = { do_rename, "RENAME" }, -- [FUSE_LINK] = { do_link, "LINK" }, -- [FUSE_OPEN] = { do_open, "OPEN" }, -- [FUSE_READ] = { do_read, "READ" }, -- [FUSE_WRITE] = { do_write, "WRITE" }, -- [FUSE_STATFS] = { do_statfs, "STATFS" }, -- [FUSE_RELEASE] = { do_release, "RELEASE" }, -- [FUSE_FSYNC] = { do_fsync, "FSYNC" }, -- [FUSE_SETXATTR] = { do_setxattr, "SETXATTR" }, -- [FUSE_GETXATTR] = { do_getxattr, "GETXATTR" }, -- [FUSE_LISTXATTR] = { do_listxattr, "LISTXATTR" }, -- [FUSE_REMOVEXATTR] = { do_removexattr, "REMOVEXATTR" }, -- [FUSE_FLUSH] = { do_flush, "FLUSH" }, -- [FUSE_INIT] = { do_init, "INIT" }, -- [FUSE_OPENDIR] = { do_opendir, "OPENDIR" }, -- [FUSE_READDIR] = { do_readdir, "READDIR" }, -- [FUSE_RELEASEDIR] = { do_releasedir, "RELEASEDIR" }, -- [FUSE_FSYNCDIR] = { do_fsyncdir, "FSYNCDIR" }, -- [FUSE_GETLK] = { do_getlk, "GETLK" }, -- [FUSE_SETLK] = { do_setlk, "SETLK" }, -- [FUSE_SETLKW] = { do_setlkw, "SETLKW" }, -- [FUSE_ACCESS] = { do_access, "ACCESS" }, -- [FUSE_CREATE] = { do_create, "CREATE" }, -- [FUSE_INTERRUPT] = { do_interrupt, "INTERRUPT" }, -- [FUSE_BMAP] = { do_bmap, "BMAP" }, -- [FUSE_IOCTL] = { do_ioctl, "IOCTL" }, -- [FUSE_POLL] = { do_poll, "POLL" }, -- [FUSE_FALLOCATE] = { do_fallocate, "FALLOCATE" }, -- [FUSE_DESTROY] = { do_destroy, "DESTROY" }, -- [FUSE_NOTIFY_REPLY] = { (void *) 1, "NOTIFY_REPLY" }, -- [FUSE_BATCH_FORGET] = { do_batch_forget, "BATCH_FORGET" }, -- [FUSE_READDIRPLUS] = { do_readdirplus, "READDIRPLUS"}, -- [FUSE_RENAME2] = { do_rename2, "RENAME2" }, -- [FUSE_COPY_FILE_RANGE] = { do_copy_file_range, "COPY_FILE_RANGE" }, -- [FUSE_LSEEK] = { do_lseek, "LSEEK" }, -+ [FUSE_LOOKUP] = { do_lookup, "LOOKUP" }, -+ [FUSE_FORGET] = { do_forget, "FORGET" }, -+ [FUSE_GETATTR] = { do_getattr, "GETATTR" }, -+ [FUSE_SETATTR] = { do_setattr, "SETATTR" }, -+ [FUSE_READLINK] = { do_readlink, "READLINK" }, -+ [FUSE_SYMLINK] = { do_symlink, "SYMLINK" }, -+ [FUSE_MKNOD] = { do_mknod, "MKNOD" }, -+ [FUSE_MKDIR] = { do_mkdir, "MKDIR" }, -+ [FUSE_UNLINK] = { do_unlink, "UNLINK" }, -+ [FUSE_RMDIR] = { do_rmdir, "RMDIR" }, -+ [FUSE_RENAME] = { do_rename, "RENAME" }, -+ [FUSE_LINK] = { do_link, "LINK" }, -+ [FUSE_OPEN] = { do_open, "OPEN" }, -+ [FUSE_READ] = { do_read, "READ" }, -+ [FUSE_WRITE] = { do_write, "WRITE" }, -+ [FUSE_STATFS] = { do_statfs, "STATFS" }, -+ [FUSE_RELEASE] = { do_release, "RELEASE" }, -+ [FUSE_FSYNC] = { do_fsync, "FSYNC" }, -+ [FUSE_SETXATTR] = { do_setxattr, "SETXATTR" }, -+ [FUSE_GETXATTR] = { do_getxattr, "GETXATTR" }, -+ [FUSE_LISTXATTR] = { do_listxattr, "LISTXATTR" }, -+ [FUSE_REMOVEXATTR] = { do_removexattr, "REMOVEXATTR" }, -+ [FUSE_FLUSH] = { do_flush, "FLUSH" }, -+ [FUSE_INIT] = { do_init, "INIT" }, -+ [FUSE_OPENDIR] = { do_opendir, "OPENDIR" }, -+ [FUSE_READDIR] = { do_readdir, "READDIR" }, -+ [FUSE_RELEASEDIR] = { do_releasedir, "RELEASEDIR" }, -+ [FUSE_FSYNCDIR] = { do_fsyncdir, "FSYNCDIR" }, -+ [FUSE_GETLK] = { do_getlk, "GETLK" }, -+ [FUSE_SETLK] = { do_setlk, "SETLK" }, -+ [FUSE_SETLKW] = { do_setlkw, "SETLKW" }, -+ [FUSE_ACCESS] = { do_access, "ACCESS" }, -+ [FUSE_CREATE] = { do_create, "CREATE" }, -+ [FUSE_INTERRUPT] = { do_interrupt, "INTERRUPT" }, -+ [FUSE_BMAP] = { do_bmap, "BMAP" }, -+ [FUSE_IOCTL] = { do_ioctl, "IOCTL" }, -+ [FUSE_POLL] = { do_poll, "POLL" }, -+ [FUSE_FALLOCATE] = { do_fallocate, "FALLOCATE" }, -+ [FUSE_DESTROY] = { do_destroy, "DESTROY" }, -+ [FUSE_NOTIFY_REPLY] = { (void *)1, "NOTIFY_REPLY" }, -+ [FUSE_BATCH_FORGET] = { do_batch_forget, "BATCH_FORGET" }, -+ [FUSE_READDIRPLUS] = { do_readdirplus, "READDIRPLUS" }, -+ [FUSE_RENAME2] = { do_rename2, "RENAME2" }, -+ [FUSE_COPY_FILE_RANGE] = { do_copy_file_range, "COPY_FILE_RANGE" }, -+ [FUSE_LSEEK] = { do_lseek, "LSEEK" }, - }; - - #define FUSE_MAXOP (sizeof(fuse_ll_ops) / sizeof(fuse_ll_ops[0])) - - static const char *opname(enum fuse_opcode opcode) - { -- if (opcode >= FUSE_MAXOP || !fuse_ll_ops[opcode].name) -- return "???"; -- else -- return fuse_ll_ops[opcode].name; -+ if (opcode >= FUSE_MAXOP || !fuse_ll_ops[opcode].name) { -+ return "???"; -+ } else { -+ return fuse_ll_ops[opcode].name; -+ } - } - - void fuse_session_process_buf(struct fuse_session *se, -- const struct fuse_buf *buf) -+ const struct fuse_buf *buf) - { -- fuse_session_process_buf_int(se, buf, NULL); -+ fuse_session_process_buf_int(se, buf, NULL); - } - - void fuse_session_process_buf_int(struct fuse_session *se, -- const struct fuse_buf *buf, struct fuse_chan *ch) --{ -- struct fuse_in_header *in; -- const void *inarg; -- struct fuse_req *req; -- int err; -- -- in = buf->mem; -- -- if (se->debug) { -- fuse_log(FUSE_LOG_DEBUG, -- "unique: %llu, opcode: %s (%i), nodeid: %llu, insize: %zu, pid: %u\n", -- (unsigned long long) in->unique, -- opname((enum fuse_opcode) in->opcode), in->opcode, -- (unsigned long long) in->nodeid, buf->size, in->pid); -- } -- -- req = fuse_ll_alloc_req(se); -- if (req == NULL) { -- struct fuse_out_header out = { -- .unique = in->unique, -- .error = -ENOMEM, -- }; -- struct iovec iov = { -- .iov_base = &out, -- .iov_len = sizeof(struct fuse_out_header), -- }; -- -- fuse_send_msg(se, ch, &iov, 1); -- return; -- } -- -- req->unique = in->unique; -- req->ctx.uid = in->uid; -- req->ctx.gid = in->gid; -- req->ctx.pid = in->pid; -- req->ch = ch; -- -- err = EIO; -- if (!se->got_init) { -- enum fuse_opcode expected; -- -- expected = se->cuse_data ? CUSE_INIT : FUSE_INIT; -- if (in->opcode != expected) -- goto reply_err; -- } else if (in->opcode == FUSE_INIT || in->opcode == CUSE_INIT) -- goto reply_err; -- -- err = EACCES; -- /* Implement -o allow_root */ -- if (se->deny_others && in->uid != se->owner && in->uid != 0 && -- in->opcode != FUSE_INIT && in->opcode != FUSE_READ && -- in->opcode != FUSE_WRITE && in->opcode != FUSE_FSYNC && -- in->opcode != FUSE_RELEASE && in->opcode != FUSE_READDIR && -- in->opcode != FUSE_FSYNCDIR && in->opcode != FUSE_RELEASEDIR && -- in->opcode != FUSE_NOTIFY_REPLY && -- in->opcode != FUSE_READDIRPLUS) -- goto reply_err; -- -- err = ENOSYS; -- if (in->opcode >= FUSE_MAXOP || !fuse_ll_ops[in->opcode].func) -- goto reply_err; -- if (in->opcode != FUSE_INTERRUPT) { -- struct fuse_req *intr; -- pthread_mutex_lock(&se->lock); -- intr = check_interrupt(se, req); -- list_add_req(req, &se->list); -- pthread_mutex_unlock(&se->lock); -- if (intr) -- fuse_reply_err(intr, EAGAIN); -- } -- -- inarg = (void *) &in[1]; -- if (in->opcode == FUSE_WRITE && se->op.write_buf) -- do_write_buf(req, in->nodeid, inarg, buf); -- else if (in->opcode == FUSE_NOTIFY_REPLY) -- do_notify_reply(req, in->nodeid, inarg, buf); -- else -- fuse_ll_ops[in->opcode].func(req, in->nodeid, inarg); -- -- return; -+ const struct fuse_buf *buf, -+ struct fuse_chan *ch) -+{ -+ struct fuse_in_header *in; -+ const void *inarg; -+ struct fuse_req *req; -+ int err; -+ -+ in = buf->mem; -+ -+ if (se->debug) { -+ fuse_log(FUSE_LOG_DEBUG, -+ "unique: %llu, opcode: %s (%i), nodeid: %llu, insize: %zu, " -+ "pid: %u\n", -+ (unsigned long long)in->unique, -+ opname((enum fuse_opcode)in->opcode), in->opcode, -+ (unsigned long long)in->nodeid, buf->size, in->pid); -+ } -+ -+ req = fuse_ll_alloc_req(se); -+ if (req == NULL) { -+ struct fuse_out_header out = { -+ .unique = in->unique, -+ .error = -ENOMEM, -+ }; -+ struct iovec iov = { -+ .iov_base = &out, -+ .iov_len = sizeof(struct fuse_out_header), -+ }; -+ -+ fuse_send_msg(se, ch, &iov, 1); -+ return; -+ } -+ -+ req->unique = in->unique; -+ req->ctx.uid = in->uid; -+ req->ctx.gid = in->gid; -+ req->ctx.pid = in->pid; -+ req->ch = ch; -+ -+ err = EIO; -+ if (!se->got_init) { -+ enum fuse_opcode expected; -+ -+ expected = se->cuse_data ? CUSE_INIT : FUSE_INIT; -+ if (in->opcode != expected) { -+ goto reply_err; -+ } -+ } else if (in->opcode == FUSE_INIT || in->opcode == CUSE_INIT) { -+ goto reply_err; -+ } -+ -+ err = EACCES; -+ /* Implement -o allow_root */ -+ if (se->deny_others && in->uid != se->owner && in->uid != 0 && -+ in->opcode != FUSE_INIT && in->opcode != FUSE_READ && -+ in->opcode != FUSE_WRITE && in->opcode != FUSE_FSYNC && -+ in->opcode != FUSE_RELEASE && in->opcode != FUSE_READDIR && -+ in->opcode != FUSE_FSYNCDIR && in->opcode != FUSE_RELEASEDIR && -+ in->opcode != FUSE_NOTIFY_REPLY && in->opcode != FUSE_READDIRPLUS) { -+ goto reply_err; -+ } -+ -+ err = ENOSYS; -+ if (in->opcode >= FUSE_MAXOP || !fuse_ll_ops[in->opcode].func) { -+ goto reply_err; -+ } -+ if (in->opcode != FUSE_INTERRUPT) { -+ struct fuse_req *intr; -+ pthread_mutex_lock(&se->lock); -+ intr = check_interrupt(se, req); -+ list_add_req(req, &se->list); -+ pthread_mutex_unlock(&se->lock); -+ if (intr) { -+ fuse_reply_err(intr, EAGAIN); -+ } -+ } -+ -+ inarg = (void *)&in[1]; -+ if (in->opcode == FUSE_WRITE && se->op.write_buf) { -+ do_write_buf(req, in->nodeid, inarg, buf); -+ } else if (in->opcode == FUSE_NOTIFY_REPLY) { -+ do_notify_reply(req, in->nodeid, inarg, buf); -+ } else { -+ fuse_ll_ops[in->opcode].func(req, in->nodeid, inarg); -+ } -+ -+ return; - - reply_err: -- fuse_reply_err(req, err); -+ fuse_reply_err(req, err); - } - --#define LL_OPTION(n,o,v) \ -- { n, offsetof(struct fuse_session, o), v } -+#define LL_OPTION(n, o, v) \ -+ { \ -+ n, offsetof(struct fuse_session, o), v \ -+ } - - static const struct fuse_opt fuse_ll_opts[] = { -- LL_OPTION("debug", debug, 1), -- LL_OPTION("-d", debug, 1), -- LL_OPTION("--debug", debug, 1), -- LL_OPTION("allow_root", deny_others, 1), -- FUSE_OPT_END -+ LL_OPTION("debug", debug, 1), LL_OPTION("-d", debug, 1), -+ LL_OPTION("--debug", debug, 1), LL_OPTION("allow_root", deny_others, 1), -+ FUSE_OPT_END - }; - - void fuse_lowlevel_version(void) - { -- printf("using FUSE kernel interface version %i.%i\n", -- FUSE_KERNEL_VERSION, FUSE_KERNEL_MINOR_VERSION); -+ printf("using FUSE kernel interface version %i.%i\n", FUSE_KERNEL_VERSION, -+ FUSE_KERNEL_MINOR_VERSION); - } - - void fuse_lowlevel_help(void) - { -- /* These are not all options, but the ones that are -- potentially of interest to an end-user */ -- printf( --" -o allow_root allow access by root\n" --); -+ /* -+ * These are not all options, but the ones that are -+ * potentially of interest to an end-user -+ */ -+ printf(" -o allow_root allow access by root\n"); - } - - void fuse_session_destroy(struct fuse_session *se) - { -- if (se->got_init && !se->got_destroy) { -- if (se->op.destroy) -- se->op.destroy(se->userdata); -- } -- pthread_mutex_destroy(&se->lock); -- free(se->cuse_data); -- if (se->fd != -1) -- close(se->fd); -- free(se); -+ if (se->got_init && !se->got_destroy) { -+ if (se->op.destroy) { -+ se->op.destroy(se->userdata); -+ } -+ } -+ pthread_mutex_destroy(&se->lock); -+ free(se->cuse_data); -+ if (se->fd != -1) { -+ close(se->fd); -+ } -+ free(se); - } - - - struct fuse_session *fuse_session_new(struct fuse_args *args, -- const struct fuse_lowlevel_ops *op, -- size_t op_size, void *userdata) --{ -- struct fuse_session *se; -- -- if (sizeof(struct fuse_lowlevel_ops) < op_size) { -- fuse_log(FUSE_LOG_ERR, "fuse: warning: library too old, some operations may not work\n"); -- op_size = sizeof(struct fuse_lowlevel_ops); -- } -- -- if (args->argc == 0) { -- fuse_log(FUSE_LOG_ERR, "fuse: empty argv passed to fuse_session_new().\n"); -- return NULL; -- } -- -- se = (struct fuse_session *) calloc(1, sizeof(struct fuse_session)); -- if (se == NULL) { -- fuse_log(FUSE_LOG_ERR, "fuse: failed to allocate fuse object\n"); -- goto out1; -- } -- se->fd = -1; -- se->conn.max_write = UINT_MAX; -- se->conn.max_readahead = UINT_MAX; -- -- /* Parse options */ -- if(fuse_opt_parse(args, se, fuse_ll_opts, NULL) == -1) -- goto out2; -- if(args->argc == 1 && -- args->argv[0][0] == '-') { -- fuse_log(FUSE_LOG_ERR, "fuse: warning: argv[0] looks like an option, but " -- "will be ignored\n"); -- } else if (args->argc != 1) { -- int i; -- fuse_log(FUSE_LOG_ERR, "fuse: unknown option(s): `"); -- for(i = 1; i < args->argc-1; i++) -- fuse_log(FUSE_LOG_ERR, "%s ", args->argv[i]); -- fuse_log(FUSE_LOG_ERR, "%s'\n", args->argv[i]); -- goto out4; -- } -- -- se->bufsize = FUSE_MAX_MAX_PAGES * getpagesize() + -- FUSE_BUFFER_HEADER_SIZE; -- -- list_init_req(&se->list); -- list_init_req(&se->interrupts); -- list_init_nreq(&se->notify_list); -- se->notify_ctr = 1; -- fuse_mutex_init(&se->lock); -- -- memcpy(&se->op, op, op_size); -- se->owner = getuid(); -- se->userdata = userdata; -- -- return se; -+ const struct fuse_lowlevel_ops *op, -+ size_t op_size, void *userdata) -+{ -+ struct fuse_session *se; -+ -+ if (sizeof(struct fuse_lowlevel_ops) < op_size) { -+ fuse_log( -+ FUSE_LOG_ERR, -+ "fuse: warning: library too old, some operations may not work\n"); -+ op_size = sizeof(struct fuse_lowlevel_ops); -+ } -+ -+ if (args->argc == 0) { -+ fuse_log(FUSE_LOG_ERR, -+ "fuse: empty argv passed to fuse_session_new().\n"); -+ return NULL; -+ } -+ -+ se = (struct fuse_session *)calloc(1, sizeof(struct fuse_session)); -+ if (se == NULL) { -+ fuse_log(FUSE_LOG_ERR, "fuse: failed to allocate fuse object\n"); -+ goto out1; -+ } -+ se->fd = -1; -+ se->conn.max_write = UINT_MAX; -+ se->conn.max_readahead = UINT_MAX; -+ -+ /* Parse options */ -+ if (fuse_opt_parse(args, se, fuse_ll_opts, NULL) == -1) { -+ goto out2; -+ } -+ if (args->argc == 1 && args->argv[0][0] == '-') { -+ fuse_log(FUSE_LOG_ERR, -+ "fuse: warning: argv[0] looks like an option, but " -+ "will be ignored\n"); -+ } else if (args->argc != 1) { -+ int i; -+ fuse_log(FUSE_LOG_ERR, "fuse: unknown option(s): `"); -+ for (i = 1; i < args->argc - 1; i++) { -+ fuse_log(FUSE_LOG_ERR, "%s ", args->argv[i]); -+ } -+ fuse_log(FUSE_LOG_ERR, "%s'\n", args->argv[i]); -+ goto out4; -+ } -+ -+ se->bufsize = FUSE_MAX_MAX_PAGES * getpagesize() + FUSE_BUFFER_HEADER_SIZE; -+ -+ list_init_req(&se->list); -+ list_init_req(&se->interrupts); -+ list_init_nreq(&se->notify_list); -+ se->notify_ctr = 1; -+ fuse_mutex_init(&se->lock); -+ -+ memcpy(&se->op, op, op_size); -+ se->owner = getuid(); -+ se->userdata = userdata; -+ -+ return se; - - out4: -- fuse_opt_free_args(args); -+ fuse_opt_free_args(args); - out2: -- free(se); -+ free(se); - out1: -- return NULL; -+ return NULL; - } - - int fuse_session_mount(struct fuse_session *se, const char *mountpoint) - { -- int fd; -- -- /* -- * Make sure file descriptors 0, 1 and 2 are open, otherwise chaos -- * would ensue. -- */ -- do { -- fd = open("/dev/null", O_RDWR); -- if (fd > 2) -- close(fd); -- } while (fd >= 0 && fd <= 2); -- -- /* -- * To allow FUSE daemons to run without privileges, the caller may open -- * /dev/fuse before launching the file system and pass on the file -- * descriptor by specifying /dev/fd/N as the mount point. Note that the -- * parent process takes care of performing the mount in this case. -- */ -- fd = fuse_mnt_parse_fuse_fd(mountpoint); -- if (fd != -1) { -- if (fcntl(fd, F_GETFD) == -1) { -- fuse_log(FUSE_LOG_ERR, -- "fuse: Invalid file descriptor /dev/fd/%u\n", -- fd); -- return -1; -- } -- se->fd = fd; -- return 0; -- } -- -- /* Open channel */ -- fd = fuse_kern_mount(mountpoint, se->mo); -- if (fd == -1) -- return -1; -- se->fd = fd; -- -- /* Save mountpoint */ -- se->mountpoint = strdup(mountpoint); -- if (se->mountpoint == NULL) -- goto error_out; -- -- return 0; -+ int fd; -+ -+ /* -+ * Make sure file descriptors 0, 1 and 2 are open, otherwise chaos -+ * would ensue. -+ */ -+ do { -+ fd = open("/dev/null", O_RDWR); -+ if (fd > 2) { -+ close(fd); -+ } -+ } while (fd >= 0 && fd <= 2); -+ -+ /* -+ * To allow FUSE daemons to run without privileges, the caller may open -+ * /dev/fuse before launching the file system and pass on the file -+ * descriptor by specifying /dev/fd/N as the mount point. Note that the -+ * parent process takes care of performing the mount in this case. -+ */ -+ fd = fuse_mnt_parse_fuse_fd(mountpoint); -+ if (fd != -1) { -+ if (fcntl(fd, F_GETFD) == -1) { -+ fuse_log(FUSE_LOG_ERR, "fuse: Invalid file descriptor /dev/fd/%u\n", -+ fd); -+ return -1; -+ } -+ se->fd = fd; -+ return 0; -+ } -+ -+ /* Open channel */ -+ fd = fuse_kern_mount(mountpoint, se->mo); -+ if (fd == -1) { -+ return -1; -+ } -+ se->fd = fd; -+ -+ /* Save mountpoint */ -+ se->mountpoint = strdup(mountpoint); -+ if (se->mountpoint == NULL) { -+ goto error_out; -+ } -+ -+ return 0; - - error_out: -- fuse_kern_unmount(mountpoint, fd); -- return -1; -+ fuse_kern_unmount(mountpoint, fd); -+ return -1; - } - - int fuse_session_fd(struct fuse_session *se) - { -- return se->fd; -+ return se->fd; - } - - void fuse_session_unmount(struct fuse_session *se) -@@ -2384,61 +2519,66 @@ void fuse_session_unmount(struct fuse_session *se) - #ifdef linux - int fuse_req_getgroups(fuse_req_t req, int size, gid_t list[]) - { -- char *buf; -- size_t bufsize = 1024; -- char path[128]; -- int ret; -- int fd; -- unsigned long pid = req->ctx.pid; -- char *s; -+ char *buf; -+ size_t bufsize = 1024; -+ char path[128]; -+ int ret; -+ int fd; -+ unsigned long pid = req->ctx.pid; -+ char *s; - -- sprintf(path, "/proc/%lu/task/%lu/status", pid, pid); -+ sprintf(path, "/proc/%lu/task/%lu/status", pid, pid); - - retry: -- buf = malloc(bufsize); -- if (buf == NULL) -- return -ENOMEM; -- -- ret = -EIO; -- fd = open(path, O_RDONLY); -- if (fd == -1) -- goto out_free; -- -- ret = read(fd, buf, bufsize); -- close(fd); -- if (ret < 0) { -- ret = -EIO; -- goto out_free; -- } -- -- if ((size_t)ret == bufsize) { -- free(buf); -- bufsize *= 4; -- goto retry; -- } -- -- ret = -EIO; -- s = strstr(buf, "\nGroups:"); -- if (s == NULL) -- goto out_free; -- -- s += 8; -- ret = 0; -- while (1) { -- char *end; -- unsigned long val = strtoul(s, &end, 0); -- if (end == s) -- break; -- -- s = end; -- if (ret < size) -- list[ret] = val; -- ret++; -- } -+ buf = malloc(bufsize); -+ if (buf == NULL) { -+ return -ENOMEM; -+ } -+ -+ ret = -EIO; -+ fd = open(path, O_RDONLY); -+ if (fd == -1) { -+ goto out_free; -+ } -+ -+ ret = read(fd, buf, bufsize); -+ close(fd); -+ if (ret < 0) { -+ ret = -EIO; -+ goto out_free; -+ } -+ -+ if ((size_t)ret == bufsize) { -+ free(buf); -+ bufsize *= 4; -+ goto retry; -+ } -+ -+ ret = -EIO; -+ s = strstr(buf, "\nGroups:"); -+ if (s == NULL) { -+ goto out_free; -+ } -+ -+ s += 8; -+ ret = 0; -+ while (1) { -+ char *end; -+ unsigned long val = strtoul(s, &end, 0); -+ if (end == s) { -+ break; -+ } -+ -+ s = end; -+ if (ret < size) { -+ list[ret] = val; -+ } -+ ret++; -+ } - - out_free: -- free(buf); -- return ret; -+ free(buf); -+ return ret; - } - #else /* linux */ - /* -@@ -2446,23 +2586,25 @@ out_free: - */ - int fuse_req_getgroups(fuse_req_t req, int size, gid_t list[]) - { -- (void) req; (void) size; (void) list; -- return -ENOSYS; -+ (void)req; -+ (void)size; -+ (void)list; -+ return -ENOSYS; - } - #endif - - void fuse_session_exit(struct fuse_session *se) - { -- se->exited = 1; -+ se->exited = 1; - } - - void fuse_session_reset(struct fuse_session *se) - { -- se->exited = 0; -- se->error = 0; -+ se->exited = 0; -+ se->error = 0; - } - - int fuse_session_exited(struct fuse_session *se) - { -- return se->exited; -+ return se->exited; - } -diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h -index 6b1adfc..adb9054 100644 ---- a/tools/virtiofsd/fuse_lowlevel.h -+++ b/tools/virtiofsd/fuse_lowlevel.h -@@ -1,15 +1,16 @@ - /* -- FUSE: Filesystem in Userspace -- Copyright (C) 2001-2007 Miklos Szeredi -- -- This program can be distributed under the terms of the GNU LGPLv2. -- See the file COPYING.LIB. --*/ -+ * FUSE: Filesystem in Userspace -+ * Copyright (C) 2001-2007 Miklos Szeredi -+ * -+ * This program can be distributed under the terms of the GNU LGPLv2. -+ * See the file COPYING.LIB. -+ */ - - #ifndef FUSE_LOWLEVEL_H_ - #define FUSE_LOWLEVEL_H_ - --/** @file -+/** -+ * @file - * - * Low level API - * -@@ -24,16 +25,16 @@ - - #include "fuse_common.h" - --#include - #include --#include - #include - #include -+#include - #include -+#include - --/* ----------------------------------------------------------- * -- * Miscellaneous definitions * -- * ----------------------------------------------------------- */ -+/* -+ * Miscellaneous definitions -+ */ - - /** The node ID of the root inode */ - #define FUSE_ROOT_ID 1 -@@ -53,47 +54,54 @@ struct fuse_session; - - /** Directory entry parameters supplied to fuse_reply_entry() */ - struct fuse_entry_param { -- /** Unique inode number -- * -- * In lookup, zero means negative entry (from version 2.5) -- * Returning ENOENT also means negative entry, but by setting zero -- * ino the kernel may cache negative entries for entry_timeout -- * seconds. -- */ -- fuse_ino_t ino; -- -- /** Generation number for this entry. -- * -- * If the file system will be exported over NFS, the -- * ino/generation pairs need to be unique over the file -- * system's lifetime (rather than just the mount time). So if -- * the file system reuses an inode after it has been deleted, -- * it must assign a new, previously unused generation number -- * to the inode at the same time. -- * -- */ -- uint64_t generation; -- -- /** Inode attributes. -- * -- * Even if attr_timeout == 0, attr must be correct. For example, -- * for open(), FUSE uses attr.st_size from lookup() to determine -- * how many bytes to request. If this value is not correct, -- * incorrect data will be returned. -- */ -- struct stat attr; -- -- /** Validity timeout (in seconds) for inode attributes. If -- attributes only change as a result of requests that come -- through the kernel, this should be set to a very large -- value. */ -- double attr_timeout; -- -- /** Validity timeout (in seconds) for the name. If directory -- entries are changed/deleted only as a result of requests -- that come through the kernel, this should be set to a very -- large value. */ -- double entry_timeout; -+ /** -+ * Unique inode number -+ * -+ * In lookup, zero means negative entry (from version 2.5) -+ * Returning ENOENT also means negative entry, but by setting zero -+ * ino the kernel may cache negative entries for entry_timeout -+ * seconds. -+ */ -+ fuse_ino_t ino; -+ -+ /** -+ * Generation number for this entry. -+ * -+ * If the file system will be exported over NFS, the -+ * ino/generation pairs need to be unique over the file -+ * system's lifetime (rather than just the mount time). So if -+ * the file system reuses an inode after it has been deleted, -+ * it must assign a new, previously unused generation number -+ * to the inode at the same time. -+ * -+ */ -+ uint64_t generation; -+ -+ /** -+ * Inode attributes. -+ * -+ * Even if attr_timeout == 0, attr must be correct. For example, -+ * for open(), FUSE uses attr.st_size from lookup() to determine -+ * how many bytes to request. If this value is not correct, -+ * incorrect data will be returned. -+ */ -+ struct stat attr; -+ -+ /** -+ * Validity timeout (in seconds) for inode attributes. If -+ * attributes only change as a result of requests that come -+ * through the kernel, this should be set to a very large -+ * value. -+ */ -+ double attr_timeout; -+ -+ /** -+ * Validity timeout (in seconds) for the name. If directory -+ * entries are changed/deleted only as a result of requests -+ * that come through the kernel, this should be set to a very -+ * large value. -+ */ -+ double entry_timeout; - }; - - /** -@@ -105,38 +113,38 @@ struct fuse_entry_param { - * there is no valid uid/pid/gid that could be reported. - */ - struct fuse_ctx { -- /** User ID of the calling process */ -- uid_t uid; -+ /** User ID of the calling process */ -+ uid_t uid; - -- /** Group ID of the calling process */ -- gid_t gid; -+ /** Group ID of the calling process */ -+ gid_t gid; - -- /** Thread ID of the calling process */ -- pid_t pid; -+ /** Thread ID of the calling process */ -+ pid_t pid; - -- /** Umask of the calling process */ -- mode_t umask; -+ /** Umask of the calling process */ -+ mode_t umask; - }; - - struct fuse_forget_data { -- fuse_ino_t ino; -- uint64_t nlookup; -+ fuse_ino_t ino; -+ uint64_t nlookup; - }; - - /* 'to_set' flags in setattr */ --#define FUSE_SET_ATTR_MODE (1 << 0) --#define FUSE_SET_ATTR_UID (1 << 1) --#define FUSE_SET_ATTR_GID (1 << 2) --#define FUSE_SET_ATTR_SIZE (1 << 3) --#define FUSE_SET_ATTR_ATIME (1 << 4) --#define FUSE_SET_ATTR_MTIME (1 << 5) --#define FUSE_SET_ATTR_ATIME_NOW (1 << 7) --#define FUSE_SET_ATTR_MTIME_NOW (1 << 8) --#define FUSE_SET_ATTR_CTIME (1 << 10) -- --/* ----------------------------------------------------------- * -- * Request methods and replies * -- * ----------------------------------------------------------- */ -+#define FUSE_SET_ATTR_MODE (1 << 0) -+#define FUSE_SET_ATTR_UID (1 << 1) -+#define FUSE_SET_ATTR_GID (1 << 2) -+#define FUSE_SET_ATTR_SIZE (1 << 3) -+#define FUSE_SET_ATTR_ATIME (1 << 4) -+#define FUSE_SET_ATTR_MTIME (1 << 5) -+#define FUSE_SET_ATTR_ATIME_NOW (1 << 7) -+#define FUSE_SET_ATTR_MTIME_NOW (1 << 8) -+#define FUSE_SET_ATTR_CTIME (1 << 10) -+ -+/* -+ * Request methods and replies -+ */ - - /** - * Low level filesystem operations -@@ -166,1075 +174,1069 @@ struct fuse_forget_data { - * this file will not be called. - */ - struct fuse_lowlevel_ops { -- /** -- * Initialize filesystem -- * -- * This function is called when libfuse establishes -- * communication with the FUSE kernel module. The file system -- * should use this module to inspect and/or modify the -- * connection parameters provided in the `conn` structure. -- * -- * Note that some parameters may be overwritten by options -- * passed to fuse_session_new() which take precedence over the -- * values set in this handler. -- * -- * There's no reply to this function -- * -- * @param userdata the user data passed to fuse_session_new() -- */ -- void (*init) (void *userdata, struct fuse_conn_info *conn); -- -- /** -- * Clean up filesystem. -- * -- * Called on filesystem exit. When this method is called, the -- * connection to the kernel may be gone already, so that eg. calls -- * to fuse_lowlevel_notify_* will fail. -- * -- * There's no reply to this function -- * -- * @param userdata the user data passed to fuse_session_new() -- */ -- void (*destroy) (void *userdata); -- -- /** -- * Look up a directory entry by name and get its attributes. -- * -- * Valid replies: -- * fuse_reply_entry -- * fuse_reply_err -- * -- * @param req request handle -- * @param parent inode number of the parent directory -- * @param name the name to look up -- */ -- void (*lookup) (fuse_req_t req, fuse_ino_t parent, const char *name); -- -- /** -- * Forget about an inode -- * -- * This function is called when the kernel removes an inode -- * from its internal caches. -- * -- * The inode's lookup count increases by one for every call to -- * fuse_reply_entry and fuse_reply_create. The nlookup parameter -- * indicates by how much the lookup count should be decreased. -- * -- * Inodes with a non-zero lookup count may receive request from -- * the kernel even after calls to unlink, rmdir or (when -- * overwriting an existing file) rename. Filesystems must handle -- * such requests properly and it is recommended to defer removal -- * of the inode until the lookup count reaches zero. Calls to -- * unlink, rmdir or rename will be followed closely by forget -- * unless the file or directory is open, in which case the -- * kernel issues forget only after the release or releasedir -- * calls. -- * -- * Note that if a file system will be exported over NFS the -- * inodes lifetime must extend even beyond forget. See the -- * generation field in struct fuse_entry_param above. -- * -- * On unmount the lookup count for all inodes implicitly drops -- * to zero. It is not guaranteed that the file system will -- * receive corresponding forget messages for the affected -- * inodes. -- * -- * Valid replies: -- * fuse_reply_none -- * -- * @param req request handle -- * @param ino the inode number -- * @param nlookup the number of lookups to forget -- */ -- void (*forget) (fuse_req_t req, fuse_ino_t ino, uint64_t nlookup); -- -- /** -- * Get file attributes. -- * -- * If writeback caching is enabled, the kernel may have a -- * better idea of a file's length than the FUSE file system -- * (eg if there has been a write that extended the file size, -- * but that has not yet been passed to the filesystem.n -- * -- * In this case, the st_size value provided by the file system -- * will be ignored. -- * -- * Valid replies: -- * fuse_reply_attr -- * fuse_reply_err -- * -- * @param req request handle -- * @param ino the inode number -- * @param fi for future use, currently always NULL -- */ -- void (*getattr) (fuse_req_t req, fuse_ino_t ino, -- struct fuse_file_info *fi); -- -- /** -- * Set file attributes -- * -- * In the 'attr' argument only members indicated by the 'to_set' -- * bitmask contain valid values. Other members contain undefined -- * values. -- * -- * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is -- * expected to reset the setuid and setgid bits if the file -- * size or owner is being changed. -- * -- * If the setattr was invoked from the ftruncate() system call -- * under Linux kernel versions 2.6.15 or later, the fi->fh will -- * contain the value set by the open method or will be undefined -- * if the open method didn't set any value. Otherwise (not -- * ftruncate call, or kernel version earlier than 2.6.15) the fi -- * parameter will be NULL. -- * -- * Valid replies: -- * fuse_reply_attr -- * fuse_reply_err -- * -- * @param req request handle -- * @param ino the inode number -- * @param attr the attributes -- * @param to_set bit mask of attributes which should be set -- * @param fi file information, or NULL -- */ -- void (*setattr) (fuse_req_t req, fuse_ino_t ino, struct stat *attr, -- int to_set, struct fuse_file_info *fi); -- -- /** -- * Read symbolic link -- * -- * Valid replies: -- * fuse_reply_readlink -- * fuse_reply_err -- * -- * @param req request handle -- * @param ino the inode number -- */ -- void (*readlink) (fuse_req_t req, fuse_ino_t ino); -- -- /** -- * Create file node -- * -- * Create a regular file, character device, block device, fifo or -- * socket node. -- * -- * Valid replies: -- * fuse_reply_entry -- * fuse_reply_err -- * -- * @param req request handle -- * @param parent inode number of the parent directory -- * @param name to create -- * @param mode file type and mode with which to create the new file -- * @param rdev the device number (only valid if created file is a device) -- */ -- void (*mknod) (fuse_req_t req, fuse_ino_t parent, const char *name, -- mode_t mode, dev_t rdev); -- -- /** -- * Create a directory -- * -- * Valid replies: -- * fuse_reply_entry -- * fuse_reply_err -- * -- * @param req request handle -- * @param parent inode number of the parent directory -- * @param name to create -- * @param mode with which to create the new file -- */ -- void (*mkdir) (fuse_req_t req, fuse_ino_t parent, const char *name, -- mode_t mode); -- -- /** -- * Remove a file -- * -- * If the file's inode's lookup count is non-zero, the file -- * system is expected to postpone any removal of the inode -- * until the lookup count reaches zero (see description of the -- * forget function). -- * -- * Valid replies: -- * fuse_reply_err -- * -- * @param req request handle -- * @param parent inode number of the parent directory -- * @param name to remove -- */ -- void (*unlink) (fuse_req_t req, fuse_ino_t parent, const char *name); -- -- /** -- * Remove a directory -- * -- * If the directory's inode's lookup count is non-zero, the -- * file system is expected to postpone any removal of the -- * inode until the lookup count reaches zero (see description -- * of the forget function). -- * -- * Valid replies: -- * fuse_reply_err -- * -- * @param req request handle -- * @param parent inode number of the parent directory -- * @param name to remove -- */ -- void (*rmdir) (fuse_req_t req, fuse_ino_t parent, const char *name); -- -- /** -- * Create a symbolic link -- * -- * Valid replies: -- * fuse_reply_entry -- * fuse_reply_err -- * -- * @param req request handle -- * @param link the contents of the symbolic link -- * @param parent inode number of the parent directory -- * @param name to create -- */ -- void (*symlink) (fuse_req_t req, const char *link, fuse_ino_t parent, -- const char *name); -- -- /** Rename a file -- * -- * If the target exists it should be atomically replaced. If -- * the target's inode's lookup count is non-zero, the file -- * system is expected to postpone any removal of the inode -- * until the lookup count reaches zero (see description of the -- * forget function). -- * -- * If this request is answered with an error code of ENOSYS, this is -- * treated as a permanent failure with error code EINVAL, i.e. all -- * future bmap requests will fail with EINVAL without being -- * send to the filesystem process. -- * -- * *flags* may be `RENAME_EXCHANGE` or `RENAME_NOREPLACE`. If -- * RENAME_NOREPLACE is specified, the filesystem must not -- * overwrite *newname* if it exists and return an error -- * instead. If `RENAME_EXCHANGE` is specified, the filesystem -- * must atomically exchange the two files, i.e. both must -- * exist and neither may be deleted. -- * -- * Valid replies: -- * fuse_reply_err -- * -- * @param req request handle -- * @param parent inode number of the old parent directory -- * @param name old name -- * @param newparent inode number of the new parent directory -- * @param newname new name -- */ -- void (*rename) (fuse_req_t req, fuse_ino_t parent, const char *name, -- fuse_ino_t newparent, const char *newname, -- unsigned int flags); -- -- /** -- * Create a hard link -- * -- * Valid replies: -- * fuse_reply_entry -- * fuse_reply_err -- * -- * @param req request handle -- * @param ino the old inode number -- * @param newparent inode number of the new parent directory -- * @param newname new name to create -- */ -- void (*link) (fuse_req_t req, fuse_ino_t ino, fuse_ino_t newparent, -- const char *newname); -- -- /** -- * Open a file -- * -- * Open flags are available in fi->flags. The following rules -- * apply. -- * -- * - Creation (O_CREAT, O_EXCL, O_NOCTTY) flags will be -- * filtered out / handled by the kernel. -- * -- * - Access modes (O_RDONLY, O_WRONLY, O_RDWR) should be used -- * by the filesystem to check if the operation is -- * permitted. If the ``-o default_permissions`` mount -- * option is given, this check is already done by the -- * kernel before calling open() and may thus be omitted by -- * the filesystem. -- * -- * - When writeback caching is enabled, the kernel may send -- * read requests even for files opened with O_WRONLY. The -- * filesystem should be prepared to handle this. -- * -- * - When writeback caching is disabled, the filesystem is -- * expected to properly handle the O_APPEND flag and ensure -- * that each write is appending to the end of the file. -- * -- * - When writeback caching is enabled, the kernel will -- * handle O_APPEND. However, unless all changes to the file -- * come through the kernel this will not work reliably. The -- * filesystem should thus either ignore the O_APPEND flag -- * (and let the kernel handle it), or return an error -- * (indicating that reliably O_APPEND is not available). -- * -- * Filesystem may store an arbitrary file handle (pointer, -- * index, etc) in fi->fh, and use this in other all other file -- * operations (read, write, flush, release, fsync). -- * -- * Filesystem may also implement stateless file I/O and not store -- * anything in fi->fh. -- * -- * There are also some flags (direct_io, keep_cache) which the -- * filesystem may set in fi, to change the way the file is opened. -- * See fuse_file_info structure in for more details. -- * -- * If this request is answered with an error code of ENOSYS -- * and FUSE_CAP_NO_OPEN_SUPPORT is set in -- * `fuse_conn_info.capable`, this is treated as success and -- * future calls to open and release will also succeed without being -- * sent to the filesystem process. -- * -- * Valid replies: -- * fuse_reply_open -- * fuse_reply_err -- * -- * @param req request handle -- * @param ino the inode number -- * @param fi file information -- */ -- void (*open) (fuse_req_t req, fuse_ino_t ino, -- struct fuse_file_info *fi); -- -- /** -- * Read data -- * -- * Read should send exactly the number of bytes requested except -- * on EOF or error, otherwise the rest of the data will be -- * substituted with zeroes. An exception to this is when the file -- * has been opened in 'direct_io' mode, in which case the return -- * value of the read system call will reflect the return value of -- * this operation. -- * -- * fi->fh will contain the value set by the open method, or will -- * be undefined if the open method didn't set any value. -- * -- * Valid replies: -- * fuse_reply_buf -- * fuse_reply_iov -- * fuse_reply_data -- * fuse_reply_err -- * -- * @param req request handle -- * @param ino the inode number -- * @param size number of bytes to read -- * @param off offset to read from -- * @param fi file information -- */ -- void (*read) (fuse_req_t req, fuse_ino_t ino, size_t size, off_t off, -- struct fuse_file_info *fi); -- -- /** -- * Write data -- * -- * Write should return exactly the number of bytes requested -- * except on error. An exception to this is when the file has -- * been opened in 'direct_io' mode, in which case the return value -- * of the write system call will reflect the return value of this -- * operation. -- * -- * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is -- * expected to reset the setuid and setgid bits. -- * -- * fi->fh will contain the value set by the open method, or will -- * be undefined if the open method didn't set any value. -- * -- * Valid replies: -- * fuse_reply_write -- * fuse_reply_err -- * -- * @param req request handle -- * @param ino the inode number -- * @param buf data to write -- * @param size number of bytes to write -- * @param off offset to write to -- * @param fi file information -- */ -- void (*write) (fuse_req_t req, fuse_ino_t ino, const char *buf, -- size_t size, off_t off, struct fuse_file_info *fi); -- -- /** -- * Flush method -- * -- * This is called on each close() of the opened file. -- * -- * Since file descriptors can be duplicated (dup, dup2, fork), for -- * one open call there may be many flush calls. -- * -- * Filesystems shouldn't assume that flush will always be called -- * after some writes, or that if will be called at all. -- * -- * fi->fh will contain the value set by the open method, or will -- * be undefined if the open method didn't set any value. -- * -- * NOTE: the name of the method is misleading, since (unlike -- * fsync) the filesystem is not forced to flush pending writes. -- * One reason to flush data is if the filesystem wants to return -- * write errors during close. However, such use is non-portable -- * because POSIX does not require [close] to wait for delayed I/O to -- * complete. -- * -- * If the filesystem supports file locking operations (setlk, -- * getlk) it should remove all locks belonging to 'fi->owner'. -- * -- * If this request is answered with an error code of ENOSYS, -- * this is treated as success and future calls to flush() will -- * succeed automatically without being send to the filesystem -- * process. -- * -- * Valid replies: -- * fuse_reply_err -- * -- * @param req request handle -- * @param ino the inode number -- * @param fi file information -- * -- * [close]: http://pubs.opengroup.org/onlinepubs/9699919799/functions/close.html -- */ -- void (*flush) (fuse_req_t req, fuse_ino_t ino, -- struct fuse_file_info *fi); -- -- /** -- * Release an open file -- * -- * Release is called when there are no more references to an open -- * file: all file descriptors are closed and all memory mappings -- * are unmapped. -- * -- * For every open call there will be exactly one release call (unless -- * the filesystem is force-unmounted). -- * -- * The filesystem may reply with an error, but error values are -- * not returned to close() or munmap() which triggered the -- * release. -- * -- * fi->fh will contain the value set by the open method, or will -- * be undefined if the open method didn't set any value. -- * fi->flags will contain the same flags as for open. -- * -- * Valid replies: -- * fuse_reply_err -- * -- * @param req request handle -- * @param ino the inode number -- * @param fi file information -- */ -- void (*release) (fuse_req_t req, fuse_ino_t ino, -- struct fuse_file_info *fi); -- -- /** -- * Synchronize file contents -- * -- * If the datasync parameter is non-zero, then only the user data -- * should be flushed, not the meta data. -- * -- * If this request is answered with an error code of ENOSYS, -- * this is treated as success and future calls to fsync() will -- * succeed automatically without being send to the filesystem -- * process. -- * -- * Valid replies: -- * fuse_reply_err -- * -- * @param req request handle -- * @param ino the inode number -- * @param datasync flag indicating if only data should be flushed -- * @param fi file information -- */ -- void (*fsync) (fuse_req_t req, fuse_ino_t ino, int datasync, -- struct fuse_file_info *fi); -- -- /** -- * Open a directory -- * -- * Filesystem may store an arbitrary file handle (pointer, index, -- * etc) in fi->fh, and use this in other all other directory -- * stream operations (readdir, releasedir, fsyncdir). -- * -- * If this request is answered with an error code of ENOSYS and -- * FUSE_CAP_NO_OPENDIR_SUPPORT is set in `fuse_conn_info.capable`, -- * this is treated as success and future calls to opendir and -- * releasedir will also succeed without being sent to the filesystem -- * process. In addition, the kernel will cache readdir results -- * as if opendir returned FOPEN_KEEP_CACHE | FOPEN_CACHE_DIR. -- * -- * Valid replies: -- * fuse_reply_open -- * fuse_reply_err -- * -- * @param req request handle -- * @param ino the inode number -- * @param fi file information -- */ -- void (*opendir) (fuse_req_t req, fuse_ino_t ino, -- struct fuse_file_info *fi); -- -- /** -- * Read directory -- * -- * Send a buffer filled using fuse_add_direntry(), with size not -- * exceeding the requested size. Send an empty buffer on end of -- * stream. -- * -- * fi->fh will contain the value set by the opendir method, or -- * will be undefined if the opendir method didn't set any value. -- * -- * Returning a directory entry from readdir() does not affect -- * its lookup count. -- * -- * If off_t is non-zero, then it will correspond to one of the off_t -- * values that was previously returned by readdir() for the same -- * directory handle. In this case, readdir() should skip over entries -- * coming before the position defined by the off_t value. If entries -- * are added or removed while the directory handle is open, they filesystem -- * may still include the entries that have been removed, and may not -- * report the entries that have been created. However, addition or -- * removal of entries must never cause readdir() to skip over unrelated -- * entries or to report them more than once. This means -- * that off_t can not be a simple index that enumerates the entries -- * that have been returned but must contain sufficient information to -- * uniquely determine the next directory entry to return even when the -- * set of entries is changing. -- * -- * The function does not have to report the '.' and '..' -- * entries, but is allowed to do so. Note that, if readdir does -- * not return '.' or '..', they will not be implicitly returned, -- * and this behavior is observable by the caller. -- * -- * Valid replies: -- * fuse_reply_buf -- * fuse_reply_data -- * fuse_reply_err -- * -- * @param req request handle -- * @param ino the inode number -- * @param size maximum number of bytes to send -- * @param off offset to continue reading the directory stream -- * @param fi file information -- */ -- void (*readdir) (fuse_req_t req, fuse_ino_t ino, size_t size, off_t off, -- struct fuse_file_info *fi); -- -- /** -- * Release an open directory -- * -- * For every opendir call there will be exactly one releasedir -- * call (unless the filesystem is force-unmounted). -- * -- * fi->fh will contain the value set by the opendir method, or -- * will be undefined if the opendir method didn't set any value. -- * -- * Valid replies: -- * fuse_reply_err -- * -- * @param req request handle -- * @param ino the inode number -- * @param fi file information -- */ -- void (*releasedir) (fuse_req_t req, fuse_ino_t ino, -- struct fuse_file_info *fi); -- -- /** -- * Synchronize directory contents -- * -- * If the datasync parameter is non-zero, then only the directory -- * contents should be flushed, not the meta data. -- * -- * fi->fh will contain the value set by the opendir method, or -- * will be undefined if the opendir method didn't set any value. -- * -- * If this request is answered with an error code of ENOSYS, -- * this is treated as success and future calls to fsyncdir() will -- * succeed automatically without being send to the filesystem -- * process. -- * -- * Valid replies: -- * fuse_reply_err -- * -- * @param req request handle -- * @param ino the inode number -- * @param datasync flag indicating if only data should be flushed -- * @param fi file information -- */ -- void (*fsyncdir) (fuse_req_t req, fuse_ino_t ino, int datasync, -- struct fuse_file_info *fi); -- -- /** -- * Get file system statistics -- * -- * Valid replies: -- * fuse_reply_statfs -- * fuse_reply_err -- * -- * @param req request handle -- * @param ino the inode number, zero means "undefined" -- */ -- void (*statfs) (fuse_req_t req, fuse_ino_t ino); -- -- /** -- * Set an extended attribute -- * -- * If this request is answered with an error code of ENOSYS, this is -- * treated as a permanent failure with error code EOPNOTSUPP, i.e. all -- * future setxattr() requests will fail with EOPNOTSUPP without being -- * send to the filesystem process. -- * -- * Valid replies: -- * fuse_reply_err -- */ -- void (*setxattr) (fuse_req_t req, fuse_ino_t ino, const char *name, -- const char *value, size_t size, int flags); -- -- /** -- * Get an extended attribute -- * -- * If size is zero, the size of the value should be sent with -- * fuse_reply_xattr. -- * -- * If the size is non-zero, and the value fits in the buffer, the -- * value should be sent with fuse_reply_buf. -- * -- * If the size is too small for the value, the ERANGE error should -- * be sent. -- * -- * If this request is answered with an error code of ENOSYS, this is -- * treated as a permanent failure with error code EOPNOTSUPP, i.e. all -- * future getxattr() requests will fail with EOPNOTSUPP without being -- * send to the filesystem process. -- * -- * Valid replies: -- * fuse_reply_buf -- * fuse_reply_data -- * fuse_reply_xattr -- * fuse_reply_err -- * -- * @param req request handle -- * @param ino the inode number -- * @param name of the extended attribute -- * @param size maximum size of the value to send -- */ -- void (*getxattr) (fuse_req_t req, fuse_ino_t ino, const char *name, -- size_t size); -- -- /** -- * List extended attribute names -- * -- * If size is zero, the total size of the attribute list should be -- * sent with fuse_reply_xattr. -- * -- * If the size is non-zero, and the null character separated -- * attribute list fits in the buffer, the list should be sent with -- * fuse_reply_buf. -- * -- * If the size is too small for the list, the ERANGE error should -- * be sent. -- * -- * If this request is answered with an error code of ENOSYS, this is -- * treated as a permanent failure with error code EOPNOTSUPP, i.e. all -- * future listxattr() requests will fail with EOPNOTSUPP without being -- * send to the filesystem process. -- * -- * Valid replies: -- * fuse_reply_buf -- * fuse_reply_data -- * fuse_reply_xattr -- * fuse_reply_err -- * -- * @param req request handle -- * @param ino the inode number -- * @param size maximum size of the list to send -- */ -- void (*listxattr) (fuse_req_t req, fuse_ino_t ino, size_t size); -- -- /** -- * Remove an extended attribute -- * -- * If this request is answered with an error code of ENOSYS, this is -- * treated as a permanent failure with error code EOPNOTSUPP, i.e. all -- * future removexattr() requests will fail with EOPNOTSUPP without being -- * send to the filesystem process. -- * -- * Valid replies: -- * fuse_reply_err -- * -- * @param req request handle -- * @param ino the inode number -- * @param name of the extended attribute -- */ -- void (*removexattr) (fuse_req_t req, fuse_ino_t ino, const char *name); -- -- /** -- * Check file access permissions -- * -- * This will be called for the access() and chdir() system -- * calls. If the 'default_permissions' mount option is given, -- * this method is not called. -- * -- * This method is not called under Linux kernel versions 2.4.x -- * -- * If this request is answered with an error code of ENOSYS, this is -- * treated as a permanent success, i.e. this and all future access() -- * requests will succeed without being send to the filesystem process. -- * -- * Valid replies: -- * fuse_reply_err -- * -- * @param req request handle -- * @param ino the inode number -- * @param mask requested access mode -- */ -- void (*access) (fuse_req_t req, fuse_ino_t ino, int mask); -- -- /** -- * Create and open a file -- * -- * If the file does not exist, first create it with the specified -- * mode, and then open it. -- * -- * See the description of the open handler for more -- * information. -- * -- * If this method is not implemented or under Linux kernel -- * versions earlier than 2.6.15, the mknod() and open() methods -- * will be called instead. -- * -- * If this request is answered with an error code of ENOSYS, the handler -- * is treated as not implemented (i.e., for this and future requests the -- * mknod() and open() handlers will be called instead). -- * -- * Valid replies: -- * fuse_reply_create -- * fuse_reply_err -- * -- * @param req request handle -- * @param parent inode number of the parent directory -- * @param name to create -- * @param mode file type and mode with which to create the new file -- * @param fi file information -- */ -- void (*create) (fuse_req_t req, fuse_ino_t parent, const char *name, -- mode_t mode, struct fuse_file_info *fi); -- -- /** -- * Test for a POSIX file lock -- * -- * Valid replies: -- * fuse_reply_lock -- * fuse_reply_err -- * -- * @param req request handle -- * @param ino the inode number -- * @param fi file information -- * @param lock the region/type to test -- */ -- void (*getlk) (fuse_req_t req, fuse_ino_t ino, -- struct fuse_file_info *fi, struct flock *lock); -- -- /** -- * Acquire, modify or release a POSIX file lock -- * -- * For POSIX threads (NPTL) there's a 1-1 relation between pid and -- * owner, but otherwise this is not always the case. For checking -- * lock ownership, 'fi->owner' must be used. The l_pid field in -- * 'struct flock' should only be used to fill in this field in -- * getlk(). -- * -- * Note: if the locking methods are not implemented, the kernel -- * will still allow file locking to work locally. Hence these are -- * only interesting for network filesystems and similar. -- * -- * Valid replies: -- * fuse_reply_err -- * -- * @param req request handle -- * @param ino the inode number -- * @param fi file information -- * @param lock the region/type to set -- * @param sleep locking operation may sleep -- */ -- void (*setlk) (fuse_req_t req, fuse_ino_t ino, -- struct fuse_file_info *fi, -- struct flock *lock, int sleep); -- -- /** -- * Map block index within file to block index within device -- * -- * Note: This makes sense only for block device backed filesystems -- * mounted with the 'blkdev' option -- * -- * If this request is answered with an error code of ENOSYS, this is -- * treated as a permanent failure, i.e. all future bmap() requests will -- * fail with the same error code without being send to the filesystem -- * process. -- * -- * Valid replies: -- * fuse_reply_bmap -- * fuse_reply_err -- * -- * @param req request handle -- * @param ino the inode number -- * @param blocksize unit of block index -- * @param idx block index within file -- */ -- void (*bmap) (fuse_req_t req, fuse_ino_t ino, size_t blocksize, -- uint64_t idx); -- -- /** -- * Ioctl -- * -- * Note: For unrestricted ioctls (not allowed for FUSE -- * servers), data in and out areas can be discovered by giving -- * iovs and setting FUSE_IOCTL_RETRY in *flags*. For -- * restricted ioctls, kernel prepares in/out data area -- * according to the information encoded in cmd. -- * -- * Valid replies: -- * fuse_reply_ioctl_retry -- * fuse_reply_ioctl -- * fuse_reply_ioctl_iov -- * fuse_reply_err -- * -- * @param req request handle -- * @param ino the inode number -- * @param cmd ioctl command -- * @param arg ioctl argument -- * @param fi file information -- * @param flags for FUSE_IOCTL_* flags -- * @param in_buf data fetched from the caller -- * @param in_bufsz number of fetched bytes -- * @param out_bufsz maximum size of output data -- * -- * Note : the unsigned long request submitted by the application -- * is truncated to 32 bits. -- */ -- void (*ioctl) (fuse_req_t req, fuse_ino_t ino, unsigned int cmd, -- void *arg, struct fuse_file_info *fi, unsigned flags, -- const void *in_buf, size_t in_bufsz, size_t out_bufsz); -- -- /** -- * Poll for IO readiness -- * -- * Note: If ph is non-NULL, the client should notify -- * when IO readiness events occur by calling -- * fuse_lowlevel_notify_poll() with the specified ph. -- * -- * Regardless of the number of times poll with a non-NULL ph -- * is received, single notification is enough to clear all. -- * Notifying more times incurs overhead but doesn't harm -- * correctness. -- * -- * The callee is responsible for destroying ph with -- * fuse_pollhandle_destroy() when no longer in use. -- * -- * If this request is answered with an error code of ENOSYS, this is -- * treated as success (with a kernel-defined default poll-mask) and -- * future calls to pull() will succeed the same way without being send -- * to the filesystem process. -- * -- * Valid replies: -- * fuse_reply_poll -- * fuse_reply_err -- * -- * @param req request handle -- * @param ino the inode number -- * @param fi file information -- * @param ph poll handle to be used for notification -- */ -- void (*poll) (fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, -- struct fuse_pollhandle *ph); -- -- /** -- * Write data made available in a buffer -- * -- * This is a more generic version of the ->write() method. If -- * FUSE_CAP_SPLICE_READ is set in fuse_conn_info.want and the -- * kernel supports splicing from the fuse device, then the -- * data will be made available in pipe for supporting zero -- * copy data transfer. -- * -- * buf->count is guaranteed to be one (and thus buf->idx is -- * always zero). The write_buf handler must ensure that -- * bufv->off is correctly updated (reflecting the number of -- * bytes read from bufv->buf[0]). -- * -- * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is -- * expected to reset the setuid and setgid bits. -- * -- * Valid replies: -- * fuse_reply_write -- * fuse_reply_err -- * -- * @param req request handle -- * @param ino the inode number -- * @param bufv buffer containing the data -- * @param off offset to write to -- * @param fi file information -- */ -- void (*write_buf) (fuse_req_t req, fuse_ino_t ino, -- struct fuse_bufvec *bufv, off_t off, -- struct fuse_file_info *fi); -- -- /** -- * Callback function for the retrieve request -- * -- * Valid replies: -- * fuse_reply_none -- * -- * @param req request handle -- * @param cookie user data supplied to fuse_lowlevel_notify_retrieve() -- * @param ino the inode number supplied to fuse_lowlevel_notify_retrieve() -- * @param offset the offset supplied to fuse_lowlevel_notify_retrieve() -- * @param bufv the buffer containing the returned data -- */ -- void (*retrieve_reply) (fuse_req_t req, void *cookie, fuse_ino_t ino, -- off_t offset, struct fuse_bufvec *bufv); -- -- /** -- * Forget about multiple inodes -- * -- * See description of the forget function for more -- * information. -- * -- * Valid replies: -- * fuse_reply_none -- * -- * @param req request handle -- */ -- void (*forget_multi) (fuse_req_t req, size_t count, -- struct fuse_forget_data *forgets); -- -- /** -- * Acquire, modify or release a BSD file lock -- * -- * Note: if the locking methods are not implemented, the kernel -- * will still allow file locking to work locally. Hence these are -- * only interesting for network filesystems and similar. -- * -- * Valid replies: -- * fuse_reply_err -- * -- * @param req request handle -- * @param ino the inode number -- * @param fi file information -- * @param op the locking operation, see flock(2) -- */ -- void (*flock) (fuse_req_t req, fuse_ino_t ino, -- struct fuse_file_info *fi, int op); -- -- /** -- * Allocate requested space. If this function returns success then -- * subsequent writes to the specified range shall not fail due to the lack -- * of free space on the file system storage media. -- * -- * If this request is answered with an error code of ENOSYS, this is -- * treated as a permanent failure with error code EOPNOTSUPP, i.e. all -- * future fallocate() requests will fail with EOPNOTSUPP without being -- * send to the filesystem process. -- * -- * Valid replies: -- * fuse_reply_err -- * -- * @param req request handle -- * @param ino the inode number -- * @param offset starting point for allocated region -- * @param length size of allocated region -- * @param mode determines the operation to be performed on the given range, -- * see fallocate(2) -- */ -- void (*fallocate) (fuse_req_t req, fuse_ino_t ino, int mode, -- off_t offset, off_t length, struct fuse_file_info *fi); -- -- /** -- * Read directory with attributes -- * -- * Send a buffer filled using fuse_add_direntry_plus(), with size not -- * exceeding the requested size. Send an empty buffer on end of -- * stream. -- * -- * fi->fh will contain the value set by the opendir method, or -- * will be undefined if the opendir method didn't set any value. -- * -- * In contrast to readdir() (which does not affect the lookup counts), -- * the lookup count of every entry returned by readdirplus(), except "." -- * and "..", is incremented by one. -- * -- * Valid replies: -- * fuse_reply_buf -- * fuse_reply_data -- * fuse_reply_err -- * -- * @param req request handle -- * @param ino the inode number -- * @param size maximum number of bytes to send -- * @param off offset to continue reading the directory stream -- * @param fi file information -- */ -- void (*readdirplus) (fuse_req_t req, fuse_ino_t ino, size_t size, off_t off, -- struct fuse_file_info *fi); -- -- /** -- * Copy a range of data from one file to another -- * -- * Performs an optimized copy between two file descriptors without the -- * additional cost of transferring data through the FUSE kernel module -- * to user space (glibc) and then back into the FUSE filesystem again. -- * -- * In case this method is not implemented, glibc falls back to reading -- * data from the source and writing to the destination. Effectively -- * doing an inefficient copy of the data. -- * -- * If this request is answered with an error code of ENOSYS, this is -- * treated as a permanent failure with error code EOPNOTSUPP, i.e. all -- * future copy_file_range() requests will fail with EOPNOTSUPP without -- * being send to the filesystem process. -- * -- * Valid replies: -- * fuse_reply_write -- * fuse_reply_err -- * -- * @param req request handle -- * @param ino_in the inode number or the source file -- * @param off_in starting point from were the data should be read -- * @param fi_in file information of the source file -- * @param ino_out the inode number or the destination file -- * @param off_out starting point where the data should be written -- * @param fi_out file information of the destination file -- * @param len maximum size of the data to copy -- * @param flags passed along with the copy_file_range() syscall -- */ -- void (*copy_file_range) (fuse_req_t req, fuse_ino_t ino_in, -- off_t off_in, struct fuse_file_info *fi_in, -- fuse_ino_t ino_out, off_t off_out, -- struct fuse_file_info *fi_out, size_t len, -- int flags); -- -- /** -- * Find next data or hole after the specified offset -- * -- * If this request is answered with an error code of ENOSYS, this is -- * treated as a permanent failure, i.e. all future lseek() requests will -- * fail with the same error code without being send to the filesystem -- * process. -- * -- * Valid replies: -- * fuse_reply_lseek -- * fuse_reply_err -- * -- * @param req request handle -- * @param ino the inode number -- * @param off offset to start search from -- * @param whence either SEEK_DATA or SEEK_HOLE -- * @param fi file information -- */ -- void (*lseek) (fuse_req_t req, fuse_ino_t ino, off_t off, int whence, -- struct fuse_file_info *fi); -+ /** -+ * Initialize filesystem -+ * -+ * This function is called when libfuse establishes -+ * communication with the FUSE kernel module. The file system -+ * should use this module to inspect and/or modify the -+ * connection parameters provided in the `conn` structure. -+ * -+ * Note that some parameters may be overwritten by options -+ * passed to fuse_session_new() which take precedence over the -+ * values set in this handler. -+ * -+ * There's no reply to this function -+ * -+ * @param userdata the user data passed to fuse_session_new() -+ */ -+ void (*init)(void *userdata, struct fuse_conn_info *conn); -+ -+ /** -+ * Clean up filesystem. -+ * -+ * Called on filesystem exit. When this method is called, the -+ * connection to the kernel may be gone already, so that eg. calls -+ * to fuse_lowlevel_notify_* will fail. -+ * -+ * There's no reply to this function -+ * -+ * @param userdata the user data passed to fuse_session_new() -+ */ -+ void (*destroy)(void *userdata); -+ -+ /** -+ * Look up a directory entry by name and get its attributes. -+ * -+ * Valid replies: -+ * fuse_reply_entry -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param parent inode number of the parent directory -+ * @param name the name to look up -+ */ -+ void (*lookup)(fuse_req_t req, fuse_ino_t parent, const char *name); -+ -+ /** -+ * Forget about an inode -+ * -+ * This function is called when the kernel removes an inode -+ * from its internal caches. -+ * -+ * The inode's lookup count increases by one for every call to -+ * fuse_reply_entry and fuse_reply_create. The nlookup parameter -+ * indicates by how much the lookup count should be decreased. -+ * -+ * Inodes with a non-zero lookup count may receive request from -+ * the kernel even after calls to unlink, rmdir or (when -+ * overwriting an existing file) rename. Filesystems must handle -+ * such requests properly and it is recommended to defer removal -+ * of the inode until the lookup count reaches zero. Calls to -+ * unlink, rmdir or rename will be followed closely by forget -+ * unless the file or directory is open, in which case the -+ * kernel issues forget only after the release or releasedir -+ * calls. -+ * -+ * Note that if a file system will be exported over NFS the -+ * inodes lifetime must extend even beyond forget. See the -+ * generation field in struct fuse_entry_param above. -+ * -+ * On unmount the lookup count for all inodes implicitly drops -+ * to zero. It is not guaranteed that the file system will -+ * receive corresponding forget messages for the affected -+ * inodes. -+ * -+ * Valid replies: -+ * fuse_reply_none -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param nlookup the number of lookups to forget -+ */ -+ void (*forget)(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup); -+ -+ /** -+ * Get file attributes. -+ * -+ * If writeback caching is enabled, the kernel may have a -+ * better idea of a file's length than the FUSE file system -+ * (eg if there has been a write that extended the file size, -+ * but that has not yet been passed to the filesystem.n -+ * -+ * In this case, the st_size value provided by the file system -+ * will be ignored. -+ * -+ * Valid replies: -+ * fuse_reply_attr -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param fi for future use, currently always NULL -+ */ -+ void (*getattr)(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi); -+ -+ /** -+ * Set file attributes -+ * -+ * In the 'attr' argument only members indicated by the 'to_set' -+ * bitmask contain valid values. Other members contain undefined -+ * values. -+ * -+ * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is -+ * expected to reset the setuid and setgid bits if the file -+ * size or owner is being changed. -+ * -+ * If the setattr was invoked from the ftruncate() system call -+ * under Linux kernel versions 2.6.15 or later, the fi->fh will -+ * contain the value set by the open method or will be undefined -+ * if the open method didn't set any value. Otherwise (not -+ * ftruncate call, or kernel version earlier than 2.6.15) the fi -+ * parameter will be NULL. -+ * -+ * Valid replies: -+ * fuse_reply_attr -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param attr the attributes -+ * @param to_set bit mask of attributes which should be set -+ * @param fi file information, or NULL -+ */ -+ void (*setattr)(fuse_req_t req, fuse_ino_t ino, struct stat *attr, -+ int to_set, struct fuse_file_info *fi); -+ -+ /** -+ * Read symbolic link -+ * -+ * Valid replies: -+ * fuse_reply_readlink -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ */ -+ void (*readlink)(fuse_req_t req, fuse_ino_t ino); -+ -+ /** -+ * Create file node -+ * -+ * Create a regular file, character device, block device, fifo or -+ * socket node. -+ * -+ * Valid replies: -+ * fuse_reply_entry -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param parent inode number of the parent directory -+ * @param name to create -+ * @param mode file type and mode with which to create the new file -+ * @param rdev the device number (only valid if created file is a device) -+ */ -+ void (*mknod)(fuse_req_t req, fuse_ino_t parent, const char *name, -+ mode_t mode, dev_t rdev); -+ -+ /** -+ * Create a directory -+ * -+ * Valid replies: -+ * fuse_reply_entry -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param parent inode number of the parent directory -+ * @param name to create -+ * @param mode with which to create the new file -+ */ -+ void (*mkdir)(fuse_req_t req, fuse_ino_t parent, const char *name, -+ mode_t mode); -+ -+ /** -+ * Remove a file -+ * -+ * If the file's inode's lookup count is non-zero, the file -+ * system is expected to postpone any removal of the inode -+ * until the lookup count reaches zero (see description of the -+ * forget function). -+ * -+ * Valid replies: -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param parent inode number of the parent directory -+ * @param name to remove -+ */ -+ void (*unlink)(fuse_req_t req, fuse_ino_t parent, const char *name); -+ -+ /** -+ * Remove a directory -+ * -+ * If the directory's inode's lookup count is non-zero, the -+ * file system is expected to postpone any removal of the -+ * inode until the lookup count reaches zero (see description -+ * of the forget function). -+ * -+ * Valid replies: -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param parent inode number of the parent directory -+ * @param name to remove -+ */ -+ void (*rmdir)(fuse_req_t req, fuse_ino_t parent, const char *name); -+ -+ /** -+ * Create a symbolic link -+ * -+ * Valid replies: -+ * fuse_reply_entry -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param link the contents of the symbolic link -+ * @param parent inode number of the parent directory -+ * @param name to create -+ */ -+ void (*symlink)(fuse_req_t req, const char *link, fuse_ino_t parent, -+ const char *name); -+ -+ /** -+ * Rename a file -+ * -+ * If the target exists it should be atomically replaced. If -+ * the target's inode's lookup count is non-zero, the file -+ * system is expected to postpone any removal of the inode -+ * until the lookup count reaches zero (see description of the -+ * forget function). -+ * -+ * If this request is answered with an error code of ENOSYS, this is -+ * treated as a permanent failure with error code EINVAL, i.e. all -+ * future bmap requests will fail with EINVAL without being -+ * send to the filesystem process. -+ * -+ * *flags* may be `RENAME_EXCHANGE` or `RENAME_NOREPLACE`. If -+ * RENAME_NOREPLACE is specified, the filesystem must not -+ * overwrite *newname* if it exists and return an error -+ * instead. If `RENAME_EXCHANGE` is specified, the filesystem -+ * must atomically exchange the two files, i.e. both must -+ * exist and neither may be deleted. -+ * -+ * Valid replies: -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param parent inode number of the old parent directory -+ * @param name old name -+ * @param newparent inode number of the new parent directory -+ * @param newname new name -+ */ -+ void (*rename)(fuse_req_t req, fuse_ino_t parent, const char *name, -+ fuse_ino_t newparent, const char *newname, -+ unsigned int flags); -+ -+ /** -+ * Create a hard link -+ * -+ * Valid replies: -+ * fuse_reply_entry -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the old inode number -+ * @param newparent inode number of the new parent directory -+ * @param newname new name to create -+ */ -+ void (*link)(fuse_req_t req, fuse_ino_t ino, fuse_ino_t newparent, -+ const char *newname); -+ -+ /** -+ * Open a file -+ * -+ * Open flags are available in fi->flags. The following rules -+ * apply. -+ * -+ * - Creation (O_CREAT, O_EXCL, O_NOCTTY) flags will be -+ * filtered out / handled by the kernel. -+ * -+ * - Access modes (O_RDONLY, O_WRONLY, O_RDWR) should be used -+ * by the filesystem to check if the operation is -+ * permitted. If the ``-o default_permissions`` mount -+ * option is given, this check is already done by the -+ * kernel before calling open() and may thus be omitted by -+ * the filesystem. -+ * -+ * - When writeback caching is enabled, the kernel may send -+ * read requests even for files opened with O_WRONLY. The -+ * filesystem should be prepared to handle this. -+ * -+ * - When writeback caching is disabled, the filesystem is -+ * expected to properly handle the O_APPEND flag and ensure -+ * that each write is appending to the end of the file. -+ * -+ * - When writeback caching is enabled, the kernel will -+ * handle O_APPEND. However, unless all changes to the file -+ * come through the kernel this will not work reliably. The -+ * filesystem should thus either ignore the O_APPEND flag -+ * (and let the kernel handle it), or return an error -+ * (indicating that reliably O_APPEND is not available). -+ * -+ * Filesystem may store an arbitrary file handle (pointer, -+ * index, etc) in fi->fh, and use this in other all other file -+ * operations (read, write, flush, release, fsync). -+ * -+ * Filesystem may also implement stateless file I/O and not store -+ * anything in fi->fh. -+ * -+ * There are also some flags (direct_io, keep_cache) which the -+ * filesystem may set in fi, to change the way the file is opened. -+ * See fuse_file_info structure in for more details. -+ * -+ * If this request is answered with an error code of ENOSYS -+ * and FUSE_CAP_NO_OPEN_SUPPORT is set in -+ * `fuse_conn_info.capable`, this is treated as success and -+ * future calls to open and release will also succeed without being -+ * sent to the filesystem process. -+ * -+ * Valid replies: -+ * fuse_reply_open -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param fi file information -+ */ -+ void (*open)(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi); -+ -+ /** -+ * Read data -+ * -+ * Read should send exactly the number of bytes requested except -+ * on EOF or error, otherwise the rest of the data will be -+ * substituted with zeroes. An exception to this is when the file -+ * has been opened in 'direct_io' mode, in which case the return -+ * value of the read system call will reflect the return value of -+ * this operation. -+ * -+ * fi->fh will contain the value set by the open method, or will -+ * be undefined if the open method didn't set any value. -+ * -+ * Valid replies: -+ * fuse_reply_buf -+ * fuse_reply_iov -+ * fuse_reply_data -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param size number of bytes to read -+ * @param off offset to read from -+ * @param fi file information -+ */ -+ void (*read)(fuse_req_t req, fuse_ino_t ino, size_t size, off_t off, -+ struct fuse_file_info *fi); -+ -+ /** -+ * Write data -+ * -+ * Write should return exactly the number of bytes requested -+ * except on error. An exception to this is when the file has -+ * been opened in 'direct_io' mode, in which case the return value -+ * of the write system call will reflect the return value of this -+ * operation. -+ * -+ * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is -+ * expected to reset the setuid and setgid bits. -+ * -+ * fi->fh will contain the value set by the open method, or will -+ * be undefined if the open method didn't set any value. -+ * -+ * Valid replies: -+ * fuse_reply_write -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param buf data to write -+ * @param size number of bytes to write -+ * @param off offset to write to -+ * @param fi file information -+ */ -+ void (*write)(fuse_req_t req, fuse_ino_t ino, const char *buf, size_t size, -+ off_t off, struct fuse_file_info *fi); -+ -+ /** -+ * Flush method -+ * -+ * This is called on each close() of the opened file. -+ * -+ * Since file descriptors can be duplicated (dup, dup2, fork), for -+ * one open call there may be many flush calls. -+ * -+ * Filesystems shouldn't assume that flush will always be called -+ * after some writes, or that if will be called at all. -+ * -+ * fi->fh will contain the value set by the open method, or will -+ * be undefined if the open method didn't set any value. -+ * -+ * NOTE: the name of the method is misleading, since (unlike -+ * fsync) the filesystem is not forced to flush pending writes. -+ * One reason to flush data is if the filesystem wants to return -+ * write errors during close. However, such use is non-portable -+ * because POSIX does not require [close] to wait for delayed I/O to -+ * complete. -+ * -+ * If the filesystem supports file locking operations (setlk, -+ * getlk) it should remove all locks belonging to 'fi->owner'. -+ * -+ * If this request is answered with an error code of ENOSYS, -+ * this is treated as success and future calls to flush() will -+ * succeed automatically without being send to the filesystem -+ * process. -+ * -+ * Valid replies: -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param fi file information -+ * -+ * [close]: -+ * http://pubs.opengroup.org/onlinepubs/9699919799/functions/close.html -+ */ -+ void (*flush)(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi); -+ -+ /** -+ * Release an open file -+ * -+ * Release is called when there are no more references to an open -+ * file: all file descriptors are closed and all memory mappings -+ * are unmapped. -+ * -+ * For every open call there will be exactly one release call (unless -+ * the filesystem is force-unmounted). -+ * -+ * The filesystem may reply with an error, but error values are -+ * not returned to close() or munmap() which triggered the -+ * release. -+ * -+ * fi->fh will contain the value set by the open method, or will -+ * be undefined if the open method didn't set any value. -+ * fi->flags will contain the same flags as for open. -+ * -+ * Valid replies: -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param fi file information -+ */ -+ void (*release)(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi); -+ -+ /** -+ * Synchronize file contents -+ * -+ * If the datasync parameter is non-zero, then only the user data -+ * should be flushed, not the meta data. -+ * -+ * If this request is answered with an error code of ENOSYS, -+ * this is treated as success and future calls to fsync() will -+ * succeed automatically without being send to the filesystem -+ * process. -+ * -+ * Valid replies: -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param datasync flag indicating if only data should be flushed -+ * @param fi file information -+ */ -+ void (*fsync)(fuse_req_t req, fuse_ino_t ino, int datasync, -+ struct fuse_file_info *fi); -+ -+ /** -+ * Open a directory -+ * -+ * Filesystem may store an arbitrary file handle (pointer, index, -+ * etc) in fi->fh, and use this in other all other directory -+ * stream operations (readdir, releasedir, fsyncdir). -+ * -+ * If this request is answered with an error code of ENOSYS and -+ * FUSE_CAP_NO_OPENDIR_SUPPORT is set in `fuse_conn_info.capable`, -+ * this is treated as success and future calls to opendir and -+ * releasedir will also succeed without being sent to the filesystem -+ * process. In addition, the kernel will cache readdir results -+ * as if opendir returned FOPEN_KEEP_CACHE | FOPEN_CACHE_DIR. -+ * -+ * Valid replies: -+ * fuse_reply_open -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param fi file information -+ */ -+ void (*opendir)(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi); -+ -+ /** -+ * Read directory -+ * -+ * Send a buffer filled using fuse_add_direntry(), with size not -+ * exceeding the requested size. Send an empty buffer on end of -+ * stream. -+ * -+ * fi->fh will contain the value set by the opendir method, or -+ * will be undefined if the opendir method didn't set any value. -+ * -+ * Returning a directory entry from readdir() does not affect -+ * its lookup count. -+ * -+ * If off_t is non-zero, then it will correspond to one of the off_t -+ * values that was previously returned by readdir() for the same -+ * directory handle. In this case, readdir() should skip over entries -+ * coming before the position defined by the off_t value. If entries -+ * are added or removed while the directory handle is open, they filesystem -+ * may still include the entries that have been removed, and may not -+ * report the entries that have been created. However, addition or -+ * removal of entries must never cause readdir() to skip over unrelated -+ * entries or to report them more than once. This means -+ * that off_t can not be a simple index that enumerates the entries -+ * that have been returned but must contain sufficient information to -+ * uniquely determine the next directory entry to return even when the -+ * set of entries is changing. -+ * -+ * The function does not have to report the '.' and '..' -+ * entries, but is allowed to do so. Note that, if readdir does -+ * not return '.' or '..', they will not be implicitly returned, -+ * and this behavior is observable by the caller. -+ * -+ * Valid replies: -+ * fuse_reply_buf -+ * fuse_reply_data -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param size maximum number of bytes to send -+ * @param off offset to continue reading the directory stream -+ * @param fi file information -+ */ -+ void (*readdir)(fuse_req_t req, fuse_ino_t ino, size_t size, off_t off, -+ struct fuse_file_info *fi); -+ -+ /** -+ * Release an open directory -+ * -+ * For every opendir call there will be exactly one releasedir -+ * call (unless the filesystem is force-unmounted). -+ * -+ * fi->fh will contain the value set by the opendir method, or -+ * will be undefined if the opendir method didn't set any value. -+ * -+ * Valid replies: -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param fi file information -+ */ -+ void (*releasedir)(fuse_req_t req, fuse_ino_t ino, -+ struct fuse_file_info *fi); -+ -+ /** -+ * Synchronize directory contents -+ * -+ * If the datasync parameter is non-zero, then only the directory -+ * contents should be flushed, not the meta data. -+ * -+ * fi->fh will contain the value set by the opendir method, or -+ * will be undefined if the opendir method didn't set any value. -+ * -+ * If this request is answered with an error code of ENOSYS, -+ * this is treated as success and future calls to fsyncdir() will -+ * succeed automatically without being send to the filesystem -+ * process. -+ * -+ * Valid replies: -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param datasync flag indicating if only data should be flushed -+ * @param fi file information -+ */ -+ void (*fsyncdir)(fuse_req_t req, fuse_ino_t ino, int datasync, -+ struct fuse_file_info *fi); -+ -+ /** -+ * Get file system statistics -+ * -+ * Valid replies: -+ * fuse_reply_statfs -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number, zero means "undefined" -+ */ -+ void (*statfs)(fuse_req_t req, fuse_ino_t ino); -+ -+ /** -+ * Set an extended attribute -+ * -+ * If this request is answered with an error code of ENOSYS, this is -+ * treated as a permanent failure with error code EOPNOTSUPP, i.e. all -+ * future setxattr() requests will fail with EOPNOTSUPP without being -+ * send to the filesystem process. -+ * -+ * Valid replies: -+ * fuse_reply_err -+ */ -+ void (*setxattr)(fuse_req_t req, fuse_ino_t ino, const char *name, -+ const char *value, size_t size, int flags); -+ -+ /** -+ * Get an extended attribute -+ * -+ * If size is zero, the size of the value should be sent with -+ * fuse_reply_xattr. -+ * -+ * If the size is non-zero, and the value fits in the buffer, the -+ * value should be sent with fuse_reply_buf. -+ * -+ * If the size is too small for the value, the ERANGE error should -+ * be sent. -+ * -+ * If this request is answered with an error code of ENOSYS, this is -+ * treated as a permanent failure with error code EOPNOTSUPP, i.e. all -+ * future getxattr() requests will fail with EOPNOTSUPP without being -+ * send to the filesystem process. -+ * -+ * Valid replies: -+ * fuse_reply_buf -+ * fuse_reply_data -+ * fuse_reply_xattr -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param name of the extended attribute -+ * @param size maximum size of the value to send -+ */ -+ void (*getxattr)(fuse_req_t req, fuse_ino_t ino, const char *name, -+ size_t size); -+ -+ /** -+ * List extended attribute names -+ * -+ * If size is zero, the total size of the attribute list should be -+ * sent with fuse_reply_xattr. -+ * -+ * If the size is non-zero, and the null character separated -+ * attribute list fits in the buffer, the list should be sent with -+ * fuse_reply_buf. -+ * -+ * If the size is too small for the list, the ERANGE error should -+ * be sent. -+ * -+ * If this request is answered with an error code of ENOSYS, this is -+ * treated as a permanent failure with error code EOPNOTSUPP, i.e. all -+ * future listxattr() requests will fail with EOPNOTSUPP without being -+ * send to the filesystem process. -+ * -+ * Valid replies: -+ * fuse_reply_buf -+ * fuse_reply_data -+ * fuse_reply_xattr -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param size maximum size of the list to send -+ */ -+ void (*listxattr)(fuse_req_t req, fuse_ino_t ino, size_t size); -+ -+ /** -+ * Remove an extended attribute -+ * -+ * If this request is answered with an error code of ENOSYS, this is -+ * treated as a permanent failure with error code EOPNOTSUPP, i.e. all -+ * future removexattr() requests will fail with EOPNOTSUPP without being -+ * send to the filesystem process. -+ * -+ * Valid replies: -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param name of the extended attribute -+ */ -+ void (*removexattr)(fuse_req_t req, fuse_ino_t ino, const char *name); -+ -+ /** -+ * Check file access permissions -+ * -+ * This will be called for the access() and chdir() system -+ * calls. If the 'default_permissions' mount option is given, -+ * this method is not called. -+ * -+ * This method is not called under Linux kernel versions 2.4.x -+ * -+ * If this request is answered with an error code of ENOSYS, this is -+ * treated as a permanent success, i.e. this and all future access() -+ * requests will succeed without being send to the filesystem process. -+ * -+ * Valid replies: -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param mask requested access mode -+ */ -+ void (*access)(fuse_req_t req, fuse_ino_t ino, int mask); -+ -+ /** -+ * Create and open a file -+ * -+ * If the file does not exist, first create it with the specified -+ * mode, and then open it. -+ * -+ * See the description of the open handler for more -+ * information. -+ * -+ * If this method is not implemented or under Linux kernel -+ * versions earlier than 2.6.15, the mknod() and open() methods -+ * will be called instead. -+ * -+ * If this request is answered with an error code of ENOSYS, the handler -+ * is treated as not implemented (i.e., for this and future requests the -+ * mknod() and open() handlers will be called instead). -+ * -+ * Valid replies: -+ * fuse_reply_create -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param parent inode number of the parent directory -+ * @param name to create -+ * @param mode file type and mode with which to create the new file -+ * @param fi file information -+ */ -+ void (*create)(fuse_req_t req, fuse_ino_t parent, const char *name, -+ mode_t mode, struct fuse_file_info *fi); -+ -+ /** -+ * Test for a POSIX file lock -+ * -+ * Valid replies: -+ * fuse_reply_lock -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param fi file information -+ * @param lock the region/type to test -+ */ -+ void (*getlk)(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, -+ struct flock *lock); -+ -+ /** -+ * Acquire, modify or release a POSIX file lock -+ * -+ * For POSIX threads (NPTL) there's a 1-1 relation between pid and -+ * owner, but otherwise this is not always the case. For checking -+ * lock ownership, 'fi->owner' must be used. The l_pid field in -+ * 'struct flock' should only be used to fill in this field in -+ * getlk(). -+ * -+ * Note: if the locking methods are not implemented, the kernel -+ * will still allow file locking to work locally. Hence these are -+ * only interesting for network filesystems and similar. -+ * -+ * Valid replies: -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param fi file information -+ * @param lock the region/type to set -+ * @param sleep locking operation may sleep -+ */ -+ void (*setlk)(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, -+ struct flock *lock, int sleep); -+ -+ /** -+ * Map block index within file to block index within device -+ * -+ * Note: This makes sense only for block device backed filesystems -+ * mounted with the 'blkdev' option -+ * -+ * If this request is answered with an error code of ENOSYS, this is -+ * treated as a permanent failure, i.e. all future bmap() requests will -+ * fail with the same error code without being send to the filesystem -+ * process. -+ * -+ * Valid replies: -+ * fuse_reply_bmap -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param blocksize unit of block index -+ * @param idx block index within file -+ */ -+ void (*bmap)(fuse_req_t req, fuse_ino_t ino, size_t blocksize, -+ uint64_t idx); -+ -+ /** -+ * Ioctl -+ * -+ * Note: For unrestricted ioctls (not allowed for FUSE -+ * servers), data in and out areas can be discovered by giving -+ * iovs and setting FUSE_IOCTL_RETRY in *flags*. For -+ * restricted ioctls, kernel prepares in/out data area -+ * according to the information encoded in cmd. -+ * -+ * Valid replies: -+ * fuse_reply_ioctl_retry -+ * fuse_reply_ioctl -+ * fuse_reply_ioctl_iov -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param cmd ioctl command -+ * @param arg ioctl argument -+ * @param fi file information -+ * @param flags for FUSE_IOCTL_* flags -+ * @param in_buf data fetched from the caller -+ * @param in_bufsz number of fetched bytes -+ * @param out_bufsz maximum size of output data -+ * -+ * Note : the unsigned long request submitted by the application -+ * is truncated to 32 bits. -+ */ -+ void (*ioctl)(fuse_req_t req, fuse_ino_t ino, unsigned int cmd, void *arg, -+ struct fuse_file_info *fi, unsigned flags, const void *in_buf, -+ size_t in_bufsz, size_t out_bufsz); -+ -+ /** -+ * Poll for IO readiness -+ * -+ * Note: If ph is non-NULL, the client should notify -+ * when IO readiness events occur by calling -+ * fuse_lowlevel_notify_poll() with the specified ph. -+ * -+ * Regardless of the number of times poll with a non-NULL ph -+ * is received, single notification is enough to clear all. -+ * Notifying more times incurs overhead but doesn't harm -+ * correctness. -+ * -+ * The callee is responsible for destroying ph with -+ * fuse_pollhandle_destroy() when no longer in use. -+ * -+ * If this request is answered with an error code of ENOSYS, this is -+ * treated as success (with a kernel-defined default poll-mask) and -+ * future calls to pull() will succeed the same way without being send -+ * to the filesystem process. -+ * -+ * Valid replies: -+ * fuse_reply_poll -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param fi file information -+ * @param ph poll handle to be used for notification -+ */ -+ void (*poll)(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, -+ struct fuse_pollhandle *ph); -+ -+ /** -+ * Write data made available in a buffer -+ * -+ * This is a more generic version of the ->write() method. If -+ * FUSE_CAP_SPLICE_READ is set in fuse_conn_info.want and the -+ * kernel supports splicing from the fuse device, then the -+ * data will be made available in pipe for supporting zero -+ * copy data transfer. -+ * -+ * buf->count is guaranteed to be one (and thus buf->idx is -+ * always zero). The write_buf handler must ensure that -+ * bufv->off is correctly updated (reflecting the number of -+ * bytes read from bufv->buf[0]). -+ * -+ * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is -+ * expected to reset the setuid and setgid bits. -+ * -+ * Valid replies: -+ * fuse_reply_write -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param bufv buffer containing the data -+ * @param off offset to write to -+ * @param fi file information -+ */ -+ void (*write_buf)(fuse_req_t req, fuse_ino_t ino, struct fuse_bufvec *bufv, -+ off_t off, struct fuse_file_info *fi); -+ -+ /** -+ * Callback function for the retrieve request -+ * -+ * Valid replies: -+ * fuse_reply_none -+ * -+ * @param req request handle -+ * @param cookie user data supplied to fuse_lowlevel_notify_retrieve() -+ * @param ino the inode number supplied to fuse_lowlevel_notify_retrieve() -+ * @param offset the offset supplied to fuse_lowlevel_notify_retrieve() -+ * @param bufv the buffer containing the returned data -+ */ -+ void (*retrieve_reply)(fuse_req_t req, void *cookie, fuse_ino_t ino, -+ off_t offset, struct fuse_bufvec *bufv); -+ -+ /** -+ * Forget about multiple inodes -+ * -+ * See description of the forget function for more -+ * information. -+ * -+ * Valid replies: -+ * fuse_reply_none -+ * -+ * @param req request handle -+ */ -+ void (*forget_multi)(fuse_req_t req, size_t count, -+ struct fuse_forget_data *forgets); -+ -+ /** -+ * Acquire, modify or release a BSD file lock -+ * -+ * Note: if the locking methods are not implemented, the kernel -+ * will still allow file locking to work locally. Hence these are -+ * only interesting for network filesystems and similar. -+ * -+ * Valid replies: -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param fi file information -+ * @param op the locking operation, see flock(2) -+ */ -+ void (*flock)(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, -+ int op); -+ -+ /** -+ * Allocate requested space. If this function returns success then -+ * subsequent writes to the specified range shall not fail due to the lack -+ * of free space on the file system storage media. -+ * -+ * If this request is answered with an error code of ENOSYS, this is -+ * treated as a permanent failure with error code EOPNOTSUPP, i.e. all -+ * future fallocate() requests will fail with EOPNOTSUPP without being -+ * send to the filesystem process. -+ * -+ * Valid replies: -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param offset starting point for allocated region -+ * @param length size of allocated region -+ * @param mode determines the operation to be performed on the given range, -+ * see fallocate(2) -+ */ -+ void (*fallocate)(fuse_req_t req, fuse_ino_t ino, int mode, off_t offset, -+ off_t length, struct fuse_file_info *fi); -+ -+ /** -+ * Read directory with attributes -+ * -+ * Send a buffer filled using fuse_add_direntry_plus(), with size not -+ * exceeding the requested size. Send an empty buffer on end of -+ * stream. -+ * -+ * fi->fh will contain the value set by the opendir method, or -+ * will be undefined if the opendir method didn't set any value. -+ * -+ * In contrast to readdir() (which does not affect the lookup counts), -+ * the lookup count of every entry returned by readdirplus(), except "." -+ * and "..", is incremented by one. -+ * -+ * Valid replies: -+ * fuse_reply_buf -+ * fuse_reply_data -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param size maximum number of bytes to send -+ * @param off offset to continue reading the directory stream -+ * @param fi file information -+ */ -+ void (*readdirplus)(fuse_req_t req, fuse_ino_t ino, size_t size, off_t off, -+ struct fuse_file_info *fi); -+ -+ /** -+ * Copy a range of data from one file to another -+ * -+ * Performs an optimized copy between two file descriptors without the -+ * additional cost of transferring data through the FUSE kernel module -+ * to user space (glibc) and then back into the FUSE filesystem again. -+ * -+ * In case this method is not implemented, glibc falls back to reading -+ * data from the source and writing to the destination. Effectively -+ * doing an inefficient copy of the data. -+ * -+ * If this request is answered with an error code of ENOSYS, this is -+ * treated as a permanent failure with error code EOPNOTSUPP, i.e. all -+ * future copy_file_range() requests will fail with EOPNOTSUPP without -+ * being send to the filesystem process. -+ * -+ * Valid replies: -+ * fuse_reply_write -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino_in the inode number or the source file -+ * @param off_in starting point from were the data should be read -+ * @param fi_in file information of the source file -+ * @param ino_out the inode number or the destination file -+ * @param off_out starting point where the data should be written -+ * @param fi_out file information of the destination file -+ * @param len maximum size of the data to copy -+ * @param flags passed along with the copy_file_range() syscall -+ */ -+ void (*copy_file_range)(fuse_req_t req, fuse_ino_t ino_in, off_t off_in, -+ struct fuse_file_info *fi_in, fuse_ino_t ino_out, -+ off_t off_out, struct fuse_file_info *fi_out, -+ size_t len, int flags); -+ -+ /** -+ * Find next data or hole after the specified offset -+ * -+ * If this request is answered with an error code of ENOSYS, this is -+ * treated as a permanent failure, i.e. all future lseek() requests will -+ * fail with the same error code without being send to the filesystem -+ * process. -+ * -+ * Valid replies: -+ * fuse_reply_lseek -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param off offset to start search from -+ * @param whence either SEEK_DATA or SEEK_HOLE -+ * @param fi file information -+ */ -+ void (*lseek)(fuse_req_t req, fuse_ino_t ino, off_t off, int whence, -+ struct fuse_file_info *fi); - }; - - /** -@@ -1305,7 +1307,7 @@ int fuse_reply_entry(fuse_req_t req, const struct fuse_entry_param *e); - * @return zero for success, -errno for failure to send reply - */ - int fuse_reply_create(fuse_req_t req, const struct fuse_entry_param *e, -- const struct fuse_file_info *fi); -+ const struct fuse_file_info *fi); - - /** - * Reply with attributes -@@ -1315,11 +1317,11 @@ int fuse_reply_create(fuse_req_t req, const struct fuse_entry_param *e, - * - * @param req request handle - * @param attr the attributes -- * @param attr_timeout validity timeout (in seconds) for the attributes -+ * @param attr_timeout validity timeout (in seconds) for the attributes - * @return zero for success, -errno for failure to send reply - */ - int fuse_reply_attr(fuse_req_t req, const struct stat *attr, -- double attr_timeout); -+ double attr_timeout); - - /** - * Reply with the contents of a symbolic link -@@ -1417,7 +1419,7 @@ int fuse_reply_buf(fuse_req_t req, const char *buf, size_t size); - * @return zero for success, -errno for failure to send reply - */ - int fuse_reply_data(fuse_req_t req, struct fuse_bufvec *bufv, -- enum fuse_buf_copy_flags flags); -+ enum fuse_buf_copy_flags flags); - - /** - * Reply with data vector -@@ -1480,9 +1482,9 @@ int fuse_reply_lock(fuse_req_t req, const struct flock *lock); - */ - int fuse_reply_bmap(fuse_req_t req, uint64_t idx); - --/* ----------------------------------------------------------- * -- * Filling a buffer in readdir * -- * ----------------------------------------------------------- */ -+/* -+ * Filling a buffer in readdir -+ */ - - /** - * Add a directory entry to the buffer -@@ -1512,8 +1514,7 @@ int fuse_reply_bmap(fuse_req_t req, uint64_t idx); - * @return the space needed for the entry - */ - size_t fuse_add_direntry(fuse_req_t req, char *buf, size_t bufsize, -- const char *name, const struct stat *stbuf, -- off_t off); -+ const char *name, const struct stat *stbuf, off_t off); - - /** - * Add a directory entry to the buffer with the attributes -@@ -1529,8 +1530,8 @@ size_t fuse_add_direntry(fuse_req_t req, char *buf, size_t bufsize, - * @return the space needed for the entry - */ - size_t fuse_add_direntry_plus(fuse_req_t req, char *buf, size_t bufsize, -- const char *name, -- const struct fuse_entry_param *e, off_t off); -+ const char *name, -+ const struct fuse_entry_param *e, off_t off); - - /** - * Reply to ask for data fetch and output buffer preparation. ioctl -@@ -1547,9 +1548,9 @@ size_t fuse_add_direntry_plus(fuse_req_t req, char *buf, size_t bufsize, - * @param out_count number of entries in out_iov - * @return zero for success, -errno for failure to send reply - */ --int fuse_reply_ioctl_retry(fuse_req_t req, -- const struct iovec *in_iov, size_t in_count, -- const struct iovec *out_iov, size_t out_count); -+int fuse_reply_ioctl_retry(fuse_req_t req, const struct iovec *in_iov, -+ size_t in_count, const struct iovec *out_iov, -+ size_t out_count); - - /** - * Reply to finish ioctl -@@ -1576,7 +1577,7 @@ int fuse_reply_ioctl(fuse_req_t req, int result, const void *buf, size_t size); - * @param count the size of vector - */ - int fuse_reply_ioctl_iov(fuse_req_t req, int result, const struct iovec *iov, -- int count); -+ int count); - - /** - * Reply with poll result event mask -@@ -1598,9 +1599,9 @@ int fuse_reply_poll(fuse_req_t req, unsigned revents); - */ - int fuse_reply_lseek(fuse_req_t req, off_t off); - --/* ----------------------------------------------------------- * -- * Notification * -- * ----------------------------------------------------------- */ -+/* -+ * Notification -+ */ - - /** - * Notify IO readiness event -@@ -1635,7 +1636,7 @@ int fuse_lowlevel_notify_poll(struct fuse_pollhandle *ph); - * @return zero for success, -errno for failure - */ - int fuse_lowlevel_notify_inval_inode(struct fuse_session *se, fuse_ino_t ino, -- off_t off, off_t len); -+ off_t off, off_t len); - - /** - * Notify to invalidate parent attributes and the dentry matching -@@ -1663,7 +1664,7 @@ int fuse_lowlevel_notify_inval_inode(struct fuse_session *se, fuse_ino_t ino, - * @return zero for success, -errno for failure - */ - int fuse_lowlevel_notify_inval_entry(struct fuse_session *se, fuse_ino_t parent, -- const char *name, size_t namelen); -+ const char *name, size_t namelen); - - /** - * This function behaves like fuse_lowlevel_notify_inval_entry() with -@@ -1693,9 +1694,9 @@ int fuse_lowlevel_notify_inval_entry(struct fuse_session *se, fuse_ino_t parent, - * @param namelen strlen() of file name - * @return zero for success, -errno for failure - */ --int fuse_lowlevel_notify_delete(struct fuse_session *se, -- fuse_ino_t parent, fuse_ino_t child, -- const char *name, size_t namelen); -+int fuse_lowlevel_notify_delete(struct fuse_session *se, fuse_ino_t parent, -+ fuse_ino_t child, const char *name, -+ size_t namelen); - - /** - * Store data to the kernel buffers -@@ -1723,8 +1724,8 @@ int fuse_lowlevel_notify_delete(struct fuse_session *se, - * @return zero for success, -errno for failure - */ - int fuse_lowlevel_notify_store(struct fuse_session *se, fuse_ino_t ino, -- off_t offset, struct fuse_bufvec *bufv, -- enum fuse_buf_copy_flags flags); -+ off_t offset, struct fuse_bufvec *bufv, -+ enum fuse_buf_copy_flags flags); - /** - * Retrieve data from the kernel buffers - * -@@ -1755,12 +1756,12 @@ int fuse_lowlevel_notify_store(struct fuse_session *se, fuse_ino_t ino, - * @return zero for success, -errno for failure - */ - int fuse_lowlevel_notify_retrieve(struct fuse_session *se, fuse_ino_t ino, -- size_t size, off_t offset, void *cookie); -+ size_t size, off_t offset, void *cookie); - - --/* ----------------------------------------------------------- * -- * Utility functions * -- * ----------------------------------------------------------- */ -+/* -+ * Utility functions -+ */ - - /** - * Get the userdata from the request -@@ -1822,7 +1823,7 @@ typedef void (*fuse_interrupt_func_t)(fuse_req_t req, void *data); - * @param data user data passed to the callback function - */ - void fuse_req_interrupt_func(fuse_req_t req, fuse_interrupt_func_t func, -- void *data); -+ void *data); - - /** - * Check if a request has already been interrupted -@@ -1833,9 +1834,9 @@ void fuse_req_interrupt_func(fuse_req_t req, fuse_interrupt_func_t func, - int fuse_req_interrupted(fuse_req_t req); - - --/* ----------------------------------------------------------- * -- * Inquiry functions * -- * ----------------------------------------------------------- */ -+/* -+ * Inquiry functions -+ */ - - /** - * Print low-level version information to stdout. -@@ -1854,18 +1855,18 @@ void fuse_lowlevel_help(void); - */ - void fuse_cmdline_help(void); - --/* ----------------------------------------------------------- * -- * Filesystem setup & teardown * -- * ----------------------------------------------------------- */ -+/* -+ * Filesystem setup & teardown -+ */ - - struct fuse_cmdline_opts { -- int foreground; -- int debug; -- int nodefault_subtype; -- char *mountpoint; -- int show_version; -- int show_help; -- unsigned int max_idle_threads; -+ int foreground; -+ int debug; -+ int nodefault_subtype; -+ char *mountpoint; -+ int show_version; -+ int show_help; -+ unsigned int max_idle_threads; - }; - - /** -@@ -1886,8 +1887,7 @@ struct fuse_cmdline_opts { - * @param opts output argument for parsed options - * @return 0 on success, -1 on failure - */ --int fuse_parse_cmdline(struct fuse_args *args, -- struct fuse_cmdline_opts *opts); -+int fuse_parse_cmdline(struct fuse_args *args, struct fuse_cmdline_opts *opts); - - /** - * Create a low level session. -@@ -1918,8 +1918,8 @@ int fuse_parse_cmdline(struct fuse_args *args, - * @return the fuse session on success, NULL on failure - **/ - struct fuse_session *fuse_session_new(struct fuse_args *args, -- const struct fuse_lowlevel_ops *op, -- size_t op_size, void *userdata); -+ const struct fuse_lowlevel_ops *op, -+ size_t op_size, void *userdata); - - /** - * Mount a FUSE file system. -@@ -2014,9 +2014,9 @@ void fuse_session_unmount(struct fuse_session *se); - */ - void fuse_session_destroy(struct fuse_session *se); - --/* ----------------------------------------------------------- * -- * Custom event loop support * -- * ----------------------------------------------------------- */ -+/* -+ * Custom event loop support -+ */ - - /** - * Return file descriptor for communication with kernel. -@@ -2043,7 +2043,7 @@ int fuse_session_fd(struct fuse_session *se); - * @param buf the fuse_buf containing the request - */ - void fuse_session_process_buf(struct fuse_session *se, -- const struct fuse_buf *buf); -+ const struct fuse_buf *buf); - - /** - * Read a raw request from the kernel into the supplied buffer. -diff --git a/tools/virtiofsd/fuse_misc.h b/tools/virtiofsd/fuse_misc.h -index 2f6663e..f252baa 100644 ---- a/tools/virtiofsd/fuse_misc.h -+++ b/tools/virtiofsd/fuse_misc.h -@@ -1,18 +1,18 @@ - /* -- FUSE: Filesystem in Userspace -- Copyright (C) 2001-2007 Miklos Szeredi -- -- This program can be distributed under the terms of the GNU LGPLv2. -- See the file COPYING.LIB --*/ -+ * FUSE: Filesystem in Userspace -+ * Copyright (C) 2001-2007 Miklos Szeredi -+ * -+ * This program can be distributed under the terms of the GNU LGPLv2. -+ * See the file COPYING.LIB -+ */ - - #include - - /* -- Versioned symbols cannot be used in some cases because it -- - confuse the dynamic linker in uClibc -- - not supported on MacOSX (in MachO binary format) --*/ -+ * Versioned symbols cannot be used in some cases because it -+ * - confuse the dynamic linker in uClibc -+ * - not supported on MacOSX (in MachO binary format) -+ */ - #if (!defined(__UCLIBC__) && !defined(__APPLE__)) - #define FUSE_SYMVER(x) __asm__(x) - #else -@@ -25,11 +25,11 @@ - /* Is this hack still needed? */ - static inline void fuse_mutex_init(pthread_mutex_t *mut) - { -- pthread_mutexattr_t attr; -- pthread_mutexattr_init(&attr); -- pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ADAPTIVE_NP); -- pthread_mutex_init(mut, &attr); -- pthread_mutexattr_destroy(&attr); -+ pthread_mutexattr_t attr; -+ pthread_mutexattr_init(&attr); -+ pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ADAPTIVE_NP); -+ pthread_mutex_init(mut, &attr); -+ pthread_mutexattr_destroy(&attr); - } - #endif - -diff --git a/tools/virtiofsd/fuse_opt.c b/tools/virtiofsd/fuse_opt.c -index 93066b9..edd36f4 100644 ---- a/tools/virtiofsd/fuse_opt.c -+++ b/tools/virtiofsd/fuse_opt.c -@@ -1,423 +1,450 @@ - /* -- FUSE: Filesystem in Userspace -- Copyright (C) 2001-2007 Miklos Szeredi -- -- Implementation of option parsing routines (dealing with `struct -- fuse_args`). -- -- This program can be distributed under the terms of the GNU LGPLv2. -- See the file COPYING.LIB --*/ -+ * FUSE: Filesystem in Userspace -+ * Copyright (C) 2001-2007 Miklos Szeredi -+ * -+ * Implementation of option parsing routines (dealing with `struct -+ * fuse_args`). -+ * -+ * This program can be distributed under the terms of the GNU LGPLv2. -+ * See the file COPYING.LIB -+ */ - -+#include "fuse_opt.h" - #include "config.h" - #include "fuse_i.h" --#include "fuse_opt.h" - #include "fuse_misc.h" - -+#include - #include - #include - #include --#include - - struct fuse_opt_context { -- void *data; -- const struct fuse_opt *opt; -- fuse_opt_proc_t proc; -- int argctr; -- int argc; -- char **argv; -- struct fuse_args outargs; -- char *opts; -- int nonopt; -+ void *data; -+ const struct fuse_opt *opt; -+ fuse_opt_proc_t proc; -+ int argctr; -+ int argc; -+ char **argv; -+ struct fuse_args outargs; -+ char *opts; -+ int nonopt; - }; - - void fuse_opt_free_args(struct fuse_args *args) - { -- if (args) { -- if (args->argv && args->allocated) { -- int i; -- for (i = 0; i < args->argc; i++) -- free(args->argv[i]); -- free(args->argv); -- } -- args->argc = 0; -- args->argv = NULL; -- args->allocated = 0; -- } -+ if (args) { -+ if (args->argv && args->allocated) { -+ int i; -+ for (i = 0; i < args->argc; i++) { -+ free(args->argv[i]); -+ } -+ free(args->argv); -+ } -+ args->argc = 0; -+ args->argv = NULL; -+ args->allocated = 0; -+ } - } - - static int alloc_failed(void) - { -- fuse_log(FUSE_LOG_ERR, "fuse: memory allocation failed\n"); -- return -1; -+ fuse_log(FUSE_LOG_ERR, "fuse: memory allocation failed\n"); -+ return -1; - } - - int fuse_opt_add_arg(struct fuse_args *args, const char *arg) - { -- char **newargv; -- char *newarg; -- -- assert(!args->argv || args->allocated); -- -- newarg = strdup(arg); -- if (!newarg) -- return alloc_failed(); -- -- newargv = realloc(args->argv, (args->argc + 2) * sizeof(char *)); -- if (!newargv) { -- free(newarg); -- return alloc_failed(); -- } -- -- args->argv = newargv; -- args->allocated = 1; -- args->argv[args->argc++] = newarg; -- args->argv[args->argc] = NULL; -- return 0; -+ char **newargv; -+ char *newarg; -+ -+ assert(!args->argv || args->allocated); -+ -+ newarg = strdup(arg); -+ if (!newarg) { -+ return alloc_failed(); -+ } -+ -+ newargv = realloc(args->argv, (args->argc + 2) * sizeof(char *)); -+ if (!newargv) { -+ free(newarg); -+ return alloc_failed(); -+ } -+ -+ args->argv = newargv; -+ args->allocated = 1; -+ args->argv[args->argc++] = newarg; -+ args->argv[args->argc] = NULL; -+ return 0; - } - - static int fuse_opt_insert_arg_common(struct fuse_args *args, int pos, -- const char *arg) -+ const char *arg) - { -- assert(pos <= args->argc); -- if (fuse_opt_add_arg(args, arg) == -1) -- return -1; -- -- if (pos != args->argc - 1) { -- char *newarg = args->argv[args->argc - 1]; -- memmove(&args->argv[pos + 1], &args->argv[pos], -- sizeof(char *) * (args->argc - pos - 1)); -- args->argv[pos] = newarg; -- } -- return 0; -+ assert(pos <= args->argc); -+ if (fuse_opt_add_arg(args, arg) == -1) { -+ return -1; -+ } -+ -+ if (pos != args->argc - 1) { -+ char *newarg = args->argv[args->argc - 1]; -+ memmove(&args->argv[pos + 1], &args->argv[pos], -+ sizeof(char *) * (args->argc - pos - 1)); -+ args->argv[pos] = newarg; -+ } -+ return 0; - } - - int fuse_opt_insert_arg(struct fuse_args *args, int pos, const char *arg) - { -- return fuse_opt_insert_arg_common(args, pos, arg); -+ return fuse_opt_insert_arg_common(args, pos, arg); - } - - static int next_arg(struct fuse_opt_context *ctx, const char *opt) - { -- if (ctx->argctr + 1 >= ctx->argc) { -- fuse_log(FUSE_LOG_ERR, "fuse: missing argument after `%s'\n", opt); -- return -1; -- } -- ctx->argctr++; -- return 0; -+ if (ctx->argctr + 1 >= ctx->argc) { -+ fuse_log(FUSE_LOG_ERR, "fuse: missing argument after `%s'\n", opt); -+ return -1; -+ } -+ ctx->argctr++; -+ return 0; - } - - static int add_arg(struct fuse_opt_context *ctx, const char *arg) - { -- return fuse_opt_add_arg(&ctx->outargs, arg); -+ return fuse_opt_add_arg(&ctx->outargs, arg); - } - - static int add_opt_common(char **opts, const char *opt, int esc) - { -- unsigned oldlen = *opts ? strlen(*opts) : 0; -- char *d = realloc(*opts, oldlen + 1 + strlen(opt) * 2 + 1); -- -- if (!d) -- return alloc_failed(); -- -- *opts = d; -- if (oldlen) { -- d += oldlen; -- *d++ = ','; -- } -- -- for (; *opt; opt++) { -- if (esc && (*opt == ',' || *opt == '\\')) -- *d++ = '\\'; -- *d++ = *opt; -- } -- *d = '\0'; -- -- return 0; -+ unsigned oldlen = *opts ? strlen(*opts) : 0; -+ char *d = realloc(*opts, oldlen + 1 + strlen(opt) * 2 + 1); -+ -+ if (!d) { -+ return alloc_failed(); -+ } -+ -+ *opts = d; -+ if (oldlen) { -+ d += oldlen; -+ *d++ = ','; -+ } -+ -+ for (; *opt; opt++) { -+ if (esc && (*opt == ',' || *opt == '\\')) { -+ *d++ = '\\'; -+ } -+ *d++ = *opt; -+ } -+ *d = '\0'; -+ -+ return 0; - } - - int fuse_opt_add_opt(char **opts, const char *opt) - { -- return add_opt_common(opts, opt, 0); -+ return add_opt_common(opts, opt, 0); - } - - int fuse_opt_add_opt_escaped(char **opts, const char *opt) - { -- return add_opt_common(opts, opt, 1); -+ return add_opt_common(opts, opt, 1); - } - - static int add_opt(struct fuse_opt_context *ctx, const char *opt) - { -- return add_opt_common(&ctx->opts, opt, 1); -+ return add_opt_common(&ctx->opts, opt, 1); - } - - static int call_proc(struct fuse_opt_context *ctx, const char *arg, int key, -- int iso) -+ int iso) - { -- if (key == FUSE_OPT_KEY_DISCARD) -- return 0; -- -- if (key != FUSE_OPT_KEY_KEEP && ctx->proc) { -- int res = ctx->proc(ctx->data, arg, key, &ctx->outargs); -- if (res == -1 || !res) -- return res; -- } -- if (iso) -- return add_opt(ctx, arg); -- else -- return add_arg(ctx, arg); -+ if (key == FUSE_OPT_KEY_DISCARD) { -+ return 0; -+ } -+ -+ if (key != FUSE_OPT_KEY_KEEP && ctx->proc) { -+ int res = ctx->proc(ctx->data, arg, key, &ctx->outargs); -+ if (res == -1 || !res) { -+ return res; -+ } -+ } -+ if (iso) { -+ return add_opt(ctx, arg); -+ } else { -+ return add_arg(ctx, arg); -+ } - } - - static int match_template(const char *t, const char *arg, unsigned *sepp) - { -- int arglen = strlen(arg); -- const char *sep = strchr(t, '='); -- sep = sep ? sep : strchr(t, ' '); -- if (sep && (!sep[1] || sep[1] == '%')) { -- int tlen = sep - t; -- if (sep[0] == '=') -- tlen ++; -- if (arglen >= tlen && strncmp(arg, t, tlen) == 0) { -- *sepp = sep - t; -- return 1; -- } -- } -- if (strcmp(t, arg) == 0) { -- *sepp = 0; -- return 1; -- } -- return 0; -+ int arglen = strlen(arg); -+ const char *sep = strchr(t, '='); -+ sep = sep ? sep : strchr(t, ' '); -+ if (sep && (!sep[1] || sep[1] == '%')) { -+ int tlen = sep - t; -+ if (sep[0] == '=') { -+ tlen++; -+ } -+ if (arglen >= tlen && strncmp(arg, t, tlen) == 0) { -+ *sepp = sep - t; -+ return 1; -+ } -+ } -+ if (strcmp(t, arg) == 0) { -+ *sepp = 0; -+ return 1; -+ } -+ return 0; - } - - static const struct fuse_opt *find_opt(const struct fuse_opt *opt, -- const char *arg, unsigned *sepp) -+ const char *arg, unsigned *sepp) - { -- for (; opt && opt->templ; opt++) -- if (match_template(opt->templ, arg, sepp)) -- return opt; -- return NULL; -+ for (; opt && opt->templ; opt++) { -+ if (match_template(opt->templ, arg, sepp)) { -+ return opt; -+ } -+ } -+ return NULL; - } - - int fuse_opt_match(const struct fuse_opt *opts, const char *opt) - { -- unsigned dummy; -- return find_opt(opts, opt, &dummy) ? 1 : 0; -+ unsigned dummy; -+ return find_opt(opts, opt, &dummy) ? 1 : 0; - } - - static int process_opt_param(void *var, const char *format, const char *param, -- const char *arg) -+ const char *arg) - { -- assert(format[0] == '%'); -- if (format[1] == 's') { -- char **s = var; -- char *copy = strdup(param); -- if (!copy) -- return alloc_failed(); -- -- free(*s); -- *s = copy; -- } else { -- if (sscanf(param, format, var) != 1) { -- fuse_log(FUSE_LOG_ERR, "fuse: invalid parameter in option `%s'\n", arg); -- return -1; -- } -- } -- return 0; -+ assert(format[0] == '%'); -+ if (format[1] == 's') { -+ char **s = var; -+ char *copy = strdup(param); -+ if (!copy) { -+ return alloc_failed(); -+ } -+ -+ free(*s); -+ *s = copy; -+ } else { -+ if (sscanf(param, format, var) != 1) { -+ fuse_log(FUSE_LOG_ERR, "fuse: invalid parameter in option `%s'\n", -+ arg); -+ return -1; -+ } -+ } -+ return 0; - } - --static int process_opt(struct fuse_opt_context *ctx, -- const struct fuse_opt *opt, unsigned sep, -- const char *arg, int iso) -+static int process_opt(struct fuse_opt_context *ctx, const struct fuse_opt *opt, -+ unsigned sep, const char *arg, int iso) - { -- if (opt->offset == -1U) { -- if (call_proc(ctx, arg, opt->value, iso) == -1) -- return -1; -- } else { -- void *var = (char *)ctx->data + opt->offset; -- if (sep && opt->templ[sep + 1]) { -- const char *param = arg + sep; -- if (opt->templ[sep] == '=') -- param ++; -- if (process_opt_param(var, opt->templ + sep + 1, -- param, arg) == -1) -- return -1; -- } else -- *(int *)var = opt->value; -- } -- return 0; -+ if (opt->offset == -1U) { -+ if (call_proc(ctx, arg, opt->value, iso) == -1) { -+ return -1; -+ } -+ } else { -+ void *var = (char *)ctx->data + opt->offset; -+ if (sep && opt->templ[sep + 1]) { -+ const char *param = arg + sep; -+ if (opt->templ[sep] == '=') { -+ param++; -+ } -+ if (process_opt_param(var, opt->templ + sep + 1, param, arg) == -+ -1) { -+ return -1; -+ } -+ } else { -+ *(int *)var = opt->value; -+ } -+ } -+ return 0; - } - - static int process_opt_sep_arg(struct fuse_opt_context *ctx, -- const struct fuse_opt *opt, unsigned sep, -- const char *arg, int iso) -+ const struct fuse_opt *opt, unsigned sep, -+ const char *arg, int iso) - { -- int res; -- char *newarg; -- char *param; -- -- if (next_arg(ctx, arg) == -1) -- return -1; -- -- param = ctx->argv[ctx->argctr]; -- newarg = malloc(sep + strlen(param) + 1); -- if (!newarg) -- return alloc_failed(); -- -- memcpy(newarg, arg, sep); -- strcpy(newarg + sep, param); -- res = process_opt(ctx, opt, sep, newarg, iso); -- free(newarg); -- -- return res; -+ int res; -+ char *newarg; -+ char *param; -+ -+ if (next_arg(ctx, arg) == -1) { -+ return -1; -+ } -+ -+ param = ctx->argv[ctx->argctr]; -+ newarg = malloc(sep + strlen(param) + 1); -+ if (!newarg) { -+ return alloc_failed(); -+ } -+ -+ memcpy(newarg, arg, sep); -+ strcpy(newarg + sep, param); -+ res = process_opt(ctx, opt, sep, newarg, iso); -+ free(newarg); -+ -+ return res; - } - - static int process_gopt(struct fuse_opt_context *ctx, const char *arg, int iso) - { -- unsigned sep; -- const struct fuse_opt *opt = find_opt(ctx->opt, arg, &sep); -- if (opt) { -- for (; opt; opt = find_opt(opt + 1, arg, &sep)) { -- int res; -- if (sep && opt->templ[sep] == ' ' && !arg[sep]) -- res = process_opt_sep_arg(ctx, opt, sep, arg, -- iso); -- else -- res = process_opt(ctx, opt, sep, arg, iso); -- if (res == -1) -- return -1; -- } -- return 0; -- } else -- return call_proc(ctx, arg, FUSE_OPT_KEY_OPT, iso); -+ unsigned sep; -+ const struct fuse_opt *opt = find_opt(ctx->opt, arg, &sep); -+ if (opt) { -+ for (; opt; opt = find_opt(opt + 1, arg, &sep)) { -+ int res; -+ if (sep && opt->templ[sep] == ' ' && !arg[sep]) { -+ res = process_opt_sep_arg(ctx, opt, sep, arg, iso); -+ } else { -+ res = process_opt(ctx, opt, sep, arg, iso); -+ } -+ if (res == -1) { -+ return -1; -+ } -+ } -+ return 0; -+ } else { -+ return call_proc(ctx, arg, FUSE_OPT_KEY_OPT, iso); -+ } - } - - static int process_real_option_group(struct fuse_opt_context *ctx, char *opts) - { -- char *s = opts; -- char *d = s; -- int end = 0; -- -- while (!end) { -- if (*s == '\0') -- end = 1; -- if (*s == ',' || end) { -- int res; -- -- *d = '\0'; -- res = process_gopt(ctx, opts, 1); -- if (res == -1) -- return -1; -- d = opts; -- } else { -- if (s[0] == '\\' && s[1] != '\0') { -- s++; -- if (s[0] >= '0' && s[0] <= '3' && -- s[1] >= '0' && s[1] <= '7' && -- s[2] >= '0' && s[2] <= '7') { -- *d++ = (s[0] - '0') * 0100 + -- (s[1] - '0') * 0010 + -- (s[2] - '0'); -- s += 2; -- } else { -- *d++ = *s; -- } -- } else { -- *d++ = *s; -- } -- } -- s++; -- } -- -- return 0; -+ char *s = opts; -+ char *d = s; -+ int end = 0; -+ -+ while (!end) { -+ if (*s == '\0') { -+ end = 1; -+ } -+ if (*s == ',' || end) { -+ int res; -+ -+ *d = '\0'; -+ res = process_gopt(ctx, opts, 1); -+ if (res == -1) { -+ return -1; -+ } -+ d = opts; -+ } else { -+ if (s[0] == '\\' && s[1] != '\0') { -+ s++; -+ if (s[0] >= '0' && s[0] <= '3' && s[1] >= '0' && s[1] <= '7' && -+ s[2] >= '0' && s[2] <= '7') { -+ *d++ = (s[0] - '0') * 0100 + (s[1] - '0') * 0010 + -+ (s[2] - '0'); -+ s += 2; -+ } else { -+ *d++ = *s; -+ } -+ } else { -+ *d++ = *s; -+ } -+ } -+ s++; -+ } -+ -+ return 0; - } - - static int process_option_group(struct fuse_opt_context *ctx, const char *opts) - { -- int res; -- char *copy = strdup(opts); -- -- if (!copy) { -- fuse_log(FUSE_LOG_ERR, "fuse: memory allocation failed\n"); -- return -1; -- } -- res = process_real_option_group(ctx, copy); -- free(copy); -- return res; -+ int res; -+ char *copy = strdup(opts); -+ -+ if (!copy) { -+ fuse_log(FUSE_LOG_ERR, "fuse: memory allocation failed\n"); -+ return -1; -+ } -+ res = process_real_option_group(ctx, copy); -+ free(copy); -+ return res; - } - - static int process_one(struct fuse_opt_context *ctx, const char *arg) - { -- if (ctx->nonopt || arg[0] != '-') -- return call_proc(ctx, arg, FUSE_OPT_KEY_NONOPT, 0); -- else if (arg[1] == 'o') { -- if (arg[2]) -- return process_option_group(ctx, arg + 2); -- else { -- if (next_arg(ctx, arg) == -1) -- return -1; -- -- return process_option_group(ctx, -- ctx->argv[ctx->argctr]); -- } -- } else if (arg[1] == '-' && !arg[2]) { -- if (add_arg(ctx, arg) == -1) -- return -1; -- ctx->nonopt = ctx->outargs.argc; -- return 0; -- } else -- return process_gopt(ctx, arg, 0); -+ if (ctx->nonopt || arg[0] != '-') { -+ return call_proc(ctx, arg, FUSE_OPT_KEY_NONOPT, 0); -+ } else if (arg[1] == 'o') { -+ if (arg[2]) { -+ return process_option_group(ctx, arg + 2); -+ } else { -+ if (next_arg(ctx, arg) == -1) { -+ return -1; -+ } -+ -+ return process_option_group(ctx, ctx->argv[ctx->argctr]); -+ } -+ } else if (arg[1] == '-' && !arg[2]) { -+ if (add_arg(ctx, arg) == -1) { -+ return -1; -+ } -+ ctx->nonopt = ctx->outargs.argc; -+ return 0; -+ } else { -+ return process_gopt(ctx, arg, 0); -+ } - } - - static int opt_parse(struct fuse_opt_context *ctx) - { -- if (ctx->argc) { -- if (add_arg(ctx, ctx->argv[0]) == -1) -- return -1; -- } -- -- for (ctx->argctr = 1; ctx->argctr < ctx->argc; ctx->argctr++) -- if (process_one(ctx, ctx->argv[ctx->argctr]) == -1) -- return -1; -- -- if (ctx->opts) { -- if (fuse_opt_insert_arg(&ctx->outargs, 1, "-o") == -1 || -- fuse_opt_insert_arg(&ctx->outargs, 2, ctx->opts) == -1) -- return -1; -- } -- -- /* If option separator ("--") is the last argument, remove it */ -- if (ctx->nonopt && ctx->nonopt == ctx->outargs.argc && -- strcmp(ctx->outargs.argv[ctx->outargs.argc - 1], "--") == 0) { -- free(ctx->outargs.argv[ctx->outargs.argc - 1]); -- ctx->outargs.argv[--ctx->outargs.argc] = NULL; -- } -- -- return 0; -+ if (ctx->argc) { -+ if (add_arg(ctx, ctx->argv[0]) == -1) { -+ return -1; -+ } -+ } -+ -+ for (ctx->argctr = 1; ctx->argctr < ctx->argc; ctx->argctr++) { -+ if (process_one(ctx, ctx->argv[ctx->argctr]) == -1) { -+ return -1; -+ } -+ } -+ -+ if (ctx->opts) { -+ if (fuse_opt_insert_arg(&ctx->outargs, 1, "-o") == -1 || -+ fuse_opt_insert_arg(&ctx->outargs, 2, ctx->opts) == -1) { -+ return -1; -+ } -+ } -+ -+ /* If option separator ("--") is the last argument, remove it */ -+ if (ctx->nonopt && ctx->nonopt == ctx->outargs.argc && -+ strcmp(ctx->outargs.argv[ctx->outargs.argc - 1], "--") == 0) { -+ free(ctx->outargs.argv[ctx->outargs.argc - 1]); -+ ctx->outargs.argv[--ctx->outargs.argc] = NULL; -+ } -+ -+ return 0; - } - - int fuse_opt_parse(struct fuse_args *args, void *data, -- const struct fuse_opt opts[], fuse_opt_proc_t proc) -+ const struct fuse_opt opts[], fuse_opt_proc_t proc) - { -- int res; -- struct fuse_opt_context ctx = { -- .data = data, -- .opt = opts, -- .proc = proc, -- }; -- -- if (!args || !args->argv || !args->argc) -- return 0; -- -- ctx.argc = args->argc; -- ctx.argv = args->argv; -- -- res = opt_parse(&ctx); -- if (res != -1) { -- struct fuse_args tmp = *args; -- *args = ctx.outargs; -- ctx.outargs = tmp; -- } -- free(ctx.opts); -- fuse_opt_free_args(&ctx.outargs); -- return res; -+ int res; -+ struct fuse_opt_context ctx = { -+ .data = data, -+ .opt = opts, -+ .proc = proc, -+ }; -+ -+ if (!args || !args->argv || !args->argc) { -+ return 0; -+ } -+ -+ ctx.argc = args->argc; -+ ctx.argv = args->argv; -+ -+ res = opt_parse(&ctx); -+ if (res != -1) { -+ struct fuse_args tmp = *args; -+ *args = ctx.outargs; -+ ctx.outargs = tmp; -+ } -+ free(ctx.opts); -+ fuse_opt_free_args(&ctx.outargs); -+ return res; - } -diff --git a/tools/virtiofsd/fuse_opt.h b/tools/virtiofsd/fuse_opt.h -index 6910255..8f59b4d 100644 ---- a/tools/virtiofsd/fuse_opt.h -+++ b/tools/virtiofsd/fuse_opt.h -@@ -1,10 +1,10 @@ - /* -- FUSE: Filesystem in Userspace -- Copyright (C) 2001-2007 Miklos Szeredi -- -- This program can be distributed under the terms of the GNU LGPLv2. -- See the file COPYING.LIB. --*/ -+ * FUSE: Filesystem in Userspace -+ * Copyright (C) 2001-2007 Miklos Szeredi -+ * -+ * This program can be distributed under the terms of the GNU LGPLv2. -+ * See the file COPYING.LIB. -+ */ - - #ifndef FUSE_OPT_H_ - #define FUSE_OPT_H_ -@@ -37,7 +37,7 @@ - * - * - 'offsetof(struct foo, member)' actions i) and iii) - * -- * - -1 action ii) -+ * - -1 action ii) - * - * The 'offsetof()' macro is defined in the header. - * -@@ -48,7 +48,7 @@ - * - * The types of templates are: - * -- * 1) "-x", "-foo", "--foo", "--foo-bar", etc. These match only -+ * 1) "-x", "-foo", "--foo", "--foo-bar", etc. These match only - * themselves. Invalid values are "--" and anything beginning - * with "-o" - * -@@ -71,58 +71,67 @@ - * freed. - */ - struct fuse_opt { -- /** Matching template and optional parameter formatting */ -- const char *templ; -+ /** Matching template and optional parameter formatting */ -+ const char *templ; - -- /** -- * Offset of variable within 'data' parameter of fuse_opt_parse() -- * or -1 -- */ -- unsigned long offset; -+ /** -+ * Offset of variable within 'data' parameter of fuse_opt_parse() -+ * or -1 -+ */ -+ unsigned long offset; - -- /** -- * Value to set the variable to, or to be passed as 'key' to the -- * processing function. Ignored if template has a format -- */ -- int value; -+ /** -+ * Value to set the variable to, or to be passed as 'key' to the -+ * processing function. Ignored if template has a format -+ */ -+ int value; - }; - - /** -- * Key option. In case of a match, the processing function will be -+ * Key option. In case of a match, the processing function will be - * called with the specified key. - */ --#define FUSE_OPT_KEY(templ, key) { templ, -1U, key } -+#define FUSE_OPT_KEY(templ, key) \ -+ { \ -+ templ, -1U, key \ -+ } - - /** -- * Last option. An array of 'struct fuse_opt' must end with a NULL -+ * Last option. An array of 'struct fuse_opt' must end with a NULL - * template value - */ --#define FUSE_OPT_END { NULL, 0, 0 } -+#define FUSE_OPT_END \ -+ { \ -+ NULL, 0, 0 \ -+ } - - /** - * Argument list - */ - struct fuse_args { -- /** Argument count */ -- int argc; -+ /** Argument count */ -+ int argc; - -- /** Argument vector. NULL terminated */ -- char **argv; -+ /** Argument vector. NULL terminated */ -+ char **argv; - -- /** Is 'argv' allocated? */ -- int allocated; -+ /** Is 'argv' allocated? */ -+ int allocated; - }; - - /** - * Initializer for 'struct fuse_args' - */ --#define FUSE_ARGS_INIT(argc, argv) { argc, argv, 0 } -+#define FUSE_ARGS_INIT(argc, argv) \ -+ { \ -+ argc, argv, 0 \ -+ } - - /** - * Key value passed to the processing function if an option did not - * match any template - */ --#define FUSE_OPT_KEY_OPT -1 -+#define FUSE_OPT_KEY_OPT -1 - - /** - * Key value passed to the processing function for all non-options -@@ -130,7 +139,7 @@ struct fuse_args { - * Non-options are the arguments beginning with a character other than - * '-' or all arguments after the special '--' option - */ --#define FUSE_OPT_KEY_NONOPT -2 -+#define FUSE_OPT_KEY_NONOPT -2 - - /** - * Special key value for options to keep -@@ -174,7 +183,7 @@ struct fuse_args { - * @return -1 on error, 0 if arg is to be discarded, 1 if arg should be kept - */ - typedef int (*fuse_opt_proc_t)(void *data, const char *arg, int key, -- struct fuse_args *outargs); -+ struct fuse_args *outargs); - - /** - * Option parsing function -@@ -197,7 +206,7 @@ typedef int (*fuse_opt_proc_t)(void *data, const char *arg, int key, - * @return -1 on error, 0 on success - */ - int fuse_opt_parse(struct fuse_args *args, void *data, -- const struct fuse_opt opts[], fuse_opt_proc_t proc); -+ const struct fuse_opt opts[], fuse_opt_proc_t proc); - - /** - * Add an option to a comma separated option list -diff --git a/tools/virtiofsd/fuse_signals.c b/tools/virtiofsd/fuse_signals.c -index 4271947..19d6791 100644 ---- a/tools/virtiofsd/fuse_signals.c -+++ b/tools/virtiofsd/fuse_signals.c -@@ -1,91 +1,95 @@ - /* -- FUSE: Filesystem in Userspace -- Copyright (C) 2001-2007 Miklos Szeredi -- -- Utility functions for setting signal handlers. -- -- This program can be distributed under the terms of the GNU LGPLv2. -- See the file COPYING.LIB --*/ -+ * FUSE: Filesystem in Userspace -+ * Copyright (C) 2001-2007 Miklos Szeredi -+ * -+ * Utility functions for setting signal handlers. -+ * -+ * This program can be distributed under the terms of the GNU LGPLv2. -+ * See the file COPYING.LIB -+ */ - - #include "config.h" --#include "fuse_lowlevel.h" - #include "fuse_i.h" -+#include "fuse_lowlevel.h" - --#include --#include - #include -+#include - #include -+#include - - static struct fuse_session *fuse_instance; - - static void exit_handler(int sig) - { -- if (fuse_instance) { -- fuse_session_exit(fuse_instance); -- if(sig <= 0) { -- fuse_log(FUSE_LOG_ERR, "assertion error: signal value <= 0\n"); -- abort(); -- } -- fuse_instance->error = sig; -- } -+ if (fuse_instance) { -+ fuse_session_exit(fuse_instance); -+ if (sig <= 0) { -+ fuse_log(FUSE_LOG_ERR, "assertion error: signal value <= 0\n"); -+ abort(); -+ } -+ fuse_instance->error = sig; -+ } - } - - static void do_nothing(int sig) - { -- (void) sig; -+ (void)sig; - } - - static int set_one_signal_handler(int sig, void (*handler)(int), int remove) - { -- struct sigaction sa; -- struct sigaction old_sa; -+ struct sigaction sa; -+ struct sigaction old_sa; - -- memset(&sa, 0, sizeof(struct sigaction)); -- sa.sa_handler = remove ? SIG_DFL : handler; -- sigemptyset(&(sa.sa_mask)); -- sa.sa_flags = 0; -+ memset(&sa, 0, sizeof(struct sigaction)); -+ sa.sa_handler = remove ? SIG_DFL : handler; -+ sigemptyset(&(sa.sa_mask)); -+ sa.sa_flags = 0; - -- if (sigaction(sig, NULL, &old_sa) == -1) { -- perror("fuse: cannot get old signal handler"); -- return -1; -- } -+ if (sigaction(sig, NULL, &old_sa) == -1) { -+ perror("fuse: cannot get old signal handler"); -+ return -1; -+ } - -- if (old_sa.sa_handler == (remove ? handler : SIG_DFL) && -- sigaction(sig, &sa, NULL) == -1) { -- perror("fuse: cannot set signal handler"); -- return -1; -- } -- return 0; -+ if (old_sa.sa_handler == (remove ? handler : SIG_DFL) && -+ sigaction(sig, &sa, NULL) == -1) { -+ perror("fuse: cannot set signal handler"); -+ return -1; -+ } -+ return 0; - } - - int fuse_set_signal_handlers(struct fuse_session *se) - { -- /* If we used SIG_IGN instead of the do_nothing function, -- then we would be unable to tell if we set SIG_IGN (and -- thus should reset to SIG_DFL in fuse_remove_signal_handlers) -- or if it was already set to SIG_IGN (and should be left -- untouched. */ -- if (set_one_signal_handler(SIGHUP, exit_handler, 0) == -1 || -- set_one_signal_handler(SIGINT, exit_handler, 0) == -1 || -- set_one_signal_handler(SIGTERM, exit_handler, 0) == -1 || -- set_one_signal_handler(SIGPIPE, do_nothing, 0) == -1) -- return -1; -+ /* -+ * If we used SIG_IGN instead of the do_nothing function, -+ * then we would be unable to tell if we set SIG_IGN (and -+ * thus should reset to SIG_DFL in fuse_remove_signal_handlers) -+ * or if it was already set to SIG_IGN (and should be left -+ * untouched. -+ */ -+ if (set_one_signal_handler(SIGHUP, exit_handler, 0) == -1 || -+ set_one_signal_handler(SIGINT, exit_handler, 0) == -1 || -+ set_one_signal_handler(SIGTERM, exit_handler, 0) == -1 || -+ set_one_signal_handler(SIGPIPE, do_nothing, 0) == -1) { -+ return -1; -+ } - -- fuse_instance = se; -- return 0; -+ fuse_instance = se; -+ return 0; - } - - void fuse_remove_signal_handlers(struct fuse_session *se) - { -- if (fuse_instance != se) -- fuse_log(FUSE_LOG_ERR, -- "fuse: fuse_remove_signal_handlers: unknown session\n"); -- else -- fuse_instance = NULL; -+ if (fuse_instance != se) { -+ fuse_log(FUSE_LOG_ERR, -+ "fuse: fuse_remove_signal_handlers: unknown session\n"); -+ } else { -+ fuse_instance = NULL; -+ } - -- set_one_signal_handler(SIGHUP, exit_handler, 1); -- set_one_signal_handler(SIGINT, exit_handler, 1); -- set_one_signal_handler(SIGTERM, exit_handler, 1); -- set_one_signal_handler(SIGPIPE, do_nothing, 1); -+ set_one_signal_handler(SIGHUP, exit_handler, 1); -+ set_one_signal_handler(SIGINT, exit_handler, 1); -+ set_one_signal_handler(SIGTERM, exit_handler, 1); -+ set_one_signal_handler(SIGPIPE, do_nothing, 1); - } -diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c -index 5a2e64c..5711dd2 100644 ---- a/tools/virtiofsd/helper.c -+++ b/tools/virtiofsd/helper.c -@@ -1,297 +1,309 @@ - /* -- FUSE: Filesystem in Userspace -- Copyright (C) 2001-2007 Miklos Szeredi -+ * FUSE: Filesystem in Userspace -+ * Copyright (C) 2001-2007 Miklos Szeredi -+ * -+ * Helper functions to create (simple) standalone programs. With the -+ * aid of these functions it should be possible to create full FUSE -+ * file system by implementing nothing but the request handlers. - -- Helper functions to create (simple) standalone programs. With the -- aid of these functions it should be possible to create full FUSE -- file system by implementing nothing but the request handlers. -- -- This program can be distributed under the terms of the GNU LGPLv2. -- See the file COPYING.LIB. --*/ -+ * This program can be distributed under the terms of the GNU LGPLv2. -+ * See the file COPYING.LIB. -+ */ - - #include "config.h" - #include "fuse_i.h" -+#include "fuse_lowlevel.h" - #include "fuse_misc.h" - #include "fuse_opt.h" --#include "fuse_lowlevel.h" - #include "mount_util.h" - -+#include -+#include -+#include - #include - #include --#include --#include - #include --#include --#include - #include -+#include - --#define FUSE_HELPER_OPT(t, p) \ -- { t, offsetof(struct fuse_cmdline_opts, p), 1 } -+#define FUSE_HELPER_OPT(t, p) \ -+ { \ -+ t, offsetof(struct fuse_cmdline_opts, p), 1 \ -+ } - - static const struct fuse_opt fuse_helper_opts[] = { -- FUSE_HELPER_OPT("-h", show_help), -- FUSE_HELPER_OPT("--help", show_help), -- FUSE_HELPER_OPT("-V", show_version), -- FUSE_HELPER_OPT("--version", show_version), -- FUSE_HELPER_OPT("-d", debug), -- FUSE_HELPER_OPT("debug", debug), -- FUSE_HELPER_OPT("-d", foreground), -- FUSE_HELPER_OPT("debug", foreground), -- FUSE_OPT_KEY("-d", FUSE_OPT_KEY_KEEP), -- FUSE_OPT_KEY("debug", FUSE_OPT_KEY_KEEP), -- FUSE_HELPER_OPT("-f", foreground), -- FUSE_HELPER_OPT("fsname=", nodefault_subtype), -- FUSE_OPT_KEY("fsname=", FUSE_OPT_KEY_KEEP), -- FUSE_HELPER_OPT("subtype=", nodefault_subtype), -- FUSE_OPT_KEY("subtype=", FUSE_OPT_KEY_KEEP), -- FUSE_HELPER_OPT("max_idle_threads=%u", max_idle_threads), -- FUSE_OPT_END -+ FUSE_HELPER_OPT("-h", show_help), -+ FUSE_HELPER_OPT("--help", show_help), -+ FUSE_HELPER_OPT("-V", show_version), -+ FUSE_HELPER_OPT("--version", show_version), -+ FUSE_HELPER_OPT("-d", debug), -+ FUSE_HELPER_OPT("debug", debug), -+ FUSE_HELPER_OPT("-d", foreground), -+ FUSE_HELPER_OPT("debug", foreground), -+ FUSE_OPT_KEY("-d", FUSE_OPT_KEY_KEEP), -+ FUSE_OPT_KEY("debug", FUSE_OPT_KEY_KEEP), -+ FUSE_HELPER_OPT("-f", foreground), -+ FUSE_HELPER_OPT("fsname=", nodefault_subtype), -+ FUSE_OPT_KEY("fsname=", FUSE_OPT_KEY_KEEP), -+ FUSE_HELPER_OPT("subtype=", nodefault_subtype), -+ FUSE_OPT_KEY("subtype=", FUSE_OPT_KEY_KEEP), -+ FUSE_HELPER_OPT("max_idle_threads=%u", max_idle_threads), -+ FUSE_OPT_END - }; - - struct fuse_conn_info_opts { -- int atomic_o_trunc; -- int no_remote_posix_lock; -- int no_remote_flock; -- int splice_write; -- int splice_move; -- int splice_read; -- int no_splice_write; -- int no_splice_move; -- int no_splice_read; -- int auto_inval_data; -- int no_auto_inval_data; -- int no_readdirplus; -- int no_readdirplus_auto; -- int async_dio; -- int no_async_dio; -- int writeback_cache; -- int no_writeback_cache; -- int async_read; -- int sync_read; -- unsigned max_write; -- unsigned max_readahead; -- unsigned max_background; -- unsigned congestion_threshold; -- unsigned time_gran; -- int set_max_write; -- int set_max_readahead; -- int set_max_background; -- int set_congestion_threshold; -- int set_time_gran; -+ int atomic_o_trunc; -+ int no_remote_posix_lock; -+ int no_remote_flock; -+ int splice_write; -+ int splice_move; -+ int splice_read; -+ int no_splice_write; -+ int no_splice_move; -+ int no_splice_read; -+ int auto_inval_data; -+ int no_auto_inval_data; -+ int no_readdirplus; -+ int no_readdirplus_auto; -+ int async_dio; -+ int no_async_dio; -+ int writeback_cache; -+ int no_writeback_cache; -+ int async_read; -+ int sync_read; -+ unsigned max_write; -+ unsigned max_readahead; -+ unsigned max_background; -+ unsigned congestion_threshold; -+ unsigned time_gran; -+ int set_max_write; -+ int set_max_readahead; -+ int set_max_background; -+ int set_congestion_threshold; -+ int set_time_gran; - }; - --#define CONN_OPTION(t, p, v) \ -- { t, offsetof(struct fuse_conn_info_opts, p), v } -+#define CONN_OPTION(t, p, v) \ -+ { \ -+ t, offsetof(struct fuse_conn_info_opts, p), v \ -+ } - static const struct fuse_opt conn_info_opt_spec[] = { -- CONN_OPTION("max_write=%u", max_write, 0), -- CONN_OPTION("max_write=", set_max_write, 1), -- CONN_OPTION("max_readahead=%u", max_readahead, 0), -- CONN_OPTION("max_readahead=", set_max_readahead, 1), -- CONN_OPTION("max_background=%u", max_background, 0), -- CONN_OPTION("max_background=", set_max_background, 1), -- CONN_OPTION("congestion_threshold=%u", congestion_threshold, 0), -- CONN_OPTION("congestion_threshold=", set_congestion_threshold, 1), -- CONN_OPTION("sync_read", sync_read, 1), -- CONN_OPTION("async_read", async_read, 1), -- CONN_OPTION("atomic_o_trunc", atomic_o_trunc, 1), -- CONN_OPTION("no_remote_lock", no_remote_posix_lock, 1), -- CONN_OPTION("no_remote_lock", no_remote_flock, 1), -- CONN_OPTION("no_remote_flock", no_remote_flock, 1), -- CONN_OPTION("no_remote_posix_lock", no_remote_posix_lock, 1), -- CONN_OPTION("splice_write", splice_write, 1), -- CONN_OPTION("no_splice_write", no_splice_write, 1), -- CONN_OPTION("splice_move", splice_move, 1), -- CONN_OPTION("no_splice_move", no_splice_move, 1), -- CONN_OPTION("splice_read", splice_read, 1), -- CONN_OPTION("no_splice_read", no_splice_read, 1), -- CONN_OPTION("auto_inval_data", auto_inval_data, 1), -- CONN_OPTION("no_auto_inval_data", no_auto_inval_data, 1), -- CONN_OPTION("readdirplus=no", no_readdirplus, 1), -- CONN_OPTION("readdirplus=yes", no_readdirplus, 0), -- CONN_OPTION("readdirplus=yes", no_readdirplus_auto, 1), -- CONN_OPTION("readdirplus=auto", no_readdirplus, 0), -- CONN_OPTION("readdirplus=auto", no_readdirplus_auto, 0), -- CONN_OPTION("async_dio", async_dio, 1), -- CONN_OPTION("no_async_dio", no_async_dio, 1), -- CONN_OPTION("writeback_cache", writeback_cache, 1), -- CONN_OPTION("no_writeback_cache", no_writeback_cache, 1), -- CONN_OPTION("time_gran=%u", time_gran, 0), -- CONN_OPTION("time_gran=", set_time_gran, 1), -- FUSE_OPT_END -+ CONN_OPTION("max_write=%u", max_write, 0), -+ CONN_OPTION("max_write=", set_max_write, 1), -+ CONN_OPTION("max_readahead=%u", max_readahead, 0), -+ CONN_OPTION("max_readahead=", set_max_readahead, 1), -+ CONN_OPTION("max_background=%u", max_background, 0), -+ CONN_OPTION("max_background=", set_max_background, 1), -+ CONN_OPTION("congestion_threshold=%u", congestion_threshold, 0), -+ CONN_OPTION("congestion_threshold=", set_congestion_threshold, 1), -+ CONN_OPTION("sync_read", sync_read, 1), -+ CONN_OPTION("async_read", async_read, 1), -+ CONN_OPTION("atomic_o_trunc", atomic_o_trunc, 1), -+ CONN_OPTION("no_remote_lock", no_remote_posix_lock, 1), -+ CONN_OPTION("no_remote_lock", no_remote_flock, 1), -+ CONN_OPTION("no_remote_flock", no_remote_flock, 1), -+ CONN_OPTION("no_remote_posix_lock", no_remote_posix_lock, 1), -+ CONN_OPTION("splice_write", splice_write, 1), -+ CONN_OPTION("no_splice_write", no_splice_write, 1), -+ CONN_OPTION("splice_move", splice_move, 1), -+ CONN_OPTION("no_splice_move", no_splice_move, 1), -+ CONN_OPTION("splice_read", splice_read, 1), -+ CONN_OPTION("no_splice_read", no_splice_read, 1), -+ CONN_OPTION("auto_inval_data", auto_inval_data, 1), -+ CONN_OPTION("no_auto_inval_data", no_auto_inval_data, 1), -+ CONN_OPTION("readdirplus=no", no_readdirplus, 1), -+ CONN_OPTION("readdirplus=yes", no_readdirplus, 0), -+ CONN_OPTION("readdirplus=yes", no_readdirplus_auto, 1), -+ CONN_OPTION("readdirplus=auto", no_readdirplus, 0), -+ CONN_OPTION("readdirplus=auto", no_readdirplus_auto, 0), -+ CONN_OPTION("async_dio", async_dio, 1), -+ CONN_OPTION("no_async_dio", no_async_dio, 1), -+ CONN_OPTION("writeback_cache", writeback_cache, 1), -+ CONN_OPTION("no_writeback_cache", no_writeback_cache, 1), -+ CONN_OPTION("time_gran=%u", time_gran, 0), -+ CONN_OPTION("time_gran=", set_time_gran, 1), -+ FUSE_OPT_END - }; - - - void fuse_cmdline_help(void) - { -- printf(" -h --help print help\n" -- " -V --version print version\n" -- " -d -o debug enable debug output (implies -f)\n" -- " -f foreground operation\n" -- " -o max_idle_threads the maximum number of idle worker threads\n" -- " allowed (default: 10)\n"); -+ printf( -+ " -h --help print help\n" -+ " -V --version print version\n" -+ " -d -o debug enable debug output (implies -f)\n" -+ " -f foreground operation\n" -+ " -o max_idle_threads the maximum number of idle worker threads\n" -+ " allowed (default: 10)\n"); - } - - static int fuse_helper_opt_proc(void *data, const char *arg, int key, -- struct fuse_args *outargs) -+ struct fuse_args *outargs) - { -- (void) outargs; -- struct fuse_cmdline_opts *opts = data; -- -- switch (key) { -- case FUSE_OPT_KEY_NONOPT: -- if (!opts->mountpoint) { -- if (fuse_mnt_parse_fuse_fd(arg) != -1) { -- return fuse_opt_add_opt(&opts->mountpoint, arg); -- } -- -- char mountpoint[PATH_MAX] = ""; -- if (realpath(arg, mountpoint) == NULL) { -- fuse_log(FUSE_LOG_ERR, -- "fuse: bad mount point `%s': %s\n", -- arg, strerror(errno)); -- return -1; -- } -- return fuse_opt_add_opt(&opts->mountpoint, mountpoint); -- } else { -- fuse_log(FUSE_LOG_ERR, "fuse: invalid argument `%s'\n", arg); -- return -1; -- } -- -- default: -- /* Pass through unknown options */ -- return 1; -- } -+ (void)outargs; -+ struct fuse_cmdline_opts *opts = data; -+ -+ switch (key) { -+ case FUSE_OPT_KEY_NONOPT: -+ if (!opts->mountpoint) { -+ if (fuse_mnt_parse_fuse_fd(arg) != -1) { -+ return fuse_opt_add_opt(&opts->mountpoint, arg); -+ } -+ -+ char mountpoint[PATH_MAX] = ""; -+ if (realpath(arg, mountpoint) == NULL) { -+ fuse_log(FUSE_LOG_ERR, "fuse: bad mount point `%s': %s\n", arg, -+ strerror(errno)); -+ return -1; -+ } -+ return fuse_opt_add_opt(&opts->mountpoint, mountpoint); -+ } else { -+ fuse_log(FUSE_LOG_ERR, "fuse: invalid argument `%s'\n", arg); -+ return -1; -+ } -+ -+ default: -+ /* Pass through unknown options */ -+ return 1; -+ } - } - --int fuse_parse_cmdline(struct fuse_args *args, -- struct fuse_cmdline_opts *opts) -+int fuse_parse_cmdline(struct fuse_args *args, struct fuse_cmdline_opts *opts) - { -- memset(opts, 0, sizeof(struct fuse_cmdline_opts)); -+ memset(opts, 0, sizeof(struct fuse_cmdline_opts)); - -- opts->max_idle_threads = 10; -+ opts->max_idle_threads = 10; - -- if (fuse_opt_parse(args, opts, fuse_helper_opts, -- fuse_helper_opt_proc) == -1) -- return -1; -+ if (fuse_opt_parse(args, opts, fuse_helper_opts, fuse_helper_opt_proc) == -+ -1) { -+ return -1; -+ } - -- return 0; -+ return 0; - } - - - int fuse_daemonize(int foreground) - { -- if (!foreground) { -- int nullfd; -- int waiter[2]; -- char completed; -- -- if (pipe(waiter)) { -- perror("fuse_daemonize: pipe"); -- return -1; -- } -- -- /* -- * demonize current process by forking it and killing the -- * parent. This makes current process as a child of 'init'. -- */ -- switch(fork()) { -- case -1: -- perror("fuse_daemonize: fork"); -- return -1; -- case 0: -- break; -- default: -- (void) read(waiter[0], &completed, sizeof(completed)); -- _exit(0); -- } -- -- if (setsid() == -1) { -- perror("fuse_daemonize: setsid"); -- return -1; -- } -- -- (void) chdir("/"); -- -- nullfd = open("/dev/null", O_RDWR, 0); -- if (nullfd != -1) { -- (void) dup2(nullfd, 0); -- (void) dup2(nullfd, 1); -- (void) dup2(nullfd, 2); -- if (nullfd > 2) -- close(nullfd); -- } -- -- /* Propagate completion of daemon initialization */ -- completed = 1; -- (void) write(waiter[1], &completed, sizeof(completed)); -- close(waiter[0]); -- close(waiter[1]); -- } else { -- (void) chdir("/"); -- } -- return 0; -+ if (!foreground) { -+ int nullfd; -+ int waiter[2]; -+ char completed; -+ -+ if (pipe(waiter)) { -+ perror("fuse_daemonize: pipe"); -+ return -1; -+ } -+ -+ /* -+ * demonize current process by forking it and killing the -+ * parent. This makes current process as a child of 'init'. -+ */ -+ switch (fork()) { -+ case -1: -+ perror("fuse_daemonize: fork"); -+ return -1; -+ case 0: -+ break; -+ default: -+ (void)read(waiter[0], &completed, sizeof(completed)); -+ _exit(0); -+ } -+ -+ if (setsid() == -1) { -+ perror("fuse_daemonize: setsid"); -+ return -1; -+ } -+ -+ (void)chdir("/"); -+ -+ nullfd = open("/dev/null", O_RDWR, 0); -+ if (nullfd != -1) { -+ (void)dup2(nullfd, 0); -+ (void)dup2(nullfd, 1); -+ (void)dup2(nullfd, 2); -+ if (nullfd > 2) { -+ close(nullfd); -+ } -+ } -+ -+ /* Propagate completion of daemon initialization */ -+ completed = 1; -+ (void)write(waiter[1], &completed, sizeof(completed)); -+ close(waiter[0]); -+ close(waiter[1]); -+ } else { -+ (void)chdir("/"); -+ } -+ return 0; - } - - void fuse_apply_conn_info_opts(struct fuse_conn_info_opts *opts, -- struct fuse_conn_info *conn) -+ struct fuse_conn_info *conn) - { -- if(opts->set_max_write) -- conn->max_write = opts->max_write; -- if(opts->set_max_background) -- conn->max_background = opts->max_background; -- if(opts->set_congestion_threshold) -- conn->congestion_threshold = opts->congestion_threshold; -- if(opts->set_time_gran) -- conn->time_gran = opts->time_gran; -- if(opts->set_max_readahead) -- conn->max_readahead = opts->max_readahead; -- --#define LL_ENABLE(cond,cap) \ -- if (cond) conn->want |= (cap) --#define LL_DISABLE(cond,cap) \ -- if (cond) conn->want &= ~(cap) -- -- LL_ENABLE(opts->splice_read, FUSE_CAP_SPLICE_READ); -- LL_DISABLE(opts->no_splice_read, FUSE_CAP_SPLICE_READ); -- -- LL_ENABLE(opts->splice_write, FUSE_CAP_SPLICE_WRITE); -- LL_DISABLE(opts->no_splice_write, FUSE_CAP_SPLICE_WRITE); -- -- LL_ENABLE(opts->splice_move, FUSE_CAP_SPLICE_MOVE); -- LL_DISABLE(opts->no_splice_move, FUSE_CAP_SPLICE_MOVE); -- -- LL_ENABLE(opts->auto_inval_data, FUSE_CAP_AUTO_INVAL_DATA); -- LL_DISABLE(opts->no_auto_inval_data, FUSE_CAP_AUTO_INVAL_DATA); -- -- LL_DISABLE(opts->no_readdirplus, FUSE_CAP_READDIRPLUS); -- LL_DISABLE(opts->no_readdirplus_auto, FUSE_CAP_READDIRPLUS_AUTO); -- -- LL_ENABLE(opts->async_dio, FUSE_CAP_ASYNC_DIO); -- LL_DISABLE(opts->no_async_dio, FUSE_CAP_ASYNC_DIO); -- -- LL_ENABLE(opts->writeback_cache, FUSE_CAP_WRITEBACK_CACHE); -- LL_DISABLE(opts->no_writeback_cache, FUSE_CAP_WRITEBACK_CACHE); -- -- LL_ENABLE(opts->async_read, FUSE_CAP_ASYNC_READ); -- LL_DISABLE(opts->sync_read, FUSE_CAP_ASYNC_READ); -- -- LL_DISABLE(opts->no_remote_posix_lock, FUSE_CAP_POSIX_LOCKS); -- LL_DISABLE(opts->no_remote_flock, FUSE_CAP_FLOCK_LOCKS); -+ if (opts->set_max_write) { -+ conn->max_write = opts->max_write; -+ } -+ if (opts->set_max_background) { -+ conn->max_background = opts->max_background; -+ } -+ if (opts->set_congestion_threshold) { -+ conn->congestion_threshold = opts->congestion_threshold; -+ } -+ if (opts->set_time_gran) { -+ conn->time_gran = opts->time_gran; -+ } -+ if (opts->set_max_readahead) { -+ conn->max_readahead = opts->max_readahead; -+ } -+ -+#define LL_ENABLE(cond, cap) \ -+ if (cond) \ -+ conn->want |= (cap) -+#define LL_DISABLE(cond, cap) \ -+ if (cond) \ -+ conn->want &= ~(cap) -+ -+ LL_ENABLE(opts->splice_read, FUSE_CAP_SPLICE_READ); -+ LL_DISABLE(opts->no_splice_read, FUSE_CAP_SPLICE_READ); -+ -+ LL_ENABLE(opts->splice_write, FUSE_CAP_SPLICE_WRITE); -+ LL_DISABLE(opts->no_splice_write, FUSE_CAP_SPLICE_WRITE); -+ -+ LL_ENABLE(opts->splice_move, FUSE_CAP_SPLICE_MOVE); -+ LL_DISABLE(opts->no_splice_move, FUSE_CAP_SPLICE_MOVE); -+ -+ LL_ENABLE(opts->auto_inval_data, FUSE_CAP_AUTO_INVAL_DATA); -+ LL_DISABLE(opts->no_auto_inval_data, FUSE_CAP_AUTO_INVAL_DATA); -+ -+ LL_DISABLE(opts->no_readdirplus, FUSE_CAP_READDIRPLUS); -+ LL_DISABLE(opts->no_readdirplus_auto, FUSE_CAP_READDIRPLUS_AUTO); -+ -+ LL_ENABLE(opts->async_dio, FUSE_CAP_ASYNC_DIO); -+ LL_DISABLE(opts->no_async_dio, FUSE_CAP_ASYNC_DIO); -+ -+ LL_ENABLE(opts->writeback_cache, FUSE_CAP_WRITEBACK_CACHE); -+ LL_DISABLE(opts->no_writeback_cache, FUSE_CAP_WRITEBACK_CACHE); -+ -+ LL_ENABLE(opts->async_read, FUSE_CAP_ASYNC_READ); -+ LL_DISABLE(opts->sync_read, FUSE_CAP_ASYNC_READ); -+ -+ LL_DISABLE(opts->no_remote_posix_lock, FUSE_CAP_POSIX_LOCKS); -+ LL_DISABLE(opts->no_remote_flock, FUSE_CAP_FLOCK_LOCKS); - } - --struct fuse_conn_info_opts* fuse_parse_conn_info_opts(struct fuse_args *args) -+struct fuse_conn_info_opts *fuse_parse_conn_info_opts(struct fuse_args *args) - { -- struct fuse_conn_info_opts *opts; -- -- opts = calloc(1, sizeof(struct fuse_conn_info_opts)); -- if(opts == NULL) { -- fuse_log(FUSE_LOG_ERR, "calloc failed\n"); -- return NULL; -- } -- if(fuse_opt_parse(args, opts, conn_info_opt_spec, NULL) == -1) { -- free(opts); -- return NULL; -- } -- return opts; -+ struct fuse_conn_info_opts *opts; -+ -+ opts = calloc(1, sizeof(struct fuse_conn_info_opts)); -+ if (opts == NULL) { -+ fuse_log(FUSE_LOG_ERR, "calloc failed\n"); -+ return NULL; -+ } -+ if (fuse_opt_parse(args, opts, conn_info_opt_spec, NULL) == -1) { -+ free(opts); -+ return NULL; -+ } -+ return opts; - } -diff --git a/tools/virtiofsd/passthrough_helpers.h b/tools/virtiofsd/passthrough_helpers.h -index 7c5f561..0b98275 100644 ---- a/tools/virtiofsd/passthrough_helpers.h -+++ b/tools/virtiofsd/passthrough_helpers.h -@@ -28,23 +28,24 @@ - * operation - */ - static int mknod_wrapper(int dirfd, const char *path, const char *link, -- int mode, dev_t rdev) -+ int mode, dev_t rdev) - { -- int res; -+ int res; - -- if (S_ISREG(mode)) { -- res = openat(dirfd, path, O_CREAT | O_EXCL | O_WRONLY, mode); -- if (res >= 0) -- res = close(res); -- } else if (S_ISDIR(mode)) { -- res = mkdirat(dirfd, path, mode); -- } else if (S_ISLNK(mode) && link != NULL) { -- res = symlinkat(link, dirfd, path); -- } else if (S_ISFIFO(mode)) { -- res = mkfifoat(dirfd, path, mode); -- } else { -- res = mknodat(dirfd, path, mode, rdev); -- } -+ if (S_ISREG(mode)) { -+ res = openat(dirfd, path, O_CREAT | O_EXCL | O_WRONLY, mode); -+ if (res >= 0) { -+ res = close(res); -+ } -+ } else if (S_ISDIR(mode)) { -+ res = mkdirat(dirfd, path, mode); -+ } else if (S_ISLNK(mode) && link != NULL) { -+ res = symlinkat(link, dirfd, path); -+ } else if (S_ISFIFO(mode)) { -+ res = mkfifoat(dirfd, path, mode); -+ } else { -+ res = mknodat(dirfd, path, mode, rdev); -+ } - -- return res; -+ return res; - } -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index e5f7115..c5850ef 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -1,12 +1,12 @@ - /* -- FUSE: Filesystem in Userspace -- Copyright (C) 2001-2007 Miklos Szeredi -- -- This program can be distributed under the terms of the GNU GPLv2. -- See the file COPYING. --*/ -+ * FUSE: Filesystem in Userspace -+ * Copyright (C) 2001-2007 Miklos Szeredi -+ * -+ * This program can be distributed under the terms of the GNU GPLv2. -+ * See the file COPYING. -+ */ - --/** @file -+/* - * - * This file system mirrors the existing file system hierarchy of the - * system, starting at the root file system. This is implemented by -@@ -28,7 +28,8 @@ - * - * Compile with: - * -- * gcc -Wall passthrough_ll.c `pkg-config fuse3 --cflags --libs` -o passthrough_ll -+ * gcc -Wall passthrough_ll.c `pkg-config fuse3 --cflags --libs` -o -+ * passthrough_ll - * - * ## Source code ## - * \include passthrough_ll.c -@@ -39,1299 +40,1365 @@ - - #include "config.h" - --#include --#include --#include --#include --#include --#include --#include --#include --#include - #include -+#include - #include -+#include - #include -+#include - #include -+#include -+#include -+#include -+#include -+#include - #include - #include -+#include - - #include "passthrough_helpers.h" - --/* We are re-using pointers to our `struct lo_inode` and `struct -- lo_dirp` elements as inodes. This means that we must be able to -- store uintptr_t values in a fuse_ino_t variable. The following -- incantation checks this condition at compile time. */ --#if defined(__GNUC__) && (__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 6) && !defined __cplusplus -+/* -+ * We are re-using pointers to our `struct lo_inode` and `struct -+ * lo_dirp` elements as inodes. This means that we must be able to -+ * store uintptr_t values in a fuse_ino_t variable. The following -+ * incantation checks this condition at compile time. -+ */ -+#if defined(__GNUC__) && \ -+ (__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 6) && \ -+ !defined __cplusplus - _Static_assert(sizeof(fuse_ino_t) >= sizeof(uintptr_t), -- "fuse_ino_t too small to hold uintptr_t values!"); -+ "fuse_ino_t too small to hold uintptr_t values!"); - #else --struct _uintptr_to_must_hold_fuse_ino_t_dummy_struct \ -- { unsigned _uintptr_to_must_hold_fuse_ino_t: -- ((sizeof(fuse_ino_t) >= sizeof(uintptr_t)) ? 1 : -1); }; -+struct _uintptr_to_must_hold_fuse_ino_t_dummy_struct { -+ unsigned _uintptr_to_must_hold_fuse_ino_t -+ : ((sizeof(fuse_ino_t) >= sizeof(uintptr_t)) ? 1 : -1); -+}; - #endif - - struct lo_inode { -- struct lo_inode *next; /* protected by lo->mutex */ -- struct lo_inode *prev; /* protected by lo->mutex */ -- int fd; -- bool is_symlink; -- ino_t ino; -- dev_t dev; -- uint64_t refcount; /* protected by lo->mutex */ -+ struct lo_inode *next; /* protected by lo->mutex */ -+ struct lo_inode *prev; /* protected by lo->mutex */ -+ int fd; -+ bool is_symlink; -+ ino_t ino; -+ dev_t dev; -+ uint64_t refcount; /* protected by lo->mutex */ - }; - - enum { -- CACHE_NEVER, -- CACHE_NORMAL, -- CACHE_ALWAYS, -+ CACHE_NEVER, -+ CACHE_NORMAL, -+ CACHE_ALWAYS, - }; - - struct lo_data { -- pthread_mutex_t mutex; -- int debug; -- int writeback; -- int flock; -- int xattr; -- const char *source; -- double timeout; -- int cache; -- int timeout_set; -- struct lo_inode root; /* protected by lo->mutex */ -+ pthread_mutex_t mutex; -+ int debug; -+ int writeback; -+ int flock; -+ int xattr; -+ const char *source; -+ double timeout; -+ int cache; -+ int timeout_set; -+ struct lo_inode root; /* protected by lo->mutex */ - }; - - static const struct fuse_opt lo_opts[] = { -- { "writeback", -- offsetof(struct lo_data, writeback), 1 }, -- { "no_writeback", -- offsetof(struct lo_data, writeback), 0 }, -- { "source=%s", -- offsetof(struct lo_data, source), 0 }, -- { "flock", -- offsetof(struct lo_data, flock), 1 }, -- { "no_flock", -- offsetof(struct lo_data, flock), 0 }, -- { "xattr", -- offsetof(struct lo_data, xattr), 1 }, -- { "no_xattr", -- offsetof(struct lo_data, xattr), 0 }, -- { "timeout=%lf", -- offsetof(struct lo_data, timeout), 0 }, -- { "timeout=", -- offsetof(struct lo_data, timeout_set), 1 }, -- { "cache=never", -- offsetof(struct lo_data, cache), CACHE_NEVER }, -- { "cache=auto", -- offsetof(struct lo_data, cache), CACHE_NORMAL }, -- { "cache=always", -- offsetof(struct lo_data, cache), CACHE_ALWAYS }, -- -- FUSE_OPT_END -+ { "writeback", offsetof(struct lo_data, writeback), 1 }, -+ { "no_writeback", offsetof(struct lo_data, writeback), 0 }, -+ { "source=%s", offsetof(struct lo_data, source), 0 }, -+ { "flock", offsetof(struct lo_data, flock), 1 }, -+ { "no_flock", offsetof(struct lo_data, flock), 0 }, -+ { "xattr", offsetof(struct lo_data, xattr), 1 }, -+ { "no_xattr", offsetof(struct lo_data, xattr), 0 }, -+ { "timeout=%lf", offsetof(struct lo_data, timeout), 0 }, -+ { "timeout=", offsetof(struct lo_data, timeout_set), 1 }, -+ { "cache=never", offsetof(struct lo_data, cache), CACHE_NEVER }, -+ { "cache=auto", offsetof(struct lo_data, cache), CACHE_NORMAL }, -+ { "cache=always", offsetof(struct lo_data, cache), CACHE_ALWAYS }, -+ -+ FUSE_OPT_END - }; - - static struct lo_data *lo_data(fuse_req_t req) - { -- return (struct lo_data *) fuse_req_userdata(req); -+ return (struct lo_data *)fuse_req_userdata(req); - } - - static struct lo_inode *lo_inode(fuse_req_t req, fuse_ino_t ino) - { -- if (ino == FUSE_ROOT_ID) -- return &lo_data(req)->root; -- else -- return (struct lo_inode *) (uintptr_t) ino; -+ if (ino == FUSE_ROOT_ID) { -+ return &lo_data(req)->root; -+ } else { -+ return (struct lo_inode *)(uintptr_t)ino; -+ } - } - - static int lo_fd(fuse_req_t req, fuse_ino_t ino) - { -- return lo_inode(req, ino)->fd; -+ return lo_inode(req, ino)->fd; - } - - static bool lo_debug(fuse_req_t req) - { -- return lo_data(req)->debug != 0; -+ return lo_data(req)->debug != 0; - } - --static void lo_init(void *userdata, -- struct fuse_conn_info *conn) -+static void lo_init(void *userdata, struct fuse_conn_info *conn) - { -- struct lo_data *lo = (struct lo_data*) userdata; -- -- if(conn->capable & FUSE_CAP_EXPORT_SUPPORT) -- conn->want |= FUSE_CAP_EXPORT_SUPPORT; -- -- if (lo->writeback && -- conn->capable & FUSE_CAP_WRITEBACK_CACHE) { -- if (lo->debug) -- fuse_log(FUSE_LOG_DEBUG, "lo_init: activating writeback\n"); -- conn->want |= FUSE_CAP_WRITEBACK_CACHE; -- } -- if (lo->flock && conn->capable & FUSE_CAP_FLOCK_LOCKS) { -- if (lo->debug) -- fuse_log(FUSE_LOG_DEBUG, "lo_init: activating flock locks\n"); -- conn->want |= FUSE_CAP_FLOCK_LOCKS; -- } -+ struct lo_data *lo = (struct lo_data *)userdata; -+ -+ if (conn->capable & FUSE_CAP_EXPORT_SUPPORT) { -+ conn->want |= FUSE_CAP_EXPORT_SUPPORT; -+ } -+ -+ if (lo->writeback && conn->capable & FUSE_CAP_WRITEBACK_CACHE) { -+ if (lo->debug) { -+ fuse_log(FUSE_LOG_DEBUG, "lo_init: activating writeback\n"); -+ } -+ conn->want |= FUSE_CAP_WRITEBACK_CACHE; -+ } -+ if (lo->flock && conn->capable & FUSE_CAP_FLOCK_LOCKS) { -+ if (lo->debug) { -+ fuse_log(FUSE_LOG_DEBUG, "lo_init: activating flock locks\n"); -+ } -+ conn->want |= FUSE_CAP_FLOCK_LOCKS; -+ } - } - - static void lo_getattr(fuse_req_t req, fuse_ino_t ino, -- struct fuse_file_info *fi) -+ struct fuse_file_info *fi) - { -- int res; -- struct stat buf; -- struct lo_data *lo = lo_data(req); -+ int res; -+ struct stat buf; -+ struct lo_data *lo = lo_data(req); - -- (void) fi; -+ (void)fi; - -- res = fstatat(lo_fd(req, ino), "", &buf, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); -- if (res == -1) -- return (void) fuse_reply_err(req, errno); -+ res = -+ fstatat(lo_fd(req, ino), "", &buf, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); -+ if (res == -1) { -+ return (void)fuse_reply_err(req, errno); -+ } - -- fuse_reply_attr(req, &buf, lo->timeout); -+ fuse_reply_attr(req, &buf, lo->timeout); - } - - static int utimensat_empty_nofollow(struct lo_inode *inode, -- const struct timespec *tv) -+ const struct timespec *tv) - { -- int res; -- char procname[64]; -- -- if (inode->is_symlink) { -- res = utimensat(inode->fd, "", tv, -- AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); -- if (res == -1 && errno == EINVAL) { -- /* Sorry, no race free way to set times on symlink. */ -- errno = EPERM; -- } -- return res; -- } -- sprintf(procname, "/proc/self/fd/%i", inode->fd); -- -- return utimensat(AT_FDCWD, procname, tv, 0); -+ int res; -+ char procname[64]; -+ -+ if (inode->is_symlink) { -+ res = utimensat(inode->fd, "", tv, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); -+ if (res == -1 && errno == EINVAL) { -+ /* Sorry, no race free way to set times on symlink. */ -+ errno = EPERM; -+ } -+ return res; -+ } -+ sprintf(procname, "/proc/self/fd/%i", inode->fd); -+ -+ return utimensat(AT_FDCWD, procname, tv, 0); - } - - static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, -- int valid, struct fuse_file_info *fi) -+ int valid, struct fuse_file_info *fi) - { -- int saverr; -- char procname[64]; -- struct lo_inode *inode = lo_inode(req, ino); -- int ifd = inode->fd; -- int res; -- -- if (valid & FUSE_SET_ATTR_MODE) { -- if (fi) { -- res = fchmod(fi->fh, attr->st_mode); -- } else { -- sprintf(procname, "/proc/self/fd/%i", ifd); -- res = chmod(procname, attr->st_mode); -- } -- if (res == -1) -- goto out_err; -- } -- if (valid & (FUSE_SET_ATTR_UID | FUSE_SET_ATTR_GID)) { -- uid_t uid = (valid & FUSE_SET_ATTR_UID) ? -- attr->st_uid : (uid_t) -1; -- gid_t gid = (valid & FUSE_SET_ATTR_GID) ? -- attr->st_gid : (gid_t) -1; -- -- res = fchownat(ifd, "", uid, gid, -- AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); -- if (res == -1) -- goto out_err; -- } -- if (valid & FUSE_SET_ATTR_SIZE) { -- if (fi) { -- res = ftruncate(fi->fh, attr->st_size); -- } else { -- sprintf(procname, "/proc/self/fd/%i", ifd); -- res = truncate(procname, attr->st_size); -- } -- if (res == -1) -- goto out_err; -- } -- if (valid & (FUSE_SET_ATTR_ATIME | FUSE_SET_ATTR_MTIME)) { -- struct timespec tv[2]; -- -- tv[0].tv_sec = 0; -- tv[1].tv_sec = 0; -- tv[0].tv_nsec = UTIME_OMIT; -- tv[1].tv_nsec = UTIME_OMIT; -- -- if (valid & FUSE_SET_ATTR_ATIME_NOW) -- tv[0].tv_nsec = UTIME_NOW; -- else if (valid & FUSE_SET_ATTR_ATIME) -- tv[0] = attr->st_atim; -- -- if (valid & FUSE_SET_ATTR_MTIME_NOW) -- tv[1].tv_nsec = UTIME_NOW; -- else if (valid & FUSE_SET_ATTR_MTIME) -- tv[1] = attr->st_mtim; -- -- if (fi) -- res = futimens(fi->fh, tv); -- else -- res = utimensat_empty_nofollow(inode, tv); -- if (res == -1) -- goto out_err; -- } -- -- return lo_getattr(req, ino, fi); -+ int saverr; -+ char procname[64]; -+ struct lo_inode *inode = lo_inode(req, ino); -+ int ifd = inode->fd; -+ int res; -+ -+ if (valid & FUSE_SET_ATTR_MODE) { -+ if (fi) { -+ res = fchmod(fi->fh, attr->st_mode); -+ } else { -+ sprintf(procname, "/proc/self/fd/%i", ifd); -+ res = chmod(procname, attr->st_mode); -+ } -+ if (res == -1) { -+ goto out_err; -+ } -+ } -+ if (valid & (FUSE_SET_ATTR_UID | FUSE_SET_ATTR_GID)) { -+ uid_t uid = (valid & FUSE_SET_ATTR_UID) ? attr->st_uid : (uid_t)-1; -+ gid_t gid = (valid & FUSE_SET_ATTR_GID) ? attr->st_gid : (gid_t)-1; -+ -+ res = fchownat(ifd, "", uid, gid, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); -+ if (res == -1) { -+ goto out_err; -+ } -+ } -+ if (valid & FUSE_SET_ATTR_SIZE) { -+ if (fi) { -+ res = ftruncate(fi->fh, attr->st_size); -+ } else { -+ sprintf(procname, "/proc/self/fd/%i", ifd); -+ res = truncate(procname, attr->st_size); -+ } -+ if (res == -1) { -+ goto out_err; -+ } -+ } -+ if (valid & (FUSE_SET_ATTR_ATIME | FUSE_SET_ATTR_MTIME)) { -+ struct timespec tv[2]; -+ -+ tv[0].tv_sec = 0; -+ tv[1].tv_sec = 0; -+ tv[0].tv_nsec = UTIME_OMIT; -+ tv[1].tv_nsec = UTIME_OMIT; -+ -+ if (valid & FUSE_SET_ATTR_ATIME_NOW) { -+ tv[0].tv_nsec = UTIME_NOW; -+ } else if (valid & FUSE_SET_ATTR_ATIME) { -+ tv[0] = attr->st_atim; -+ } -+ -+ if (valid & FUSE_SET_ATTR_MTIME_NOW) { -+ tv[1].tv_nsec = UTIME_NOW; -+ } else if (valid & FUSE_SET_ATTR_MTIME) { -+ tv[1] = attr->st_mtim; -+ } -+ -+ if (fi) { -+ res = futimens(fi->fh, tv); -+ } else { -+ res = utimensat_empty_nofollow(inode, tv); -+ } -+ if (res == -1) { -+ goto out_err; -+ } -+ } -+ -+ return lo_getattr(req, ino, fi); - - out_err: -- saverr = errno; -- fuse_reply_err(req, saverr); -+ saverr = errno; -+ fuse_reply_err(req, saverr); - } - - static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st) - { -- struct lo_inode *p; -- struct lo_inode *ret = NULL; -- -- pthread_mutex_lock(&lo->mutex); -- for (p = lo->root.next; p != &lo->root; p = p->next) { -- if (p->ino == st->st_ino && p->dev == st->st_dev) { -- assert(p->refcount > 0); -- ret = p; -- ret->refcount++; -- break; -- } -- } -- pthread_mutex_unlock(&lo->mutex); -- return ret; -+ struct lo_inode *p; -+ struct lo_inode *ret = NULL; -+ -+ pthread_mutex_lock(&lo->mutex); -+ for (p = lo->root.next; p != &lo->root; p = p->next) { -+ if (p->ino == st->st_ino && p->dev == st->st_dev) { -+ assert(p->refcount > 0); -+ ret = p; -+ ret->refcount++; -+ break; -+ } -+ } -+ pthread_mutex_unlock(&lo->mutex); -+ return ret; - } - - static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, -- struct fuse_entry_param *e) -+ struct fuse_entry_param *e) - { -- int newfd; -- int res; -- int saverr; -- struct lo_data *lo = lo_data(req); -- struct lo_inode *inode; -- -- memset(e, 0, sizeof(*e)); -- e->attr_timeout = lo->timeout; -- e->entry_timeout = lo->timeout; -- -- newfd = openat(lo_fd(req, parent), name, O_PATH | O_NOFOLLOW); -- if (newfd == -1) -- goto out_err; -- -- res = fstatat(newfd, "", &e->attr, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); -- if (res == -1) -- goto out_err; -- -- inode = lo_find(lo_data(req), &e->attr); -- if (inode) { -- close(newfd); -- newfd = -1; -- } else { -- struct lo_inode *prev, *next; -- -- saverr = ENOMEM; -- inode = calloc(1, sizeof(struct lo_inode)); -- if (!inode) -- goto out_err; -- -- inode->is_symlink = S_ISLNK(e->attr.st_mode); -- inode->refcount = 1; -- inode->fd = newfd; -- inode->ino = e->attr.st_ino; -- inode->dev = e->attr.st_dev; -- -- pthread_mutex_lock(&lo->mutex); -- prev = &lo->root; -- next = prev->next; -- next->prev = inode; -- inode->next = next; -- inode->prev = prev; -- prev->next = inode; -- pthread_mutex_unlock(&lo->mutex); -- } -- e->ino = (uintptr_t) inode; -- -- if (lo_debug(req)) -- fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", -- (unsigned long long) parent, name, (unsigned long long) e->ino); -- -- return 0; -+ int newfd; -+ int res; -+ int saverr; -+ struct lo_data *lo = lo_data(req); -+ struct lo_inode *inode; -+ -+ memset(e, 0, sizeof(*e)); -+ e->attr_timeout = lo->timeout; -+ e->entry_timeout = lo->timeout; -+ -+ newfd = openat(lo_fd(req, parent), name, O_PATH | O_NOFOLLOW); -+ if (newfd == -1) { -+ goto out_err; -+ } -+ -+ res = fstatat(newfd, "", &e->attr, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); -+ if (res == -1) { -+ goto out_err; -+ } -+ -+ inode = lo_find(lo_data(req), &e->attr); -+ if (inode) { -+ close(newfd); -+ newfd = -1; -+ } else { -+ struct lo_inode *prev, *next; -+ -+ saverr = ENOMEM; -+ inode = calloc(1, sizeof(struct lo_inode)); -+ if (!inode) { -+ goto out_err; -+ } -+ -+ inode->is_symlink = S_ISLNK(e->attr.st_mode); -+ inode->refcount = 1; -+ inode->fd = newfd; -+ inode->ino = e->attr.st_ino; -+ inode->dev = e->attr.st_dev; -+ -+ pthread_mutex_lock(&lo->mutex); -+ prev = &lo->root; -+ next = prev->next; -+ next->prev = inode; -+ inode->next = next; -+ inode->prev = prev; -+ prev->next = inode; -+ pthread_mutex_unlock(&lo->mutex); -+ } -+ e->ino = (uintptr_t)inode; -+ -+ if (lo_debug(req)) { -+ fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", -+ (unsigned long long)parent, name, (unsigned long long)e->ino); -+ } -+ -+ return 0; - - out_err: -- saverr = errno; -- if (newfd != -1) -- close(newfd); -- return saverr; -+ saverr = errno; -+ if (newfd != -1) { -+ close(newfd); -+ } -+ return saverr; - } - - static void lo_lookup(fuse_req_t req, fuse_ino_t parent, const char *name) - { -- struct fuse_entry_param e; -- int err; -- -- if (lo_debug(req)) -- fuse_log(FUSE_LOG_DEBUG, "lo_lookup(parent=%" PRIu64 ", name=%s)\n", -- parent, name); -- -- err = lo_do_lookup(req, parent, name, &e); -- if (err) -- fuse_reply_err(req, err); -- else -- fuse_reply_entry(req, &e); -+ struct fuse_entry_param e; -+ int err; -+ -+ if (lo_debug(req)) { -+ fuse_log(FUSE_LOG_DEBUG, "lo_lookup(parent=%" PRIu64 ", name=%s)\n", -+ parent, name); -+ } -+ -+ err = lo_do_lookup(req, parent, name, &e); -+ if (err) { -+ fuse_reply_err(req, err); -+ } else { -+ fuse_reply_entry(req, &e); -+ } - } - - static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent, -- const char *name, mode_t mode, dev_t rdev, -- const char *link) -+ const char *name, mode_t mode, dev_t rdev, -+ const char *link) - { -- int res; -- int saverr; -- struct lo_inode *dir = lo_inode(req, parent); -- struct fuse_entry_param e; -+ int res; -+ int saverr; -+ struct lo_inode *dir = lo_inode(req, parent); -+ struct fuse_entry_param e; - -- saverr = ENOMEM; -+ saverr = ENOMEM; - -- res = mknod_wrapper(dir->fd, name, link, mode, rdev); -+ res = mknod_wrapper(dir->fd, name, link, mode, rdev); - -- saverr = errno; -- if (res == -1) -- goto out; -+ saverr = errno; -+ if (res == -1) { -+ goto out; -+ } - -- saverr = lo_do_lookup(req, parent, name, &e); -- if (saverr) -- goto out; -+ saverr = lo_do_lookup(req, parent, name, &e); -+ if (saverr) { -+ goto out; -+ } - -- if (lo_debug(req)) -- fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", -- (unsigned long long) parent, name, (unsigned long long) e.ino); -+ if (lo_debug(req)) { -+ fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", -+ (unsigned long long)parent, name, (unsigned long long)e.ino); -+ } - -- fuse_reply_entry(req, &e); -- return; -+ fuse_reply_entry(req, &e); -+ return; - - out: -- fuse_reply_err(req, saverr); -+ fuse_reply_err(req, saverr); - } - --static void lo_mknod(fuse_req_t req, fuse_ino_t parent, -- const char *name, mode_t mode, dev_t rdev) -+static void lo_mknod(fuse_req_t req, fuse_ino_t parent, const char *name, -+ mode_t mode, dev_t rdev) - { -- lo_mknod_symlink(req, parent, name, mode, rdev, NULL); -+ lo_mknod_symlink(req, parent, name, mode, rdev, NULL); - } - - static void lo_mkdir(fuse_req_t req, fuse_ino_t parent, const char *name, -- mode_t mode) -+ mode_t mode) - { -- lo_mknod_symlink(req, parent, name, S_IFDIR | mode, 0, NULL); -+ lo_mknod_symlink(req, parent, name, S_IFDIR | mode, 0, NULL); - } - --static void lo_symlink(fuse_req_t req, const char *link, -- fuse_ino_t parent, const char *name) -+static void lo_symlink(fuse_req_t req, const char *link, fuse_ino_t parent, -+ const char *name) - { -- lo_mknod_symlink(req, parent, name, S_IFLNK, 0, link); -+ lo_mknod_symlink(req, parent, name, S_IFLNK, 0, link); - } - - static int linkat_empty_nofollow(struct lo_inode *inode, int dfd, -- const char *name) -+ const char *name) - { -- int res; -- char procname[64]; -+ int res; -+ char procname[64]; - -- if (inode->is_symlink) { -- res = linkat(inode->fd, "", dfd, name, AT_EMPTY_PATH); -- if (res == -1 && (errno == ENOENT || errno == EINVAL)) { -- /* Sorry, no race free way to hard-link a symlink. */ -- errno = EPERM; -- } -- return res; -- } -+ if (inode->is_symlink) { -+ res = linkat(inode->fd, "", dfd, name, AT_EMPTY_PATH); -+ if (res == -1 && (errno == ENOENT || errno == EINVAL)) { -+ /* Sorry, no race free way to hard-link a symlink. */ -+ errno = EPERM; -+ } -+ return res; -+ } - -- sprintf(procname, "/proc/self/fd/%i", inode->fd); -+ sprintf(procname, "/proc/self/fd/%i", inode->fd); - -- return linkat(AT_FDCWD, procname, dfd, name, AT_SYMLINK_FOLLOW); -+ return linkat(AT_FDCWD, procname, dfd, name, AT_SYMLINK_FOLLOW); - } - - static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent, -- const char *name) -+ const char *name) - { -- int res; -- struct lo_data *lo = lo_data(req); -- struct lo_inode *inode = lo_inode(req, ino); -- struct fuse_entry_param e; -- int saverr; -- -- memset(&e, 0, sizeof(struct fuse_entry_param)); -- e.attr_timeout = lo->timeout; -- e.entry_timeout = lo->timeout; -- -- res = linkat_empty_nofollow(inode, lo_fd(req, parent), name); -- if (res == -1) -- goto out_err; -- -- res = fstatat(inode->fd, "", &e.attr, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); -- if (res == -1) -- goto out_err; -- -- pthread_mutex_lock(&lo->mutex); -- inode->refcount++; -- pthread_mutex_unlock(&lo->mutex); -- e.ino = (uintptr_t) inode; -- -- if (lo_debug(req)) -- fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", -- (unsigned long long) parent, name, -- (unsigned long long) e.ino); -- -- fuse_reply_entry(req, &e); -- return; -+ int res; -+ struct lo_data *lo = lo_data(req); -+ struct lo_inode *inode = lo_inode(req, ino); -+ struct fuse_entry_param e; -+ int saverr; -+ -+ memset(&e, 0, sizeof(struct fuse_entry_param)); -+ e.attr_timeout = lo->timeout; -+ e.entry_timeout = lo->timeout; -+ -+ res = linkat_empty_nofollow(inode, lo_fd(req, parent), name); -+ if (res == -1) { -+ goto out_err; -+ } -+ -+ res = fstatat(inode->fd, "", &e.attr, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); -+ if (res == -1) { -+ goto out_err; -+ } -+ -+ pthread_mutex_lock(&lo->mutex); -+ inode->refcount++; -+ pthread_mutex_unlock(&lo->mutex); -+ e.ino = (uintptr_t)inode; -+ -+ if (lo_debug(req)) { -+ fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", -+ (unsigned long long)parent, name, (unsigned long long)e.ino); -+ } -+ -+ fuse_reply_entry(req, &e); -+ return; - - out_err: -- saverr = errno; -- fuse_reply_err(req, saverr); -+ saverr = errno; -+ fuse_reply_err(req, saverr); - } - - static void lo_rmdir(fuse_req_t req, fuse_ino_t parent, const char *name) - { -- int res; -+ int res; - -- res = unlinkat(lo_fd(req, parent), name, AT_REMOVEDIR); -+ res = unlinkat(lo_fd(req, parent), name, AT_REMOVEDIR); - -- fuse_reply_err(req, res == -1 ? errno : 0); -+ fuse_reply_err(req, res == -1 ? errno : 0); - } - - static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name, -- fuse_ino_t newparent, const char *newname, -- unsigned int flags) -+ fuse_ino_t newparent, const char *newname, -+ unsigned int flags) - { -- int res; -+ int res; - -- if (flags) { -- fuse_reply_err(req, EINVAL); -- return; -- } -+ if (flags) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } - -- res = renameat(lo_fd(req, parent), name, -- lo_fd(req, newparent), newname); -+ res = renameat(lo_fd(req, parent), name, lo_fd(req, newparent), newname); - -- fuse_reply_err(req, res == -1 ? errno : 0); -+ fuse_reply_err(req, res == -1 ? errno : 0); - } - - static void lo_unlink(fuse_req_t req, fuse_ino_t parent, const char *name) - { -- int res; -+ int res; - -- res = unlinkat(lo_fd(req, parent), name, 0); -+ res = unlinkat(lo_fd(req, parent), name, 0); - -- fuse_reply_err(req, res == -1 ? errno : 0); -+ fuse_reply_err(req, res == -1 ? errno : 0); - } - - static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n) - { -- if (!inode) -- return; -- -- pthread_mutex_lock(&lo->mutex); -- assert(inode->refcount >= n); -- inode->refcount -= n; -- if (!inode->refcount) { -- struct lo_inode *prev, *next; -- -- prev = inode->prev; -- next = inode->next; -- next->prev = prev; -- prev->next = next; -- -- pthread_mutex_unlock(&lo->mutex); -- close(inode->fd); -- free(inode); -- -- } else { -- pthread_mutex_unlock(&lo->mutex); -- } -+ if (!inode) { -+ return; -+ } -+ -+ pthread_mutex_lock(&lo->mutex); -+ assert(inode->refcount >= n); -+ inode->refcount -= n; -+ if (!inode->refcount) { -+ struct lo_inode *prev, *next; -+ -+ prev = inode->prev; -+ next = inode->next; -+ next->prev = prev; -+ prev->next = next; -+ -+ pthread_mutex_unlock(&lo->mutex); -+ close(inode->fd); -+ free(inode); -+ -+ } else { -+ pthread_mutex_unlock(&lo->mutex); -+ } - } - - static void lo_forget_one(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) - { -- struct lo_data *lo = lo_data(req); -- struct lo_inode *inode = lo_inode(req, ino); -+ struct lo_data *lo = lo_data(req); -+ struct lo_inode *inode = lo_inode(req, ino); - -- if (lo_debug(req)) { -- fuse_log(FUSE_LOG_DEBUG, " forget %lli %lli -%lli\n", -- (unsigned long long) ino, -- (unsigned long long) inode->refcount, -- (unsigned long long) nlookup); -- } -+ if (lo_debug(req)) { -+ fuse_log(FUSE_LOG_DEBUG, " forget %lli %lli -%lli\n", -+ (unsigned long long)ino, (unsigned long long)inode->refcount, -+ (unsigned long long)nlookup); -+ } - -- unref_inode(lo, inode, nlookup); -+ unref_inode(lo, inode, nlookup); - } - - static void lo_forget(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) - { -- lo_forget_one(req, ino, nlookup); -- fuse_reply_none(req); -+ lo_forget_one(req, ino, nlookup); -+ fuse_reply_none(req); - } - - static void lo_forget_multi(fuse_req_t req, size_t count, -- struct fuse_forget_data *forgets) -+ struct fuse_forget_data *forgets) - { -- int i; -+ int i; - -- for (i = 0; i < count; i++) -- lo_forget_one(req, forgets[i].ino, forgets[i].nlookup); -- fuse_reply_none(req); -+ for (i = 0; i < count; i++) { -+ lo_forget_one(req, forgets[i].ino, forgets[i].nlookup); -+ } -+ fuse_reply_none(req); - } - - static void lo_readlink(fuse_req_t req, fuse_ino_t ino) - { -- char buf[PATH_MAX + 1]; -- int res; -+ char buf[PATH_MAX + 1]; -+ int res; - -- res = readlinkat(lo_fd(req, ino), "", buf, sizeof(buf)); -- if (res == -1) -- return (void) fuse_reply_err(req, errno); -+ res = readlinkat(lo_fd(req, ino), "", buf, sizeof(buf)); -+ if (res == -1) { -+ return (void)fuse_reply_err(req, errno); -+ } - -- if (res == sizeof(buf)) -- return (void) fuse_reply_err(req, ENAMETOOLONG); -+ if (res == sizeof(buf)) { -+ return (void)fuse_reply_err(req, ENAMETOOLONG); -+ } - -- buf[res] = '\0'; -+ buf[res] = '\0'; - -- fuse_reply_readlink(req, buf); -+ fuse_reply_readlink(req, buf); - } - - struct lo_dirp { -- DIR *dp; -- struct dirent *entry; -- off_t offset; -+ DIR *dp; -+ struct dirent *entry; -+ off_t offset; - }; - - static struct lo_dirp *lo_dirp(struct fuse_file_info *fi) - { -- return (struct lo_dirp *) (uintptr_t) fi->fh; -+ return (struct lo_dirp *)(uintptr_t)fi->fh; - } - --static void lo_opendir(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) -+static void lo_opendir(fuse_req_t req, fuse_ino_t ino, -+ struct fuse_file_info *fi) - { -- int error = ENOMEM; -- struct lo_data *lo = lo_data(req); -- struct lo_dirp *d; -- int fd; -- -- d = calloc(1, sizeof(struct lo_dirp)); -- if (d == NULL) -- goto out_err; -- -- fd = openat(lo_fd(req, ino), ".", O_RDONLY); -- if (fd == -1) -- goto out_errno; -- -- d->dp = fdopendir(fd); -- if (d->dp == NULL) -- goto out_errno; -- -- d->offset = 0; -- d->entry = NULL; -- -- fi->fh = (uintptr_t) d; -- if (lo->cache == CACHE_ALWAYS) -- fi->keep_cache = 1; -- fuse_reply_open(req, fi); -- return; -+ int error = ENOMEM; -+ struct lo_data *lo = lo_data(req); -+ struct lo_dirp *d; -+ int fd; -+ -+ d = calloc(1, sizeof(struct lo_dirp)); -+ if (d == NULL) { -+ goto out_err; -+ } -+ -+ fd = openat(lo_fd(req, ino), ".", O_RDONLY); -+ if (fd == -1) { -+ goto out_errno; -+ } -+ -+ d->dp = fdopendir(fd); -+ if (d->dp == NULL) { -+ goto out_errno; -+ } -+ -+ d->offset = 0; -+ d->entry = NULL; -+ -+ fi->fh = (uintptr_t)d; -+ if (lo->cache == CACHE_ALWAYS) { -+ fi->keep_cache = 1; -+ } -+ fuse_reply_open(req, fi); -+ return; - - out_errno: -- error = errno; -+ error = errno; - out_err: -- if (d) { -- if (fd != -1) -- close(fd); -- free(d); -- } -- fuse_reply_err(req, error); -+ if (d) { -+ if (fd != -1) { -+ close(fd); -+ } -+ free(d); -+ } -+ fuse_reply_err(req, error); - } - - static int is_dot_or_dotdot(const char *name) - { -- return name[0] == '.' && (name[1] == '\0' || -- (name[1] == '.' && name[2] == '\0')); -+ return name[0] == '.' && -+ (name[1] == '\0' || (name[1] == '.' && name[2] == '\0')); - } - - static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, -- off_t offset, struct fuse_file_info *fi, int plus) -+ off_t offset, struct fuse_file_info *fi, int plus) - { -- struct lo_dirp *d = lo_dirp(fi); -- char *buf; -- char *p; -- size_t rem = size; -- int err; -- -- (void) ino; -- -- buf = calloc(1, size); -- if (!buf) { -- err = ENOMEM; -- goto error; -- } -- p = buf; -- -- if (offset != d->offset) { -- seekdir(d->dp, offset); -- d->entry = NULL; -- d->offset = offset; -- } -- while (1) { -- size_t entsize; -- off_t nextoff; -- const char *name; -- -- if (!d->entry) { -- errno = 0; -- d->entry = readdir(d->dp); -- if (!d->entry) { -- if (errno) { // Error -- err = errno; -- goto error; -- } else { // End of stream -- break; -- } -- } -- } -- nextoff = d->entry->d_off; -- name = d->entry->d_name; -- fuse_ino_t entry_ino = 0; -- if (plus) { -- struct fuse_entry_param e; -- if (is_dot_or_dotdot(name)) { -- e = (struct fuse_entry_param) { -- .attr.st_ino = d->entry->d_ino, -- .attr.st_mode = d->entry->d_type << 12, -- }; -- } else { -- err = lo_do_lookup(req, ino, name, &e); -- if (err) -- goto error; -- entry_ino = e.ino; -- } -- -- entsize = fuse_add_direntry_plus(req, p, rem, name, -- &e, nextoff); -- } else { -- struct stat st = { -- .st_ino = d->entry->d_ino, -- .st_mode = d->entry->d_type << 12, -- }; -- entsize = fuse_add_direntry(req, p, rem, name, -- &st, nextoff); -- } -- if (entsize > rem) { -- if (entry_ino != 0) -- lo_forget_one(req, entry_ino, 1); -- break; -- } -- -- p += entsize; -- rem -= entsize; -- -- d->entry = NULL; -- d->offset = nextoff; -- } -+ struct lo_dirp *d = lo_dirp(fi); -+ char *buf; -+ char *p; -+ size_t rem = size; -+ int err; -+ -+ (void)ino; -+ -+ buf = calloc(1, size); -+ if (!buf) { -+ err = ENOMEM; -+ goto error; -+ } -+ p = buf; -+ -+ if (offset != d->offset) { -+ seekdir(d->dp, offset); -+ d->entry = NULL; -+ d->offset = offset; -+ } -+ while (1) { -+ size_t entsize; -+ off_t nextoff; -+ const char *name; -+ -+ if (!d->entry) { -+ errno = 0; -+ d->entry = readdir(d->dp); -+ if (!d->entry) { -+ if (errno) { /* Error */ -+ err = errno; -+ goto error; -+ } else { /* End of stream */ -+ break; -+ } -+ } -+ } -+ nextoff = d->entry->d_off; -+ name = d->entry->d_name; -+ fuse_ino_t entry_ino = 0; -+ if (plus) { -+ struct fuse_entry_param e; -+ if (is_dot_or_dotdot(name)) { -+ e = (struct fuse_entry_param){ -+ .attr.st_ino = d->entry->d_ino, -+ .attr.st_mode = d->entry->d_type << 12, -+ }; -+ } else { -+ err = lo_do_lookup(req, ino, name, &e); -+ if (err) { -+ goto error; -+ } -+ entry_ino = e.ino; -+ } -+ -+ entsize = fuse_add_direntry_plus(req, p, rem, name, &e, nextoff); -+ } else { -+ struct stat st = { -+ .st_ino = d->entry->d_ino, -+ .st_mode = d->entry->d_type << 12, -+ }; -+ entsize = fuse_add_direntry(req, p, rem, name, &st, nextoff); -+ } -+ if (entsize > rem) { -+ if (entry_ino != 0) { -+ lo_forget_one(req, entry_ino, 1); -+ } -+ break; -+ } -+ -+ p += entsize; -+ rem -= entsize; -+ -+ d->entry = NULL; -+ d->offset = nextoff; -+ } - - err = 0; - error: -- // If there's an error, we can only signal it if we haven't stored -- // any entries yet - otherwise we'd end up with wrong lookup -- // counts for the entries that are already in the buffer. So we -- // return what we've collected until that point. -- if (err && rem == size) -- fuse_reply_err(req, err); -- else -- fuse_reply_buf(req, buf, size - rem); -+ /* -+ * If there's an error, we can only signal it if we haven't stored -+ * any entries yet - otherwise we'd end up with wrong lookup -+ * counts for the entries that are already in the buffer. So we -+ * return what we've collected until that point. -+ */ -+ if (err && rem == size) { -+ fuse_reply_err(req, err); -+ } else { -+ fuse_reply_buf(req, buf, size - rem); -+ } - free(buf); - } - - static void lo_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, -- off_t offset, struct fuse_file_info *fi) -+ off_t offset, struct fuse_file_info *fi) - { -- lo_do_readdir(req, ino, size, offset, fi, 0); -+ lo_do_readdir(req, ino, size, offset, fi, 0); - } - - static void lo_readdirplus(fuse_req_t req, fuse_ino_t ino, size_t size, -- off_t offset, struct fuse_file_info *fi) -+ off_t offset, struct fuse_file_info *fi) - { -- lo_do_readdir(req, ino, size, offset, fi, 1); -+ lo_do_readdir(req, ino, size, offset, fi, 1); - } - --static void lo_releasedir(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) -+static void lo_releasedir(fuse_req_t req, fuse_ino_t ino, -+ struct fuse_file_info *fi) - { -- struct lo_dirp *d = lo_dirp(fi); -- (void) ino; -- closedir(d->dp); -- free(d); -- fuse_reply_err(req, 0); -+ struct lo_dirp *d = lo_dirp(fi); -+ (void)ino; -+ closedir(d->dp); -+ free(d); -+ fuse_reply_err(req, 0); - } - - static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, -- mode_t mode, struct fuse_file_info *fi) -+ mode_t mode, struct fuse_file_info *fi) - { -- int fd; -- struct lo_data *lo = lo_data(req); -- struct fuse_entry_param e; -- int err; -- -- if (lo_debug(req)) -- fuse_log(FUSE_LOG_DEBUG, "lo_create(parent=%" PRIu64 ", name=%s)\n", -- parent, name); -- -- fd = openat(lo_fd(req, parent), name, -- (fi->flags | O_CREAT) & ~O_NOFOLLOW, mode); -- if (fd == -1) -- return (void) fuse_reply_err(req, errno); -- -- fi->fh = fd; -- if (lo->cache == CACHE_NEVER) -- fi->direct_io = 1; -- else if (lo->cache == CACHE_ALWAYS) -- fi->keep_cache = 1; -- -- err = lo_do_lookup(req, parent, name, &e); -- if (err) -- fuse_reply_err(req, err); -- else -- fuse_reply_create(req, &e, fi); -+ int fd; -+ struct lo_data *lo = lo_data(req); -+ struct fuse_entry_param e; -+ int err; -+ -+ if (lo_debug(req)) { -+ fuse_log(FUSE_LOG_DEBUG, "lo_create(parent=%" PRIu64 ", name=%s)\n", -+ parent, name); -+ } -+ -+ fd = openat(lo_fd(req, parent), name, (fi->flags | O_CREAT) & ~O_NOFOLLOW, -+ mode); -+ if (fd == -1) { -+ return (void)fuse_reply_err(req, errno); -+ } -+ -+ fi->fh = fd; -+ if (lo->cache == CACHE_NEVER) { -+ fi->direct_io = 1; -+ } else if (lo->cache == CACHE_ALWAYS) { -+ fi->keep_cache = 1; -+ } -+ -+ err = lo_do_lookup(req, parent, name, &e); -+ if (err) { -+ fuse_reply_err(req, err); -+ } else { -+ fuse_reply_create(req, &e, fi); -+ } - } - - static void lo_fsyncdir(fuse_req_t req, fuse_ino_t ino, int datasync, -- struct fuse_file_info *fi) -+ struct fuse_file_info *fi) - { -- int res; -- int fd = dirfd(lo_dirp(fi)->dp); -- (void) ino; -- if (datasync) -- res = fdatasync(fd); -- else -- res = fsync(fd); -- fuse_reply_err(req, res == -1 ? errno : 0); -+ int res; -+ int fd = dirfd(lo_dirp(fi)->dp); -+ (void)ino; -+ if (datasync) { -+ res = fdatasync(fd); -+ } else { -+ res = fsync(fd); -+ } -+ fuse_reply_err(req, res == -1 ? errno : 0); - } - - static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) - { -- int fd; -- char buf[64]; -- struct lo_data *lo = lo_data(req); -- -- if (lo_debug(req)) -- fuse_log(FUSE_LOG_DEBUG, "lo_open(ino=%" PRIu64 ", flags=%d)\n", -- ino, fi->flags); -- -- /* With writeback cache, kernel may send read requests even -- when userspace opened write-only */ -- if (lo->writeback && (fi->flags & O_ACCMODE) == O_WRONLY) { -- fi->flags &= ~O_ACCMODE; -- fi->flags |= O_RDWR; -- } -- -- /* With writeback cache, O_APPEND is handled by the kernel. -- This breaks atomicity (since the file may change in the -- underlying filesystem, so that the kernel's idea of the -- end of the file isn't accurate anymore). In this example, -- we just accept that. A more rigorous filesystem may want -- to return an error here */ -- if (lo->writeback && (fi->flags & O_APPEND)) -- fi->flags &= ~O_APPEND; -- -- sprintf(buf, "/proc/self/fd/%i", lo_fd(req, ino)); -- fd = open(buf, fi->flags & ~O_NOFOLLOW); -- if (fd == -1) -- return (void) fuse_reply_err(req, errno); -- -- fi->fh = fd; -- if (lo->cache == CACHE_NEVER) -- fi->direct_io = 1; -- else if (lo->cache == CACHE_ALWAYS) -- fi->keep_cache = 1; -- fuse_reply_open(req, fi); -+ int fd; -+ char buf[64]; -+ struct lo_data *lo = lo_data(req); -+ -+ if (lo_debug(req)) { -+ fuse_log(FUSE_LOG_DEBUG, "lo_open(ino=%" PRIu64 ", flags=%d)\n", ino, -+ fi->flags); -+ } -+ -+ /* -+ * With writeback cache, kernel may send read requests even -+ * when userspace opened write-only -+ */ -+ if (lo->writeback && (fi->flags & O_ACCMODE) == O_WRONLY) { -+ fi->flags &= ~O_ACCMODE; -+ fi->flags |= O_RDWR; -+ } -+ -+ /* -+ * With writeback cache, O_APPEND is handled by the kernel. -+ * This breaks atomicity (since the file may change in the -+ * underlying filesystem, so that the kernel's idea of the -+ * end of the file isn't accurate anymore). In this example, -+ * we just accept that. A more rigorous filesystem may want -+ * to return an error here -+ */ -+ if (lo->writeback && (fi->flags & O_APPEND)) { -+ fi->flags &= ~O_APPEND; -+ } -+ -+ sprintf(buf, "/proc/self/fd/%i", lo_fd(req, ino)); -+ fd = open(buf, fi->flags & ~O_NOFOLLOW); -+ if (fd == -1) { -+ return (void)fuse_reply_err(req, errno); -+ } -+ -+ fi->fh = fd; -+ if (lo->cache == CACHE_NEVER) { -+ fi->direct_io = 1; -+ } else if (lo->cache == CACHE_ALWAYS) { -+ fi->keep_cache = 1; -+ } -+ fuse_reply_open(req, fi); - } - --static void lo_release(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) -+static void lo_release(fuse_req_t req, fuse_ino_t ino, -+ struct fuse_file_info *fi) - { -- (void) ino; -+ (void)ino; - -- close(fi->fh); -- fuse_reply_err(req, 0); -+ close(fi->fh); -+ fuse_reply_err(req, 0); - } - - static void lo_flush(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) - { -- int res; -- (void) ino; -- res = close(dup(fi->fh)); -- fuse_reply_err(req, res == -1 ? errno : 0); -+ int res; -+ (void)ino; -+ res = close(dup(fi->fh)); -+ fuse_reply_err(req, res == -1 ? errno : 0); - } - - static void lo_fsync(fuse_req_t req, fuse_ino_t ino, int datasync, -- struct fuse_file_info *fi) -+ struct fuse_file_info *fi) - { -- int res; -- (void) ino; -- if (datasync) -- res = fdatasync(fi->fh); -- else -- res = fsync(fi->fh); -- fuse_reply_err(req, res == -1 ? errno : 0); -+ int res; -+ (void)ino; -+ if (datasync) { -+ res = fdatasync(fi->fh); -+ } else { -+ res = fsync(fi->fh); -+ } -+ fuse_reply_err(req, res == -1 ? errno : 0); - } - --static void lo_read(fuse_req_t req, fuse_ino_t ino, size_t size, -- off_t offset, struct fuse_file_info *fi) -+static void lo_read(fuse_req_t req, fuse_ino_t ino, size_t size, off_t offset, -+ struct fuse_file_info *fi) - { -- struct fuse_bufvec buf = FUSE_BUFVEC_INIT(size); -+ struct fuse_bufvec buf = FUSE_BUFVEC_INIT(size); - -- if (lo_debug(req)) -- fuse_log(FUSE_LOG_DEBUG, "lo_read(ino=%" PRIu64 ", size=%zd, " -- "off=%lu)\n", ino, size, (unsigned long) offset); -+ if (lo_debug(req)) { -+ fuse_log(FUSE_LOG_DEBUG, -+ "lo_read(ino=%" PRIu64 ", size=%zd, " -+ "off=%lu)\n", -+ ino, size, (unsigned long)offset); -+ } - -- buf.buf[0].flags = FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK; -- buf.buf[0].fd = fi->fh; -- buf.buf[0].pos = offset; -+ buf.buf[0].flags = FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK; -+ buf.buf[0].fd = fi->fh; -+ buf.buf[0].pos = offset; - -- fuse_reply_data(req, &buf, FUSE_BUF_SPLICE_MOVE); -+ fuse_reply_data(req, &buf, FUSE_BUF_SPLICE_MOVE); - } - - static void lo_write_buf(fuse_req_t req, fuse_ino_t ino, -- struct fuse_bufvec *in_buf, off_t off, -- struct fuse_file_info *fi) -+ struct fuse_bufvec *in_buf, off_t off, -+ struct fuse_file_info *fi) - { -- (void) ino; -- ssize_t res; -- struct fuse_bufvec out_buf = FUSE_BUFVEC_INIT(fuse_buf_size(in_buf)); -- -- out_buf.buf[0].flags = FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK; -- out_buf.buf[0].fd = fi->fh; -- out_buf.buf[0].pos = off; -- -- if (lo_debug(req)) -- fuse_log(FUSE_LOG_DEBUG, "lo_write(ino=%" PRIu64 ", size=%zd, off=%lu)\n", -- ino, out_buf.buf[0].size, (unsigned long) off); -- -- res = fuse_buf_copy(&out_buf, in_buf, 0); -- if(res < 0) -- fuse_reply_err(req, -res); -- else -- fuse_reply_write(req, (size_t) res); -+ (void)ino; -+ ssize_t res; -+ struct fuse_bufvec out_buf = FUSE_BUFVEC_INIT(fuse_buf_size(in_buf)); -+ -+ out_buf.buf[0].flags = FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK; -+ out_buf.buf[0].fd = fi->fh; -+ out_buf.buf[0].pos = off; -+ -+ if (lo_debug(req)) { -+ fuse_log(FUSE_LOG_DEBUG, -+ "lo_write(ino=%" PRIu64 ", size=%zd, off=%lu)\n", ino, -+ out_buf.buf[0].size, (unsigned long)off); -+ } -+ -+ res = fuse_buf_copy(&out_buf, in_buf, 0); -+ if (res < 0) { -+ fuse_reply_err(req, -res); -+ } else { -+ fuse_reply_write(req, (size_t)res); -+ } - } - - static void lo_statfs(fuse_req_t req, fuse_ino_t ino) - { -- int res; -- struct statvfs stbuf; -- -- res = fstatvfs(lo_fd(req, ino), &stbuf); -- if (res == -1) -- fuse_reply_err(req, errno); -- else -- fuse_reply_statfs(req, &stbuf); -+ int res; -+ struct statvfs stbuf; -+ -+ res = fstatvfs(lo_fd(req, ino), &stbuf); -+ if (res == -1) { -+ fuse_reply_err(req, errno); -+ } else { -+ fuse_reply_statfs(req, &stbuf); -+ } - } - --static void lo_fallocate(fuse_req_t req, fuse_ino_t ino, int mode, -- off_t offset, off_t length, struct fuse_file_info *fi) -+static void lo_fallocate(fuse_req_t req, fuse_ino_t ino, int mode, off_t offset, -+ off_t length, struct fuse_file_info *fi) - { -- int err = EOPNOTSUPP; -- (void) ino; -+ int err = EOPNOTSUPP; -+ (void)ino; - - #ifdef HAVE_FALLOCATE -- err = fallocate(fi->fh, mode, offset, length); -- if (err < 0) -- err = errno; -+ err = fallocate(fi->fh, mode, offset, length); -+ if (err < 0) { -+ err = errno; -+ } - - #elif defined(HAVE_POSIX_FALLOCATE) -- if (mode) { -- fuse_reply_err(req, EOPNOTSUPP); -- return; -- } -+ if (mode) { -+ fuse_reply_err(req, EOPNOTSUPP); -+ return; -+ } - -- err = posix_fallocate(fi->fh, offset, length); -+ err = posix_fallocate(fi->fh, offset, length); - #endif - -- fuse_reply_err(req, err); -+ fuse_reply_err(req, err); - } - - static void lo_flock(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, -- int op) -+ int op) - { -- int res; -- (void) ino; -+ int res; -+ (void)ino; - -- res = flock(fi->fh, op); -+ res = flock(fi->fh, op); - -- fuse_reply_err(req, res == -1 ? errno : 0); -+ fuse_reply_err(req, res == -1 ? errno : 0); - } - - static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *name, -- size_t size) -+ size_t size) - { -- char *value = NULL; -- char procname[64]; -- struct lo_inode *inode = lo_inode(req, ino); -- ssize_t ret; -- int saverr; -- -- saverr = ENOSYS; -- if (!lo_data(req)->xattr) -- goto out; -- -- if (lo_debug(req)) { -- fuse_log(FUSE_LOG_DEBUG, "lo_getxattr(ino=%" PRIu64 ", name=%s size=%zd)\n", -- ino, name, size); -- } -- -- if (inode->is_symlink) { -- /* Sorry, no race free way to getxattr on symlink. */ -- saverr = EPERM; -- goto out; -- } -- -- sprintf(procname, "/proc/self/fd/%i", inode->fd); -- -- if (size) { -- value = malloc(size); -- if (!value) -- goto out_err; -- -- ret = getxattr(procname, name, value, size); -- if (ret == -1) -- goto out_err; -- saverr = 0; -- if (ret == 0) -- goto out; -- -- fuse_reply_buf(req, value, ret); -- } else { -- ret = getxattr(procname, name, NULL, 0); -- if (ret == -1) -- goto out_err; -- -- fuse_reply_xattr(req, ret); -- } -+ char *value = NULL; -+ char procname[64]; -+ struct lo_inode *inode = lo_inode(req, ino); -+ ssize_t ret; -+ int saverr; -+ -+ saverr = ENOSYS; -+ if (!lo_data(req)->xattr) { -+ goto out; -+ } -+ -+ if (lo_debug(req)) { -+ fuse_log(FUSE_LOG_DEBUG, -+ "lo_getxattr(ino=%" PRIu64 ", name=%s size=%zd)\n", ino, name, -+ size); -+ } -+ -+ if (inode->is_symlink) { -+ /* Sorry, no race free way to getxattr on symlink. */ -+ saverr = EPERM; -+ goto out; -+ } -+ -+ sprintf(procname, "/proc/self/fd/%i", inode->fd); -+ -+ if (size) { -+ value = malloc(size); -+ if (!value) { -+ goto out_err; -+ } -+ -+ ret = getxattr(procname, name, value, size); -+ if (ret == -1) { -+ goto out_err; -+ } -+ saverr = 0; -+ if (ret == 0) { -+ goto out; -+ } -+ -+ fuse_reply_buf(req, value, ret); -+ } else { -+ ret = getxattr(procname, name, NULL, 0); -+ if (ret == -1) { -+ goto out_err; -+ } -+ -+ fuse_reply_xattr(req, ret); -+ } - out_free: -- free(value); -- return; -+ free(value); -+ return; - - out_err: -- saverr = errno; -+ saverr = errno; - out: -- fuse_reply_err(req, saverr); -- goto out_free; -+ fuse_reply_err(req, saverr); -+ goto out_free; - } - - static void lo_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size) - { -- char *value = NULL; -- char procname[64]; -- struct lo_inode *inode = lo_inode(req, ino); -- ssize_t ret; -- int saverr; -- -- saverr = ENOSYS; -- if (!lo_data(req)->xattr) -- goto out; -- -- if (lo_debug(req)) { -- fuse_log(FUSE_LOG_DEBUG, "lo_listxattr(ino=%" PRIu64 ", size=%zd)\n", -- ino, size); -- } -- -- if (inode->is_symlink) { -- /* Sorry, no race free way to listxattr on symlink. */ -- saverr = EPERM; -- goto out; -- } -- -- sprintf(procname, "/proc/self/fd/%i", inode->fd); -- -- if (size) { -- value = malloc(size); -- if (!value) -- goto out_err; -- -- ret = listxattr(procname, value, size); -- if (ret == -1) -- goto out_err; -- saverr = 0; -- if (ret == 0) -- goto out; -- -- fuse_reply_buf(req, value, ret); -- } else { -- ret = listxattr(procname, NULL, 0); -- if (ret == -1) -- goto out_err; -- -- fuse_reply_xattr(req, ret); -- } -+ char *value = NULL; -+ char procname[64]; -+ struct lo_inode *inode = lo_inode(req, ino); -+ ssize_t ret; -+ int saverr; -+ -+ saverr = ENOSYS; -+ if (!lo_data(req)->xattr) { -+ goto out; -+ } -+ -+ if (lo_debug(req)) { -+ fuse_log(FUSE_LOG_DEBUG, "lo_listxattr(ino=%" PRIu64 ", size=%zd)\n", -+ ino, size); -+ } -+ -+ if (inode->is_symlink) { -+ /* Sorry, no race free way to listxattr on symlink. */ -+ saverr = EPERM; -+ goto out; -+ } -+ -+ sprintf(procname, "/proc/self/fd/%i", inode->fd); -+ -+ if (size) { -+ value = malloc(size); -+ if (!value) { -+ goto out_err; -+ } -+ -+ ret = listxattr(procname, value, size); -+ if (ret == -1) { -+ goto out_err; -+ } -+ saverr = 0; -+ if (ret == 0) { -+ goto out; -+ } -+ -+ fuse_reply_buf(req, value, ret); -+ } else { -+ ret = listxattr(procname, NULL, 0); -+ if (ret == -1) { -+ goto out_err; -+ } -+ -+ fuse_reply_xattr(req, ret); -+ } - out_free: -- free(value); -- return; -+ free(value); -+ return; - - out_err: -- saverr = errno; -+ saverr = errno; - out: -- fuse_reply_err(req, saverr); -- goto out_free; -+ fuse_reply_err(req, saverr); -+ goto out_free; - } - - static void lo_setxattr(fuse_req_t req, fuse_ino_t ino, const char *name, -- const char *value, size_t size, int flags) -+ const char *value, size_t size, int flags) - { -- char procname[64]; -- struct lo_inode *inode = lo_inode(req, ino); -- ssize_t ret; -- int saverr; -+ char procname[64]; -+ struct lo_inode *inode = lo_inode(req, ino); -+ ssize_t ret; -+ int saverr; - -- saverr = ENOSYS; -- if (!lo_data(req)->xattr) -- goto out; -+ saverr = ENOSYS; -+ if (!lo_data(req)->xattr) { -+ goto out; -+ } - -- if (lo_debug(req)) { -- fuse_log(FUSE_LOG_DEBUG, "lo_setxattr(ino=%" PRIu64 ", name=%s value=%s size=%zd)\n", -- ino, name, value, size); -- } -+ if (lo_debug(req)) { -+ fuse_log(FUSE_LOG_DEBUG, -+ "lo_setxattr(ino=%" PRIu64 ", name=%s value=%s size=%zd)\n", -+ ino, name, value, size); -+ } - -- if (inode->is_symlink) { -- /* Sorry, no race free way to setxattr on symlink. */ -- saverr = EPERM; -- goto out; -- } -+ if (inode->is_symlink) { -+ /* Sorry, no race free way to setxattr on symlink. */ -+ saverr = EPERM; -+ goto out; -+ } - -- sprintf(procname, "/proc/self/fd/%i", inode->fd); -+ sprintf(procname, "/proc/self/fd/%i", inode->fd); - -- ret = setxattr(procname, name, value, size, flags); -- saverr = ret == -1 ? errno : 0; -+ ret = setxattr(procname, name, value, size, flags); -+ saverr = ret == -1 ? errno : 0; - - out: -- fuse_reply_err(req, saverr); -+ fuse_reply_err(req, saverr); - } - - static void lo_removexattr(fuse_req_t req, fuse_ino_t ino, const char *name) - { -- char procname[64]; -- struct lo_inode *inode = lo_inode(req, ino); -- ssize_t ret; -- int saverr; -+ char procname[64]; -+ struct lo_inode *inode = lo_inode(req, ino); -+ ssize_t ret; -+ int saverr; - -- saverr = ENOSYS; -- if (!lo_data(req)->xattr) -- goto out; -+ saverr = ENOSYS; -+ if (!lo_data(req)->xattr) { -+ goto out; -+ } - -- if (lo_debug(req)) { -- fuse_log(FUSE_LOG_DEBUG, "lo_removexattr(ino=%" PRIu64 ", name=%s)\n", -- ino, name); -- } -+ if (lo_debug(req)) { -+ fuse_log(FUSE_LOG_DEBUG, "lo_removexattr(ino=%" PRIu64 ", name=%s)\n", -+ ino, name); -+ } - -- if (inode->is_symlink) { -- /* Sorry, no race free way to setxattr on symlink. */ -- saverr = EPERM; -- goto out; -- } -+ if (inode->is_symlink) { -+ /* Sorry, no race free way to setxattr on symlink. */ -+ saverr = EPERM; -+ goto out; -+ } - -- sprintf(procname, "/proc/self/fd/%i", inode->fd); -+ sprintf(procname, "/proc/self/fd/%i", inode->fd); - -- ret = removexattr(procname, name); -- saverr = ret == -1 ? errno : 0; -+ ret = removexattr(procname, name); -+ saverr = ret == -1 ? errno : 0; - - out: -- fuse_reply_err(req, saverr); -+ fuse_reply_err(req, saverr); - } - - #ifdef HAVE_COPY_FILE_RANGE - static void lo_copy_file_range(fuse_req_t req, fuse_ino_t ino_in, off_t off_in, -- struct fuse_file_info *fi_in, -- fuse_ino_t ino_out, off_t off_out, -- struct fuse_file_info *fi_out, size_t len, -- int flags) -+ struct fuse_file_info *fi_in, fuse_ino_t ino_out, -+ off_t off_out, struct fuse_file_info *fi_out, -+ size_t len, int flags) - { -- ssize_t res; -- -- if (lo_debug(req)) -- fuse_log(FUSE_LOG_DEBUG, "lo_copy_file_range(ino=%" PRIu64 "/fd=%lu, " -- "off=%lu, ino=%" PRIu64 "/fd=%lu, " -- "off=%lu, size=%zd, flags=0x%x)\n", -- ino_in, fi_in->fh, off_in, ino_out, fi_out->fh, off_out, -- len, flags); -- -- res = copy_file_range(fi_in->fh, &off_in, fi_out->fh, &off_out, len, -- flags); -- if (res < 0) -- fuse_reply_err(req, -errno); -- else -- fuse_reply_write(req, res); -+ ssize_t res; -+ -+ if (lo_debug(req)) -+ fuse_log(FUSE_LOG_DEBUG, -+ "lo_copy_file_range(ino=%" PRIu64 "/fd=%lu, " -+ "off=%lu, ino=%" PRIu64 "/fd=%lu, " -+ "off=%lu, size=%zd, flags=0x%x)\n", -+ ino_in, fi_in->fh, off_in, ino_out, fi_out->fh, off_out, len, -+ flags); -+ -+ res = copy_file_range(fi_in->fh, &off_in, fi_out->fh, &off_out, len, flags); -+ if (res < 0) { -+ fuse_reply_err(req, -errno); -+ } else { -+ fuse_reply_write(req, res); -+ } - } - #endif - - static void lo_lseek(fuse_req_t req, fuse_ino_t ino, off_t off, int whence, -- struct fuse_file_info *fi) -+ struct fuse_file_info *fi) - { -- off_t res; -- -- (void)ino; -- res = lseek(fi->fh, off, whence); -- if (res != -1) -- fuse_reply_lseek(req, res); -- else -- fuse_reply_err(req, errno); -+ off_t res; -+ -+ (void)ino; -+ res = lseek(fi->fh, off, whence); -+ if (res != -1) { -+ fuse_reply_lseek(req, res); -+ } else { -+ fuse_reply_err(req, errno); -+ } - } - - static struct fuse_lowlevel_ops lo_oper = { -- .init = lo_init, -- .lookup = lo_lookup, -- .mkdir = lo_mkdir, -- .mknod = lo_mknod, -- .symlink = lo_symlink, -- .link = lo_link, -- .unlink = lo_unlink, -- .rmdir = lo_rmdir, -- .rename = lo_rename, -- .forget = lo_forget, -- .forget_multi = lo_forget_multi, -- .getattr = lo_getattr, -- .setattr = lo_setattr, -- .readlink = lo_readlink, -- .opendir = lo_opendir, -- .readdir = lo_readdir, -- .readdirplus = lo_readdirplus, -- .releasedir = lo_releasedir, -- .fsyncdir = lo_fsyncdir, -- .create = lo_create, -- .open = lo_open, -- .release = lo_release, -- .flush = lo_flush, -- .fsync = lo_fsync, -- .read = lo_read, -- .write_buf = lo_write_buf, -- .statfs = lo_statfs, -- .fallocate = lo_fallocate, -- .flock = lo_flock, -- .getxattr = lo_getxattr, -- .listxattr = lo_listxattr, -- .setxattr = lo_setxattr, -- .removexattr = lo_removexattr, -+ .init = lo_init, -+ .lookup = lo_lookup, -+ .mkdir = lo_mkdir, -+ .mknod = lo_mknod, -+ .symlink = lo_symlink, -+ .link = lo_link, -+ .unlink = lo_unlink, -+ .rmdir = lo_rmdir, -+ .rename = lo_rename, -+ .forget = lo_forget, -+ .forget_multi = lo_forget_multi, -+ .getattr = lo_getattr, -+ .setattr = lo_setattr, -+ .readlink = lo_readlink, -+ .opendir = lo_opendir, -+ .readdir = lo_readdir, -+ .readdirplus = lo_readdirplus, -+ .releasedir = lo_releasedir, -+ .fsyncdir = lo_fsyncdir, -+ .create = lo_create, -+ .open = lo_open, -+ .release = lo_release, -+ .flush = lo_flush, -+ .fsync = lo_fsync, -+ .read = lo_read, -+ .write_buf = lo_write_buf, -+ .statfs = lo_statfs, -+ .fallocate = lo_fallocate, -+ .flock = lo_flock, -+ .getxattr = lo_getxattr, -+ .listxattr = lo_listxattr, -+ .setxattr = lo_setxattr, -+ .removexattr = lo_removexattr, - #ifdef HAVE_COPY_FILE_RANGE -- .copy_file_range = lo_copy_file_range, -+ .copy_file_range = lo_copy_file_range, - #endif -- .lseek = lo_lseek, -+ .lseek = lo_lseek, - }; - - int main(int argc, char *argv[]) - { -- struct fuse_args args = FUSE_ARGS_INIT(argc, argv); -- struct fuse_session *se; -- struct fuse_cmdline_opts opts; -- struct lo_data lo = { .debug = 0, -- .writeback = 0 }; -- int ret = -1; -- -- /* Don't mask creation mode, kernel already did that */ -- umask(0); -- -- pthread_mutex_init(&lo.mutex, NULL); -- lo.root.next = lo.root.prev = &lo.root; -- lo.root.fd = -1; -- lo.cache = CACHE_NORMAL; -- -- if (fuse_parse_cmdline(&args, &opts) != 0) -- return 1; -- if (opts.show_help) { -- printf("usage: %s [options] \n\n", argv[0]); -- fuse_cmdline_help(); -- fuse_lowlevel_help(); -- ret = 0; -- goto err_out1; -- } else if (opts.show_version) { -- fuse_lowlevel_version(); -- ret = 0; -- goto err_out1; -- } -- -- if(opts.mountpoint == NULL) { -- printf("usage: %s [options] \n", argv[0]); -- printf(" %s --help\n", argv[0]); -- ret = 1; -- goto err_out1; -- } -- -- if (fuse_opt_parse(&args, &lo, lo_opts, NULL)== -1) -- return 1; -- -- lo.debug = opts.debug; -- lo.root.refcount = 2; -- if (lo.source) { -- struct stat stat; -- int res; -- -- res = lstat(lo.source, &stat); -- if (res == -1) { -- fuse_log(FUSE_LOG_ERR, "failed to stat source (\"%s\"): %m\n", -- lo.source); -- exit(1); -- } -- if (!S_ISDIR(stat.st_mode)) { -- fuse_log(FUSE_LOG_ERR, "source is not a directory\n"); -- exit(1); -- } -- -- } else { -- lo.source = "/"; -- } -- lo.root.is_symlink = false; -- if (!lo.timeout_set) { -- switch (lo.cache) { -- case CACHE_NEVER: -- lo.timeout = 0.0; -- break; -- -- case CACHE_NORMAL: -- lo.timeout = 1.0; -- break; -- -- case CACHE_ALWAYS: -- lo.timeout = 86400.0; -- break; -- } -- } else if (lo.timeout < 0) { -- fuse_log(FUSE_LOG_ERR, "timeout is negative (%lf)\n", -- lo.timeout); -- exit(1); -- } -- -- lo.root.fd = open(lo.source, O_PATH); -- if (lo.root.fd == -1) { -- fuse_log(FUSE_LOG_ERR, "open(\"%s\", O_PATH): %m\n", -- lo.source); -- exit(1); -- } -- -- se = fuse_session_new(&args, &lo_oper, sizeof(lo_oper), &lo); -- if (se == NULL) -- goto err_out1; -- -- if (fuse_set_signal_handlers(se) != 0) -- goto err_out2; -- -- if (fuse_session_mount(se, opts.mountpoint) != 0) -- goto err_out3; -- -- fuse_daemonize(opts.foreground); -- -- /* Block until ctrl+c or fusermount -u */ -- if (opts.singlethread) -- ret = fuse_session_loop(se); -- else -- ret = fuse_session_loop_mt(se, opts.clone_fd); -- -- fuse_session_unmount(se); -+ struct fuse_args args = FUSE_ARGS_INIT(argc, argv); -+ struct fuse_session *se; -+ struct fuse_cmdline_opts opts; -+ struct lo_data lo = { .debug = 0, .writeback = 0 }; -+ int ret = -1; -+ -+ /* Don't mask creation mode, kernel already did that */ -+ umask(0); -+ -+ pthread_mutex_init(&lo.mutex, NULL); -+ lo.root.next = lo.root.prev = &lo.root; -+ lo.root.fd = -1; -+ lo.cache = CACHE_NORMAL; -+ -+ if (fuse_parse_cmdline(&args, &opts) != 0) { -+ return 1; -+ } -+ if (opts.show_help) { -+ printf("usage: %s [options] \n\n", argv[0]); -+ fuse_cmdline_help(); -+ fuse_lowlevel_help(); -+ ret = 0; -+ goto err_out1; -+ } else if (opts.show_version) { -+ fuse_lowlevel_version(); -+ ret = 0; -+ goto err_out1; -+ } -+ -+ if (opts.mountpoint == NULL) { -+ printf("usage: %s [options] \n", argv[0]); -+ printf(" %s --help\n", argv[0]); -+ ret = 1; -+ goto err_out1; -+ } -+ -+ if (fuse_opt_parse(&args, &lo, lo_opts, NULL) == -1) { -+ return 1; -+ } -+ -+ lo.debug = opts.debug; -+ lo.root.refcount = 2; -+ if (lo.source) { -+ struct stat stat; -+ int res; -+ -+ res = lstat(lo.source, &stat); -+ if (res == -1) { -+ fuse_log(FUSE_LOG_ERR, "failed to stat source (\"%s\"): %m\n", -+ lo.source); -+ exit(1); -+ } -+ if (!S_ISDIR(stat.st_mode)) { -+ fuse_log(FUSE_LOG_ERR, "source is not a directory\n"); -+ exit(1); -+ } -+ -+ } else { -+ lo.source = "/"; -+ } -+ lo.root.is_symlink = false; -+ if (!lo.timeout_set) { -+ switch (lo.cache) { -+ case CACHE_NEVER: -+ lo.timeout = 0.0; -+ break; -+ -+ case CACHE_NORMAL: -+ lo.timeout = 1.0; -+ break; -+ -+ case CACHE_ALWAYS: -+ lo.timeout = 86400.0; -+ break; -+ } -+ } else if (lo.timeout < 0) { -+ fuse_log(FUSE_LOG_ERR, "timeout is negative (%lf)\n", lo.timeout); -+ exit(1); -+ } -+ -+ lo.root.fd = open(lo.source, O_PATH); -+ if (lo.root.fd == -1) { -+ fuse_log(FUSE_LOG_ERR, "open(\"%s\", O_PATH): %m\n", lo.source); -+ exit(1); -+ } -+ -+ se = fuse_session_new(&args, &lo_oper, sizeof(lo_oper), &lo); -+ if (se == NULL) { -+ goto err_out1; -+ } -+ -+ if (fuse_set_signal_handlers(se) != 0) { -+ goto err_out2; -+ } -+ -+ if (fuse_session_mount(se, opts.mountpoint) != 0) { -+ goto err_out3; -+ } -+ -+ fuse_daemonize(opts.foreground); -+ -+ /* Block until ctrl+c or fusermount -u */ -+ if (opts.singlethread) { -+ ret = fuse_session_loop(se); -+ } else { -+ ret = fuse_session_loop_mt(se, opts.clone_fd); -+ } -+ -+ fuse_session_unmount(se); - err_out3: -- fuse_remove_signal_handlers(se); -+ fuse_remove_signal_handlers(se); - err_out2: -- fuse_session_destroy(se); -+ fuse_session_destroy(se); - err_out1: -- free(opts.mountpoint); -- fuse_opt_free_args(&args); -+ free(opts.mountpoint); -+ fuse_opt_free_args(&args); - -- if (lo.root.fd >= 0) -- close(lo.root.fd); -+ if (lo.root.fd >= 0) { -+ close(lo.root.fd); -+ } - -- return ret ? 1 : 0; -+ return ret ? 1 : 0; - } --- -1.8.3.1 - diff --git a/kvm-virtiofsd-Handle-hard-reboot.patch b/kvm-virtiofsd-Handle-hard-reboot.patch deleted file mode 100644 index 8888030..0000000 --- a/kvm-virtiofsd-Handle-hard-reboot.patch +++ /dev/null @@ -1,65 +0,0 @@ -From 616407b06517361ce444dcc0960aeaf55b52da33 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:41 +0100 -Subject: [PATCH 070/116] virtiofsd: Handle hard reboot -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-67-dgilbert@redhat.com> -Patchwork-id: 93521 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 066/112] virtiofsd: Handle hard reboot -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -Handle a - mount - hard reboot (without unmount) - mount - -we get another 'init' which FUSE doesn't normally expect. - -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit e8556f49098b5d95634e592d79a97f761b76c96e) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_lowlevel.c | 16 +++++++++++++++- - 1 file changed, 15 insertions(+), 1 deletion(-) - -diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -index 7d742b5..65f91da 100644 ---- a/tools/virtiofsd/fuse_lowlevel.c -+++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -2433,7 +2433,21 @@ void fuse_session_process_buf_int(struct fuse_session *se, - goto reply_err; - } - } else if (in->opcode == FUSE_INIT || in->opcode == CUSE_INIT) { -- goto reply_err; -+ if (fuse_lowlevel_is_virtio(se)) { -+ /* -+ * TODO: This is after a hard reboot typically, we need to do -+ * a destroy, but we can't reply to this request yet so -+ * we can't use do_destroy -+ */ -+ fuse_log(FUSE_LOG_DEBUG, "%s: reinit\n", __func__); -+ se->got_destroy = 1; -+ se->got_init = 0; -+ if (se->op.destroy) { -+ se->op.destroy(se->userdata); -+ } -+ } else { -+ goto reply_err; -+ } - } - - err = EACCES; --- -1.8.3.1 - diff --git a/kvm-virtiofsd-Handle-reinit.patch b/kvm-virtiofsd-Handle-reinit.patch deleted file mode 100644 index 3f9577b..0000000 --- a/kvm-virtiofsd-Handle-reinit.patch +++ /dev/null @@ -1,53 +0,0 @@ -From 485adfa1aa1b3e2d1449edf5c42d6ec396cbfb5d Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:40 +0100 -Subject: [PATCH 069/116] virtiofsd: Handle reinit -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-66-dgilbert@redhat.com> -Patchwork-id: 93520 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 065/112] virtiofsd: Handle reinit -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -Allow init->destroy->init for mount->umount->mount - -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit c806d6435fe95fd54b379920aca2f4e3ea1f3258) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_lowlevel.c | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -index a7a1968..7d742b5 100644 ---- a/tools/virtiofsd/fuse_lowlevel.c -+++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -2028,6 +2028,7 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid, - } - - se->got_init = 1; -+ se->got_destroy = 0; - if (se->op.init) { - se->op.init(se->userdata, &se->conn); - } -@@ -2130,6 +2131,7 @@ static void do_destroy(fuse_req_t req, fuse_ino_t nodeid, - (void)iter; - - se->got_destroy = 1; -+ se->got_init = 0; - if (se->op.destroy) { - se->op.destroy(se->userdata); - } --- -1.8.3.1 - diff --git a/kvm-virtiofsd-Keep-track-of-replies.patch b/kvm-virtiofsd-Keep-track-of-replies.patch deleted file mode 100644 index 18be3e0..0000000 --- a/kvm-virtiofsd-Keep-track-of-replies.patch +++ /dev/null @@ -1,116 +0,0 @@ -From c818a1cb603cad07aa5c49ce808aa09435667c7c Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:04 +0100 -Subject: [PATCH 033/116] virtiofsd: Keep track of replies -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-30-dgilbert@redhat.com> -Patchwork-id: 93481 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 029/112] virtiofsd: Keep track of replies -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -Keep track of whether we sent a reply to a request; this is a bit -paranoid but it means: - a) We should always recycle an element even if there was an error - in the request - b) Never try and send two replies on one queue element - -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 2f65e69a7f22da8d20c747f34f339ebb40a0634f) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_virtio.c | 23 ++++++++++++++++++++--- - 1 file changed, 20 insertions(+), 3 deletions(-) - -diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c -index 05d0e29..f1adeb6 100644 ---- a/tools/virtiofsd/fuse_virtio.c -+++ b/tools/virtiofsd/fuse_virtio.c -@@ -44,6 +44,7 @@ struct fv_QueueInfo { - - /* The element for the command currently being processed */ - VuVirtqElement *qe; -+ bool reply_sent; - }; - - /* -@@ -178,6 +179,7 @@ int virtio_send_msg(struct fuse_session *se, struct fuse_chan *ch, - { - VuVirtqElement *elem; - VuVirtq *q; -+ int ret = 0; - - assert(count >= 1); - assert(iov[0].iov_len >= sizeof(struct fuse_out_header)); -@@ -191,6 +193,7 @@ int virtio_send_msg(struct fuse_session *se, struct fuse_chan *ch, - assert(out->unique); - /* For virtio we always have ch */ - assert(ch); -+ assert(!ch->qi->reply_sent); - elem = ch->qi->qe; - q = &ch->qi->virtio_dev->dev.vq[ch->qi->qidx]; - -@@ -208,19 +211,23 @@ int virtio_send_msg(struct fuse_session *se, struct fuse_chan *ch, - if (in_len < sizeof(struct fuse_out_header)) { - fuse_log(FUSE_LOG_ERR, "%s: elem %d too short for out_header\n", - __func__, elem->index); -- return -E2BIG; -+ ret = -E2BIG; -+ goto err; - } - if (in_len < tosend_len) { - fuse_log(FUSE_LOG_ERR, "%s: elem %d too small for data len %zd\n", - __func__, elem->index, tosend_len); -- return -E2BIG; -+ ret = -E2BIG; -+ goto err; - } - - copy_iov(iov, count, in_sg, in_num, tosend_len); - vu_queue_push(&se->virtio_dev->dev, q, elem, tosend_len); - vu_queue_notify(&se->virtio_dev->dev, q); -+ ch->qi->reply_sent = true; - -- return 0; -+err: -+ return ret; - } - - /* Thread function for individual queues, created when a queue is 'started' */ -@@ -296,6 +303,9 @@ static void *fv_queue_thread(void *opaque) - break; - } - -+ qi->qe = elem; -+ qi->reply_sent = false; -+ - if (!fbuf.mem) { - fbuf.mem = malloc(se->bufsize); - assert(fbuf.mem); -@@ -331,6 +341,13 @@ static void *fv_queue_thread(void *opaque) - /* TODO: Add checks for fuse_session_exited */ - fuse_session_process_buf_int(se, &fbuf, &ch); - -+ if (!qi->reply_sent) { -+ fuse_log(FUSE_LOG_DEBUG, "%s: elem %d no reply sent\n", -+ __func__, elem->index); -+ /* I think we've still got to recycle the element */ -+ vu_queue_push(dev, q, elem, 0); -+ vu_queue_notify(dev, q); -+ } - qi->qe = NULL; - free(elem); - elem = NULL; --- -1.8.3.1 - diff --git a/kvm-virtiofsd-Kill-threads-when-queues-are-stopped.patch b/kvm-virtiofsd-Kill-threads-when-queues-are-stopped.patch deleted file mode 100644 index 5e054f3..0000000 --- a/kvm-virtiofsd-Kill-threads-when-queues-are-stopped.patch +++ /dev/null @@ -1,143 +0,0 @@ -From b37344c38b866c7e7fb773b4a3172a39306bac7e Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:42 +0100 -Subject: [PATCH 071/116] virtiofsd: Kill threads when queues are stopped -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-68-dgilbert@redhat.com> -Patchwork-id: 93522 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 067/112] virtiofsd: Kill threads when queues are stopped -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -Kill the threads we've started when the queues get stopped. - -Signed-off-by: Dr. David Alan Gilbert -With improvements by: -Signed-off-by: Eryu Guan -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 10477ac47fc57d00a84802ff97c15450cd8021c1) - -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_virtio.c | 51 +++++++++++++++++++++++++++++++++++++------ - 1 file changed, 44 insertions(+), 7 deletions(-) - -diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c -index 872968f..7a8774a 100644 ---- a/tools/virtiofsd/fuse_virtio.c -+++ b/tools/virtiofsd/fuse_virtio.c -@@ -41,6 +41,7 @@ struct fv_QueueInfo { - /* Our queue index, corresponds to array position */ - int qidx; - int kick_fd; -+ int kill_fd; /* For killing the thread */ - - /* The element for the command currently being processed */ - VuVirtqElement *qe; -@@ -412,14 +413,17 @@ static void *fv_queue_thread(void *opaque) - fuse_log(FUSE_LOG_INFO, "%s: Start for queue %d kick_fd %d\n", __func__, - qi->qidx, qi->kick_fd); - while (1) { -- struct pollfd pf[1]; -+ struct pollfd pf[2]; - pf[0].fd = qi->kick_fd; - pf[0].events = POLLIN; - pf[0].revents = 0; -+ pf[1].fd = qi->kill_fd; -+ pf[1].events = POLLIN; -+ pf[1].revents = 0; - - fuse_log(FUSE_LOG_DEBUG, "%s: Waiting for Queue %d event\n", __func__, - qi->qidx); -- int poll_res = ppoll(pf, 1, NULL, NULL); -+ int poll_res = ppoll(pf, 2, NULL, NULL); - - if (poll_res == -1) { - if (errno == EINTR) { -@@ -430,12 +434,23 @@ static void *fv_queue_thread(void *opaque) - fuse_log(FUSE_LOG_ERR, "fv_queue_thread ppoll: %m\n"); - break; - } -- assert(poll_res == 1); -+ assert(poll_res >= 1); - if (pf[0].revents & (POLLERR | POLLHUP | POLLNVAL)) { - fuse_log(FUSE_LOG_ERR, "%s: Unexpected poll revents %x Queue %d\n", - __func__, pf[0].revents, qi->qidx); - break; - } -+ if (pf[1].revents & (POLLERR | POLLHUP | POLLNVAL)) { -+ fuse_log(FUSE_LOG_ERR, -+ "%s: Unexpected poll revents %x Queue %d killfd\n", -+ __func__, pf[1].revents, qi->qidx); -+ break; -+ } -+ if (pf[1].revents) { -+ fuse_log(FUSE_LOG_INFO, "%s: kill event on queue %d - quitting\n", -+ __func__, qi->qidx); -+ break; -+ } - assert(pf[0].revents & POLLIN); - fuse_log(FUSE_LOG_DEBUG, "%s: Got queue event on Queue %d\n", __func__, - qi->qidx); -@@ -589,6 +604,28 @@ out: - return NULL; - } - -+static void fv_queue_cleanup_thread(struct fv_VuDev *vud, int qidx) -+{ -+ int ret; -+ struct fv_QueueInfo *ourqi; -+ -+ assert(qidx < vud->nqueues); -+ ourqi = vud->qi[qidx]; -+ -+ /* Kill the thread */ -+ if (eventfd_write(ourqi->kill_fd, 1)) { -+ fuse_log(FUSE_LOG_ERR, "Eventfd_write for queue %d: %s\n", -+ qidx, strerror(errno)); -+ } -+ ret = pthread_join(ourqi->thread, NULL); -+ if (ret) { -+ fuse_log(FUSE_LOG_ERR, "%s: Failed to join thread idx %d err %d\n", -+ __func__, qidx, ret); -+ } -+ close(ourqi->kill_fd); -+ ourqi->kick_fd = -1; -+} -+ - /* Callback from libvhost-user on start or stop of a queue */ - static void fv_queue_set_started(VuDev *dev, int qidx, bool started) - { -@@ -633,16 +670,16 @@ static void fv_queue_set_started(VuDev *dev, int qidx, bool started) - } - ourqi = vud->qi[qidx]; - ourqi->kick_fd = dev->vq[qidx].kick_fd; -+ -+ ourqi->kill_fd = eventfd(0, EFD_CLOEXEC | EFD_SEMAPHORE); -+ assert(ourqi->kill_fd != -1); - if (pthread_create(&ourqi->thread, NULL, fv_queue_thread, ourqi)) { - fuse_log(FUSE_LOG_ERR, "%s: Failed to create thread for queue %d\n", - __func__, qidx); - assert(0); - } - } else { -- /* TODO: Kill the thread */ -- assert(qidx < vud->nqueues); -- ourqi = vud->qi[qidx]; -- ourqi->kick_fd = -1; -+ fv_queue_cleanup_thread(vud, qidx); - } - } - --- -1.8.3.1 - diff --git a/kvm-virtiofsd-Make-fsync-work-even-if-only-inode-is-pass.patch b/kvm-virtiofsd-Make-fsync-work-even-if-only-inode-is-pass.patch deleted file mode 100644 index 98211cb..0000000 --- a/kvm-virtiofsd-Make-fsync-work-even-if-only-inode-is-pass.patch +++ /dev/null @@ -1,96 +0,0 @@ -From f09f13f9a001a50ee3465c165f4bbaf870fcadb9 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:00:53 +0100 -Subject: [PATCH 022/116] virtiofsd: Make fsync work even if only inode is - passed in -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-19-dgilbert@redhat.com> -Patchwork-id: 93472 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 018/112] virtiofsd: Make fsync work even if only inode is passed in -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Vivek Goyal - -If caller has not sent file handle in request, then using inode, retrieve -the fd opened using O_PATH and use that to open file again and issue -fsync. This will be needed when dax_flush() calls fsync. At that time -we only have inode information (and not file). - -Signed-off-by: Vivek Goyal -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 1b209805f8159c3f4d89ddb9390a5f64887cebff) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_lowlevel.c | 6 +++++- - tools/virtiofsd/passthrough_ll.c | 28 ++++++++++++++++++++++++++-- - 2 files changed, 31 insertions(+), 3 deletions(-) - -diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -index 514d79c..8552cfb 100644 ---- a/tools/virtiofsd/fuse_lowlevel.c -+++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -1075,7 +1075,11 @@ static void do_fsync(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - fi.fh = arg->fh; - - if (req->se->op.fsync) { -- req->se->op.fsync(req, nodeid, datasync, &fi); -+ if (fi.fh == (uint64_t)-1) { -+ req->se->op.fsync(req, nodeid, datasync, NULL); -+ } else { -+ req->se->op.fsync(req, nodeid, datasync, &fi); -+ } - } else { - fuse_reply_err(req, ENOSYS); - } -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 6c4da18..26ac870 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -903,10 +903,34 @@ static void lo_fsync(fuse_req_t req, fuse_ino_t ino, int datasync, - { - int res; - (void)ino; -+ int fd; -+ char *buf; -+ -+ fuse_log(FUSE_LOG_DEBUG, "lo_fsync(ino=%" PRIu64 ", fi=0x%p)\n", ino, -+ (void *)fi); -+ -+ if (!fi) { -+ res = asprintf(&buf, "/proc/self/fd/%i", lo_fd(req, ino)); -+ if (res == -1) { -+ return (void)fuse_reply_err(req, errno); -+ } -+ -+ fd = open(buf, O_RDWR); -+ free(buf); -+ if (fd == -1) { -+ return (void)fuse_reply_err(req, errno); -+ } -+ } else { -+ fd = fi->fh; -+ } -+ - if (datasync) { -- res = fdatasync(fi->fh); -+ res = fdatasync(fd); - } else { -- res = fsync(fi->fh); -+ res = fsync(fd); -+ } -+ if (!fi) { -+ close(fd); - } - fuse_reply_err(req, res == -1 ? errno : 0); - } --- -1.8.3.1 - diff --git a/kvm-virtiofsd-Open-vhost-connection-instead-of-mounting.patch b/kvm-virtiofsd-Open-vhost-connection-instead-of-mounting.patch deleted file mode 100644 index 2c9874d..0000000 --- a/kvm-virtiofsd-Open-vhost-connection-instead-of-mounting.patch +++ /dev/null @@ -1,257 +0,0 @@ -From a96042f05eaf494fbe26a9cbd940f5f815f782f9 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:00:56 +0100 -Subject: [PATCH 025/116] virtiofsd: Open vhost connection instead of mounting -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-22-dgilbert@redhat.com> -Patchwork-id: 93476 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 021/112] virtiofsd: Open vhost connection instead of mounting -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -When run with vhost-user options we conect to the QEMU instead -via a socket. Start this off by creating the socket. - -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Daniel P. Berrangé -Reviewed-by: Misono Tomohiro -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit d14bf584dd965821e80d14c16d9292a464b1ab85) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_i.h | 7 ++-- - tools/virtiofsd/fuse_lowlevel.c | 55 ++++------------------------ - tools/virtiofsd/fuse_virtio.c | 79 +++++++++++++++++++++++++++++++++++++++++ - tools/virtiofsd/fuse_virtio.h | 23 ++++++++++++ - 4 files changed, 114 insertions(+), 50 deletions(-) - create mode 100644 tools/virtiofsd/fuse_virtio.c - create mode 100644 tools/virtiofsd/fuse_virtio.h - -diff --git a/tools/virtiofsd/fuse_i.h b/tools/virtiofsd/fuse_i.h -index 26b1a7d..82d6ac7 100644 ---- a/tools/virtiofsd/fuse_i.h -+++ b/tools/virtiofsd/fuse_i.h -@@ -6,9 +6,10 @@ - * See the file COPYING.LIB - */ - --#define FUSE_USE_VERSION 31 -- -+#ifndef FUSE_I_H -+#define FUSE_I_H - -+#define FUSE_USE_VERSION 31 - #include "fuse.h" - #include "fuse_lowlevel.h" - -@@ -101,3 +102,5 @@ void fuse_session_process_buf_int(struct fuse_session *se, - - /* room needed in buffer to accommodate header */ - #define FUSE_BUFFER_HEADER_SIZE 0x1000 -+ -+#endif -diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -index 17e8718..5df124e 100644 ---- a/tools/virtiofsd/fuse_lowlevel.c -+++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -14,6 +14,7 @@ - #include "standard-headers/linux/fuse.h" - #include "fuse_misc.h" - #include "fuse_opt.h" -+#include "fuse_virtio.h" - - #include - #include -@@ -2202,6 +2203,11 @@ struct fuse_session *fuse_session_new(struct fuse_args *args, - goto out4; - } - -+ if (!se->vu_socket_path) { -+ fprintf(stderr, "fuse: missing -o vhost_user_socket option\n"); -+ goto out4; -+ } -+ - se->bufsize = FUSE_MAX_MAX_PAGES * getpagesize() + FUSE_BUFFER_HEADER_SIZE; - - list_init_req(&se->list); -@@ -2224,54 +2230,7 @@ out1: - - int fuse_session_mount(struct fuse_session *se) - { -- int fd; -- -- /* -- * Make sure file descriptors 0, 1 and 2 are open, otherwise chaos -- * would ensue. -- */ -- do { -- fd = open("/dev/null", O_RDWR); -- if (fd > 2) { -- close(fd); -- } -- } while (fd >= 0 && fd <= 2); -- -- /* -- * To allow FUSE daemons to run without privileges, the caller may open -- * /dev/fuse before launching the file system and pass on the file -- * descriptor by specifying /dev/fd/N as the mount point. Note that the -- * parent process takes care of performing the mount in this case. -- */ -- fd = fuse_mnt_parse_fuse_fd(mountpoint); -- if (fd != -1) { -- if (fcntl(fd, F_GETFD) == -1) { -- fuse_log(FUSE_LOG_ERR, "fuse: Invalid file descriptor /dev/fd/%u\n", -- fd); -- return -1; -- } -- se->fd = fd; -- return 0; -- } -- -- /* Open channel */ -- fd = fuse_kern_mount(mountpoint, se->mo); -- if (fd == -1) { -- return -1; -- } -- se->fd = fd; -- -- /* Save mountpoint */ -- se->mountpoint = strdup(mountpoint); -- if (se->mountpoint == NULL) { -- goto error_out; -- } -- -- return 0; -- --error_out: -- fuse_kern_unmount(mountpoint, fd); -- return -1; -+ return virtio_session_mount(se); - } - - int fuse_session_fd(struct fuse_session *se) -diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c -new file mode 100644 -index 0000000..cbef6ff ---- /dev/null -+++ b/tools/virtiofsd/fuse_virtio.c -@@ -0,0 +1,79 @@ -+/* -+ * virtio-fs glue for FUSE -+ * Copyright (C) 2018 Red Hat, Inc. and/or its affiliates -+ * -+ * Authors: -+ * Dave Gilbert -+ * -+ * Implements the glue between libfuse and libvhost-user -+ * -+ * This program can be distributed under the terms of the GNU LGPLv2. -+ * See the file COPYING.LIB -+ */ -+ -+#include "fuse_i.h" -+#include "standard-headers/linux/fuse.h" -+#include "fuse_misc.h" -+#include "fuse_opt.h" -+#include "fuse_virtio.h" -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+/* From spec */ -+struct virtio_fs_config { -+ char tag[36]; -+ uint32_t num_queues; -+}; -+ -+int virtio_session_mount(struct fuse_session *se) -+{ -+ struct sockaddr_un un; -+ mode_t old_umask; -+ -+ if (strlen(se->vu_socket_path) >= sizeof(un.sun_path)) { -+ fuse_log(FUSE_LOG_ERR, "Socket path too long\n"); -+ return -1; -+ } -+ -+ se->fd = -1; -+ -+ /* -+ * Create the Unix socket to communicate with qemu -+ * based on QEMU's vhost-user-bridge -+ */ -+ unlink(se->vu_socket_path); -+ strcpy(un.sun_path, se->vu_socket_path); -+ size_t addr_len = sizeof(un); -+ -+ int listen_sock = socket(AF_UNIX, SOCK_STREAM, 0); -+ if (listen_sock == -1) { -+ fuse_log(FUSE_LOG_ERR, "vhost socket creation: %m\n"); -+ return -1; -+ } -+ un.sun_family = AF_UNIX; -+ -+ /* -+ * Unfortunately bind doesn't let you set the mask on the socket, -+ * so set umask to 077 and restore it later. -+ */ -+ old_umask = umask(0077); -+ if (bind(listen_sock, (struct sockaddr *)&un, addr_len) == -1) { -+ fuse_log(FUSE_LOG_ERR, "vhost socket bind: %m\n"); -+ umask(old_umask); -+ return -1; -+ } -+ umask(old_umask); -+ -+ if (listen(listen_sock, 1) == -1) { -+ fuse_log(FUSE_LOG_ERR, "vhost socket listen: %m\n"); -+ return -1; -+ } -+ -+ return -1; -+} -diff --git a/tools/virtiofsd/fuse_virtio.h b/tools/virtiofsd/fuse_virtio.h -new file mode 100644 -index 0000000..8f2edb6 ---- /dev/null -+++ b/tools/virtiofsd/fuse_virtio.h -@@ -0,0 +1,23 @@ -+/* -+ * virtio-fs glue for FUSE -+ * Copyright (C) 2018 Red Hat, Inc. and/or its affiliates -+ * -+ * Authors: -+ * Dave Gilbert -+ * -+ * Implements the glue between libfuse and libvhost-user -+ * -+ * This program can be distributed under the terms of the GNU LGPLv2. -+ * See the file COPYING.LIB -+ */ -+ -+#ifndef FUSE_VIRTIO_H -+#define FUSE_VIRTIO_H -+ -+#include "fuse_i.h" -+ -+struct fuse_session; -+ -+int virtio_session_mount(struct fuse_session *se); -+ -+#endif --- -1.8.3.1 - diff --git a/kvm-virtiofsd-Parse-flag-FUSE_WRITE_KILL_PRIV.patch b/kvm-virtiofsd-Parse-flag-FUSE_WRITE_KILL_PRIV.patch deleted file mode 100644 index 8d8de78..0000000 --- a/kvm-virtiofsd-Parse-flag-FUSE_WRITE_KILL_PRIV.patch +++ /dev/null @@ -1,76 +0,0 @@ -From ade3dcad8a907d281549b341a8908851e36ba458 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:31 +0100 -Subject: [PATCH 060/116] virtiofsd: Parse flag FUSE_WRITE_KILL_PRIV -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-57-dgilbert@redhat.com> -Patchwork-id: 93505 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 056/112] virtiofsd: Parse flag FUSE_WRITE_KILL_PRIV -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Vivek Goyal - -Caller can set FUSE_WRITE_KILL_PRIV in write_flags. Parse it and pass it -to the filesystem. - -Signed-off-by: Vivek Goyal -Reviewed-by: Misono Tomohiro -Reviewed-by: Sergio Lopez -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit f779bc5265e7e7abb13a03d4bfbc74151afc15c2) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_common.h | 6 +++++- - tools/virtiofsd/fuse_lowlevel.c | 4 +++- - 2 files changed, 8 insertions(+), 2 deletions(-) - -diff --git a/tools/virtiofsd/fuse_common.h b/tools/virtiofsd/fuse_common.h -index f8f6433..686c42c 100644 ---- a/tools/virtiofsd/fuse_common.h -+++ b/tools/virtiofsd/fuse_common.h -@@ -93,8 +93,12 @@ struct fuse_file_info { - */ - unsigned int cache_readdir:1; - -+ /* Indicates that suid/sgid bits should be removed upon write */ -+ unsigned int kill_priv:1; -+ -+ - /** Padding. Reserved for future use*/ -- unsigned int padding:25; -+ unsigned int padding:24; - unsigned int padding2:32; - - /* -diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -index 02e1d83..2d6dc5a 100644 ---- a/tools/virtiofsd/fuse_lowlevel.c -+++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -1142,6 +1142,7 @@ static void do_write(fuse_req_t req, fuse_ino_t nodeid, - memset(&fi, 0, sizeof(fi)); - fi.fh = arg->fh; - fi.writepage = (arg->write_flags & FUSE_WRITE_CACHE) != 0; -+ fi.kill_priv = !!(arg->write_flags & FUSE_WRITE_KILL_PRIV); - - fi.lock_owner = arg->lock_owner; - fi.flags = arg->flags; -@@ -1177,7 +1178,8 @@ static void do_write_buf(fuse_req_t req, fuse_ino_t nodeid, - fi.lock_owner = arg->lock_owner; - fi.flags = arg->flags; - fi.fh = arg->fh; -- fi.writepage = arg->write_flags & FUSE_WRITE_CACHE; -+ fi.writepage = !!(arg->write_flags & FUSE_WRITE_CACHE); -+ fi.kill_priv = !!(arg->write_flags & FUSE_WRITE_KILL_PRIV); - - if (ibufv->count == 1) { - assert(!(tmpbufv.buf[0].flags & FUSE_BUF_IS_FD)); --- -1.8.3.1 - diff --git a/kvm-virtiofsd-Pass-write-iov-s-all-the-way-through.patch b/kvm-virtiofsd-Pass-write-iov-s-all-the-way-through.patch deleted file mode 100644 index 7d095c9..0000000 --- a/kvm-virtiofsd-Pass-write-iov-s-all-the-way-through.patch +++ /dev/null @@ -1,140 +0,0 @@ -From d5986c804f05070a07dfe702f7c66357daaa1ab6 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:20 +0100 -Subject: [PATCH 049/116] virtiofsd: Pass write iov's all the way through -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-46-dgilbert@redhat.com> -Patchwork-id: 93497 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 045/112] virtiofsd: Pass write iov's all the way through -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -Pass the write iov pointing to guest RAM all the way through rather -than copying the data. - -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Xiao Yang -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit e17f7a580e2c599330ad3a6946be615ca2fe97d9) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_virtio.c | 79 +++++++++++++++++++++++++++++++++++++++---- - 1 file changed, 73 insertions(+), 6 deletions(-) - -diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c -index fd588a4..872968f 100644 ---- a/tools/virtiofsd/fuse_virtio.c -+++ b/tools/virtiofsd/fuse_virtio.c -@@ -454,6 +454,10 @@ static void *fv_queue_thread(void *opaque) - __func__, qi->qidx, (size_t)evalue, in_bytes, out_bytes); - - while (1) { -+ bool allocated_bufv = false; -+ struct fuse_bufvec bufv; -+ struct fuse_bufvec *pbufv; -+ - /* - * An element contains one request and the space to send our - * response They're spread over multiple descriptors in a -@@ -495,14 +499,76 @@ static void *fv_queue_thread(void *opaque) - __func__, elem->index); - assert(0); /* TODO */ - } -- copy_from_iov(&fbuf, out_num, out_sg); -- fbuf.size = out_len; -+ /* Copy just the first element and look at it */ -+ copy_from_iov(&fbuf, 1, out_sg); -+ -+ if (out_num > 2 && -+ out_sg[0].iov_len == sizeof(struct fuse_in_header) && -+ ((struct fuse_in_header *)fbuf.mem)->opcode == FUSE_WRITE && -+ out_sg[1].iov_len == sizeof(struct fuse_write_in)) { -+ /* -+ * For a write we don't actually need to copy the -+ * data, we can just do it straight out of guest memory -+ * but we must still copy the headers in case the guest -+ * was nasty and changed them while we were using them. -+ */ -+ fuse_log(FUSE_LOG_DEBUG, "%s: Write special case\n", __func__); -+ -+ /* copy the fuse_write_in header after the fuse_in_header */ -+ fbuf.mem += out_sg->iov_len; -+ copy_from_iov(&fbuf, 1, out_sg + 1); -+ fbuf.mem -= out_sg->iov_len; -+ fbuf.size = out_sg[0].iov_len + out_sg[1].iov_len; -+ -+ /* Allocate the bufv, with space for the rest of the iov */ -+ allocated_bufv = true; -+ pbufv = malloc(sizeof(struct fuse_bufvec) + -+ sizeof(struct fuse_buf) * (out_num - 2)); -+ if (!pbufv) { -+ vu_queue_unpop(dev, q, elem, 0); -+ free(elem); -+ fuse_log(FUSE_LOG_ERR, "%s: pbufv malloc failed\n", -+ __func__); -+ goto out; -+ } -+ -+ pbufv->count = 1; -+ pbufv->buf[0] = fbuf; -+ -+ size_t iovindex, pbufvindex; -+ iovindex = 2; /* 2 headers, separate iovs */ -+ pbufvindex = 1; /* 2 headers, 1 fusebuf */ -+ -+ for (; iovindex < out_num; iovindex++, pbufvindex++) { -+ pbufv->count++; -+ pbufv->buf[pbufvindex].pos = ~0; /* Dummy */ -+ pbufv->buf[pbufvindex].flags = 0; -+ pbufv->buf[pbufvindex].mem = out_sg[iovindex].iov_base; -+ pbufv->buf[pbufvindex].size = out_sg[iovindex].iov_len; -+ } -+ } else { -+ /* Normal (non fast write) path */ -+ -+ /* Copy the rest of the buffer */ -+ fbuf.mem += out_sg->iov_len; -+ copy_from_iov(&fbuf, out_num - 1, out_sg + 1); -+ fbuf.mem -= out_sg->iov_len; -+ fbuf.size = out_len; - -- /* TODO! Endianness of header */ -+ /* TODO! Endianness of header */ - -- /* TODO: Add checks for fuse_session_exited */ -- struct fuse_bufvec bufv = { .buf[0] = fbuf, .count = 1 }; -- fuse_session_process_buf_int(se, &bufv, &ch); -+ /* TODO: Add checks for fuse_session_exited */ -+ bufv.buf[0] = fbuf; -+ bufv.count = 1; -+ pbufv = &bufv; -+ } -+ pbufv->idx = 0; -+ pbufv->off = 0; -+ fuse_session_process_buf_int(se, pbufv, &ch); -+ -+ if (allocated_bufv) { -+ free(pbufv); -+ } - - if (!qi->reply_sent) { - fuse_log(FUSE_LOG_DEBUG, "%s: elem %d no reply sent\n", -@@ -516,6 +582,7 @@ static void *fv_queue_thread(void *opaque) - elem = NULL; - } - } -+out: - pthread_mutex_destroy(&ch.lock); - free(fbuf.mem); - --- -1.8.3.1 - diff --git a/kvm-virtiofsd-Plumb-fuse_bufvec-through-to-do_write_buf.patch b/kvm-virtiofsd-Plumb-fuse_bufvec-through-to-do_write_buf.patch deleted file mode 100644 index 834ced1..0000000 --- a/kvm-virtiofsd-Plumb-fuse_bufvec-through-to-do_write_buf.patch +++ /dev/null @@ -1,168 +0,0 @@ -From 9e4320eec5204da851ac95fb7a7e6520c9ccee7d Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:19 +0100 -Subject: [PATCH 048/116] virtiofsd: Plumb fuse_bufvec through to do_write_buf -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-45-dgilbert@redhat.com> -Patchwork-id: 93499 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 044/112] virtiofsd: Plumb fuse_bufvec through to do_write_buf -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -Let fuse_session_process_buf_int take a fuse_bufvec * instead of a -fuse_buf; and then through to do_write_buf - where in the best -case it can pass that straight through to op.write_buf without copying -(other than skipping a header). - -Signed-off-by: Dr. David Alan Gilbert -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Masayoshi Mizuma -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 469f9d2fc405b0508e6cf1b4b5bbcadfc82064e5) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_i.h | 2 +- - tools/virtiofsd/fuse_lowlevel.c | 61 +++++++++++++++++++++++++++-------------- - tools/virtiofsd/fuse_virtio.c | 3 +- - 3 files changed, 44 insertions(+), 22 deletions(-) - -diff --git a/tools/virtiofsd/fuse_i.h b/tools/virtiofsd/fuse_i.h -index 45995f3..a20854f 100644 ---- a/tools/virtiofsd/fuse_i.h -+++ b/tools/virtiofsd/fuse_i.h -@@ -100,7 +100,7 @@ int fuse_send_reply_iov_nofree(fuse_req_t req, int error, struct iovec *iov, - void fuse_free_req(fuse_req_t req); - - void fuse_session_process_buf_int(struct fuse_session *se, -- const struct fuse_buf *buf, -+ struct fuse_bufvec *bufv, - struct fuse_chan *ch); - - -diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -index 95f4db8..7e10995 100644 ---- a/tools/virtiofsd/fuse_lowlevel.c -+++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -1004,11 +1004,12 @@ static void do_write(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - - static void do_write_buf(fuse_req_t req, fuse_ino_t nodeid, const void *inarg, -- const struct fuse_buf *ibuf) -+ struct fuse_bufvec *ibufv) - { - struct fuse_session *se = req->se; -- struct fuse_bufvec bufv = { -- .buf[0] = *ibuf, -+ struct fuse_bufvec *pbufv = ibufv; -+ struct fuse_bufvec tmpbufv = { -+ .buf[0] = ibufv->buf[0], - .count = 1, - }; - struct fuse_write_in *arg = (struct fuse_write_in *)inarg; -@@ -1018,22 +1019,31 @@ static void do_write_buf(fuse_req_t req, fuse_ino_t nodeid, const void *inarg, - fi.fh = arg->fh; - fi.writepage = arg->write_flags & FUSE_WRITE_CACHE; - -- fi.lock_owner = arg->lock_owner; -- fi.flags = arg->flags; -- if (!(bufv.buf[0].flags & FUSE_BUF_IS_FD)) { -- bufv.buf[0].mem = PARAM(arg); -- } -- -- bufv.buf[0].size -= -- sizeof(struct fuse_in_header) + sizeof(struct fuse_write_in); -- if (bufv.buf[0].size < arg->size) { -- fuse_log(FUSE_LOG_ERR, "fuse: do_write_buf: buffer size too small\n"); -- fuse_reply_err(req, EIO); -- return; -+ if (ibufv->count == 1) { -+ fi.lock_owner = arg->lock_owner; -+ fi.flags = arg->flags; -+ if (!(tmpbufv.buf[0].flags & FUSE_BUF_IS_FD)) { -+ tmpbufv.buf[0].mem = PARAM(arg); -+ } -+ tmpbufv.buf[0].size -= -+ sizeof(struct fuse_in_header) + sizeof(struct fuse_write_in); -+ if (tmpbufv.buf[0].size < arg->size) { -+ fuse_log(FUSE_LOG_ERR, -+ "fuse: do_write_buf: buffer size too small\n"); -+ fuse_reply_err(req, EIO); -+ return; -+ } -+ tmpbufv.buf[0].size = arg->size; -+ pbufv = &tmpbufv; -+ } else { -+ /* -+ * Input bufv contains the headers in the first element -+ * and the data in the rest, we need to skip that first element -+ */ -+ ibufv->buf[0].size = 0; - } -- bufv.buf[0].size = arg->size; - -- se->op.write_buf(req, nodeid, &bufv, arg->offset, &fi); -+ se->op.write_buf(req, nodeid, pbufv, arg->offset, &fi); - } - - static void do_flush(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -@@ -2024,13 +2034,24 @@ static const char *opname(enum fuse_opcode opcode) - void fuse_session_process_buf(struct fuse_session *se, - const struct fuse_buf *buf) - { -- fuse_session_process_buf_int(se, buf, NULL); -+ struct fuse_bufvec bufv = { .buf[0] = *buf, .count = 1 }; -+ fuse_session_process_buf_int(se, &bufv, NULL); - } - -+/* -+ * Restriction: -+ * bufv is normally a single entry buffer, except for a write -+ * where (if it's in memory) then the bufv may be multiple entries, -+ * where the first entry contains all headers and subsequent entries -+ * contain data -+ * bufv shall not use any offsets etc to make the data anything -+ * other than contiguous starting from 0. -+ */ - void fuse_session_process_buf_int(struct fuse_session *se, -- const struct fuse_buf *buf, -+ struct fuse_bufvec *bufv, - struct fuse_chan *ch) - { -+ const struct fuse_buf *buf = bufv->buf; - struct fuse_in_header *in; - const void *inarg; - struct fuse_req *req; -@@ -2108,7 +2129,7 @@ void fuse_session_process_buf_int(struct fuse_session *se, - - inarg = (void *)&in[1]; - if (in->opcode == FUSE_WRITE && se->op.write_buf) { -- do_write_buf(req, in->nodeid, inarg, buf); -+ do_write_buf(req, in->nodeid, inarg, bufv); - } else { - fuse_ll_ops[in->opcode].func(req, in->nodeid, inarg); - } -diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c -index 635f877..fd588a4 100644 ---- a/tools/virtiofsd/fuse_virtio.c -+++ b/tools/virtiofsd/fuse_virtio.c -@@ -501,7 +501,8 @@ static void *fv_queue_thread(void *opaque) - /* TODO! Endianness of header */ - - /* TODO: Add checks for fuse_session_exited */ -- fuse_session_process_buf_int(se, &fbuf, &ch); -+ struct fuse_bufvec bufv = { .buf[0] = fbuf, .count = 1 }; -+ fuse_session_process_buf_int(se, &bufv, &ch); - - if (!qi->reply_sent) { - fuse_log(FUSE_LOG_DEBUG, "%s: elem %d no reply sent\n", --- -1.8.3.1 - diff --git a/kvm-virtiofsd-Poll-kick_fd-for-queue.patch b/kvm-virtiofsd-Poll-kick_fd-for-queue.patch deleted file mode 100644 index d7c6c0a..0000000 --- a/kvm-virtiofsd-Poll-kick_fd-for-queue.patch +++ /dev/null @@ -1,97 +0,0 @@ -From 083b944fac29bc3115a19eb38e176f6b23f04938 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:01 +0100 -Subject: [PATCH 030/116] virtiofsd: Poll kick_fd for queue -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-27-dgilbert@redhat.com> -Patchwork-id: 93483 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 026/112] virtiofsd: Poll kick_fd for queue -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -In the queue thread poll the kick_fd we're passed. - -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 5dcd1f56141378226d33dc3df68ec57913e0aa04) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_virtio.c | 40 +++++++++++++++++++++++++++++++++++++++- - 1 file changed, 39 insertions(+), 1 deletion(-) - -diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c -index 2a94bb3..05e7258 100644 ---- a/tools/virtiofsd/fuse_virtio.c -+++ b/tools/virtiofsd/fuse_virtio.c -@@ -24,6 +24,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -100,13 +101,50 @@ static void fv_panic(VuDev *dev, const char *err) - exit(EXIT_FAILURE); - } - -+/* Thread function for individual queues, created when a queue is 'started' */ - static void *fv_queue_thread(void *opaque) - { - struct fv_QueueInfo *qi = opaque; - fuse_log(FUSE_LOG_INFO, "%s: Start for queue %d kick_fd %d\n", __func__, - qi->qidx, qi->kick_fd); - while (1) { -- /* TODO */ -+ struct pollfd pf[1]; -+ pf[0].fd = qi->kick_fd; -+ pf[0].events = POLLIN; -+ pf[0].revents = 0; -+ -+ fuse_log(FUSE_LOG_DEBUG, "%s: Waiting for Queue %d event\n", __func__, -+ qi->qidx); -+ int poll_res = ppoll(pf, 1, NULL, NULL); -+ -+ if (poll_res == -1) { -+ if (errno == EINTR) { -+ fuse_log(FUSE_LOG_INFO, "%s: ppoll interrupted, going around\n", -+ __func__); -+ continue; -+ } -+ fuse_log(FUSE_LOG_ERR, "fv_queue_thread ppoll: %m\n"); -+ break; -+ } -+ assert(poll_res == 1); -+ if (pf[0].revents & (POLLERR | POLLHUP | POLLNVAL)) { -+ fuse_log(FUSE_LOG_ERR, "%s: Unexpected poll revents %x Queue %d\n", -+ __func__, pf[0].revents, qi->qidx); -+ break; -+ } -+ assert(pf[0].revents & POLLIN); -+ fuse_log(FUSE_LOG_DEBUG, "%s: Got queue event on Queue %d\n", __func__, -+ qi->qidx); -+ -+ eventfd_t evalue; -+ if (eventfd_read(qi->kick_fd, &evalue)) { -+ fuse_log(FUSE_LOG_ERR, "Eventfd_read for queue: %m\n"); -+ break; -+ } -+ if (qi->virtio_dev->se->debug) { -+ fprintf(stderr, "%s: Queue %d gave evalue: %zx\n", __func__, -+ qi->qidx, (size_t)evalue); -+ } - } - - return NULL; --- -1.8.3.1 - diff --git a/kvm-virtiofsd-Prevent-multiply-running-with-same-vhost_u.patch b/kvm-virtiofsd-Prevent-multiply-running-with-same-vhost_u.patch deleted file mode 100644 index d4e1ea1..0000000 --- a/kvm-virtiofsd-Prevent-multiply-running-with-same-vhost_u.patch +++ /dev/null @@ -1,144 +0,0 @@ -From ab336e3aea97d76c1b2ac725d19b4518f47dd8f0 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:59 +0100 -Subject: [PATCH 088/116] virtiofsd: Prevent multiply running with same - vhost_user_socket -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-85-dgilbert@redhat.com> -Patchwork-id: 93541 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 084/112] virtiofsd: Prevent multiply running with same vhost_user_socket -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Masayoshi Mizuma - -virtiofsd can run multiply even if the vhost_user_socket is same path. - - ]# ./virtiofsd -o vhost_user_socket=/tmp/vhostqemu -o source=/tmp/share & - [1] 244965 - virtio_session_mount: Waiting for vhost-user socket connection... - ]# ./virtiofsd -o vhost_user_socket=/tmp/vhostqemu -o source=/tmp/share & - [2] 244966 - virtio_session_mount: Waiting for vhost-user socket connection... - ]# - -The user will get confused about the situation and maybe the cause of the -unexpected problem. So it's better to prevent the multiple running. - -Create a regular file under localstatedir directory to exclude the -vhost_user_socket. To create and lock the file, use qemu_write_pidfile() -because the API has some sanity checks and file lock. - -Signed-off-by: Masayoshi Mizuma -Signed-off-by: Dr. David Alan Gilbert - Applied fixes from Stefan's review and moved osdep include -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 96814800d2b49d18737c36e021c387697ec40c62) - -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_lowlevel.c | 1 + - tools/virtiofsd/fuse_virtio.c | 49 ++++++++++++++++++++++++++++++++++++++++- - 2 files changed, 49 insertions(+), 1 deletion(-) - -diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -index 440508a..aac282f 100644 ---- a/tools/virtiofsd/fuse_lowlevel.c -+++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -18,6 +18,7 @@ - - #include - #include -+#include - #include - #include - #include -diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c -index e7bd772..b7948de 100644 ---- a/tools/virtiofsd/fuse_virtio.c -+++ b/tools/virtiofsd/fuse_virtio.c -@@ -13,11 +13,12 @@ - - #include "qemu/osdep.h" - #include "qemu/iov.h" --#include "fuse_virtio.h" -+#include "qapi/error.h" - #include "fuse_i.h" - #include "standard-headers/linux/fuse.h" - #include "fuse_misc.h" - #include "fuse_opt.h" -+#include "fuse_virtio.h" - - #include - #include -@@ -743,6 +744,42 @@ int virtio_loop(struct fuse_session *se) - return 0; - } - -+static void strreplace(char *s, char old, char new) -+{ -+ for (; *s; ++s) { -+ if (*s == old) { -+ *s = new; -+ } -+ } -+} -+ -+static bool fv_socket_lock(struct fuse_session *se) -+{ -+ g_autofree gchar *sk_name = NULL; -+ g_autofree gchar *pidfile = NULL; -+ g_autofree gchar *dir = NULL; -+ Error *local_err = NULL; -+ -+ dir = qemu_get_local_state_pathname("run/virtiofsd"); -+ -+ if (g_mkdir_with_parents(dir, S_IRWXU) < 0) { -+ fuse_log(FUSE_LOG_ERR, "%s: Failed to create directory %s: %s", -+ __func__, dir, strerror(errno)); -+ return false; -+ } -+ -+ sk_name = g_strdup(se->vu_socket_path); -+ strreplace(sk_name, '/', '.'); -+ pidfile = g_strdup_printf("%s/%s.pid", dir, sk_name); -+ -+ if (!qemu_write_pidfile(pidfile, &local_err)) { -+ error_report_err(local_err); -+ return false; -+ } -+ -+ return true; -+} -+ - static int fv_create_listen_socket(struct fuse_session *se) - { - struct sockaddr_un un; -@@ -758,6 +795,16 @@ static int fv_create_listen_socket(struct fuse_session *se) - return -1; - } - -+ if (!strlen(se->vu_socket_path)) { -+ fuse_log(FUSE_LOG_ERR, "Socket path is empty\n"); -+ return -1; -+ } -+ -+ /* Check the vu_socket_path is already used */ -+ if (!fv_socket_lock(se)) { -+ return -1; -+ } -+ - /* - * Create the Unix socket to communicate with qemu - * based on QEMU's vhost-user-bridge --- -1.8.3.1 - diff --git a/kvm-virtiofsd-Pull-in-kernel-s-fuse.h.patch b/kvm-virtiofsd-Pull-in-kernel-s-fuse.h.patch deleted file mode 100644 index f30f23a..0000000 --- a/kvm-virtiofsd-Pull-in-kernel-s-fuse.h.patch +++ /dev/null @@ -1,945 +0,0 @@ -From e7c1ad608117b21f80c762f5505a66b21c56e9d3 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:00:40 +0100 -Subject: [PATCH 009/116] virtiofsd: Pull in kernel's fuse.h -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-6-dgilbert@redhat.com> -Patchwork-id: 93460 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 005/112] virtiofsd: Pull in kernel's fuse.h -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -Update scripts/update-linux-headers.sh to add fuse.h and -use it to pull in fuse.h from the kernel; from v5.5-rc1 - -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit a62a9e192bc5f0aa0bc076b51db5a069add87c78) -Signed-off-by: Miroslav Rezanina ---- - include/standard-headers/linux/fuse.h | 891 ++++++++++++++++++++++++++++++++++ - scripts/update-linux-headers.sh | 1 + - 2 files changed, 892 insertions(+) - create mode 100644 include/standard-headers/linux/fuse.h - -diff --git a/include/standard-headers/linux/fuse.h b/include/standard-headers/linux/fuse.h -new file mode 100644 -index 0000000..f4df0a4 ---- /dev/null -+++ b/include/standard-headers/linux/fuse.h -@@ -0,0 +1,891 @@ -+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */ -+/* -+ This file defines the kernel interface of FUSE -+ Copyright (C) 2001-2008 Miklos Szeredi -+ -+ This program can be distributed under the terms of the GNU GPL. -+ See the file COPYING. -+ -+ This -- and only this -- header file may also be distributed under -+ the terms of the BSD Licence as follows: -+ -+ Copyright (C) 2001-2007 Miklos Szeredi. All rights reserved. -+ -+ Redistribution and use in source and binary forms, with or without -+ modification, are permitted provided that the following conditions -+ are met: -+ 1. Redistributions of source code must retain the above copyright -+ notice, this list of conditions and the following disclaimer. -+ 2. Redistributions in binary form must reproduce the above copyright -+ notice, this list of conditions and the following disclaimer in the -+ documentation and/or other materials provided with the distribution. -+ -+ THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND -+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE -+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -+ OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -+ HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -+ LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY -+ OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF -+ SUCH DAMAGE. -+*/ -+ -+/* -+ * This file defines the kernel interface of FUSE -+ * -+ * Protocol changelog: -+ * -+ * 7.1: -+ * - add the following messages: -+ * FUSE_SETATTR, FUSE_SYMLINK, FUSE_MKNOD, FUSE_MKDIR, FUSE_UNLINK, -+ * FUSE_RMDIR, FUSE_RENAME, FUSE_LINK, FUSE_OPEN, FUSE_READ, FUSE_WRITE, -+ * FUSE_RELEASE, FUSE_FSYNC, FUSE_FLUSH, FUSE_SETXATTR, FUSE_GETXATTR, -+ * FUSE_LISTXATTR, FUSE_REMOVEXATTR, FUSE_OPENDIR, FUSE_READDIR, -+ * FUSE_RELEASEDIR -+ * - add padding to messages to accommodate 32-bit servers on 64-bit kernels -+ * -+ * 7.2: -+ * - add FOPEN_DIRECT_IO and FOPEN_KEEP_CACHE flags -+ * - add FUSE_FSYNCDIR message -+ * -+ * 7.3: -+ * - add FUSE_ACCESS message -+ * - add FUSE_CREATE message -+ * - add filehandle to fuse_setattr_in -+ * -+ * 7.4: -+ * - add frsize to fuse_kstatfs -+ * - clean up request size limit checking -+ * -+ * 7.5: -+ * - add flags and max_write to fuse_init_out -+ * -+ * 7.6: -+ * - add max_readahead to fuse_init_in and fuse_init_out -+ * -+ * 7.7: -+ * - add FUSE_INTERRUPT message -+ * - add POSIX file lock support -+ * -+ * 7.8: -+ * - add lock_owner and flags fields to fuse_release_in -+ * - add FUSE_BMAP message -+ * - add FUSE_DESTROY message -+ * -+ * 7.9: -+ * - new fuse_getattr_in input argument of GETATTR -+ * - add lk_flags in fuse_lk_in -+ * - add lock_owner field to fuse_setattr_in, fuse_read_in and fuse_write_in -+ * - add blksize field to fuse_attr -+ * - add file flags field to fuse_read_in and fuse_write_in -+ * - Add ATIME_NOW and MTIME_NOW flags to fuse_setattr_in -+ * -+ * 7.10 -+ * - add nonseekable open flag -+ * -+ * 7.11 -+ * - add IOCTL message -+ * - add unsolicited notification support -+ * - add POLL message and NOTIFY_POLL notification -+ * -+ * 7.12 -+ * - add umask flag to input argument of create, mknod and mkdir -+ * - add notification messages for invalidation of inodes and -+ * directory entries -+ * -+ * 7.13 -+ * - make max number of background requests and congestion threshold -+ * tunables -+ * -+ * 7.14 -+ * - add splice support to fuse device -+ * -+ * 7.15 -+ * - add store notify -+ * - add retrieve notify -+ * -+ * 7.16 -+ * - add BATCH_FORGET request -+ * - FUSE_IOCTL_UNRESTRICTED shall now return with array of 'struct -+ * fuse_ioctl_iovec' instead of ambiguous 'struct iovec' -+ * - add FUSE_IOCTL_32BIT flag -+ * -+ * 7.17 -+ * - add FUSE_FLOCK_LOCKS and FUSE_RELEASE_FLOCK_UNLOCK -+ * -+ * 7.18 -+ * - add FUSE_IOCTL_DIR flag -+ * - add FUSE_NOTIFY_DELETE -+ * -+ * 7.19 -+ * - add FUSE_FALLOCATE -+ * -+ * 7.20 -+ * - add FUSE_AUTO_INVAL_DATA -+ * -+ * 7.21 -+ * - add FUSE_READDIRPLUS -+ * - send the requested events in POLL request -+ * -+ * 7.22 -+ * - add FUSE_ASYNC_DIO -+ * -+ * 7.23 -+ * - add FUSE_WRITEBACK_CACHE -+ * - add time_gran to fuse_init_out -+ * - add reserved space to fuse_init_out -+ * - add FATTR_CTIME -+ * - add ctime and ctimensec to fuse_setattr_in -+ * - add FUSE_RENAME2 request -+ * - add FUSE_NO_OPEN_SUPPORT flag -+ * -+ * 7.24 -+ * - add FUSE_LSEEK for SEEK_HOLE and SEEK_DATA support -+ * -+ * 7.25 -+ * - add FUSE_PARALLEL_DIROPS -+ * -+ * 7.26 -+ * - add FUSE_HANDLE_KILLPRIV -+ * - add FUSE_POSIX_ACL -+ * -+ * 7.27 -+ * - add FUSE_ABORT_ERROR -+ * -+ * 7.28 -+ * - add FUSE_COPY_FILE_RANGE -+ * - add FOPEN_CACHE_DIR -+ * - add FUSE_MAX_PAGES, add max_pages to init_out -+ * - add FUSE_CACHE_SYMLINKS -+ * -+ * 7.29 -+ * - add FUSE_NO_OPENDIR_SUPPORT flag -+ * -+ * 7.30 -+ * - add FUSE_EXPLICIT_INVAL_DATA -+ * - add FUSE_IOCTL_COMPAT_X32 -+ * -+ * 7.31 -+ * - add FUSE_WRITE_KILL_PRIV flag -+ * - add FUSE_SETUPMAPPING and FUSE_REMOVEMAPPING -+ * - add map_alignment to fuse_init_out, add FUSE_MAP_ALIGNMENT flag -+ */ -+ -+#ifndef _LINUX_FUSE_H -+#define _LINUX_FUSE_H -+ -+#include -+ -+/* -+ * Version negotiation: -+ * -+ * Both the kernel and userspace send the version they support in the -+ * INIT request and reply respectively. -+ * -+ * If the major versions match then both shall use the smallest -+ * of the two minor versions for communication. -+ * -+ * If the kernel supports a larger major version, then userspace shall -+ * reply with the major version it supports, ignore the rest of the -+ * INIT message and expect a new INIT message from the kernel with a -+ * matching major version. -+ * -+ * If the library supports a larger major version, then it shall fall -+ * back to the major protocol version sent by the kernel for -+ * communication and reply with that major version (and an arbitrary -+ * supported minor version). -+ */ -+ -+/** Version number of this interface */ -+#define FUSE_KERNEL_VERSION 7 -+ -+/** Minor version number of this interface */ -+#define FUSE_KERNEL_MINOR_VERSION 31 -+ -+/** The node ID of the root inode */ -+#define FUSE_ROOT_ID 1 -+ -+/* Make sure all structures are padded to 64bit boundary, so 32bit -+ userspace works under 64bit kernels */ -+ -+struct fuse_attr { -+ uint64_t ino; -+ uint64_t size; -+ uint64_t blocks; -+ uint64_t atime; -+ uint64_t mtime; -+ uint64_t ctime; -+ uint32_t atimensec; -+ uint32_t mtimensec; -+ uint32_t ctimensec; -+ uint32_t mode; -+ uint32_t nlink; -+ uint32_t uid; -+ uint32_t gid; -+ uint32_t rdev; -+ uint32_t blksize; -+ uint32_t padding; -+}; -+ -+struct fuse_kstatfs { -+ uint64_t blocks; -+ uint64_t bfree; -+ uint64_t bavail; -+ uint64_t files; -+ uint64_t ffree; -+ uint32_t bsize; -+ uint32_t namelen; -+ uint32_t frsize; -+ uint32_t padding; -+ uint32_t spare[6]; -+}; -+ -+struct fuse_file_lock { -+ uint64_t start; -+ uint64_t end; -+ uint32_t type; -+ uint32_t pid; /* tgid */ -+}; -+ -+/** -+ * Bitmasks for fuse_setattr_in.valid -+ */ -+#define FATTR_MODE (1 << 0) -+#define FATTR_UID (1 << 1) -+#define FATTR_GID (1 << 2) -+#define FATTR_SIZE (1 << 3) -+#define FATTR_ATIME (1 << 4) -+#define FATTR_MTIME (1 << 5) -+#define FATTR_FH (1 << 6) -+#define FATTR_ATIME_NOW (1 << 7) -+#define FATTR_MTIME_NOW (1 << 8) -+#define FATTR_LOCKOWNER (1 << 9) -+#define FATTR_CTIME (1 << 10) -+ -+/** -+ * Flags returned by the OPEN request -+ * -+ * FOPEN_DIRECT_IO: bypass page cache for this open file -+ * FOPEN_KEEP_CACHE: don't invalidate the data cache on open -+ * FOPEN_NONSEEKABLE: the file is not seekable -+ * FOPEN_CACHE_DIR: allow caching this directory -+ * FOPEN_STREAM: the file is stream-like (no file position at all) -+ */ -+#define FOPEN_DIRECT_IO (1 << 0) -+#define FOPEN_KEEP_CACHE (1 << 1) -+#define FOPEN_NONSEEKABLE (1 << 2) -+#define FOPEN_CACHE_DIR (1 << 3) -+#define FOPEN_STREAM (1 << 4) -+ -+/** -+ * INIT request/reply flags -+ * -+ * FUSE_ASYNC_READ: asynchronous read requests -+ * FUSE_POSIX_LOCKS: remote locking for POSIX file locks -+ * FUSE_FILE_OPS: kernel sends file handle for fstat, etc... (not yet supported) -+ * FUSE_ATOMIC_O_TRUNC: handles the O_TRUNC open flag in the filesystem -+ * FUSE_EXPORT_SUPPORT: filesystem handles lookups of "." and ".." -+ * FUSE_BIG_WRITES: filesystem can handle write size larger than 4kB -+ * FUSE_DONT_MASK: don't apply umask to file mode on create operations -+ * FUSE_SPLICE_WRITE: kernel supports splice write on the device -+ * FUSE_SPLICE_MOVE: kernel supports splice move on the device -+ * FUSE_SPLICE_READ: kernel supports splice read on the device -+ * FUSE_FLOCK_LOCKS: remote locking for BSD style file locks -+ * FUSE_HAS_IOCTL_DIR: kernel supports ioctl on directories -+ * FUSE_AUTO_INVAL_DATA: automatically invalidate cached pages -+ * FUSE_DO_READDIRPLUS: do READDIRPLUS (READDIR+LOOKUP in one) -+ * FUSE_READDIRPLUS_AUTO: adaptive readdirplus -+ * FUSE_ASYNC_DIO: asynchronous direct I/O submission -+ * FUSE_WRITEBACK_CACHE: use writeback cache for buffered writes -+ * FUSE_NO_OPEN_SUPPORT: kernel supports zero-message opens -+ * FUSE_PARALLEL_DIROPS: allow parallel lookups and readdir -+ * FUSE_HANDLE_KILLPRIV: fs handles killing suid/sgid/cap on write/chown/trunc -+ * FUSE_POSIX_ACL: filesystem supports posix acls -+ * FUSE_ABORT_ERROR: reading the device after abort returns ECONNABORTED -+ * FUSE_MAX_PAGES: init_out.max_pages contains the max number of req pages -+ * FUSE_CACHE_SYMLINKS: cache READLINK responses -+ * FUSE_NO_OPENDIR_SUPPORT: kernel supports zero-message opendir -+ * FUSE_EXPLICIT_INVAL_DATA: only invalidate cached pages on explicit request -+ * FUSE_MAP_ALIGNMENT: map_alignment field is valid -+ */ -+#define FUSE_ASYNC_READ (1 << 0) -+#define FUSE_POSIX_LOCKS (1 << 1) -+#define FUSE_FILE_OPS (1 << 2) -+#define FUSE_ATOMIC_O_TRUNC (1 << 3) -+#define FUSE_EXPORT_SUPPORT (1 << 4) -+#define FUSE_BIG_WRITES (1 << 5) -+#define FUSE_DONT_MASK (1 << 6) -+#define FUSE_SPLICE_WRITE (1 << 7) -+#define FUSE_SPLICE_MOVE (1 << 8) -+#define FUSE_SPLICE_READ (1 << 9) -+#define FUSE_FLOCK_LOCKS (1 << 10) -+#define FUSE_HAS_IOCTL_DIR (1 << 11) -+#define FUSE_AUTO_INVAL_DATA (1 << 12) -+#define FUSE_DO_READDIRPLUS (1 << 13) -+#define FUSE_READDIRPLUS_AUTO (1 << 14) -+#define FUSE_ASYNC_DIO (1 << 15) -+#define FUSE_WRITEBACK_CACHE (1 << 16) -+#define FUSE_NO_OPEN_SUPPORT (1 << 17) -+#define FUSE_PARALLEL_DIROPS (1 << 18) -+#define FUSE_HANDLE_KILLPRIV (1 << 19) -+#define FUSE_POSIX_ACL (1 << 20) -+#define FUSE_ABORT_ERROR (1 << 21) -+#define FUSE_MAX_PAGES (1 << 22) -+#define FUSE_CACHE_SYMLINKS (1 << 23) -+#define FUSE_NO_OPENDIR_SUPPORT (1 << 24) -+#define FUSE_EXPLICIT_INVAL_DATA (1 << 25) -+#define FUSE_MAP_ALIGNMENT (1 << 26) -+ -+/** -+ * CUSE INIT request/reply flags -+ * -+ * CUSE_UNRESTRICTED_IOCTL: use unrestricted ioctl -+ */ -+#define CUSE_UNRESTRICTED_IOCTL (1 << 0) -+ -+/** -+ * Release flags -+ */ -+#define FUSE_RELEASE_FLUSH (1 << 0) -+#define FUSE_RELEASE_FLOCK_UNLOCK (1 << 1) -+ -+/** -+ * Getattr flags -+ */ -+#define FUSE_GETATTR_FH (1 << 0) -+ -+/** -+ * Lock flags -+ */ -+#define FUSE_LK_FLOCK (1 << 0) -+ -+/** -+ * WRITE flags -+ * -+ * FUSE_WRITE_CACHE: delayed write from page cache, file handle is guessed -+ * FUSE_WRITE_LOCKOWNER: lock_owner field is valid -+ * FUSE_WRITE_KILL_PRIV: kill suid and sgid bits -+ */ -+#define FUSE_WRITE_CACHE (1 << 0) -+#define FUSE_WRITE_LOCKOWNER (1 << 1) -+#define FUSE_WRITE_KILL_PRIV (1 << 2) -+ -+/** -+ * Read flags -+ */ -+#define FUSE_READ_LOCKOWNER (1 << 1) -+ -+/** -+ * Ioctl flags -+ * -+ * FUSE_IOCTL_COMPAT: 32bit compat ioctl on 64bit machine -+ * FUSE_IOCTL_UNRESTRICTED: not restricted to well-formed ioctls, retry allowed -+ * FUSE_IOCTL_RETRY: retry with new iovecs -+ * FUSE_IOCTL_32BIT: 32bit ioctl -+ * FUSE_IOCTL_DIR: is a directory -+ * FUSE_IOCTL_COMPAT_X32: x32 compat ioctl on 64bit machine (64bit time_t) -+ * -+ * FUSE_IOCTL_MAX_IOV: maximum of in_iovecs + out_iovecs -+ */ -+#define FUSE_IOCTL_COMPAT (1 << 0) -+#define FUSE_IOCTL_UNRESTRICTED (1 << 1) -+#define FUSE_IOCTL_RETRY (1 << 2) -+#define FUSE_IOCTL_32BIT (1 << 3) -+#define FUSE_IOCTL_DIR (1 << 4) -+#define FUSE_IOCTL_COMPAT_X32 (1 << 5) -+ -+#define FUSE_IOCTL_MAX_IOV 256 -+ -+/** -+ * Poll flags -+ * -+ * FUSE_POLL_SCHEDULE_NOTIFY: request poll notify -+ */ -+#define FUSE_POLL_SCHEDULE_NOTIFY (1 << 0) -+ -+/** -+ * Fsync flags -+ * -+ * FUSE_FSYNC_FDATASYNC: Sync data only, not metadata -+ */ -+#define FUSE_FSYNC_FDATASYNC (1 << 0) -+ -+enum fuse_opcode { -+ FUSE_LOOKUP = 1, -+ FUSE_FORGET = 2, /* no reply */ -+ FUSE_GETATTR = 3, -+ FUSE_SETATTR = 4, -+ FUSE_READLINK = 5, -+ FUSE_SYMLINK = 6, -+ FUSE_MKNOD = 8, -+ FUSE_MKDIR = 9, -+ FUSE_UNLINK = 10, -+ FUSE_RMDIR = 11, -+ FUSE_RENAME = 12, -+ FUSE_LINK = 13, -+ FUSE_OPEN = 14, -+ FUSE_READ = 15, -+ FUSE_WRITE = 16, -+ FUSE_STATFS = 17, -+ FUSE_RELEASE = 18, -+ FUSE_FSYNC = 20, -+ FUSE_SETXATTR = 21, -+ FUSE_GETXATTR = 22, -+ FUSE_LISTXATTR = 23, -+ FUSE_REMOVEXATTR = 24, -+ FUSE_FLUSH = 25, -+ FUSE_INIT = 26, -+ FUSE_OPENDIR = 27, -+ FUSE_READDIR = 28, -+ FUSE_RELEASEDIR = 29, -+ FUSE_FSYNCDIR = 30, -+ FUSE_GETLK = 31, -+ FUSE_SETLK = 32, -+ FUSE_SETLKW = 33, -+ FUSE_ACCESS = 34, -+ FUSE_CREATE = 35, -+ FUSE_INTERRUPT = 36, -+ FUSE_BMAP = 37, -+ FUSE_DESTROY = 38, -+ FUSE_IOCTL = 39, -+ FUSE_POLL = 40, -+ FUSE_NOTIFY_REPLY = 41, -+ FUSE_BATCH_FORGET = 42, -+ FUSE_FALLOCATE = 43, -+ FUSE_READDIRPLUS = 44, -+ FUSE_RENAME2 = 45, -+ FUSE_LSEEK = 46, -+ FUSE_COPY_FILE_RANGE = 47, -+ FUSE_SETUPMAPPING = 48, -+ FUSE_REMOVEMAPPING = 49, -+ -+ /* CUSE specific operations */ -+ CUSE_INIT = 4096, -+ -+ /* Reserved opcodes: helpful to detect structure endian-ness */ -+ CUSE_INIT_BSWAP_RESERVED = 1048576, /* CUSE_INIT << 8 */ -+ FUSE_INIT_BSWAP_RESERVED = 436207616, /* FUSE_INIT << 24 */ -+}; -+ -+enum fuse_notify_code { -+ FUSE_NOTIFY_POLL = 1, -+ FUSE_NOTIFY_INVAL_INODE = 2, -+ FUSE_NOTIFY_INVAL_ENTRY = 3, -+ FUSE_NOTIFY_STORE = 4, -+ FUSE_NOTIFY_RETRIEVE = 5, -+ FUSE_NOTIFY_DELETE = 6, -+ FUSE_NOTIFY_CODE_MAX, -+}; -+ -+/* The read buffer is required to be at least 8k, but may be much larger */ -+#define FUSE_MIN_READ_BUFFER 8192 -+ -+#define FUSE_COMPAT_ENTRY_OUT_SIZE 120 -+ -+struct fuse_entry_out { -+ uint64_t nodeid; /* Inode ID */ -+ uint64_t generation; /* Inode generation: nodeid:gen must -+ be unique for the fs's lifetime */ -+ uint64_t entry_valid; /* Cache timeout for the name */ -+ uint64_t attr_valid; /* Cache timeout for the attributes */ -+ uint32_t entry_valid_nsec; -+ uint32_t attr_valid_nsec; -+ struct fuse_attr attr; -+}; -+ -+struct fuse_forget_in { -+ uint64_t nlookup; -+}; -+ -+struct fuse_forget_one { -+ uint64_t nodeid; -+ uint64_t nlookup; -+}; -+ -+struct fuse_batch_forget_in { -+ uint32_t count; -+ uint32_t dummy; -+}; -+ -+struct fuse_getattr_in { -+ uint32_t getattr_flags; -+ uint32_t dummy; -+ uint64_t fh; -+}; -+ -+#define FUSE_COMPAT_ATTR_OUT_SIZE 96 -+ -+struct fuse_attr_out { -+ uint64_t attr_valid; /* Cache timeout for the attributes */ -+ uint32_t attr_valid_nsec; -+ uint32_t dummy; -+ struct fuse_attr attr; -+}; -+ -+#define FUSE_COMPAT_MKNOD_IN_SIZE 8 -+ -+struct fuse_mknod_in { -+ uint32_t mode; -+ uint32_t rdev; -+ uint32_t umask; -+ uint32_t padding; -+}; -+ -+struct fuse_mkdir_in { -+ uint32_t mode; -+ uint32_t umask; -+}; -+ -+struct fuse_rename_in { -+ uint64_t newdir; -+}; -+ -+struct fuse_rename2_in { -+ uint64_t newdir; -+ uint32_t flags; -+ uint32_t padding; -+}; -+ -+struct fuse_link_in { -+ uint64_t oldnodeid; -+}; -+ -+struct fuse_setattr_in { -+ uint32_t valid; -+ uint32_t padding; -+ uint64_t fh; -+ uint64_t size; -+ uint64_t lock_owner; -+ uint64_t atime; -+ uint64_t mtime; -+ uint64_t ctime; -+ uint32_t atimensec; -+ uint32_t mtimensec; -+ uint32_t ctimensec; -+ uint32_t mode; -+ uint32_t unused4; -+ uint32_t uid; -+ uint32_t gid; -+ uint32_t unused5; -+}; -+ -+struct fuse_open_in { -+ uint32_t flags; -+ uint32_t unused; -+}; -+ -+struct fuse_create_in { -+ uint32_t flags; -+ uint32_t mode; -+ uint32_t umask; -+ uint32_t padding; -+}; -+ -+struct fuse_open_out { -+ uint64_t fh; -+ uint32_t open_flags; -+ uint32_t padding; -+}; -+ -+struct fuse_release_in { -+ uint64_t fh; -+ uint32_t flags; -+ uint32_t release_flags; -+ uint64_t lock_owner; -+}; -+ -+struct fuse_flush_in { -+ uint64_t fh; -+ uint32_t unused; -+ uint32_t padding; -+ uint64_t lock_owner; -+}; -+ -+struct fuse_read_in { -+ uint64_t fh; -+ uint64_t offset; -+ uint32_t size; -+ uint32_t read_flags; -+ uint64_t lock_owner; -+ uint32_t flags; -+ uint32_t padding; -+}; -+ -+#define FUSE_COMPAT_WRITE_IN_SIZE 24 -+ -+struct fuse_write_in { -+ uint64_t fh; -+ uint64_t offset; -+ uint32_t size; -+ uint32_t write_flags; -+ uint64_t lock_owner; -+ uint32_t flags; -+ uint32_t padding; -+}; -+ -+struct fuse_write_out { -+ uint32_t size; -+ uint32_t padding; -+}; -+ -+#define FUSE_COMPAT_STATFS_SIZE 48 -+ -+struct fuse_statfs_out { -+ struct fuse_kstatfs st; -+}; -+ -+struct fuse_fsync_in { -+ uint64_t fh; -+ uint32_t fsync_flags; -+ uint32_t padding; -+}; -+ -+struct fuse_setxattr_in { -+ uint32_t size; -+ uint32_t flags; -+}; -+ -+struct fuse_getxattr_in { -+ uint32_t size; -+ uint32_t padding; -+}; -+ -+struct fuse_getxattr_out { -+ uint32_t size; -+ uint32_t padding; -+}; -+ -+struct fuse_lk_in { -+ uint64_t fh; -+ uint64_t owner; -+ struct fuse_file_lock lk; -+ uint32_t lk_flags; -+ uint32_t padding; -+}; -+ -+struct fuse_lk_out { -+ struct fuse_file_lock lk; -+}; -+ -+struct fuse_access_in { -+ uint32_t mask; -+ uint32_t padding; -+}; -+ -+struct fuse_init_in { -+ uint32_t major; -+ uint32_t minor; -+ uint32_t max_readahead; -+ uint32_t flags; -+}; -+ -+#define FUSE_COMPAT_INIT_OUT_SIZE 8 -+#define FUSE_COMPAT_22_INIT_OUT_SIZE 24 -+ -+struct fuse_init_out { -+ uint32_t major; -+ uint32_t minor; -+ uint32_t max_readahead; -+ uint32_t flags; -+ uint16_t max_background; -+ uint16_t congestion_threshold; -+ uint32_t max_write; -+ uint32_t time_gran; -+ uint16_t max_pages; -+ uint16_t map_alignment; -+ uint32_t unused[8]; -+}; -+ -+#define CUSE_INIT_INFO_MAX 4096 -+ -+struct cuse_init_in { -+ uint32_t major; -+ uint32_t minor; -+ uint32_t unused; -+ uint32_t flags; -+}; -+ -+struct cuse_init_out { -+ uint32_t major; -+ uint32_t minor; -+ uint32_t unused; -+ uint32_t flags; -+ uint32_t max_read; -+ uint32_t max_write; -+ uint32_t dev_major; /* chardev major */ -+ uint32_t dev_minor; /* chardev minor */ -+ uint32_t spare[10]; -+}; -+ -+struct fuse_interrupt_in { -+ uint64_t unique; -+}; -+ -+struct fuse_bmap_in { -+ uint64_t block; -+ uint32_t blocksize; -+ uint32_t padding; -+}; -+ -+struct fuse_bmap_out { -+ uint64_t block; -+}; -+ -+struct fuse_ioctl_in { -+ uint64_t fh; -+ uint32_t flags; -+ uint32_t cmd; -+ uint64_t arg; -+ uint32_t in_size; -+ uint32_t out_size; -+}; -+ -+struct fuse_ioctl_iovec { -+ uint64_t base; -+ uint64_t len; -+}; -+ -+struct fuse_ioctl_out { -+ int32_t result; -+ uint32_t flags; -+ uint32_t in_iovs; -+ uint32_t out_iovs; -+}; -+ -+struct fuse_poll_in { -+ uint64_t fh; -+ uint64_t kh; -+ uint32_t flags; -+ uint32_t events; -+}; -+ -+struct fuse_poll_out { -+ uint32_t revents; -+ uint32_t padding; -+}; -+ -+struct fuse_notify_poll_wakeup_out { -+ uint64_t kh; -+}; -+ -+struct fuse_fallocate_in { -+ uint64_t fh; -+ uint64_t offset; -+ uint64_t length; -+ uint32_t mode; -+ uint32_t padding; -+}; -+ -+struct fuse_in_header { -+ uint32_t len; -+ uint32_t opcode; -+ uint64_t unique; -+ uint64_t nodeid; -+ uint32_t uid; -+ uint32_t gid; -+ uint32_t pid; -+ uint32_t padding; -+}; -+ -+struct fuse_out_header { -+ uint32_t len; -+ int32_t error; -+ uint64_t unique; -+}; -+ -+struct fuse_dirent { -+ uint64_t ino; -+ uint64_t off; -+ uint32_t namelen; -+ uint32_t type; -+ char name[]; -+}; -+ -+#define FUSE_NAME_OFFSET offsetof(struct fuse_dirent, name) -+#define FUSE_DIRENT_ALIGN(x) \ -+ (((x) + sizeof(uint64_t) - 1) & ~(sizeof(uint64_t) - 1)) -+#define FUSE_DIRENT_SIZE(d) \ -+ FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET + (d)->namelen) -+ -+struct fuse_direntplus { -+ struct fuse_entry_out entry_out; -+ struct fuse_dirent dirent; -+}; -+ -+#define FUSE_NAME_OFFSET_DIRENTPLUS \ -+ offsetof(struct fuse_direntplus, dirent.name) -+#define FUSE_DIRENTPLUS_SIZE(d) \ -+ FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET_DIRENTPLUS + (d)->dirent.namelen) -+ -+struct fuse_notify_inval_inode_out { -+ uint64_t ino; -+ int64_t off; -+ int64_t len; -+}; -+ -+struct fuse_notify_inval_entry_out { -+ uint64_t parent; -+ uint32_t namelen; -+ uint32_t padding; -+}; -+ -+struct fuse_notify_delete_out { -+ uint64_t parent; -+ uint64_t child; -+ uint32_t namelen; -+ uint32_t padding; -+}; -+ -+struct fuse_notify_store_out { -+ uint64_t nodeid; -+ uint64_t offset; -+ uint32_t size; -+ uint32_t padding; -+}; -+ -+struct fuse_notify_retrieve_out { -+ uint64_t notify_unique; -+ uint64_t nodeid; -+ uint64_t offset; -+ uint32_t size; -+ uint32_t padding; -+}; -+ -+/* Matches the size of fuse_write_in */ -+struct fuse_notify_retrieve_in { -+ uint64_t dummy1; -+ uint64_t offset; -+ uint32_t size; -+ uint32_t dummy2; -+ uint64_t dummy3; -+ uint64_t dummy4; -+}; -+ -+/* Device ioctls: */ -+#define FUSE_DEV_IOC_CLONE _IOR(229, 0, uint32_t) -+ -+struct fuse_lseek_in { -+ uint64_t fh; -+ uint64_t offset; -+ uint32_t whence; -+ uint32_t padding; -+}; -+ -+struct fuse_lseek_out { -+ uint64_t offset; -+}; -+ -+struct fuse_copy_file_range_in { -+ uint64_t fh_in; -+ uint64_t off_in; -+ uint64_t nodeid_out; -+ uint64_t fh_out; -+ uint64_t off_out; -+ uint64_t len; -+ uint64_t flags; -+}; -+ -+#endif /* _LINUX_FUSE_H */ -diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh -index f76d773..29c27f4 100755 ---- a/scripts/update-linux-headers.sh -+++ b/scripts/update-linux-headers.sh -@@ -186,6 +186,7 @@ rm -rf "$output/include/standard-headers/linux" - mkdir -p "$output/include/standard-headers/linux" - for i in "$tmpdir"/include/linux/*virtio*.h \ - "$tmpdir/include/linux/qemu_fw_cfg.h" \ -+ "$tmpdir/include/linux/fuse.h" \ - "$tmpdir/include/linux/input.h" \ - "$tmpdir/include/linux/input-event-codes.h" \ - "$tmpdir/include/linux/pci_regs.h" \ --- -1.8.3.1 - diff --git a/kvm-virtiofsd-Pull-in-upstream-headers.patch b/kvm-virtiofsd-Pull-in-upstream-headers.patch deleted file mode 100644 index 78784fb..0000000 --- a/kvm-virtiofsd-Pull-in-upstream-headers.patch +++ /dev/null @@ -1,4911 +0,0 @@ -From 434b51e5c2fce756906dec4803900397bc98ad72 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:00:39 +0100 -Subject: [PATCH 008/116] virtiofsd: Pull in upstream headers -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-5-dgilbert@redhat.com> -Patchwork-id: 93457 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 004/112] virtiofsd: Pull in upstream headers -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -Pull in headers fromlibfuse's upstream fuse-3.8.0 - -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Daniel P. Berrangé -Reviewed-by: Philippe Mathieu-Daudé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit ee46c78901eb7fa78e328e04c0494ad6d207238b) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse.h | 1275 ++++++++++++++++++++ - tools/virtiofsd/fuse_common.h | 823 +++++++++++++ - tools/virtiofsd/fuse_i.h | 139 +++ - tools/virtiofsd/fuse_log.h | 82 ++ - tools/virtiofsd/fuse_lowlevel.h | 2089 +++++++++++++++++++++++++++++++++ - tools/virtiofsd/fuse_misc.h | 59 + - tools/virtiofsd/fuse_opt.h | 271 +++++ - tools/virtiofsd/passthrough_helpers.h | 76 ++ - 8 files changed, 4814 insertions(+) - create mode 100644 tools/virtiofsd/fuse.h - create mode 100644 tools/virtiofsd/fuse_common.h - create mode 100644 tools/virtiofsd/fuse_i.h - create mode 100644 tools/virtiofsd/fuse_log.h - create mode 100644 tools/virtiofsd/fuse_lowlevel.h - create mode 100644 tools/virtiofsd/fuse_misc.h - create mode 100644 tools/virtiofsd/fuse_opt.h - create mode 100644 tools/virtiofsd/passthrough_helpers.h - -diff --git a/tools/virtiofsd/fuse.h b/tools/virtiofsd/fuse.h -new file mode 100644 -index 0000000..883f6e5 ---- /dev/null -+++ b/tools/virtiofsd/fuse.h -@@ -0,0 +1,1275 @@ -+/* -+ FUSE: Filesystem in Userspace -+ Copyright (C) 2001-2007 Miklos Szeredi -+ -+ This program can be distributed under the terms of the GNU LGPLv2. -+ See the file COPYING.LIB. -+*/ -+ -+#ifndef FUSE_H_ -+#define FUSE_H_ -+ -+/** @file -+ * -+ * This file defines the library interface of FUSE -+ * -+ * IMPORTANT: you should define FUSE_USE_VERSION before including this header. -+ */ -+ -+#include "fuse_common.h" -+ -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#ifdef __cplusplus -+extern "C" { -+#endif -+ -+/* ----------------------------------------------------------- * -+ * Basic FUSE API * -+ * ----------------------------------------------------------- */ -+ -+/** Handle for a FUSE filesystem */ -+struct fuse; -+ -+/** -+ * Readdir flags, passed to ->readdir() -+ */ -+enum fuse_readdir_flags { -+ /** -+ * "Plus" mode. -+ * -+ * The kernel wants to prefill the inode cache during readdir. The -+ * filesystem may honour this by filling in the attributes and setting -+ * FUSE_FILL_DIR_FLAGS for the filler function. The filesystem may also -+ * just ignore this flag completely. -+ */ -+ FUSE_READDIR_PLUS = (1 << 0), -+}; -+ -+enum fuse_fill_dir_flags { -+ /** -+ * "Plus" mode: all file attributes are valid -+ * -+ * The attributes are used by the kernel to prefill the inode cache -+ * during a readdir. -+ * -+ * It is okay to set FUSE_FILL_DIR_PLUS if FUSE_READDIR_PLUS is not set -+ * and vice versa. -+ */ -+ FUSE_FILL_DIR_PLUS = (1 << 1), -+}; -+ -+/** Function to add an entry in a readdir() operation -+ * -+ * The *off* parameter can be any non-zero value that enables the -+ * filesystem to identify the current point in the directory -+ * stream. It does not need to be the actual physical position. A -+ * value of zero is reserved to indicate that seeking in directories -+ * is not supported. -+ * -+ * @param buf the buffer passed to the readdir() operation -+ * @param name the file name of the directory entry -+ * @param stat file attributes, can be NULL -+ * @param off offset of the next entry or zero -+ * @param flags fill flags -+ * @return 1 if buffer is full, zero otherwise -+ */ -+typedef int (*fuse_fill_dir_t) (void *buf, const char *name, -+ const struct stat *stbuf, off_t off, -+ enum fuse_fill_dir_flags flags); -+/** -+ * Configuration of the high-level API -+ * -+ * This structure is initialized from the arguments passed to -+ * fuse_new(), and then passed to the file system's init() handler -+ * which should ensure that the configuration is compatible with the -+ * file system implementation. -+ */ -+struct fuse_config { -+ /** -+ * If `set_gid` is non-zero, the st_gid attribute of each file -+ * is overwritten with the value of `gid`. -+ */ -+ int set_gid; -+ unsigned int gid; -+ -+ /** -+ * If `set_uid` is non-zero, the st_uid attribute of each file -+ * is overwritten with the value of `uid`. -+ */ -+ int set_uid; -+ unsigned int uid; -+ -+ /** -+ * If `set_mode` is non-zero, the any permissions bits set in -+ * `umask` are unset in the st_mode attribute of each file. -+ */ -+ int set_mode; -+ unsigned int umask; -+ -+ /** -+ * The timeout in seconds for which name lookups will be -+ * cached. -+ */ -+ double entry_timeout; -+ -+ /** -+ * The timeout in seconds for which a negative lookup will be -+ * cached. This means, that if file did not exist (lookup -+ * retuned ENOENT), the lookup will only be redone after the -+ * timeout, and the file/directory will be assumed to not -+ * exist until then. A value of zero means that negative -+ * lookups are not cached. -+ */ -+ double negative_timeout; -+ -+ /** -+ * The timeout in seconds for which file/directory attributes -+ * (as returned by e.g. the `getattr` handler) are cached. -+ */ -+ double attr_timeout; -+ -+ /** -+ * Allow requests to be interrupted -+ */ -+ int intr; -+ -+ /** -+ * Specify which signal number to send to the filesystem when -+ * a request is interrupted. The default is hardcoded to -+ * USR1. -+ */ -+ int intr_signal; -+ -+ /** -+ * Normally, FUSE assigns inodes to paths only for as long as -+ * the kernel is aware of them. With this option inodes are -+ * instead remembered for at least this many seconds. This -+ * will require more memory, but may be necessary when using -+ * applications that make use of inode numbers. -+ * -+ * A number of -1 means that inodes will be remembered for the -+ * entire life-time of the file-system process. -+ */ -+ int remember; -+ -+ /** -+ * The default behavior is that if an open file is deleted, -+ * the file is renamed to a hidden file (.fuse_hiddenXXX), and -+ * only removed when the file is finally released. This -+ * relieves the filesystem implementation of having to deal -+ * with this problem. This option disables the hiding -+ * behavior, and files are removed immediately in an unlink -+ * operation (or in a rename operation which overwrites an -+ * existing file). -+ * -+ * It is recommended that you not use the hard_remove -+ * option. When hard_remove is set, the following libc -+ * functions fail on unlinked files (returning errno of -+ * ENOENT): read(2), write(2), fsync(2), close(2), f*xattr(2), -+ * ftruncate(2), fstat(2), fchmod(2), fchown(2) -+ */ -+ int hard_remove; -+ -+ /** -+ * Honor the st_ino field in the functions getattr() and -+ * fill_dir(). This value is used to fill in the st_ino field -+ * in the stat(2), lstat(2), fstat(2) functions and the d_ino -+ * field in the readdir(2) function. The filesystem does not -+ * have to guarantee uniqueness, however some applications -+ * rely on this value being unique for the whole filesystem. -+ * -+ * Note that this does *not* affect the inode that libfuse -+ * and the kernel use internally (also called the "nodeid"). -+ */ -+ int use_ino; -+ -+ /** -+ * If use_ino option is not given, still try to fill in the -+ * d_ino field in readdir(2). If the name was previously -+ * looked up, and is still in the cache, the inode number -+ * found there will be used. Otherwise it will be set to -1. -+ * If use_ino option is given, this option is ignored. -+ */ -+ int readdir_ino; -+ -+ /** -+ * This option disables the use of page cache (file content cache) -+ * in the kernel for this filesystem. This has several affects: -+ * -+ * 1. Each read(2) or write(2) system call will initiate one -+ * or more read or write operations, data will not be -+ * cached in the kernel. -+ * -+ * 2. The return value of the read() and write() system calls -+ * will correspond to the return values of the read and -+ * write operations. This is useful for example if the -+ * file size is not known in advance (before reading it). -+ * -+ * Internally, enabling this option causes fuse to set the -+ * `direct_io` field of `struct fuse_file_info` - overwriting -+ * any value that was put there by the file system. -+ */ -+ int direct_io; -+ -+ /** -+ * This option disables flushing the cache of the file -+ * contents on every open(2). This should only be enabled on -+ * filesystems where the file data is never changed -+ * externally (not through the mounted FUSE filesystem). Thus -+ * it is not suitable for network filesystems and other -+ * intermediate filesystems. -+ * -+ * NOTE: if this option is not specified (and neither -+ * direct_io) data is still cached after the open(2), so a -+ * read(2) system call will not always initiate a read -+ * operation. -+ * -+ * Internally, enabling this option causes fuse to set the -+ * `keep_cache` field of `struct fuse_file_info` - overwriting -+ * any value that was put there by the file system. -+ */ -+ int kernel_cache; -+ -+ /** -+ * This option is an alternative to `kernel_cache`. Instead of -+ * unconditionally keeping cached data, the cached data is -+ * invalidated on open(2) if if the modification time or the -+ * size of the file has changed since it was last opened. -+ */ -+ int auto_cache; -+ -+ /** -+ * The timeout in seconds for which file attributes are cached -+ * for the purpose of checking if auto_cache should flush the -+ * file data on open. -+ */ -+ int ac_attr_timeout_set; -+ double ac_attr_timeout; -+ -+ /** -+ * If this option is given the file-system handlers for the -+ * following operations will not receive path information: -+ * read, write, flush, release, fsync, readdir, releasedir, -+ * fsyncdir, lock, ioctl and poll. -+ * -+ * For the truncate, getattr, chmod, chown and utimens -+ * operations the path will be provided only if the struct -+ * fuse_file_info argument is NULL. -+ */ -+ int nullpath_ok; -+ -+ /** -+ * The remaining options are used by libfuse internally and -+ * should not be touched. -+ */ -+ int show_help; -+ char *modules; -+ int debug; -+}; -+ -+ -+/** -+ * The file system operations: -+ * -+ * Most of these should work very similarly to the well known UNIX -+ * file system operations. A major exception is that instead of -+ * returning an error in 'errno', the operation should return the -+ * negated error value (-errno) directly. -+ * -+ * All methods are optional, but some are essential for a useful -+ * filesystem (e.g. getattr). Open, flush, release, fsync, opendir, -+ * releasedir, fsyncdir, access, create, truncate, lock, init and -+ * destroy are special purpose methods, without which a full featured -+ * filesystem can still be implemented. -+ * -+ * In general, all methods are expected to perform any necessary -+ * permission checking. However, a filesystem may delegate this task -+ * to the kernel by passing the `default_permissions` mount option to -+ * `fuse_new()`. In this case, methods will only be called if -+ * the kernel's permission check has succeeded. -+ * -+ * Almost all operations take a path which can be of any length. -+ */ -+struct fuse_operations { -+ /** Get file attributes. -+ * -+ * Similar to stat(). The 'st_dev' and 'st_blksize' fields are -+ * ignored. The 'st_ino' field is ignored except if the 'use_ino' -+ * mount option is given. In that case it is passed to userspace, -+ * but libfuse and the kernel will still assign a different -+ * inode for internal use (called the "nodeid"). -+ * -+ * `fi` will always be NULL if the file is not currently open, but -+ * may also be NULL if the file is open. -+ */ -+ int (*getattr) (const char *, struct stat *, struct fuse_file_info *fi); -+ -+ /** Read the target of a symbolic link -+ * -+ * The buffer should be filled with a null terminated string. The -+ * buffer size argument includes the space for the terminating -+ * null character. If the linkname is too long to fit in the -+ * buffer, it should be truncated. The return value should be 0 -+ * for success. -+ */ -+ int (*readlink) (const char *, char *, size_t); -+ -+ /** Create a file node -+ * -+ * This is called for creation of all non-directory, non-symlink -+ * nodes. If the filesystem defines a create() method, then for -+ * regular files that will be called instead. -+ */ -+ int (*mknod) (const char *, mode_t, dev_t); -+ -+ /** Create a directory -+ * -+ * Note that the mode argument may not have the type specification -+ * bits set, i.e. S_ISDIR(mode) can be false. To obtain the -+ * correct directory type bits use mode|S_IFDIR -+ * */ -+ int (*mkdir) (const char *, mode_t); -+ -+ /** Remove a file */ -+ int (*unlink) (const char *); -+ -+ /** Remove a directory */ -+ int (*rmdir) (const char *); -+ -+ /** Create a symbolic link */ -+ int (*symlink) (const char *, const char *); -+ -+ /** Rename a file -+ * -+ * *flags* may be `RENAME_EXCHANGE` or `RENAME_NOREPLACE`. If -+ * RENAME_NOREPLACE is specified, the filesystem must not -+ * overwrite *newname* if it exists and return an error -+ * instead. If `RENAME_EXCHANGE` is specified, the filesystem -+ * must atomically exchange the two files, i.e. both must -+ * exist and neither may be deleted. -+ */ -+ int (*rename) (const char *, const char *, unsigned int flags); -+ -+ /** Create a hard link to a file */ -+ int (*link) (const char *, const char *); -+ -+ /** Change the permission bits of a file -+ * -+ * `fi` will always be NULL if the file is not currenlty open, but -+ * may also be NULL if the file is open. -+ */ -+ int (*chmod) (const char *, mode_t, struct fuse_file_info *fi); -+ -+ /** Change the owner and group of a file -+ * -+ * `fi` will always be NULL if the file is not currenlty open, but -+ * may also be NULL if the file is open. -+ * -+ * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is -+ * expected to reset the setuid and setgid bits. -+ */ -+ int (*chown) (const char *, uid_t, gid_t, struct fuse_file_info *fi); -+ -+ /** Change the size of a file -+ * -+ * `fi` will always be NULL if the file is not currenlty open, but -+ * may also be NULL if the file is open. -+ * -+ * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is -+ * expected to reset the setuid and setgid bits. -+ */ -+ int (*truncate) (const char *, off_t, struct fuse_file_info *fi); -+ -+ /** Open a file -+ * -+ * Open flags are available in fi->flags. The following rules -+ * apply. -+ * -+ * - Creation (O_CREAT, O_EXCL, O_NOCTTY) flags will be -+ * filtered out / handled by the kernel. -+ * -+ * - Access modes (O_RDONLY, O_WRONLY, O_RDWR, O_EXEC, O_SEARCH) -+ * should be used by the filesystem to check if the operation is -+ * permitted. If the ``-o default_permissions`` mount option is -+ * given, this check is already done by the kernel before calling -+ * open() and may thus be omitted by the filesystem. -+ * -+ * - When writeback caching is enabled, the kernel may send -+ * read requests even for files opened with O_WRONLY. The -+ * filesystem should be prepared to handle this. -+ * -+ * - When writeback caching is disabled, the filesystem is -+ * expected to properly handle the O_APPEND flag and ensure -+ * that each write is appending to the end of the file. -+ * -+ * - When writeback caching is enabled, the kernel will -+ * handle O_APPEND. However, unless all changes to the file -+ * come through the kernel this will not work reliably. The -+ * filesystem should thus either ignore the O_APPEND flag -+ * (and let the kernel handle it), or return an error -+ * (indicating that reliably O_APPEND is not available). -+ * -+ * Filesystem may store an arbitrary file handle (pointer, -+ * index, etc) in fi->fh, and use this in other all other file -+ * operations (read, write, flush, release, fsync). -+ * -+ * Filesystem may also implement stateless file I/O and not store -+ * anything in fi->fh. -+ * -+ * There are also some flags (direct_io, keep_cache) which the -+ * filesystem may set in fi, to change the way the file is opened. -+ * See fuse_file_info structure in for more details. -+ * -+ * If this request is answered with an error code of ENOSYS -+ * and FUSE_CAP_NO_OPEN_SUPPORT is set in -+ * `fuse_conn_info.capable`, this is treated as success and -+ * future calls to open will also succeed without being send -+ * to the filesystem process. -+ * -+ */ -+ int (*open) (const char *, struct fuse_file_info *); -+ -+ /** Read data from an open file -+ * -+ * Read should return exactly the number of bytes requested except -+ * on EOF or error, otherwise the rest of the data will be -+ * substituted with zeroes. An exception to this is when the -+ * 'direct_io' mount option is specified, in which case the return -+ * value of the read system call will reflect the return value of -+ * this operation. -+ */ -+ int (*read) (const char *, char *, size_t, off_t, -+ struct fuse_file_info *); -+ -+ /** Write data to an open file -+ * -+ * Write should return exactly the number of bytes requested -+ * except on error. An exception to this is when the 'direct_io' -+ * mount option is specified (see read operation). -+ * -+ * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is -+ * expected to reset the setuid and setgid bits. -+ */ -+ int (*write) (const char *, const char *, size_t, off_t, -+ struct fuse_file_info *); -+ -+ /** Get file system statistics -+ * -+ * The 'f_favail', 'f_fsid' and 'f_flag' fields are ignored -+ */ -+ int (*statfs) (const char *, struct statvfs *); -+ -+ /** Possibly flush cached data -+ * -+ * BIG NOTE: This is not equivalent to fsync(). It's not a -+ * request to sync dirty data. -+ * -+ * Flush is called on each close() of a file descriptor, as opposed to -+ * release which is called on the close of the last file descriptor for -+ * a file. Under Linux, errors returned by flush() will be passed to -+ * userspace as errors from close(), so flush() is a good place to write -+ * back any cached dirty data. However, many applications ignore errors -+ * on close(), and on non-Linux systems, close() may succeed even if flush() -+ * returns an error. For these reasons, filesystems should not assume -+ * that errors returned by flush will ever be noticed or even -+ * delivered. -+ * -+ * NOTE: The flush() method may be called more than once for each -+ * open(). This happens if more than one file descriptor refers to an -+ * open file handle, e.g. due to dup(), dup2() or fork() calls. It is -+ * not possible to determine if a flush is final, so each flush should -+ * be treated equally. Multiple write-flush sequences are relatively -+ * rare, so this shouldn't be a problem. -+ * -+ * Filesystems shouldn't assume that flush will be called at any -+ * particular point. It may be called more times than expected, or not -+ * at all. -+ * -+ * [close]: http://pubs.opengroup.org/onlinepubs/9699919799/functions/close.html -+ */ -+ int (*flush) (const char *, struct fuse_file_info *); -+ -+ /** Release an open file -+ * -+ * Release is called when there are no more references to an open -+ * file: all file descriptors are closed and all memory mappings -+ * are unmapped. -+ * -+ * For every open() call there will be exactly one release() call -+ * with the same flags and file handle. It is possible to -+ * have a file opened more than once, in which case only the last -+ * release will mean, that no more reads/writes will happen on the -+ * file. The return value of release is ignored. -+ */ -+ int (*release) (const char *, struct fuse_file_info *); -+ -+ /** Synchronize file contents -+ * -+ * If the datasync parameter is non-zero, then only the user data -+ * should be flushed, not the meta data. -+ */ -+ int (*fsync) (const char *, int, struct fuse_file_info *); -+ -+ /** Set extended attributes */ -+ int (*setxattr) (const char *, const char *, const char *, size_t, int); -+ -+ /** Get extended attributes */ -+ int (*getxattr) (const char *, const char *, char *, size_t); -+ -+ /** List extended attributes */ -+ int (*listxattr) (const char *, char *, size_t); -+ -+ /** Remove extended attributes */ -+ int (*removexattr) (const char *, const char *); -+ -+ /** Open directory -+ * -+ * Unless the 'default_permissions' mount option is given, -+ * this method should check if opendir is permitted for this -+ * directory. Optionally opendir may also return an arbitrary -+ * filehandle in the fuse_file_info structure, which will be -+ * passed to readdir, releasedir and fsyncdir. -+ */ -+ int (*opendir) (const char *, struct fuse_file_info *); -+ -+ /** Read directory -+ * -+ * The filesystem may choose between two modes of operation: -+ * -+ * 1) The readdir implementation ignores the offset parameter, and -+ * passes zero to the filler function's offset. The filler -+ * function will not return '1' (unless an error happens), so the -+ * whole directory is read in a single readdir operation. -+ * -+ * 2) The readdir implementation keeps track of the offsets of the -+ * directory entries. It uses the offset parameter and always -+ * passes non-zero offset to the filler function. When the buffer -+ * is full (or an error happens) the filler function will return -+ * '1'. -+ */ -+ int (*readdir) (const char *, void *, fuse_fill_dir_t, off_t, -+ struct fuse_file_info *, enum fuse_readdir_flags); -+ -+ /** Release directory -+ */ -+ int (*releasedir) (const char *, struct fuse_file_info *); -+ -+ /** Synchronize directory contents -+ * -+ * If the datasync parameter is non-zero, then only the user data -+ * should be flushed, not the meta data -+ */ -+ int (*fsyncdir) (const char *, int, struct fuse_file_info *); -+ -+ /** -+ * Initialize filesystem -+ * -+ * The return value will passed in the `private_data` field of -+ * `struct fuse_context` to all file operations, and as a -+ * parameter to the destroy() method. It overrides the initial -+ * value provided to fuse_main() / fuse_new(). -+ */ -+ void *(*init) (struct fuse_conn_info *conn, -+ struct fuse_config *cfg); -+ -+ /** -+ * Clean up filesystem -+ * -+ * Called on filesystem exit. -+ */ -+ void (*destroy) (void *private_data); -+ -+ /** -+ * Check file access permissions -+ * -+ * This will be called for the access() system call. If the -+ * 'default_permissions' mount option is given, this method is not -+ * called. -+ * -+ * This method is not called under Linux kernel versions 2.4.x -+ */ -+ int (*access) (const char *, int); -+ -+ /** -+ * Create and open a file -+ * -+ * If the file does not exist, first create it with the specified -+ * mode, and then open it. -+ * -+ * If this method is not implemented or under Linux kernel -+ * versions earlier than 2.6.15, the mknod() and open() methods -+ * will be called instead. -+ */ -+ int (*create) (const char *, mode_t, struct fuse_file_info *); -+ -+ /** -+ * Perform POSIX file locking operation -+ * -+ * The cmd argument will be either F_GETLK, F_SETLK or F_SETLKW. -+ * -+ * For the meaning of fields in 'struct flock' see the man page -+ * for fcntl(2). The l_whence field will always be set to -+ * SEEK_SET. -+ * -+ * For checking lock ownership, the 'fuse_file_info->owner' -+ * argument must be used. -+ * -+ * For F_GETLK operation, the library will first check currently -+ * held locks, and if a conflicting lock is found it will return -+ * information without calling this method. This ensures, that -+ * for local locks the l_pid field is correctly filled in. The -+ * results may not be accurate in case of race conditions and in -+ * the presence of hard links, but it's unlikely that an -+ * application would rely on accurate GETLK results in these -+ * cases. If a conflicting lock is not found, this method will be -+ * called, and the filesystem may fill out l_pid by a meaningful -+ * value, or it may leave this field zero. -+ * -+ * For F_SETLK and F_SETLKW the l_pid field will be set to the pid -+ * of the process performing the locking operation. -+ * -+ * Note: if this method is not implemented, the kernel will still -+ * allow file locking to work locally. Hence it is only -+ * interesting for network filesystems and similar. -+ */ -+ int (*lock) (const char *, struct fuse_file_info *, int cmd, -+ struct flock *); -+ -+ /** -+ * Change the access and modification times of a file with -+ * nanosecond resolution -+ * -+ * This supersedes the old utime() interface. New applications -+ * should use this. -+ * -+ * `fi` will always be NULL if the file is not currenlty open, but -+ * may also be NULL if the file is open. -+ * -+ * See the utimensat(2) man page for details. -+ */ -+ int (*utimens) (const char *, const struct timespec tv[2], -+ struct fuse_file_info *fi); -+ -+ /** -+ * Map block index within file to block index within device -+ * -+ * Note: This makes sense only for block device backed filesystems -+ * mounted with the 'blkdev' option -+ */ -+ int (*bmap) (const char *, size_t blocksize, uint64_t *idx); -+ -+ /** -+ * Ioctl -+ * -+ * flags will have FUSE_IOCTL_COMPAT set for 32bit ioctls in -+ * 64bit environment. The size and direction of data is -+ * determined by _IOC_*() decoding of cmd. For _IOC_NONE, -+ * data will be NULL, for _IOC_WRITE data is out area, for -+ * _IOC_READ in area and if both are set in/out area. In all -+ * non-NULL cases, the area is of _IOC_SIZE(cmd) bytes. -+ * -+ * If flags has FUSE_IOCTL_DIR then the fuse_file_info refers to a -+ * directory file handle. -+ * -+ * Note : the unsigned long request submitted by the application -+ * is truncated to 32 bits. -+ */ -+ int (*ioctl) (const char *, unsigned int cmd, void *arg, -+ struct fuse_file_info *, unsigned int flags, void *data); -+ -+ /** -+ * Poll for IO readiness events -+ * -+ * Note: If ph is non-NULL, the client should notify -+ * when IO readiness events occur by calling -+ * fuse_notify_poll() with the specified ph. -+ * -+ * Regardless of the number of times poll with a non-NULL ph -+ * is received, single notification is enough to clear all. -+ * Notifying more times incurs overhead but doesn't harm -+ * correctness. -+ * -+ * The callee is responsible for destroying ph with -+ * fuse_pollhandle_destroy() when no longer in use. -+ */ -+ int (*poll) (const char *, struct fuse_file_info *, -+ struct fuse_pollhandle *ph, unsigned *reventsp); -+ -+ /** Write contents of buffer to an open file -+ * -+ * Similar to the write() method, but data is supplied in a -+ * generic buffer. Use fuse_buf_copy() to transfer data to -+ * the destination. -+ * -+ * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is -+ * expected to reset the setuid and setgid bits. -+ */ -+ int (*write_buf) (const char *, struct fuse_bufvec *buf, off_t off, -+ struct fuse_file_info *); -+ -+ /** Store data from an open file in a buffer -+ * -+ * Similar to the read() method, but data is stored and -+ * returned in a generic buffer. -+ * -+ * No actual copying of data has to take place, the source -+ * file descriptor may simply be stored in the buffer for -+ * later data transfer. -+ * -+ * The buffer must be allocated dynamically and stored at the -+ * location pointed to by bufp. If the buffer contains memory -+ * regions, they too must be allocated using malloc(). The -+ * allocated memory will be freed by the caller. -+ */ -+ int (*read_buf) (const char *, struct fuse_bufvec **bufp, -+ size_t size, off_t off, struct fuse_file_info *); -+ /** -+ * Perform BSD file locking operation -+ * -+ * The op argument will be either LOCK_SH, LOCK_EX or LOCK_UN -+ * -+ * Nonblocking requests will be indicated by ORing LOCK_NB to -+ * the above operations -+ * -+ * For more information see the flock(2) manual page. -+ * -+ * Additionally fi->owner will be set to a value unique to -+ * this open file. This same value will be supplied to -+ * ->release() when the file is released. -+ * -+ * Note: if this method is not implemented, the kernel will still -+ * allow file locking to work locally. Hence it is only -+ * interesting for network filesystems and similar. -+ */ -+ int (*flock) (const char *, struct fuse_file_info *, int op); -+ -+ /** -+ * Allocates space for an open file -+ * -+ * This function ensures that required space is allocated for specified -+ * file. If this function returns success then any subsequent write -+ * request to specified range is guaranteed not to fail because of lack -+ * of space on the file system media. -+ */ -+ int (*fallocate) (const char *, int, off_t, off_t, -+ struct fuse_file_info *); -+ -+ /** -+ * Copy a range of data from one file to another -+ * -+ * Performs an optimized copy between two file descriptors without the -+ * additional cost of transferring data through the FUSE kernel module -+ * to user space (glibc) and then back into the FUSE filesystem again. -+ * -+ * In case this method is not implemented, glibc falls back to reading -+ * data from the source and writing to the destination. Effectively -+ * doing an inefficient copy of the data. -+ */ -+ ssize_t (*copy_file_range) (const char *path_in, -+ struct fuse_file_info *fi_in, -+ off_t offset_in, const char *path_out, -+ struct fuse_file_info *fi_out, -+ off_t offset_out, size_t size, int flags); -+ -+ /** -+ * Find next data or hole after the specified offset -+ */ -+ off_t (*lseek) (const char *, off_t off, int whence, struct fuse_file_info *); -+}; -+ -+/** Extra context that may be needed by some filesystems -+ * -+ * The uid, gid and pid fields are not filled in case of a writepage -+ * operation. -+ */ -+struct fuse_context { -+ /** Pointer to the fuse object */ -+ struct fuse *fuse; -+ -+ /** User ID of the calling process */ -+ uid_t uid; -+ -+ /** Group ID of the calling process */ -+ gid_t gid; -+ -+ /** Process ID of the calling thread */ -+ pid_t pid; -+ -+ /** Private filesystem data */ -+ void *private_data; -+ -+ /** Umask of the calling process */ -+ mode_t umask; -+}; -+ -+/** -+ * Main function of FUSE. -+ * -+ * This is for the lazy. This is all that has to be called from the -+ * main() function. -+ * -+ * This function does the following: -+ * - parses command line options, and handles --help and -+ * --version -+ * - installs signal handlers for INT, HUP, TERM and PIPE -+ * - registers an exit handler to unmount the filesystem on program exit -+ * - creates a fuse handle -+ * - registers the operations -+ * - calls either the single-threaded or the multi-threaded event loop -+ * -+ * Most file systems will have to parse some file-system specific -+ * arguments before calling this function. It is recommended to do -+ * this with fuse_opt_parse() and a processing function that passes -+ * through any unknown options (this can also be achieved by just -+ * passing NULL as the processing function). That way, the remaining -+ * options can be passed directly to fuse_main(). -+ * -+ * fuse_main() accepts all options that can be passed to -+ * fuse_parse_cmdline(), fuse_new(), or fuse_session_new(). -+ * -+ * Option parsing skips argv[0], which is assumed to contain the -+ * program name. This element must always be present and is used to -+ * construct a basic ``usage: `` message for the --help -+ * output. argv[0] may also be set to the empty string. In this case -+ * the usage message is suppressed. This can be used by file systems -+ * to print their own usage line first. See hello.c for an example of -+ * how to do this. -+ * -+ * Note: this is currently implemented as a macro. -+ * -+ * The following error codes may be returned from fuse_main(): -+ * 1: Invalid option arguments -+ * 2: No mount point specified -+ * 3: FUSE setup failed -+ * 4: Mounting failed -+ * 5: Failed to daemonize (detach from session) -+ * 6: Failed to set up signal handlers -+ * 7: An error occured during the life of the file system -+ * -+ * @param argc the argument counter passed to the main() function -+ * @param argv the argument vector passed to the main() function -+ * @param op the file system operation -+ * @param private_data Initial value for the `private_data` -+ * field of `struct fuse_context`. May be overridden by the -+ * `struct fuse_operations.init` handler. -+ * @return 0 on success, nonzero on failure -+ * -+ * Example usage, see hello.c -+ */ -+/* -+ int fuse_main(int argc, char *argv[], const struct fuse_operations *op, -+ void *private_data); -+*/ -+#define fuse_main(argc, argv, op, private_data) \ -+ fuse_main_real(argc, argv, op, sizeof(*(op)), private_data) -+ -+/* ----------------------------------------------------------- * -+ * More detailed API * -+ * ----------------------------------------------------------- */ -+ -+/** -+ * Print available options (high- and low-level) to stdout. This is -+ * not an exhaustive list, but includes only those options that may be -+ * of interest to an end-user of a file system. -+ * -+ * The function looks at the argument vector only to determine if -+ * there are additional modules to be loaded (module=foo option), -+ * and attempts to call their help functions as well. -+ * -+ * @param args the argument vector. -+ */ -+void fuse_lib_help(struct fuse_args *args); -+ -+/** -+ * Create a new FUSE filesystem. -+ * -+ * This function accepts most file-system independent mount options -+ * (like context, nodev, ro - see mount(8)), as well as the -+ * FUSE-specific mount options from mount.fuse(8). -+ * -+ * If the --help option is specified, the function writes a help text -+ * to stdout and returns NULL. -+ * -+ * Option parsing skips argv[0], which is assumed to contain the -+ * program name. This element must always be present and is used to -+ * construct a basic ``usage: `` message for the --help output. If -+ * argv[0] is set to the empty string, no usage message is included in -+ * the --help output. -+ * -+ * If an unknown option is passed in, an error message is written to -+ * stderr and the function returns NULL. -+ * -+ * @param args argument vector -+ * @param op the filesystem operations -+ * @param op_size the size of the fuse_operations structure -+ * @param private_data Initial value for the `private_data` -+ * field of `struct fuse_context`. May be overridden by the -+ * `struct fuse_operations.init` handler. -+ * @return the created FUSE handle -+ */ -+#if FUSE_USE_VERSION == 30 -+struct fuse *fuse_new_30(struct fuse_args *args, const struct fuse_operations *op, -+ size_t op_size, void *private_data); -+#define fuse_new(args, op, size, data) fuse_new_30(args, op, size, data) -+#else -+struct fuse *fuse_new(struct fuse_args *args, const struct fuse_operations *op, -+ size_t op_size, void *private_data); -+#endif -+ -+/** -+ * Mount a FUSE file system. -+ * -+ * @param mountpoint the mount point path -+ * @param f the FUSE handle -+ * -+ * @return 0 on success, -1 on failure. -+ **/ -+int fuse_mount(struct fuse *f, const char *mountpoint); -+ -+/** -+ * Unmount a FUSE file system. -+ * -+ * See fuse_session_unmount() for additional information. -+ * -+ * @param f the FUSE handle -+ **/ -+void fuse_unmount(struct fuse *f); -+ -+/** -+ * Destroy the FUSE handle. -+ * -+ * NOTE: This function does not unmount the filesystem. If this is -+ * needed, call fuse_unmount() before calling this function. -+ * -+ * @param f the FUSE handle -+ */ -+void fuse_destroy(struct fuse *f); -+ -+/** -+ * FUSE event loop. -+ * -+ * Requests from the kernel are processed, and the appropriate -+ * operations are called. -+ * -+ * For a description of the return value and the conditions when the -+ * event loop exits, refer to the documentation of -+ * fuse_session_loop(). -+ * -+ * @param f the FUSE handle -+ * @return see fuse_session_loop() -+ * -+ * See also: fuse_loop_mt() -+ */ -+int fuse_loop(struct fuse *f); -+ -+/** -+ * Flag session as terminated -+ * -+ * This function will cause any running event loops to exit on -+ * the next opportunity. -+ * -+ * @param f the FUSE handle -+ */ -+void fuse_exit(struct fuse *f); -+ -+/** -+ * FUSE event loop with multiple threads -+ * -+ * Requests from the kernel are processed, and the appropriate -+ * operations are called. Request are processed in parallel by -+ * distributing them between multiple threads. -+ * -+ * For a description of the return value and the conditions when the -+ * event loop exits, refer to the documentation of -+ * fuse_session_loop(). -+ * -+ * Note: using fuse_loop() instead of fuse_loop_mt() means you are running in -+ * single-threaded mode, and that you will not have to worry about reentrancy, -+ * though you will have to worry about recursive lookups. In single-threaded -+ * mode, FUSE will wait for one callback to return before calling another. -+ * -+ * Enabling multiple threads, by using fuse_loop_mt(), will cause FUSE to make -+ * multiple simultaneous calls into the various callback functions given by your -+ * fuse_operations record. -+ * -+ * If you are using multiple threads, you can enjoy all the parallel execution -+ * and interactive response benefits of threads, and you get to enjoy all the -+ * benefits of race conditions and locking bugs, too. Ensure that any code used -+ * in the callback function of fuse_operations is also thread-safe. -+ * -+ * @param f the FUSE handle -+ * @param config loop configuration -+ * @return see fuse_session_loop() -+ * -+ * See also: fuse_loop() -+ */ -+#if FUSE_USE_VERSION < 32 -+int fuse_loop_mt_31(struct fuse *f, int clone_fd); -+#define fuse_loop_mt(f, clone_fd) fuse_loop_mt_31(f, clone_fd) -+#else -+int fuse_loop_mt(struct fuse *f, struct fuse_loop_config *config); -+#endif -+ -+/** -+ * Get the current context -+ * -+ * The context is only valid for the duration of a filesystem -+ * operation, and thus must not be stored and used later. -+ * -+ * @return the context -+ */ -+struct fuse_context *fuse_get_context(void); -+ -+/** -+ * Get the current supplementary group IDs for the current request -+ * -+ * Similar to the getgroups(2) system call, except the return value is -+ * always the total number of group IDs, even if it is larger than the -+ * specified size. -+ * -+ * The current fuse kernel module in linux (as of 2.6.30) doesn't pass -+ * the group list to userspace, hence this function needs to parse -+ * "/proc/$TID/task/$TID/status" to get the group IDs. -+ * -+ * This feature may not be supported on all operating systems. In -+ * such a case this function will return -ENOSYS. -+ * -+ * @param size size of given array -+ * @param list array of group IDs to be filled in -+ * @return the total number of supplementary group IDs or -errno on failure -+ */ -+int fuse_getgroups(int size, gid_t list[]); -+ -+/** -+ * Check if the current request has already been interrupted -+ * -+ * @return 1 if the request has been interrupted, 0 otherwise -+ */ -+int fuse_interrupted(void); -+ -+/** -+ * Invalidates cache for the given path. -+ * -+ * This calls fuse_lowlevel_notify_inval_inode internally. -+ * -+ * @return 0 on successful invalidation, negative error value otherwise. -+ * This routine may return -ENOENT to indicate that there was -+ * no entry to be invalidated, e.g., because the path has not -+ * been seen before or has been forgotten; this should not be -+ * considered to be an error. -+ */ -+int fuse_invalidate_path(struct fuse *f, const char *path); -+ -+/** -+ * The real main function -+ * -+ * Do not call this directly, use fuse_main() -+ */ -+int fuse_main_real(int argc, char *argv[], const struct fuse_operations *op, -+ size_t op_size, void *private_data); -+ -+/** -+ * Start the cleanup thread when using option "remember". -+ * -+ * This is done automatically by fuse_loop_mt() -+ * @param fuse struct fuse pointer for fuse instance -+ * @return 0 on success and -1 on error -+ */ -+int fuse_start_cleanup_thread(struct fuse *fuse); -+ -+/** -+ * Stop the cleanup thread when using option "remember". -+ * -+ * This is done automatically by fuse_loop_mt() -+ * @param fuse struct fuse pointer for fuse instance -+ */ -+void fuse_stop_cleanup_thread(struct fuse *fuse); -+ -+/** -+ * Iterate over cache removing stale entries -+ * use in conjunction with "-oremember" -+ * -+ * NOTE: This is already done for the standard sessions -+ * -+ * @param fuse struct fuse pointer for fuse instance -+ * @return the number of seconds until the next cleanup -+ */ -+int fuse_clean_cache(struct fuse *fuse); -+ -+/* -+ * Stacking API -+ */ -+ -+/** -+ * Fuse filesystem object -+ * -+ * This is opaque object represents a filesystem layer -+ */ -+struct fuse_fs; -+ -+/* -+ * These functions call the relevant filesystem operation, and return -+ * the result. -+ * -+ * If the operation is not defined, they return -ENOSYS, with the -+ * exception of fuse_fs_open, fuse_fs_release, fuse_fs_opendir, -+ * fuse_fs_releasedir and fuse_fs_statfs, which return 0. -+ */ -+ -+int fuse_fs_getattr(struct fuse_fs *fs, const char *path, struct stat *buf, -+ struct fuse_file_info *fi); -+int fuse_fs_rename(struct fuse_fs *fs, const char *oldpath, -+ const char *newpath, unsigned int flags); -+int fuse_fs_unlink(struct fuse_fs *fs, const char *path); -+int fuse_fs_rmdir(struct fuse_fs *fs, const char *path); -+int fuse_fs_symlink(struct fuse_fs *fs, const char *linkname, -+ const char *path); -+int fuse_fs_link(struct fuse_fs *fs, const char *oldpath, const char *newpath); -+int fuse_fs_release(struct fuse_fs *fs, const char *path, -+ struct fuse_file_info *fi); -+int fuse_fs_open(struct fuse_fs *fs, const char *path, -+ struct fuse_file_info *fi); -+int fuse_fs_read(struct fuse_fs *fs, const char *path, char *buf, size_t size, -+ off_t off, struct fuse_file_info *fi); -+int fuse_fs_read_buf(struct fuse_fs *fs, const char *path, -+ struct fuse_bufvec **bufp, size_t size, off_t off, -+ struct fuse_file_info *fi); -+int fuse_fs_write(struct fuse_fs *fs, const char *path, const char *buf, -+ size_t size, off_t off, struct fuse_file_info *fi); -+int fuse_fs_write_buf(struct fuse_fs *fs, const char *path, -+ struct fuse_bufvec *buf, off_t off, -+ struct fuse_file_info *fi); -+int fuse_fs_fsync(struct fuse_fs *fs, const char *path, int datasync, -+ struct fuse_file_info *fi); -+int fuse_fs_flush(struct fuse_fs *fs, const char *path, -+ struct fuse_file_info *fi); -+int fuse_fs_statfs(struct fuse_fs *fs, const char *path, struct statvfs *buf); -+int fuse_fs_opendir(struct fuse_fs *fs, const char *path, -+ struct fuse_file_info *fi); -+int fuse_fs_readdir(struct fuse_fs *fs, const char *path, void *buf, -+ fuse_fill_dir_t filler, off_t off, -+ struct fuse_file_info *fi, enum fuse_readdir_flags flags); -+int fuse_fs_fsyncdir(struct fuse_fs *fs, const char *path, int datasync, -+ struct fuse_file_info *fi); -+int fuse_fs_releasedir(struct fuse_fs *fs, const char *path, -+ struct fuse_file_info *fi); -+int fuse_fs_create(struct fuse_fs *fs, const char *path, mode_t mode, -+ struct fuse_file_info *fi); -+int fuse_fs_lock(struct fuse_fs *fs, const char *path, -+ struct fuse_file_info *fi, int cmd, struct flock *lock); -+int fuse_fs_flock(struct fuse_fs *fs, const char *path, -+ struct fuse_file_info *fi, int op); -+int fuse_fs_chmod(struct fuse_fs *fs, const char *path, mode_t mode, -+ struct fuse_file_info *fi); -+int fuse_fs_chown(struct fuse_fs *fs, const char *path, uid_t uid, gid_t gid, -+ struct fuse_file_info *fi); -+int fuse_fs_truncate(struct fuse_fs *fs, const char *path, off_t size, -+ struct fuse_file_info *fi); -+int fuse_fs_utimens(struct fuse_fs *fs, const char *path, -+ const struct timespec tv[2], struct fuse_file_info *fi); -+int fuse_fs_access(struct fuse_fs *fs, const char *path, int mask); -+int fuse_fs_readlink(struct fuse_fs *fs, const char *path, char *buf, -+ size_t len); -+int fuse_fs_mknod(struct fuse_fs *fs, const char *path, mode_t mode, -+ dev_t rdev); -+int fuse_fs_mkdir(struct fuse_fs *fs, const char *path, mode_t mode); -+int fuse_fs_setxattr(struct fuse_fs *fs, const char *path, const char *name, -+ const char *value, size_t size, int flags); -+int fuse_fs_getxattr(struct fuse_fs *fs, const char *path, const char *name, -+ char *value, size_t size); -+int fuse_fs_listxattr(struct fuse_fs *fs, const char *path, char *list, -+ size_t size); -+int fuse_fs_removexattr(struct fuse_fs *fs, const char *path, -+ const char *name); -+int fuse_fs_bmap(struct fuse_fs *fs, const char *path, size_t blocksize, -+ uint64_t *idx); -+int fuse_fs_ioctl(struct fuse_fs *fs, const char *path, unsigned int cmd, -+ void *arg, struct fuse_file_info *fi, unsigned int flags, -+ void *data); -+int fuse_fs_poll(struct fuse_fs *fs, const char *path, -+ struct fuse_file_info *fi, struct fuse_pollhandle *ph, -+ unsigned *reventsp); -+int fuse_fs_fallocate(struct fuse_fs *fs, const char *path, int mode, -+ off_t offset, off_t length, struct fuse_file_info *fi); -+ssize_t fuse_fs_copy_file_range(struct fuse_fs *fs, const char *path_in, -+ struct fuse_file_info *fi_in, off_t off_in, -+ const char *path_out, -+ struct fuse_file_info *fi_out, off_t off_out, -+ size_t len, int flags); -+off_t fuse_fs_lseek(struct fuse_fs *fs, const char *path, off_t off, int whence, -+ struct fuse_file_info *fi); -+void fuse_fs_init(struct fuse_fs *fs, struct fuse_conn_info *conn, -+ struct fuse_config *cfg); -+void fuse_fs_destroy(struct fuse_fs *fs); -+ -+int fuse_notify_poll(struct fuse_pollhandle *ph); -+ -+/** -+ * Create a new fuse filesystem object -+ * -+ * This is usually called from the factory of a fuse module to create -+ * a new instance of a filesystem. -+ * -+ * @param op the filesystem operations -+ * @param op_size the size of the fuse_operations structure -+ * @param private_data Initial value for the `private_data` -+ * field of `struct fuse_context`. May be overridden by the -+ * `struct fuse_operations.init` handler. -+ * @return a new filesystem object -+ */ -+struct fuse_fs *fuse_fs_new(const struct fuse_operations *op, size_t op_size, -+ void *private_data); -+ -+/** -+ * Factory for creating filesystem objects -+ * -+ * The function may use and remove options from 'args' that belong -+ * to this module. -+ * -+ * For now the 'fs' vector always contains exactly one filesystem. -+ * This is the filesystem which will be below the newly created -+ * filesystem in the stack. -+ * -+ * @param args the command line arguments -+ * @param fs NULL terminated filesystem object vector -+ * @return the new filesystem object -+ */ -+typedef struct fuse_fs *(*fuse_module_factory_t)(struct fuse_args *args, -+ struct fuse_fs *fs[]); -+/** -+ * Register filesystem module -+ * -+ * If the "-omodules=*name*_:..." option is present, filesystem -+ * objects are created and pushed onto the stack with the *factory_* -+ * function. -+ * -+ * @param name_ the name of this filesystem module -+ * @param factory_ the factory function for this filesystem module -+ */ -+#define FUSE_REGISTER_MODULE(name_, factory_) \ -+ fuse_module_factory_t fuse_module_ ## name_ ## _factory = factory_ -+ -+/** Get session from fuse object */ -+struct fuse_session *fuse_get_session(struct fuse *f); -+ -+/** -+ * Open a FUSE file descriptor and set up the mount for the given -+ * mountpoint and flags. -+ * -+ * @param mountpoint reference to the mount in the file system -+ * @param options mount options -+ * @return the FUSE file descriptor or -1 upon error -+ */ -+int fuse_open_channel(const char *mountpoint, const char *options); -+ -+#ifdef __cplusplus -+} -+#endif -+ -+#endif /* FUSE_H_ */ -diff --git a/tools/virtiofsd/fuse_common.h b/tools/virtiofsd/fuse_common.h -new file mode 100644 -index 0000000..2d686b2 ---- /dev/null -+++ b/tools/virtiofsd/fuse_common.h -@@ -0,0 +1,823 @@ -+/* FUSE: Filesystem in Userspace -+ Copyright (C) 2001-2007 Miklos Szeredi -+ -+ This program can be distributed under the terms of the GNU LGPLv2. -+ See the file COPYING.LIB. -+*/ -+ -+/** @file */ -+ -+#if !defined(FUSE_H_) && !defined(FUSE_LOWLEVEL_H_) -+#error "Never include directly; use or instead." -+#endif -+ -+#ifndef FUSE_COMMON_H_ -+#define FUSE_COMMON_H_ -+ -+#include "fuse_opt.h" -+#include "fuse_log.h" -+#include -+#include -+ -+/** Major version of FUSE library interface */ -+#define FUSE_MAJOR_VERSION 3 -+ -+/** Minor version of FUSE library interface */ -+#define FUSE_MINOR_VERSION 2 -+ -+#define FUSE_MAKE_VERSION(maj, min) ((maj) * 10 + (min)) -+#define FUSE_VERSION FUSE_MAKE_VERSION(FUSE_MAJOR_VERSION, FUSE_MINOR_VERSION) -+ -+#ifdef __cplusplus -+extern "C" { -+#endif -+ -+/** -+ * Information about an open file. -+ * -+ * File Handles are created by the open, opendir, and create methods and closed -+ * by the release and releasedir methods. Multiple file handles may be -+ * concurrently open for the same file. Generally, a client will create one -+ * file handle per file descriptor, though in some cases multiple file -+ * descriptors can share a single file handle. -+ */ -+struct fuse_file_info { -+ /** Open flags. Available in open() and release() */ -+ int flags; -+ -+ /** In case of a write operation indicates if this was caused -+ by a delayed write from the page cache. If so, then the -+ context's pid, uid, and gid fields will not be valid, and -+ the *fh* value may not match the *fh* value that would -+ have been sent with the corresponding individual write -+ requests if write caching had been disabled. */ -+ unsigned int writepage : 1; -+ -+ /** Can be filled in by open, to use direct I/O on this file. */ -+ unsigned int direct_io : 1; -+ -+ /** Can be filled in by open. It signals the kernel that any -+ currently cached file data (ie., data that the filesystem -+ provided the last time the file was open) need not be -+ invalidated. Has no effect when set in other contexts (in -+ particular it does nothing when set by opendir()). */ -+ unsigned int keep_cache : 1; -+ -+ /** Indicates a flush operation. Set in flush operation, also -+ maybe set in highlevel lock operation and lowlevel release -+ operation. */ -+ unsigned int flush : 1; -+ -+ /** Can be filled in by open, to indicate that the file is not -+ seekable. */ -+ unsigned int nonseekable : 1; -+ -+ /* Indicates that flock locks for this file should be -+ released. If set, lock_owner shall contain a valid value. -+ May only be set in ->release(). */ -+ unsigned int flock_release : 1; -+ -+ /** Can be filled in by opendir. It signals the kernel to -+ enable caching of entries returned by readdir(). Has no -+ effect when set in other contexts (in particular it does -+ nothing when set by open()). */ -+ unsigned int cache_readdir : 1; -+ -+ /** Padding. Reserved for future use*/ -+ unsigned int padding : 25; -+ unsigned int padding2 : 32; -+ -+ /** File handle id. May be filled in by filesystem in create, -+ * open, and opendir(). Available in most other file operations on the -+ * same file handle. */ -+ uint64_t fh; -+ -+ /** Lock owner id. Available in locking operations and flush */ -+ uint64_t lock_owner; -+ -+ /** Requested poll events. Available in ->poll. Only set on kernels -+ which support it. If unsupported, this field is set to zero. */ -+ uint32_t poll_events; -+}; -+ -+/** -+ * Configuration parameters passed to fuse_session_loop_mt() and -+ * fuse_loop_mt(). -+ */ -+struct fuse_loop_config { -+ /** -+ * whether to use separate device fds for each thread -+ * (may increase performance) -+ */ -+ int clone_fd; -+ -+ /** -+ * The maximum number of available worker threads before they -+ * start to get deleted when they become idle. If not -+ * specified, the default is 10. -+ * -+ * Adjusting this has performance implications; a very small number -+ * of threads in the pool will cause a lot of thread creation and -+ * deletion overhead and performance may suffer. When set to 0, a new -+ * thread will be created to service every operation. -+ */ -+ unsigned int max_idle_threads; -+}; -+ -+/************************************************************************** -+ * Capability bits for 'fuse_conn_info.capable' and 'fuse_conn_info.want' * -+ **************************************************************************/ -+ -+/** -+ * Indicates that the filesystem supports asynchronous read requests. -+ * -+ * If this capability is not requested/available, the kernel will -+ * ensure that there is at most one pending read request per -+ * file-handle at any time, and will attempt to order read requests by -+ * increasing offset. -+ * -+ * This feature is enabled by default when supported by the kernel. -+ */ -+#define FUSE_CAP_ASYNC_READ (1 << 0) -+ -+/** -+ * Indicates that the filesystem supports "remote" locking. -+ * -+ * This feature is enabled by default when supported by the kernel, -+ * and if getlk() and setlk() handlers are implemented. -+ */ -+#define FUSE_CAP_POSIX_LOCKS (1 << 1) -+ -+/** -+ * Indicates that the filesystem supports the O_TRUNC open flag. If -+ * disabled, and an application specifies O_TRUNC, fuse first calls -+ * truncate() and then open() with O_TRUNC filtered out. -+ * -+ * This feature is enabled by default when supported by the kernel. -+ */ -+#define FUSE_CAP_ATOMIC_O_TRUNC (1 << 3) -+ -+/** -+ * Indicates that the filesystem supports lookups of "." and "..". -+ * -+ * This feature is disabled by default. -+ */ -+#define FUSE_CAP_EXPORT_SUPPORT (1 << 4) -+ -+/** -+ * Indicates that the kernel should not apply the umask to the -+ * file mode on create operations. -+ * -+ * This feature is disabled by default. -+ */ -+#define FUSE_CAP_DONT_MASK (1 << 6) -+ -+/** -+ * Indicates that libfuse should try to use splice() when writing to -+ * the fuse device. This may improve performance. -+ * -+ * This feature is disabled by default. -+ */ -+#define FUSE_CAP_SPLICE_WRITE (1 << 7) -+ -+/** -+ * Indicates that libfuse should try to move pages instead of copying when -+ * writing to / reading from the fuse device. This may improve performance. -+ * -+ * This feature is disabled by default. -+ */ -+#define FUSE_CAP_SPLICE_MOVE (1 << 8) -+ -+/** -+ * Indicates that libfuse should try to use splice() when reading from -+ * the fuse device. This may improve performance. -+ * -+ * This feature is enabled by default when supported by the kernel and -+ * if the filesystem implements a write_buf() handler. -+ */ -+#define FUSE_CAP_SPLICE_READ (1 << 9) -+ -+/** -+ * If set, the calls to flock(2) will be emulated using POSIX locks and must -+ * then be handled by the filesystem's setlock() handler. -+ * -+ * If not set, flock(2) calls will be handled by the FUSE kernel module -+ * internally (so any access that does not go through the kernel cannot be taken -+ * into account). -+ * -+ * This feature is enabled by default when supported by the kernel and -+ * if the filesystem implements a flock() handler. -+ */ -+#define FUSE_CAP_FLOCK_LOCKS (1 << 10) -+ -+/** -+ * Indicates that the filesystem supports ioctl's on directories. -+ * -+ * This feature is enabled by default when supported by the kernel. -+ */ -+#define FUSE_CAP_IOCTL_DIR (1 << 11) -+ -+/** -+ * Traditionally, while a file is open the FUSE kernel module only -+ * asks the filesystem for an update of the file's attributes when a -+ * client attempts to read beyond EOF. This is unsuitable for -+ * e.g. network filesystems, where the file contents may change -+ * without the kernel knowing about it. -+ * -+ * If this flag is set, FUSE will check the validity of the attributes -+ * on every read. If the attributes are no longer valid (i.e., if the -+ * *attr_timeout* passed to fuse_reply_attr() or set in `struct -+ * fuse_entry_param` has passed), it will first issue a `getattr` -+ * request. If the new mtime differs from the previous value, any -+ * cached file *contents* will be invalidated as well. -+ * -+ * This flag should always be set when available. If all file changes -+ * go through the kernel, *attr_timeout* should be set to a very large -+ * number to avoid unnecessary getattr() calls. -+ * -+ * This feature is enabled by default when supported by the kernel. -+ */ -+#define FUSE_CAP_AUTO_INVAL_DATA (1 << 12) -+ -+/** -+ * Indicates that the filesystem supports readdirplus. -+ * -+ * This feature is enabled by default when supported by the kernel and if the -+ * filesystem implements a readdirplus() handler. -+ */ -+#define FUSE_CAP_READDIRPLUS (1 << 13) -+ -+/** -+ * Indicates that the filesystem supports adaptive readdirplus. -+ * -+ * If FUSE_CAP_READDIRPLUS is not set, this flag has no effect. -+ * -+ * If FUSE_CAP_READDIRPLUS is set and this flag is not set, the kernel -+ * will always issue readdirplus() requests to retrieve directory -+ * contents. -+ * -+ * If FUSE_CAP_READDIRPLUS is set and this flag is set, the kernel -+ * will issue both readdir() and readdirplus() requests, depending on -+ * how much information is expected to be required. -+ * -+ * As of Linux 4.20, the algorithm is as follows: when userspace -+ * starts to read directory entries, issue a READDIRPLUS request to -+ * the filesystem. If any entry attributes have been looked up by the -+ * time userspace requests the next batch of entries continue with -+ * READDIRPLUS, otherwise switch to plain READDIR. This will reasult -+ * in eg plain "ls" triggering READDIRPLUS first then READDIR after -+ * that because it doesn't do lookups. "ls -l" should result in all -+ * READDIRPLUS, except if dentries are already cached. -+ * -+ * This feature is enabled by default when supported by the kernel and -+ * if the filesystem implements both a readdirplus() and a readdir() -+ * handler. -+ */ -+#define FUSE_CAP_READDIRPLUS_AUTO (1 << 14) -+ -+/** -+ * Indicates that the filesystem supports asynchronous direct I/O submission. -+ * -+ * If this capability is not requested/available, the kernel will ensure that -+ * there is at most one pending read and one pending write request per direct -+ * I/O file-handle at any time. -+ * -+ * This feature is enabled by default when supported by the kernel. -+ */ -+#define FUSE_CAP_ASYNC_DIO (1 << 15) -+ -+/** -+ * Indicates that writeback caching should be enabled. This means that -+ * individual write request may be buffered and merged in the kernel -+ * before they are send to the filesystem. -+ * -+ * This feature is disabled by default. -+ */ -+#define FUSE_CAP_WRITEBACK_CACHE (1 << 16) -+ -+/** -+ * Indicates support for zero-message opens. If this flag is set in -+ * the `capable` field of the `fuse_conn_info` structure, then the -+ * filesystem may return `ENOSYS` from the open() handler to indicate -+ * success. Further attempts to open files will be handled in the -+ * kernel. (If this flag is not set, returning ENOSYS will be treated -+ * as an error and signaled to the caller). -+ * -+ * Setting (or unsetting) this flag in the `want` field has *no -+ * effect*. -+ */ -+#define FUSE_CAP_NO_OPEN_SUPPORT (1 << 17) -+ -+/** -+ * Indicates support for parallel directory operations. If this flag -+ * is unset, the FUSE kernel module will ensure that lookup() and -+ * readdir() requests are never issued concurrently for the same -+ * directory. -+ * -+ * This feature is enabled by default when supported by the kernel. -+ */ -+#define FUSE_CAP_PARALLEL_DIROPS (1 << 18) -+ -+/** -+ * Indicates support for POSIX ACLs. -+ * -+ * If this feature is enabled, the kernel will cache and have -+ * responsibility for enforcing ACLs. ACL will be stored as xattrs and -+ * passed to userspace, which is responsible for updating the ACLs in -+ * the filesystem, keeping the file mode in sync with the ACL, and -+ * ensuring inheritance of default ACLs when new filesystem nodes are -+ * created. Note that this requires that the file system is able to -+ * parse and interpret the xattr representation of ACLs. -+ * -+ * Enabling this feature implicitly turns on the -+ * ``default_permissions`` mount option (even if it was not passed to -+ * mount(2)). -+ * -+ * This feature is disabled by default. -+ */ -+#define FUSE_CAP_POSIX_ACL (1 << 19) -+ -+/** -+ * Indicates that the filesystem is responsible for unsetting -+ * setuid and setgid bits when a file is written, truncated, or -+ * its owner is changed. -+ * -+ * This feature is enabled by default when supported by the kernel. -+ */ -+#define FUSE_CAP_HANDLE_KILLPRIV (1 << 20) -+ -+/** -+ * Indicates support for zero-message opendirs. If this flag is set in -+ * the `capable` field of the `fuse_conn_info` structure, then the filesystem -+ * may return `ENOSYS` from the opendir() handler to indicate success. Further -+ * opendir and releasedir messages will be handled in the kernel. (If this -+ * flag is not set, returning ENOSYS will be treated as an error and signalled -+ * to the caller.) -+ * -+ * Setting (or unsetting) this flag in the `want` field has *no effect*. -+ */ -+#define FUSE_CAP_NO_OPENDIR_SUPPORT (1 << 24) -+ -+/** -+ * Ioctl flags -+ * -+ * FUSE_IOCTL_COMPAT: 32bit compat ioctl on 64bit machine -+ * FUSE_IOCTL_UNRESTRICTED: not restricted to well-formed ioctls, retry allowed -+ * FUSE_IOCTL_RETRY: retry with new iovecs -+ * FUSE_IOCTL_DIR: is a directory -+ * -+ * FUSE_IOCTL_MAX_IOV: maximum of in_iovecs + out_iovecs -+ */ -+#define FUSE_IOCTL_COMPAT (1 << 0) -+#define FUSE_IOCTL_UNRESTRICTED (1 << 1) -+#define FUSE_IOCTL_RETRY (1 << 2) -+#define FUSE_IOCTL_DIR (1 << 4) -+ -+#define FUSE_IOCTL_MAX_IOV 256 -+ -+/** -+ * Connection information, passed to the ->init() method -+ * -+ * Some of the elements are read-write, these can be changed to -+ * indicate the value requested by the filesystem. The requested -+ * value must usually be smaller than the indicated value. -+ */ -+struct fuse_conn_info { -+ /** -+ * Major version of the protocol (read-only) -+ */ -+ unsigned proto_major; -+ -+ /** -+ * Minor version of the protocol (read-only) -+ */ -+ unsigned proto_minor; -+ -+ /** -+ * Maximum size of the write buffer -+ */ -+ unsigned max_write; -+ -+ /** -+ * Maximum size of read requests. A value of zero indicates no -+ * limit. However, even if the filesystem does not specify a -+ * limit, the maximum size of read requests will still be -+ * limited by the kernel. -+ * -+ * NOTE: For the time being, the maximum size of read requests -+ * must be set both here *and* passed to fuse_session_new() -+ * using the ``-o max_read=`` mount option. At some point -+ * in the future, specifying the mount option will no longer -+ * be necessary. -+ */ -+ unsigned max_read; -+ -+ /** -+ * Maximum readahead -+ */ -+ unsigned max_readahead; -+ -+ /** -+ * Capability flags that the kernel supports (read-only) -+ */ -+ unsigned capable; -+ -+ /** -+ * Capability flags that the filesystem wants to enable. -+ * -+ * libfuse attempts to initialize this field with -+ * reasonable default values before calling the init() handler. -+ */ -+ unsigned want; -+ -+ /** -+ * Maximum number of pending "background" requests. A -+ * background request is any type of request for which the -+ * total number is not limited by other means. As of kernel -+ * 4.8, only two types of requests fall into this category: -+ * -+ * 1. Read-ahead requests -+ * 2. Asynchronous direct I/O requests -+ * -+ * Read-ahead requests are generated (if max_readahead is -+ * non-zero) by the kernel to preemptively fill its caches -+ * when it anticipates that userspace will soon read more -+ * data. -+ * -+ * Asynchronous direct I/O requests are generated if -+ * FUSE_CAP_ASYNC_DIO is enabled and userspace submits a large -+ * direct I/O request. In this case the kernel will internally -+ * split it up into multiple smaller requests and submit them -+ * to the filesystem concurrently. -+ * -+ * Note that the following requests are *not* background -+ * requests: writeback requests (limited by the kernel's -+ * flusher algorithm), regular (i.e., synchronous and -+ * buffered) userspace read/write requests (limited to one per -+ * thread), asynchronous read requests (Linux's io_submit(2) -+ * call actually blocks, so these are also limited to one per -+ * thread). -+ */ -+ unsigned max_background; -+ -+ /** -+ * Kernel congestion threshold parameter. If the number of pending -+ * background requests exceeds this number, the FUSE kernel module will -+ * mark the filesystem as "congested". This instructs the kernel to -+ * expect that queued requests will take some time to complete, and to -+ * adjust its algorithms accordingly (e.g. by putting a waiting thread -+ * to sleep instead of using a busy-loop). -+ */ -+ unsigned congestion_threshold; -+ -+ /** -+ * When FUSE_CAP_WRITEBACK_CACHE is enabled, the kernel is responsible -+ * for updating mtime and ctime when write requests are received. The -+ * updated values are passed to the filesystem with setattr() requests. -+ * However, if the filesystem does not support the full resolution of -+ * the kernel timestamps (nanoseconds), the mtime and ctime values used -+ * by kernel and filesystem will differ (and result in an apparent -+ * change of times after a cache flush). -+ * -+ * To prevent this problem, this variable can be used to inform the -+ * kernel about the timestamp granularity supported by the file-system. -+ * The value should be power of 10. The default is 1, i.e. full -+ * nano-second resolution. Filesystems supporting only second resolution -+ * should set this to 1000000000. -+ */ -+ unsigned time_gran; -+ -+ /** -+ * For future use. -+ */ -+ unsigned reserved[22]; -+}; -+ -+struct fuse_session; -+struct fuse_pollhandle; -+struct fuse_conn_info_opts; -+ -+/** -+ * This function parses several command-line options that can be used -+ * to override elements of struct fuse_conn_info. The pointer returned -+ * by this function should be passed to the -+ * fuse_apply_conn_info_opts() method by the file system's init() -+ * handler. -+ * -+ * Before using this function, think twice if you really want these -+ * parameters to be adjustable from the command line. In most cases, -+ * they should be determined by the file system internally. -+ * -+ * The following options are recognized: -+ * -+ * -o max_write=N sets conn->max_write -+ * -o max_readahead=N sets conn->max_readahead -+ * -o max_background=N sets conn->max_background -+ * -o congestion_threshold=N sets conn->congestion_threshold -+ * -o async_read sets FUSE_CAP_ASYNC_READ in conn->want -+ * -o sync_read unsets FUSE_CAP_ASYNC_READ in conn->want -+ * -o atomic_o_trunc sets FUSE_CAP_ATOMIC_O_TRUNC in conn->want -+ * -o no_remote_lock Equivalent to -o no_remote_flock,no_remote_posix_lock -+ * -o no_remote_flock Unsets FUSE_CAP_FLOCK_LOCKS in conn->want -+ * -o no_remote_posix_lock Unsets FUSE_CAP_POSIX_LOCKS in conn->want -+ * -o [no_]splice_write (un-)sets FUSE_CAP_SPLICE_WRITE in conn->want -+ * -o [no_]splice_move (un-)sets FUSE_CAP_SPLICE_MOVE in conn->want -+ * -o [no_]splice_read (un-)sets FUSE_CAP_SPLICE_READ in conn->want -+ * -o [no_]auto_inval_data (un-)sets FUSE_CAP_AUTO_INVAL_DATA in conn->want -+ * -o readdirplus=no unsets FUSE_CAP_READDIRPLUS in conn->want -+ * -o readdirplus=yes sets FUSE_CAP_READDIRPLUS and unsets -+ * FUSE_CAP_READDIRPLUS_AUTO in conn->want -+ * -o readdirplus=auto sets FUSE_CAP_READDIRPLUS and -+ * FUSE_CAP_READDIRPLUS_AUTO in conn->want -+ * -o [no_]async_dio (un-)sets FUSE_CAP_ASYNC_DIO in conn->want -+ * -o [no_]writeback_cache (un-)sets FUSE_CAP_WRITEBACK_CACHE in conn->want -+ * -o time_gran=N sets conn->time_gran -+ * -+ * Known options will be removed from *args*, unknown options will be -+ * passed through unchanged. -+ * -+ * @param args argument vector (input+output) -+ * @return parsed options -+ **/ -+struct fuse_conn_info_opts* fuse_parse_conn_info_opts(struct fuse_args *args); -+ -+/** -+ * This function applies the (parsed) parameters in *opts* to the -+ * *conn* pointer. It may modify the following fields: wants, -+ * max_write, max_readahead, congestion_threshold, max_background, -+ * time_gran. A field is only set (or unset) if the corresponding -+ * option has been explicitly set. -+ */ -+void fuse_apply_conn_info_opts(struct fuse_conn_info_opts *opts, -+ struct fuse_conn_info *conn); -+ -+/** -+ * Go into the background -+ * -+ * @param foreground if true, stay in the foreground -+ * @return 0 on success, -1 on failure -+ */ -+int fuse_daemonize(int foreground); -+ -+/** -+ * Get the version of the library -+ * -+ * @return the version -+ */ -+int fuse_version(void); -+ -+/** -+ * Get the full package version string of the library -+ * -+ * @return the package version -+ */ -+const char *fuse_pkgversion(void); -+ -+/** -+ * Destroy poll handle -+ * -+ * @param ph the poll handle -+ */ -+void fuse_pollhandle_destroy(struct fuse_pollhandle *ph); -+ -+/* ----------------------------------------------------------- * -+ * Data buffer * -+ * ----------------------------------------------------------- */ -+ -+/** -+ * Buffer flags -+ */ -+enum fuse_buf_flags { -+ /** -+ * Buffer contains a file descriptor -+ * -+ * If this flag is set, the .fd field is valid, otherwise the -+ * .mem fields is valid. -+ */ -+ FUSE_BUF_IS_FD = (1 << 1), -+ -+ /** -+ * Seek on the file descriptor -+ * -+ * If this flag is set then the .pos field is valid and is -+ * used to seek to the given offset before performing -+ * operation on file descriptor. -+ */ -+ FUSE_BUF_FD_SEEK = (1 << 2), -+ -+ /** -+ * Retry operation on file descriptor -+ * -+ * If this flag is set then retry operation on file descriptor -+ * until .size bytes have been copied or an error or EOF is -+ * detected. -+ */ -+ FUSE_BUF_FD_RETRY = (1 << 3), -+}; -+ -+/** -+ * Buffer copy flags -+ */ -+enum fuse_buf_copy_flags { -+ /** -+ * Don't use splice(2) -+ * -+ * Always fall back to using read and write instead of -+ * splice(2) to copy data from one file descriptor to another. -+ * -+ * If this flag is not set, then only fall back if splice is -+ * unavailable. -+ */ -+ FUSE_BUF_NO_SPLICE = (1 << 1), -+ -+ /** -+ * Force splice -+ * -+ * Always use splice(2) to copy data from one file descriptor -+ * to another. If splice is not available, return -EINVAL. -+ */ -+ FUSE_BUF_FORCE_SPLICE = (1 << 2), -+ -+ /** -+ * Try to move data with splice. -+ * -+ * If splice is used, try to move pages from the source to the -+ * destination instead of copying. See documentation of -+ * SPLICE_F_MOVE in splice(2) man page. -+ */ -+ FUSE_BUF_SPLICE_MOVE = (1 << 3), -+ -+ /** -+ * Don't block on the pipe when copying data with splice -+ * -+ * Makes the operations on the pipe non-blocking (if the pipe -+ * is full or empty). See SPLICE_F_NONBLOCK in the splice(2) -+ * man page. -+ */ -+ FUSE_BUF_SPLICE_NONBLOCK= (1 << 4), -+}; -+ -+/** -+ * Single data buffer -+ * -+ * Generic data buffer for I/O, extended attributes, etc... Data may -+ * be supplied as a memory pointer or as a file descriptor -+ */ -+struct fuse_buf { -+ /** -+ * Size of data in bytes -+ */ -+ size_t size; -+ -+ /** -+ * Buffer flags -+ */ -+ enum fuse_buf_flags flags; -+ -+ /** -+ * Memory pointer -+ * -+ * Used unless FUSE_BUF_IS_FD flag is set. -+ */ -+ void *mem; -+ -+ /** -+ * File descriptor -+ * -+ * Used if FUSE_BUF_IS_FD flag is set. -+ */ -+ int fd; -+ -+ /** -+ * File position -+ * -+ * Used if FUSE_BUF_FD_SEEK flag is set. -+ */ -+ off_t pos; -+}; -+ -+/** -+ * Data buffer vector -+ * -+ * An array of data buffers, each containing a memory pointer or a -+ * file descriptor. -+ * -+ * Allocate dynamically to add more than one buffer. -+ */ -+struct fuse_bufvec { -+ /** -+ * Number of buffers in the array -+ */ -+ size_t count; -+ -+ /** -+ * Index of current buffer within the array -+ */ -+ size_t idx; -+ -+ /** -+ * Current offset within the current buffer -+ */ -+ size_t off; -+ -+ /** -+ * Array of buffers -+ */ -+ struct fuse_buf buf[1]; -+}; -+ -+/* Initialize bufvec with a single buffer of given size */ -+#define FUSE_BUFVEC_INIT(size__) \ -+ ((struct fuse_bufvec) { \ -+ /* .count= */ 1, \ -+ /* .idx = */ 0, \ -+ /* .off = */ 0, \ -+ /* .buf = */ { /* [0] = */ { \ -+ /* .size = */ (size__), \ -+ /* .flags = */ (enum fuse_buf_flags) 0, \ -+ /* .mem = */ NULL, \ -+ /* .fd = */ -1, \ -+ /* .pos = */ 0, \ -+ } } \ -+ } ) -+ -+/** -+ * Get total size of data in a fuse buffer vector -+ * -+ * @param bufv buffer vector -+ * @return size of data -+ */ -+size_t fuse_buf_size(const struct fuse_bufvec *bufv); -+ -+/** -+ * Copy data from one buffer vector to another -+ * -+ * @param dst destination buffer vector -+ * @param src source buffer vector -+ * @param flags flags controlling the copy -+ * @return actual number of bytes copied or -errno on error -+ */ -+ssize_t fuse_buf_copy(struct fuse_bufvec *dst, struct fuse_bufvec *src, -+ enum fuse_buf_copy_flags flags); -+ -+/* ----------------------------------------------------------- * -+ * Signal handling * -+ * ----------------------------------------------------------- */ -+ -+/** -+ * Exit session on HUP, TERM and INT signals and ignore PIPE signal -+ * -+ * Stores session in a global variable. May only be called once per -+ * process until fuse_remove_signal_handlers() is called. -+ * -+ * Once either of the POSIX signals arrives, the signal handler calls -+ * fuse_session_exit(). -+ * -+ * @param se the session to exit -+ * @return 0 on success, -1 on failure -+ * -+ * See also: -+ * fuse_remove_signal_handlers() -+ */ -+int fuse_set_signal_handlers(struct fuse_session *se); -+ -+/** -+ * Restore default signal handlers -+ * -+ * Resets global session. After this fuse_set_signal_handlers() may -+ * be called again. -+ * -+ * @param se the same session as given in fuse_set_signal_handlers() -+ * -+ * See also: -+ * fuse_set_signal_handlers() -+ */ -+void fuse_remove_signal_handlers(struct fuse_session *se); -+ -+/* ----------------------------------------------------------- * -+ * Compatibility stuff * -+ * ----------------------------------------------------------- */ -+ -+#if !defined(FUSE_USE_VERSION) || FUSE_USE_VERSION < 30 -+# error only API version 30 or greater is supported -+#endif -+ -+#ifdef __cplusplus -+} -+#endif -+ -+ -+/* -+ * This interface uses 64 bit off_t. -+ * -+ * On 32bit systems please add -D_FILE_OFFSET_BITS=64 to your compile flags! -+ */ -+ -+#if defined(__GNUC__) && (__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 6) && !defined __cplusplus -+_Static_assert(sizeof(off_t) == 8, "fuse: off_t must be 64bit"); -+#else -+struct _fuse_off_t_must_be_64bit_dummy_struct \ -+ { unsigned _fuse_off_t_must_be_64bit:((sizeof(off_t) == 8) ? 1 : -1); }; -+#endif -+ -+#endif /* FUSE_COMMON_H_ */ -diff --git a/tools/virtiofsd/fuse_i.h b/tools/virtiofsd/fuse_i.h -new file mode 100644 -index 0000000..d38b630 ---- /dev/null -+++ b/tools/virtiofsd/fuse_i.h -@@ -0,0 +1,139 @@ -+/* -+ FUSE: Filesystem in Userspace -+ Copyright (C) 2001-2007 Miklos Szeredi -+ -+ This program can be distributed under the terms of the GNU LGPLv2. -+ See the file COPYING.LIB -+*/ -+ -+#include "fuse.h" -+#include "fuse_lowlevel.h" -+ -+struct mount_opts; -+ -+struct fuse_req { -+ struct fuse_session *se; -+ uint64_t unique; -+ int ctr; -+ pthread_mutex_t lock; -+ struct fuse_ctx ctx; -+ struct fuse_chan *ch; -+ int interrupted; -+ unsigned int ioctl_64bit : 1; -+ union { -+ struct { -+ uint64_t unique; -+ } i; -+ struct { -+ fuse_interrupt_func_t func; -+ void *data; -+ } ni; -+ } u; -+ struct fuse_req *next; -+ struct fuse_req *prev; -+}; -+ -+struct fuse_notify_req { -+ uint64_t unique; -+ void (*reply)(struct fuse_notify_req *, fuse_req_t, fuse_ino_t, -+ const void *, const struct fuse_buf *); -+ struct fuse_notify_req *next; -+ struct fuse_notify_req *prev; -+}; -+ -+struct fuse_session { -+ char *mountpoint; -+ volatile int exited; -+ int fd; -+ struct mount_opts *mo; -+ int debug; -+ int deny_others; -+ struct fuse_lowlevel_ops op; -+ int got_init; -+ struct cuse_data *cuse_data; -+ void *userdata; -+ uid_t owner; -+ struct fuse_conn_info conn; -+ struct fuse_req list; -+ struct fuse_req interrupts; -+ pthread_mutex_t lock; -+ int got_destroy; -+ pthread_key_t pipe_key; -+ int broken_splice_nonblock; -+ uint64_t notify_ctr; -+ struct fuse_notify_req notify_list; -+ size_t bufsize; -+ int error; -+}; -+ -+struct fuse_chan { -+ pthread_mutex_t lock; -+ int ctr; -+ int fd; -+}; -+ -+/** -+ * Filesystem module -+ * -+ * Filesystem modules are registered with the FUSE_REGISTER_MODULE() -+ * macro. -+ * -+ */ -+struct fuse_module { -+ char *name; -+ fuse_module_factory_t factory; -+ struct fuse_module *next; -+ struct fusemod_so *so; -+ int ctr; -+}; -+ -+/* ----------------------------------------------------------- * -+ * Channel interface (when using -o clone_fd) * -+ * ----------------------------------------------------------- */ -+ -+/** -+ * Obtain counted reference to the channel -+ * -+ * @param ch the channel -+ * @return the channel -+ */ -+struct fuse_chan *fuse_chan_get(struct fuse_chan *ch); -+ -+/** -+ * Drop counted reference to a channel -+ * -+ * @param ch the channel -+ */ -+void fuse_chan_put(struct fuse_chan *ch); -+ -+struct mount_opts *parse_mount_opts(struct fuse_args *args); -+void destroy_mount_opts(struct mount_opts *mo); -+void fuse_mount_version(void); -+unsigned get_max_read(struct mount_opts *o); -+void fuse_kern_unmount(const char *mountpoint, int fd); -+int fuse_kern_mount(const char *mountpoint, struct mount_opts *mo); -+ -+int fuse_send_reply_iov_nofree(fuse_req_t req, int error, struct iovec *iov, -+ int count); -+void fuse_free_req(fuse_req_t req); -+ -+void cuse_lowlevel_init(fuse_req_t req, fuse_ino_t nodeide, const void *inarg); -+ -+int fuse_start_thread(pthread_t *thread_id, void *(*func)(void *), void *arg); -+ -+int fuse_session_receive_buf_int(struct fuse_session *se, struct fuse_buf *buf, -+ struct fuse_chan *ch); -+void fuse_session_process_buf_int(struct fuse_session *se, -+ const struct fuse_buf *buf, struct fuse_chan *ch); -+ -+struct fuse *fuse_new_31(struct fuse_args *args, const struct fuse_operations *op, -+ size_t op_size, void *private_data); -+int fuse_loop_mt_32(struct fuse *f, struct fuse_loop_config *config); -+int fuse_session_loop_mt_32(struct fuse_session *se, struct fuse_loop_config *config); -+ -+#define FUSE_MAX_MAX_PAGES 256 -+#define FUSE_DEFAULT_MAX_PAGES_PER_REQ 32 -+ -+/* room needed in buffer to accommodate header */ -+#define FUSE_BUFFER_HEADER_SIZE 0x1000 -+ -diff --git a/tools/virtiofsd/fuse_log.h b/tools/virtiofsd/fuse_log.h -new file mode 100644 -index 0000000..5e112e0 ---- /dev/null -+++ b/tools/virtiofsd/fuse_log.h -@@ -0,0 +1,82 @@ -+/* -+ FUSE: Filesystem in Userspace -+ Copyright (C) 2019 Red Hat, Inc. -+ -+ This program can be distributed under the terms of the GNU LGPLv2. -+ See the file COPYING.LIB. -+*/ -+ -+#ifndef FUSE_LOG_H_ -+#define FUSE_LOG_H_ -+ -+/** @file -+ * -+ * This file defines the logging interface of FUSE -+ */ -+ -+#include -+ -+#ifdef __cplusplus -+extern "C" { -+#endif -+ -+/** -+ * Log severity level -+ * -+ * These levels correspond to syslog(2) log levels since they are widely used. -+ */ -+enum fuse_log_level { -+ FUSE_LOG_EMERG, -+ FUSE_LOG_ALERT, -+ FUSE_LOG_CRIT, -+ FUSE_LOG_ERR, -+ FUSE_LOG_WARNING, -+ FUSE_LOG_NOTICE, -+ FUSE_LOG_INFO, -+ FUSE_LOG_DEBUG -+}; -+ -+/** -+ * Log message handler function. -+ * -+ * This function must be thread-safe. It may be called from any libfuse -+ * function, including fuse_parse_cmdline() and other functions invoked before -+ * a FUSE filesystem is created. -+ * -+ * Install a custom log message handler function using fuse_set_log_func(). -+ * -+ * @param level log severity level -+ * @param fmt sprintf-style format string including newline -+ * @param ap format string arguments -+ */ -+typedef void (*fuse_log_func_t)(enum fuse_log_level level, -+ const char *fmt, va_list ap); -+ -+/** -+ * Install a custom log handler function. -+ * -+ * Log messages are emitted by libfuse functions to report errors and debug -+ * information. Messages are printed to stderr by default but this can be -+ * overridden by installing a custom log message handler function. -+ * -+ * The log message handler function is global and affects all FUSE filesystems -+ * created within this process. -+ * -+ * @param func a custom log message handler function or NULL to revert to -+ * the default -+ */ -+void fuse_set_log_func(fuse_log_func_t func); -+ -+/** -+ * Emit a log message -+ * -+ * @param level severity level (FUSE_LOG_ERR, FUSE_LOG_DEBUG, etc) -+ * @param fmt sprintf-style format string including newline -+ */ -+void fuse_log(enum fuse_log_level level, const char *fmt, ...); -+ -+#ifdef __cplusplus -+} -+#endif -+ -+#endif /* FUSE_LOG_H_ */ -diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h -new file mode 100644 -index 0000000..18c6363 ---- /dev/null -+++ b/tools/virtiofsd/fuse_lowlevel.h -@@ -0,0 +1,2089 @@ -+/* -+ FUSE: Filesystem in Userspace -+ Copyright (C) 2001-2007 Miklos Szeredi -+ -+ This program can be distributed under the terms of the GNU LGPLv2. -+ See the file COPYING.LIB. -+*/ -+ -+#ifndef FUSE_LOWLEVEL_H_ -+#define FUSE_LOWLEVEL_H_ -+ -+/** @file -+ * -+ * Low level API -+ * -+ * IMPORTANT: you should define FUSE_USE_VERSION before including this -+ * header. To use the newest API define it to 31 (recommended for any -+ * new application). -+ */ -+ -+#ifndef FUSE_USE_VERSION -+#error FUSE_USE_VERSION not defined -+#endif -+ -+#include "fuse_common.h" -+ -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#ifdef __cplusplus -+extern "C" { -+#endif -+ -+/* ----------------------------------------------------------- * -+ * Miscellaneous definitions * -+ * ----------------------------------------------------------- */ -+ -+/** The node ID of the root inode */ -+#define FUSE_ROOT_ID 1 -+ -+/** Inode number type */ -+typedef uint64_t fuse_ino_t; -+ -+/** Request pointer type */ -+typedef struct fuse_req *fuse_req_t; -+ -+/** -+ * Session -+ * -+ * This provides hooks for processing requests, and exiting -+ */ -+struct fuse_session; -+ -+/** Directory entry parameters supplied to fuse_reply_entry() */ -+struct fuse_entry_param { -+ /** Unique inode number -+ * -+ * In lookup, zero means negative entry (from version 2.5) -+ * Returning ENOENT also means negative entry, but by setting zero -+ * ino the kernel may cache negative entries for entry_timeout -+ * seconds. -+ */ -+ fuse_ino_t ino; -+ -+ /** Generation number for this entry. -+ * -+ * If the file system will be exported over NFS, the -+ * ino/generation pairs need to be unique over the file -+ * system's lifetime (rather than just the mount time). So if -+ * the file system reuses an inode after it has been deleted, -+ * it must assign a new, previously unused generation number -+ * to the inode at the same time. -+ * -+ */ -+ uint64_t generation; -+ -+ /** Inode attributes. -+ * -+ * Even if attr_timeout == 0, attr must be correct. For example, -+ * for open(), FUSE uses attr.st_size from lookup() to determine -+ * how many bytes to request. If this value is not correct, -+ * incorrect data will be returned. -+ */ -+ struct stat attr; -+ -+ /** Validity timeout (in seconds) for inode attributes. If -+ attributes only change as a result of requests that come -+ through the kernel, this should be set to a very large -+ value. */ -+ double attr_timeout; -+ -+ /** Validity timeout (in seconds) for the name. If directory -+ entries are changed/deleted only as a result of requests -+ that come through the kernel, this should be set to a very -+ large value. */ -+ double entry_timeout; -+}; -+ -+/** -+ * Additional context associated with requests. -+ * -+ * Note that the reported client uid, gid and pid may be zero in some -+ * situations. For example, if the FUSE file system is running in a -+ * PID or user namespace but then accessed from outside the namespace, -+ * there is no valid uid/pid/gid that could be reported. -+ */ -+struct fuse_ctx { -+ /** User ID of the calling process */ -+ uid_t uid; -+ -+ /** Group ID of the calling process */ -+ gid_t gid; -+ -+ /** Thread ID of the calling process */ -+ pid_t pid; -+ -+ /** Umask of the calling process */ -+ mode_t umask; -+}; -+ -+struct fuse_forget_data { -+ fuse_ino_t ino; -+ uint64_t nlookup; -+}; -+ -+/* 'to_set' flags in setattr */ -+#define FUSE_SET_ATTR_MODE (1 << 0) -+#define FUSE_SET_ATTR_UID (1 << 1) -+#define FUSE_SET_ATTR_GID (1 << 2) -+#define FUSE_SET_ATTR_SIZE (1 << 3) -+#define FUSE_SET_ATTR_ATIME (1 << 4) -+#define FUSE_SET_ATTR_MTIME (1 << 5) -+#define FUSE_SET_ATTR_ATIME_NOW (1 << 7) -+#define FUSE_SET_ATTR_MTIME_NOW (1 << 8) -+#define FUSE_SET_ATTR_CTIME (1 << 10) -+ -+/* ----------------------------------------------------------- * -+ * Request methods and replies * -+ * ----------------------------------------------------------- */ -+ -+/** -+ * Low level filesystem operations -+ * -+ * Most of the methods (with the exception of init and destroy) -+ * receive a request handle (fuse_req_t) as their first argument. -+ * This handle must be passed to one of the specified reply functions. -+ * -+ * This may be done inside the method invocation, or after the call -+ * has returned. The request handle is valid until one of the reply -+ * functions is called. -+ * -+ * Other pointer arguments (name, fuse_file_info, etc) are not valid -+ * after the call has returned, so if they are needed later, their -+ * contents have to be copied. -+ * -+ * In general, all methods are expected to perform any necessary -+ * permission checking. However, a filesystem may delegate this task -+ * to the kernel by passing the `default_permissions` mount option to -+ * `fuse_session_new()`. In this case, methods will only be called if -+ * the kernel's permission check has succeeded. -+ * -+ * The filesystem sometimes needs to handle a return value of -ENOENT -+ * from the reply function, which means, that the request was -+ * interrupted, and the reply discarded. For example if -+ * fuse_reply_open() return -ENOENT means, that the release method for -+ * this file will not be called. -+ */ -+struct fuse_lowlevel_ops { -+ /** -+ * Initialize filesystem -+ * -+ * This function is called when libfuse establishes -+ * communication with the FUSE kernel module. The file system -+ * should use this module to inspect and/or modify the -+ * connection parameters provided in the `conn` structure. -+ * -+ * Note that some parameters may be overwritten by options -+ * passed to fuse_session_new() which take precedence over the -+ * values set in this handler. -+ * -+ * There's no reply to this function -+ * -+ * @param userdata the user data passed to fuse_session_new() -+ */ -+ void (*init) (void *userdata, struct fuse_conn_info *conn); -+ -+ /** -+ * Clean up filesystem. -+ * -+ * Called on filesystem exit. When this method is called, the -+ * connection to the kernel may be gone already, so that eg. calls -+ * to fuse_lowlevel_notify_* will fail. -+ * -+ * There's no reply to this function -+ * -+ * @param userdata the user data passed to fuse_session_new() -+ */ -+ void (*destroy) (void *userdata); -+ -+ /** -+ * Look up a directory entry by name and get its attributes. -+ * -+ * Valid replies: -+ * fuse_reply_entry -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param parent inode number of the parent directory -+ * @param name the name to look up -+ */ -+ void (*lookup) (fuse_req_t req, fuse_ino_t parent, const char *name); -+ -+ /** -+ * Forget about an inode -+ * -+ * This function is called when the kernel removes an inode -+ * from its internal caches. -+ * -+ * The inode's lookup count increases by one for every call to -+ * fuse_reply_entry and fuse_reply_create. The nlookup parameter -+ * indicates by how much the lookup count should be decreased. -+ * -+ * Inodes with a non-zero lookup count may receive request from -+ * the kernel even after calls to unlink, rmdir or (when -+ * overwriting an existing file) rename. Filesystems must handle -+ * such requests properly and it is recommended to defer removal -+ * of the inode until the lookup count reaches zero. Calls to -+ * unlink, rmdir or rename will be followed closely by forget -+ * unless the file or directory is open, in which case the -+ * kernel issues forget only after the release or releasedir -+ * calls. -+ * -+ * Note that if a file system will be exported over NFS the -+ * inodes lifetime must extend even beyond forget. See the -+ * generation field in struct fuse_entry_param above. -+ * -+ * On unmount the lookup count for all inodes implicitly drops -+ * to zero. It is not guaranteed that the file system will -+ * receive corresponding forget messages for the affected -+ * inodes. -+ * -+ * Valid replies: -+ * fuse_reply_none -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param nlookup the number of lookups to forget -+ */ -+ void (*forget) (fuse_req_t req, fuse_ino_t ino, uint64_t nlookup); -+ -+ /** -+ * Get file attributes. -+ * -+ * If writeback caching is enabled, the kernel may have a -+ * better idea of a file's length than the FUSE file system -+ * (eg if there has been a write that extended the file size, -+ * but that has not yet been passed to the filesystem.n -+ * -+ * In this case, the st_size value provided by the file system -+ * will be ignored. -+ * -+ * Valid replies: -+ * fuse_reply_attr -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param fi for future use, currently always NULL -+ */ -+ void (*getattr) (fuse_req_t req, fuse_ino_t ino, -+ struct fuse_file_info *fi); -+ -+ /** -+ * Set file attributes -+ * -+ * In the 'attr' argument only members indicated by the 'to_set' -+ * bitmask contain valid values. Other members contain undefined -+ * values. -+ * -+ * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is -+ * expected to reset the setuid and setgid bits if the file -+ * size or owner is being changed. -+ * -+ * If the setattr was invoked from the ftruncate() system call -+ * under Linux kernel versions 2.6.15 or later, the fi->fh will -+ * contain the value set by the open method or will be undefined -+ * if the open method didn't set any value. Otherwise (not -+ * ftruncate call, or kernel version earlier than 2.6.15) the fi -+ * parameter will be NULL. -+ * -+ * Valid replies: -+ * fuse_reply_attr -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param attr the attributes -+ * @param to_set bit mask of attributes which should be set -+ * @param fi file information, or NULL -+ */ -+ void (*setattr) (fuse_req_t req, fuse_ino_t ino, struct stat *attr, -+ int to_set, struct fuse_file_info *fi); -+ -+ /** -+ * Read symbolic link -+ * -+ * Valid replies: -+ * fuse_reply_readlink -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ */ -+ void (*readlink) (fuse_req_t req, fuse_ino_t ino); -+ -+ /** -+ * Create file node -+ * -+ * Create a regular file, character device, block device, fifo or -+ * socket node. -+ * -+ * Valid replies: -+ * fuse_reply_entry -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param parent inode number of the parent directory -+ * @param name to create -+ * @param mode file type and mode with which to create the new file -+ * @param rdev the device number (only valid if created file is a device) -+ */ -+ void (*mknod) (fuse_req_t req, fuse_ino_t parent, const char *name, -+ mode_t mode, dev_t rdev); -+ -+ /** -+ * Create a directory -+ * -+ * Valid replies: -+ * fuse_reply_entry -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param parent inode number of the parent directory -+ * @param name to create -+ * @param mode with which to create the new file -+ */ -+ void (*mkdir) (fuse_req_t req, fuse_ino_t parent, const char *name, -+ mode_t mode); -+ -+ /** -+ * Remove a file -+ * -+ * If the file's inode's lookup count is non-zero, the file -+ * system is expected to postpone any removal of the inode -+ * until the lookup count reaches zero (see description of the -+ * forget function). -+ * -+ * Valid replies: -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param parent inode number of the parent directory -+ * @param name to remove -+ */ -+ void (*unlink) (fuse_req_t req, fuse_ino_t parent, const char *name); -+ -+ /** -+ * Remove a directory -+ * -+ * If the directory's inode's lookup count is non-zero, the -+ * file system is expected to postpone any removal of the -+ * inode until the lookup count reaches zero (see description -+ * of the forget function). -+ * -+ * Valid replies: -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param parent inode number of the parent directory -+ * @param name to remove -+ */ -+ void (*rmdir) (fuse_req_t req, fuse_ino_t parent, const char *name); -+ -+ /** -+ * Create a symbolic link -+ * -+ * Valid replies: -+ * fuse_reply_entry -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param link the contents of the symbolic link -+ * @param parent inode number of the parent directory -+ * @param name to create -+ */ -+ void (*symlink) (fuse_req_t req, const char *link, fuse_ino_t parent, -+ const char *name); -+ -+ /** Rename a file -+ * -+ * If the target exists it should be atomically replaced. If -+ * the target's inode's lookup count is non-zero, the file -+ * system is expected to postpone any removal of the inode -+ * until the lookup count reaches zero (see description of the -+ * forget function). -+ * -+ * If this request is answered with an error code of ENOSYS, this is -+ * treated as a permanent failure with error code EINVAL, i.e. all -+ * future bmap requests will fail with EINVAL without being -+ * send to the filesystem process. -+ * -+ * *flags* may be `RENAME_EXCHANGE` or `RENAME_NOREPLACE`. If -+ * RENAME_NOREPLACE is specified, the filesystem must not -+ * overwrite *newname* if it exists and return an error -+ * instead. If `RENAME_EXCHANGE` is specified, the filesystem -+ * must atomically exchange the two files, i.e. both must -+ * exist and neither may be deleted. -+ * -+ * Valid replies: -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param parent inode number of the old parent directory -+ * @param name old name -+ * @param newparent inode number of the new parent directory -+ * @param newname new name -+ */ -+ void (*rename) (fuse_req_t req, fuse_ino_t parent, const char *name, -+ fuse_ino_t newparent, const char *newname, -+ unsigned int flags); -+ -+ /** -+ * Create a hard link -+ * -+ * Valid replies: -+ * fuse_reply_entry -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the old inode number -+ * @param newparent inode number of the new parent directory -+ * @param newname new name to create -+ */ -+ void (*link) (fuse_req_t req, fuse_ino_t ino, fuse_ino_t newparent, -+ const char *newname); -+ -+ /** -+ * Open a file -+ * -+ * Open flags are available in fi->flags. The following rules -+ * apply. -+ * -+ * - Creation (O_CREAT, O_EXCL, O_NOCTTY) flags will be -+ * filtered out / handled by the kernel. -+ * -+ * - Access modes (O_RDONLY, O_WRONLY, O_RDWR) should be used -+ * by the filesystem to check if the operation is -+ * permitted. If the ``-o default_permissions`` mount -+ * option is given, this check is already done by the -+ * kernel before calling open() and may thus be omitted by -+ * the filesystem. -+ * -+ * - When writeback caching is enabled, the kernel may send -+ * read requests even for files opened with O_WRONLY. The -+ * filesystem should be prepared to handle this. -+ * -+ * - When writeback caching is disabled, the filesystem is -+ * expected to properly handle the O_APPEND flag and ensure -+ * that each write is appending to the end of the file. -+ * -+ * - When writeback caching is enabled, the kernel will -+ * handle O_APPEND. However, unless all changes to the file -+ * come through the kernel this will not work reliably. The -+ * filesystem should thus either ignore the O_APPEND flag -+ * (and let the kernel handle it), or return an error -+ * (indicating that reliably O_APPEND is not available). -+ * -+ * Filesystem may store an arbitrary file handle (pointer, -+ * index, etc) in fi->fh, and use this in other all other file -+ * operations (read, write, flush, release, fsync). -+ * -+ * Filesystem may also implement stateless file I/O and not store -+ * anything in fi->fh. -+ * -+ * There are also some flags (direct_io, keep_cache) which the -+ * filesystem may set in fi, to change the way the file is opened. -+ * See fuse_file_info structure in for more details. -+ * -+ * If this request is answered with an error code of ENOSYS -+ * and FUSE_CAP_NO_OPEN_SUPPORT is set in -+ * `fuse_conn_info.capable`, this is treated as success and -+ * future calls to open and release will also succeed without being -+ * sent to the filesystem process. -+ * -+ * Valid replies: -+ * fuse_reply_open -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param fi file information -+ */ -+ void (*open) (fuse_req_t req, fuse_ino_t ino, -+ struct fuse_file_info *fi); -+ -+ /** -+ * Read data -+ * -+ * Read should send exactly the number of bytes requested except -+ * on EOF or error, otherwise the rest of the data will be -+ * substituted with zeroes. An exception to this is when the file -+ * has been opened in 'direct_io' mode, in which case the return -+ * value of the read system call will reflect the return value of -+ * this operation. -+ * -+ * fi->fh will contain the value set by the open method, or will -+ * be undefined if the open method didn't set any value. -+ * -+ * Valid replies: -+ * fuse_reply_buf -+ * fuse_reply_iov -+ * fuse_reply_data -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param size number of bytes to read -+ * @param off offset to read from -+ * @param fi file information -+ */ -+ void (*read) (fuse_req_t req, fuse_ino_t ino, size_t size, off_t off, -+ struct fuse_file_info *fi); -+ -+ /** -+ * Write data -+ * -+ * Write should return exactly the number of bytes requested -+ * except on error. An exception to this is when the file has -+ * been opened in 'direct_io' mode, in which case the return value -+ * of the write system call will reflect the return value of this -+ * operation. -+ * -+ * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is -+ * expected to reset the setuid and setgid bits. -+ * -+ * fi->fh will contain the value set by the open method, or will -+ * be undefined if the open method didn't set any value. -+ * -+ * Valid replies: -+ * fuse_reply_write -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param buf data to write -+ * @param size number of bytes to write -+ * @param off offset to write to -+ * @param fi file information -+ */ -+ void (*write) (fuse_req_t req, fuse_ino_t ino, const char *buf, -+ size_t size, off_t off, struct fuse_file_info *fi); -+ -+ /** -+ * Flush method -+ * -+ * This is called on each close() of the opened file. -+ * -+ * Since file descriptors can be duplicated (dup, dup2, fork), for -+ * one open call there may be many flush calls. -+ * -+ * Filesystems shouldn't assume that flush will always be called -+ * after some writes, or that if will be called at all. -+ * -+ * fi->fh will contain the value set by the open method, or will -+ * be undefined if the open method didn't set any value. -+ * -+ * NOTE: the name of the method is misleading, since (unlike -+ * fsync) the filesystem is not forced to flush pending writes. -+ * One reason to flush data is if the filesystem wants to return -+ * write errors during close. However, such use is non-portable -+ * because POSIX does not require [close] to wait for delayed I/O to -+ * complete. -+ * -+ * If the filesystem supports file locking operations (setlk, -+ * getlk) it should remove all locks belonging to 'fi->owner'. -+ * -+ * If this request is answered with an error code of ENOSYS, -+ * this is treated as success and future calls to flush() will -+ * succeed automatically without being send to the filesystem -+ * process. -+ * -+ * Valid replies: -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param fi file information -+ * -+ * [close]: http://pubs.opengroup.org/onlinepubs/9699919799/functions/close.html -+ */ -+ void (*flush) (fuse_req_t req, fuse_ino_t ino, -+ struct fuse_file_info *fi); -+ -+ /** -+ * Release an open file -+ * -+ * Release is called when there are no more references to an open -+ * file: all file descriptors are closed and all memory mappings -+ * are unmapped. -+ * -+ * For every open call there will be exactly one release call (unless -+ * the filesystem is force-unmounted). -+ * -+ * The filesystem may reply with an error, but error values are -+ * not returned to close() or munmap() which triggered the -+ * release. -+ * -+ * fi->fh will contain the value set by the open method, or will -+ * be undefined if the open method didn't set any value. -+ * fi->flags will contain the same flags as for open. -+ * -+ * Valid replies: -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param fi file information -+ */ -+ void (*release) (fuse_req_t req, fuse_ino_t ino, -+ struct fuse_file_info *fi); -+ -+ /** -+ * Synchronize file contents -+ * -+ * If the datasync parameter is non-zero, then only the user data -+ * should be flushed, not the meta data. -+ * -+ * If this request is answered with an error code of ENOSYS, -+ * this is treated as success and future calls to fsync() will -+ * succeed automatically without being send to the filesystem -+ * process. -+ * -+ * Valid replies: -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param datasync flag indicating if only data should be flushed -+ * @param fi file information -+ */ -+ void (*fsync) (fuse_req_t req, fuse_ino_t ino, int datasync, -+ struct fuse_file_info *fi); -+ -+ /** -+ * Open a directory -+ * -+ * Filesystem may store an arbitrary file handle (pointer, index, -+ * etc) in fi->fh, and use this in other all other directory -+ * stream operations (readdir, releasedir, fsyncdir). -+ * -+ * If this request is answered with an error code of ENOSYS and -+ * FUSE_CAP_NO_OPENDIR_SUPPORT is set in `fuse_conn_info.capable`, -+ * this is treated as success and future calls to opendir and -+ * releasedir will also succeed without being sent to the filesystem -+ * process. In addition, the kernel will cache readdir results -+ * as if opendir returned FOPEN_KEEP_CACHE | FOPEN_CACHE_DIR. -+ * -+ * Valid replies: -+ * fuse_reply_open -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param fi file information -+ */ -+ void (*opendir) (fuse_req_t req, fuse_ino_t ino, -+ struct fuse_file_info *fi); -+ -+ /** -+ * Read directory -+ * -+ * Send a buffer filled using fuse_add_direntry(), with size not -+ * exceeding the requested size. Send an empty buffer on end of -+ * stream. -+ * -+ * fi->fh will contain the value set by the opendir method, or -+ * will be undefined if the opendir method didn't set any value. -+ * -+ * Returning a directory entry from readdir() does not affect -+ * its lookup count. -+ * -+ * If off_t is non-zero, then it will correspond to one of the off_t -+ * values that was previously returned by readdir() for the same -+ * directory handle. In this case, readdir() should skip over entries -+ * coming before the position defined by the off_t value. If entries -+ * are added or removed while the directory handle is open, they filesystem -+ * may still include the entries that have been removed, and may not -+ * report the entries that have been created. However, addition or -+ * removal of entries must never cause readdir() to skip over unrelated -+ * entries or to report them more than once. This means -+ * that off_t can not be a simple index that enumerates the entries -+ * that have been returned but must contain sufficient information to -+ * uniquely determine the next directory entry to return even when the -+ * set of entries is changing. -+ * -+ * The function does not have to report the '.' and '..' -+ * entries, but is allowed to do so. Note that, if readdir does -+ * not return '.' or '..', they will not be implicitly returned, -+ * and this behavior is observable by the caller. -+ * -+ * Valid replies: -+ * fuse_reply_buf -+ * fuse_reply_data -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param size maximum number of bytes to send -+ * @param off offset to continue reading the directory stream -+ * @param fi file information -+ */ -+ void (*readdir) (fuse_req_t req, fuse_ino_t ino, size_t size, off_t off, -+ struct fuse_file_info *fi); -+ -+ /** -+ * Release an open directory -+ * -+ * For every opendir call there will be exactly one releasedir -+ * call (unless the filesystem is force-unmounted). -+ * -+ * fi->fh will contain the value set by the opendir method, or -+ * will be undefined if the opendir method didn't set any value. -+ * -+ * Valid replies: -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param fi file information -+ */ -+ void (*releasedir) (fuse_req_t req, fuse_ino_t ino, -+ struct fuse_file_info *fi); -+ -+ /** -+ * Synchronize directory contents -+ * -+ * If the datasync parameter is non-zero, then only the directory -+ * contents should be flushed, not the meta data. -+ * -+ * fi->fh will contain the value set by the opendir method, or -+ * will be undefined if the opendir method didn't set any value. -+ * -+ * If this request is answered with an error code of ENOSYS, -+ * this is treated as success and future calls to fsyncdir() will -+ * succeed automatically without being send to the filesystem -+ * process. -+ * -+ * Valid replies: -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param datasync flag indicating if only data should be flushed -+ * @param fi file information -+ */ -+ void (*fsyncdir) (fuse_req_t req, fuse_ino_t ino, int datasync, -+ struct fuse_file_info *fi); -+ -+ /** -+ * Get file system statistics -+ * -+ * Valid replies: -+ * fuse_reply_statfs -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number, zero means "undefined" -+ */ -+ void (*statfs) (fuse_req_t req, fuse_ino_t ino); -+ -+ /** -+ * Set an extended attribute -+ * -+ * If this request is answered with an error code of ENOSYS, this is -+ * treated as a permanent failure with error code EOPNOTSUPP, i.e. all -+ * future setxattr() requests will fail with EOPNOTSUPP without being -+ * send to the filesystem process. -+ * -+ * Valid replies: -+ * fuse_reply_err -+ */ -+ void (*setxattr) (fuse_req_t req, fuse_ino_t ino, const char *name, -+ const char *value, size_t size, int flags); -+ -+ /** -+ * Get an extended attribute -+ * -+ * If size is zero, the size of the value should be sent with -+ * fuse_reply_xattr. -+ * -+ * If the size is non-zero, and the value fits in the buffer, the -+ * value should be sent with fuse_reply_buf. -+ * -+ * If the size is too small for the value, the ERANGE error should -+ * be sent. -+ * -+ * If this request is answered with an error code of ENOSYS, this is -+ * treated as a permanent failure with error code EOPNOTSUPP, i.e. all -+ * future getxattr() requests will fail with EOPNOTSUPP without being -+ * send to the filesystem process. -+ * -+ * Valid replies: -+ * fuse_reply_buf -+ * fuse_reply_data -+ * fuse_reply_xattr -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param name of the extended attribute -+ * @param size maximum size of the value to send -+ */ -+ void (*getxattr) (fuse_req_t req, fuse_ino_t ino, const char *name, -+ size_t size); -+ -+ /** -+ * List extended attribute names -+ * -+ * If size is zero, the total size of the attribute list should be -+ * sent with fuse_reply_xattr. -+ * -+ * If the size is non-zero, and the null character separated -+ * attribute list fits in the buffer, the list should be sent with -+ * fuse_reply_buf. -+ * -+ * If the size is too small for the list, the ERANGE error should -+ * be sent. -+ * -+ * If this request is answered with an error code of ENOSYS, this is -+ * treated as a permanent failure with error code EOPNOTSUPP, i.e. all -+ * future listxattr() requests will fail with EOPNOTSUPP without being -+ * send to the filesystem process. -+ * -+ * Valid replies: -+ * fuse_reply_buf -+ * fuse_reply_data -+ * fuse_reply_xattr -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param size maximum size of the list to send -+ */ -+ void (*listxattr) (fuse_req_t req, fuse_ino_t ino, size_t size); -+ -+ /** -+ * Remove an extended attribute -+ * -+ * If this request is answered with an error code of ENOSYS, this is -+ * treated as a permanent failure with error code EOPNOTSUPP, i.e. all -+ * future removexattr() requests will fail with EOPNOTSUPP without being -+ * send to the filesystem process. -+ * -+ * Valid replies: -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param name of the extended attribute -+ */ -+ void (*removexattr) (fuse_req_t req, fuse_ino_t ino, const char *name); -+ -+ /** -+ * Check file access permissions -+ * -+ * This will be called for the access() and chdir() system -+ * calls. If the 'default_permissions' mount option is given, -+ * this method is not called. -+ * -+ * This method is not called under Linux kernel versions 2.4.x -+ * -+ * If this request is answered with an error code of ENOSYS, this is -+ * treated as a permanent success, i.e. this and all future access() -+ * requests will succeed without being send to the filesystem process. -+ * -+ * Valid replies: -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param mask requested access mode -+ */ -+ void (*access) (fuse_req_t req, fuse_ino_t ino, int mask); -+ -+ /** -+ * Create and open a file -+ * -+ * If the file does not exist, first create it with the specified -+ * mode, and then open it. -+ * -+ * See the description of the open handler for more -+ * information. -+ * -+ * If this method is not implemented or under Linux kernel -+ * versions earlier than 2.6.15, the mknod() and open() methods -+ * will be called instead. -+ * -+ * If this request is answered with an error code of ENOSYS, the handler -+ * is treated as not implemented (i.e., for this and future requests the -+ * mknod() and open() handlers will be called instead). -+ * -+ * Valid replies: -+ * fuse_reply_create -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param parent inode number of the parent directory -+ * @param name to create -+ * @param mode file type and mode with which to create the new file -+ * @param fi file information -+ */ -+ void (*create) (fuse_req_t req, fuse_ino_t parent, const char *name, -+ mode_t mode, struct fuse_file_info *fi); -+ -+ /** -+ * Test for a POSIX file lock -+ * -+ * Valid replies: -+ * fuse_reply_lock -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param fi file information -+ * @param lock the region/type to test -+ */ -+ void (*getlk) (fuse_req_t req, fuse_ino_t ino, -+ struct fuse_file_info *fi, struct flock *lock); -+ -+ /** -+ * Acquire, modify or release a POSIX file lock -+ * -+ * For POSIX threads (NPTL) there's a 1-1 relation between pid and -+ * owner, but otherwise this is not always the case. For checking -+ * lock ownership, 'fi->owner' must be used. The l_pid field in -+ * 'struct flock' should only be used to fill in this field in -+ * getlk(). -+ * -+ * Note: if the locking methods are not implemented, the kernel -+ * will still allow file locking to work locally. Hence these are -+ * only interesting for network filesystems and similar. -+ * -+ * Valid replies: -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param fi file information -+ * @param lock the region/type to set -+ * @param sleep locking operation may sleep -+ */ -+ void (*setlk) (fuse_req_t req, fuse_ino_t ino, -+ struct fuse_file_info *fi, -+ struct flock *lock, int sleep); -+ -+ /** -+ * Map block index within file to block index within device -+ * -+ * Note: This makes sense only for block device backed filesystems -+ * mounted with the 'blkdev' option -+ * -+ * If this request is answered with an error code of ENOSYS, this is -+ * treated as a permanent failure, i.e. all future bmap() requests will -+ * fail with the same error code without being send to the filesystem -+ * process. -+ * -+ * Valid replies: -+ * fuse_reply_bmap -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param blocksize unit of block index -+ * @param idx block index within file -+ */ -+ void (*bmap) (fuse_req_t req, fuse_ino_t ino, size_t blocksize, -+ uint64_t idx); -+ -+ /** -+ * Ioctl -+ * -+ * Note: For unrestricted ioctls (not allowed for FUSE -+ * servers), data in and out areas can be discovered by giving -+ * iovs and setting FUSE_IOCTL_RETRY in *flags*. For -+ * restricted ioctls, kernel prepares in/out data area -+ * according to the information encoded in cmd. -+ * -+ * Valid replies: -+ * fuse_reply_ioctl_retry -+ * fuse_reply_ioctl -+ * fuse_reply_ioctl_iov -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param cmd ioctl command -+ * @param arg ioctl argument -+ * @param fi file information -+ * @param flags for FUSE_IOCTL_* flags -+ * @param in_buf data fetched from the caller -+ * @param in_bufsz number of fetched bytes -+ * @param out_bufsz maximum size of output data -+ * -+ * Note : the unsigned long request submitted by the application -+ * is truncated to 32 bits. -+ */ -+ void (*ioctl) (fuse_req_t req, fuse_ino_t ino, unsigned int cmd, -+ void *arg, struct fuse_file_info *fi, unsigned flags, -+ const void *in_buf, size_t in_bufsz, size_t out_bufsz); -+ -+ /** -+ * Poll for IO readiness -+ * -+ * Note: If ph is non-NULL, the client should notify -+ * when IO readiness events occur by calling -+ * fuse_lowlevel_notify_poll() with the specified ph. -+ * -+ * Regardless of the number of times poll with a non-NULL ph -+ * is received, single notification is enough to clear all. -+ * Notifying more times incurs overhead but doesn't harm -+ * correctness. -+ * -+ * The callee is responsible for destroying ph with -+ * fuse_pollhandle_destroy() when no longer in use. -+ * -+ * If this request is answered with an error code of ENOSYS, this is -+ * treated as success (with a kernel-defined default poll-mask) and -+ * future calls to pull() will succeed the same way without being send -+ * to the filesystem process. -+ * -+ * Valid replies: -+ * fuse_reply_poll -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param fi file information -+ * @param ph poll handle to be used for notification -+ */ -+ void (*poll) (fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, -+ struct fuse_pollhandle *ph); -+ -+ /** -+ * Write data made available in a buffer -+ * -+ * This is a more generic version of the ->write() method. If -+ * FUSE_CAP_SPLICE_READ is set in fuse_conn_info.want and the -+ * kernel supports splicing from the fuse device, then the -+ * data will be made available in pipe for supporting zero -+ * copy data transfer. -+ * -+ * buf->count is guaranteed to be one (and thus buf->idx is -+ * always zero). The write_buf handler must ensure that -+ * bufv->off is correctly updated (reflecting the number of -+ * bytes read from bufv->buf[0]). -+ * -+ * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is -+ * expected to reset the setuid and setgid bits. -+ * -+ * Valid replies: -+ * fuse_reply_write -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param bufv buffer containing the data -+ * @param off offset to write to -+ * @param fi file information -+ */ -+ void (*write_buf) (fuse_req_t req, fuse_ino_t ino, -+ struct fuse_bufvec *bufv, off_t off, -+ struct fuse_file_info *fi); -+ -+ /** -+ * Callback function for the retrieve request -+ * -+ * Valid replies: -+ * fuse_reply_none -+ * -+ * @param req request handle -+ * @param cookie user data supplied to fuse_lowlevel_notify_retrieve() -+ * @param ino the inode number supplied to fuse_lowlevel_notify_retrieve() -+ * @param offset the offset supplied to fuse_lowlevel_notify_retrieve() -+ * @param bufv the buffer containing the returned data -+ */ -+ void (*retrieve_reply) (fuse_req_t req, void *cookie, fuse_ino_t ino, -+ off_t offset, struct fuse_bufvec *bufv); -+ -+ /** -+ * Forget about multiple inodes -+ * -+ * See description of the forget function for more -+ * information. -+ * -+ * Valid replies: -+ * fuse_reply_none -+ * -+ * @param req request handle -+ */ -+ void (*forget_multi) (fuse_req_t req, size_t count, -+ struct fuse_forget_data *forgets); -+ -+ /** -+ * Acquire, modify or release a BSD file lock -+ * -+ * Note: if the locking methods are not implemented, the kernel -+ * will still allow file locking to work locally. Hence these are -+ * only interesting for network filesystems and similar. -+ * -+ * Valid replies: -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param fi file information -+ * @param op the locking operation, see flock(2) -+ */ -+ void (*flock) (fuse_req_t req, fuse_ino_t ino, -+ struct fuse_file_info *fi, int op); -+ -+ /** -+ * Allocate requested space. If this function returns success then -+ * subsequent writes to the specified range shall not fail due to the lack -+ * of free space on the file system storage media. -+ * -+ * If this request is answered with an error code of ENOSYS, this is -+ * treated as a permanent failure with error code EOPNOTSUPP, i.e. all -+ * future fallocate() requests will fail with EOPNOTSUPP without being -+ * send to the filesystem process. -+ * -+ * Valid replies: -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param offset starting point for allocated region -+ * @param length size of allocated region -+ * @param mode determines the operation to be performed on the given range, -+ * see fallocate(2) -+ */ -+ void (*fallocate) (fuse_req_t req, fuse_ino_t ino, int mode, -+ off_t offset, off_t length, struct fuse_file_info *fi); -+ -+ /** -+ * Read directory with attributes -+ * -+ * Send a buffer filled using fuse_add_direntry_plus(), with size not -+ * exceeding the requested size. Send an empty buffer on end of -+ * stream. -+ * -+ * fi->fh will contain the value set by the opendir method, or -+ * will be undefined if the opendir method didn't set any value. -+ * -+ * In contrast to readdir() (which does not affect the lookup counts), -+ * the lookup count of every entry returned by readdirplus(), except "." -+ * and "..", is incremented by one. -+ * -+ * Valid replies: -+ * fuse_reply_buf -+ * fuse_reply_data -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param size maximum number of bytes to send -+ * @param off offset to continue reading the directory stream -+ * @param fi file information -+ */ -+ void (*readdirplus) (fuse_req_t req, fuse_ino_t ino, size_t size, off_t off, -+ struct fuse_file_info *fi); -+ -+ /** -+ * Copy a range of data from one file to another -+ * -+ * Performs an optimized copy between two file descriptors without the -+ * additional cost of transferring data through the FUSE kernel module -+ * to user space (glibc) and then back into the FUSE filesystem again. -+ * -+ * In case this method is not implemented, glibc falls back to reading -+ * data from the source and writing to the destination. Effectively -+ * doing an inefficient copy of the data. -+ * -+ * If this request is answered with an error code of ENOSYS, this is -+ * treated as a permanent failure with error code EOPNOTSUPP, i.e. all -+ * future copy_file_range() requests will fail with EOPNOTSUPP without -+ * being send to the filesystem process. -+ * -+ * Valid replies: -+ * fuse_reply_write -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino_in the inode number or the source file -+ * @param off_in starting point from were the data should be read -+ * @param fi_in file information of the source file -+ * @param ino_out the inode number or the destination file -+ * @param off_out starting point where the data should be written -+ * @param fi_out file information of the destination file -+ * @param len maximum size of the data to copy -+ * @param flags passed along with the copy_file_range() syscall -+ */ -+ void (*copy_file_range) (fuse_req_t req, fuse_ino_t ino_in, -+ off_t off_in, struct fuse_file_info *fi_in, -+ fuse_ino_t ino_out, off_t off_out, -+ struct fuse_file_info *fi_out, size_t len, -+ int flags); -+ -+ /** -+ * Find next data or hole after the specified offset -+ * -+ * If this request is answered with an error code of ENOSYS, this is -+ * treated as a permanent failure, i.e. all future lseek() requests will -+ * fail with the same error code without being send to the filesystem -+ * process. -+ * -+ * Valid replies: -+ * fuse_reply_lseek -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param off offset to start search from -+ * @param whence either SEEK_DATA or SEEK_HOLE -+ * @param fi file information -+ */ -+ void (*lseek) (fuse_req_t req, fuse_ino_t ino, off_t off, int whence, -+ struct fuse_file_info *fi); -+}; -+ -+/** -+ * Reply with an error code or success. -+ * -+ * Possible requests: -+ * all except forget -+ * -+ * Whereever possible, error codes should be chosen from the list of -+ * documented error conditions in the corresponding system calls -+ * manpage. -+ * -+ * An error code of ENOSYS is sometimes treated specially. This is -+ * indicated in the documentation of the affected handler functions. -+ * -+ * The following requests may be answered with a zero error code: -+ * unlink, rmdir, rename, flush, release, fsync, fsyncdir, setxattr, -+ * removexattr, setlk. -+ * -+ * @param req request handle -+ * @param err the positive error value, or zero for success -+ * @return zero for success, -errno for failure to send reply -+ */ -+int fuse_reply_err(fuse_req_t req, int err); -+ -+/** -+ * Don't send reply -+ * -+ * Possible requests: -+ * forget -+ * forget_multi -+ * retrieve_reply -+ * -+ * @param req request handle -+ */ -+void fuse_reply_none(fuse_req_t req); -+ -+/** -+ * Reply with a directory entry -+ * -+ * Possible requests: -+ * lookup, mknod, mkdir, symlink, link -+ * -+ * Side effects: -+ * increments the lookup count on success -+ * -+ * @param req request handle -+ * @param e the entry parameters -+ * @return zero for success, -errno for failure to send reply -+ */ -+int fuse_reply_entry(fuse_req_t req, const struct fuse_entry_param *e); -+ -+/** -+ * Reply with a directory entry and open parameters -+ * -+ * currently the following members of 'fi' are used: -+ * fh, direct_io, keep_cache -+ * -+ * Possible requests: -+ * create -+ * -+ * Side effects: -+ * increments the lookup count on success -+ * -+ * @param req request handle -+ * @param e the entry parameters -+ * @param fi file information -+ * @return zero for success, -errno for failure to send reply -+ */ -+int fuse_reply_create(fuse_req_t req, const struct fuse_entry_param *e, -+ const struct fuse_file_info *fi); -+ -+/** -+ * Reply with attributes -+ * -+ * Possible requests: -+ * getattr, setattr -+ * -+ * @param req request handle -+ * @param attr the attributes -+ * @param attr_timeout validity timeout (in seconds) for the attributes -+ * @return zero for success, -errno for failure to send reply -+ */ -+int fuse_reply_attr(fuse_req_t req, const struct stat *attr, -+ double attr_timeout); -+ -+/** -+ * Reply with the contents of a symbolic link -+ * -+ * Possible requests: -+ * readlink -+ * -+ * @param req request handle -+ * @param link symbolic link contents -+ * @return zero for success, -errno for failure to send reply -+ */ -+int fuse_reply_readlink(fuse_req_t req, const char *link); -+ -+/** -+ * Reply with open parameters -+ * -+ * currently the following members of 'fi' are used: -+ * fh, direct_io, keep_cache -+ * -+ * Possible requests: -+ * open, opendir -+ * -+ * @param req request handle -+ * @param fi file information -+ * @return zero for success, -errno for failure to send reply -+ */ -+int fuse_reply_open(fuse_req_t req, const struct fuse_file_info *fi); -+ -+/** -+ * Reply with number of bytes written -+ * -+ * Possible requests: -+ * write -+ * -+ * @param req request handle -+ * @param count the number of bytes written -+ * @return zero for success, -errno for failure to send reply -+ */ -+int fuse_reply_write(fuse_req_t req, size_t count); -+ -+/** -+ * Reply with data -+ * -+ * Possible requests: -+ * read, readdir, getxattr, listxattr -+ * -+ * @param req request handle -+ * @param buf buffer containing data -+ * @param size the size of data in bytes -+ * @return zero for success, -errno for failure to send reply -+ */ -+int fuse_reply_buf(fuse_req_t req, const char *buf, size_t size); -+ -+/** -+ * Reply with data copied/moved from buffer(s) -+ * -+ * Zero copy data transfer ("splicing") will be used under -+ * the following circumstances: -+ * -+ * 1. FUSE_CAP_SPLICE_WRITE is set in fuse_conn_info.want, and -+ * 2. the kernel supports splicing from the fuse device -+ * (FUSE_CAP_SPLICE_WRITE is set in fuse_conn_info.capable), and -+ * 3. *flags* does not contain FUSE_BUF_NO_SPLICE -+ * 4. The amount of data that is provided in file-descriptor backed -+ * buffers (i.e., buffers for which bufv[n].flags == FUSE_BUF_FD) -+ * is at least twice the page size. -+ * -+ * In order for SPLICE_F_MOVE to be used, the following additional -+ * conditions have to be fulfilled: -+ * -+ * 1. FUSE_CAP_SPLICE_MOVE is set in fuse_conn_info.want, and -+ * 2. the kernel supports it (i.e, FUSE_CAP_SPLICE_MOVE is set in -+ fuse_conn_info.capable), and -+ * 3. *flags* contains FUSE_BUF_SPLICE_MOVE -+ * -+ * Note that, if splice is used, the data is actually spliced twice: -+ * once into a temporary pipe (to prepend header data), and then again -+ * into the kernel. If some of the provided buffers are memory-backed, -+ * the data in them is copied in step one and spliced in step two. -+ * -+ * The FUSE_BUF_SPLICE_FORCE_SPLICE and FUSE_BUF_SPLICE_NONBLOCK flags -+ * are silently ignored. -+ * -+ * Possible requests: -+ * read, readdir, getxattr, listxattr -+ * -+ * Side effects: -+ * when used to return data from a readdirplus() (but not readdir()) -+ * call, increments the lookup count of each returned entry by one -+ * on success. -+ * -+ * @param req request handle -+ * @param bufv buffer vector -+ * @param flags flags controlling the copy -+ * @return zero for success, -errno for failure to send reply -+ */ -+int fuse_reply_data(fuse_req_t req, struct fuse_bufvec *bufv, -+ enum fuse_buf_copy_flags flags); -+ -+/** -+ * Reply with data vector -+ * -+ * Possible requests: -+ * read, readdir, getxattr, listxattr -+ * -+ * @param req request handle -+ * @param iov the vector containing the data -+ * @param count the size of vector -+ * @return zero for success, -errno for failure to send reply -+ */ -+int fuse_reply_iov(fuse_req_t req, const struct iovec *iov, int count); -+ -+/** -+ * Reply with filesystem statistics -+ * -+ * Possible requests: -+ * statfs -+ * -+ * @param req request handle -+ * @param stbuf filesystem statistics -+ * @return zero for success, -errno for failure to send reply -+ */ -+int fuse_reply_statfs(fuse_req_t req, const struct statvfs *stbuf); -+ -+/** -+ * Reply with needed buffer size -+ * -+ * Possible requests: -+ * getxattr, listxattr -+ * -+ * @param req request handle -+ * @param count the buffer size needed in bytes -+ * @return zero for success, -errno for failure to send reply -+ */ -+int fuse_reply_xattr(fuse_req_t req, size_t count); -+ -+/** -+ * Reply with file lock information -+ * -+ * Possible requests: -+ * getlk -+ * -+ * @param req request handle -+ * @param lock the lock information -+ * @return zero for success, -errno for failure to send reply -+ */ -+int fuse_reply_lock(fuse_req_t req, const struct flock *lock); -+ -+/** -+ * Reply with block index -+ * -+ * Possible requests: -+ * bmap -+ * -+ * @param req request handle -+ * @param idx block index within device -+ * @return zero for success, -errno for failure to send reply -+ */ -+int fuse_reply_bmap(fuse_req_t req, uint64_t idx); -+ -+/* ----------------------------------------------------------- * -+ * Filling a buffer in readdir * -+ * ----------------------------------------------------------- */ -+ -+/** -+ * Add a directory entry to the buffer -+ * -+ * Buffer needs to be large enough to hold the entry. If it's not, -+ * then the entry is not filled in but the size of the entry is still -+ * returned. The caller can check this by comparing the bufsize -+ * parameter with the returned entry size. If the entry size is -+ * larger than the buffer size, the operation failed. -+ * -+ * From the 'stbuf' argument the st_ino field and bits 12-15 of the -+ * st_mode field are used. The other fields are ignored. -+ * -+ * *off* should be any non-zero value that the filesystem can use to -+ * identify the current point in the directory stream. It does not -+ * need to be the actual physical position. A value of zero is -+ * reserved to mean "from the beginning", and should therefore never -+ * be used (the first call to fuse_add_direntry should be passed the -+ * offset of the second directory entry). -+ * -+ * @param req request handle -+ * @param buf the point where the new entry will be added to the buffer -+ * @param bufsize remaining size of the buffer -+ * @param name the name of the entry -+ * @param stbuf the file attributes -+ * @param off the offset of the next entry -+ * @return the space needed for the entry -+ */ -+size_t fuse_add_direntry(fuse_req_t req, char *buf, size_t bufsize, -+ const char *name, const struct stat *stbuf, -+ off_t off); -+ -+/** -+ * Add a directory entry to the buffer with the attributes -+ * -+ * See documentation of `fuse_add_direntry()` for more details. -+ * -+ * @param req request handle -+ * @param buf the point where the new entry will be added to the buffer -+ * @param bufsize remaining size of the buffer -+ * @param name the name of the entry -+ * @param e the directory entry -+ * @param off the offset of the next entry -+ * @return the space needed for the entry -+ */ -+size_t fuse_add_direntry_plus(fuse_req_t req, char *buf, size_t bufsize, -+ const char *name, -+ const struct fuse_entry_param *e, off_t off); -+ -+/** -+ * Reply to ask for data fetch and output buffer preparation. ioctl -+ * will be retried with the specified input data fetched and output -+ * buffer prepared. -+ * -+ * Possible requests: -+ * ioctl -+ * -+ * @param req request handle -+ * @param in_iov iovec specifying data to fetch from the caller -+ * @param in_count number of entries in in_iov -+ * @param out_iov iovec specifying addresses to write output to -+ * @param out_count number of entries in out_iov -+ * @return zero for success, -errno for failure to send reply -+ */ -+int fuse_reply_ioctl_retry(fuse_req_t req, -+ const struct iovec *in_iov, size_t in_count, -+ const struct iovec *out_iov, size_t out_count); -+ -+/** -+ * Reply to finish ioctl -+ * -+ * Possible requests: -+ * ioctl -+ * -+ * @param req request handle -+ * @param result result to be passed to the caller -+ * @param buf buffer containing output data -+ * @param size length of output data -+ */ -+int fuse_reply_ioctl(fuse_req_t req, int result, const void *buf, size_t size); -+ -+/** -+ * Reply to finish ioctl with iov buffer -+ * -+ * Possible requests: -+ * ioctl -+ * -+ * @param req request handle -+ * @param result result to be passed to the caller -+ * @param iov the vector containing the data -+ * @param count the size of vector -+ */ -+int fuse_reply_ioctl_iov(fuse_req_t req, int result, const struct iovec *iov, -+ int count); -+ -+/** -+ * Reply with poll result event mask -+ * -+ * @param req request handle -+ * @param revents poll result event mask -+ */ -+int fuse_reply_poll(fuse_req_t req, unsigned revents); -+ -+/** -+ * Reply with offset -+ * -+ * Possible requests: -+ * lseek -+ * -+ * @param req request handle -+ * @param off offset of next data or hole -+ * @return zero for success, -errno for failure to send reply -+ */ -+int fuse_reply_lseek(fuse_req_t req, off_t off); -+ -+/* ----------------------------------------------------------- * -+ * Notification * -+ * ----------------------------------------------------------- */ -+ -+/** -+ * Notify IO readiness event -+ * -+ * For more information, please read comment for poll operation. -+ * -+ * @param ph poll handle to notify IO readiness event for -+ */ -+int fuse_lowlevel_notify_poll(struct fuse_pollhandle *ph); -+ -+/** -+ * Notify to invalidate cache for an inode. -+ * -+ * Added in FUSE protocol version 7.12. If the kernel does not support -+ * this (or a newer) version, the function will return -ENOSYS and do -+ * nothing. -+ * -+ * If the filesystem has writeback caching enabled, invalidating an -+ * inode will first trigger a writeback of all dirty pages. The call -+ * will block until all writeback requests have completed and the -+ * inode has been invalidated. It will, however, not wait for -+ * completion of pending writeback requests that have been issued -+ * before. -+ * -+ * If there are no dirty pages, this function will never block. -+ * -+ * @param se the session object -+ * @param ino the inode number -+ * @param off the offset in the inode where to start invalidating -+ * or negative to invalidate attributes only -+ * @param len the amount of cache to invalidate or 0 for all -+ * @return zero for success, -errno for failure -+ */ -+int fuse_lowlevel_notify_inval_inode(struct fuse_session *se, fuse_ino_t ino, -+ off_t off, off_t len); -+ -+/** -+ * Notify to invalidate parent attributes and the dentry matching -+ * parent/name -+ * -+ * To avoid a deadlock this function must not be called in the -+ * execution path of a related filesytem operation or within any code -+ * that could hold a lock that could be needed to execute such an -+ * operation. As of kernel 4.18, a "related operation" is a lookup(), -+ * symlink(), mknod(), mkdir(), unlink(), rename(), link() or create() -+ * request for the parent, and a setattr(), unlink(), rmdir(), -+ * rename(), setxattr(), removexattr(), readdir() or readdirplus() -+ * request for the inode itself. -+ * -+ * When called correctly, this function will never block. -+ * -+ * Added in FUSE protocol version 7.12. If the kernel does not support -+ * this (or a newer) version, the function will return -ENOSYS and do -+ * nothing. -+ * -+ * @param se the session object -+ * @param parent inode number -+ * @param name file name -+ * @param namelen strlen() of file name -+ * @return zero for success, -errno for failure -+ */ -+int fuse_lowlevel_notify_inval_entry(struct fuse_session *se, fuse_ino_t parent, -+ const char *name, size_t namelen); -+ -+/** -+ * This function behaves like fuse_lowlevel_notify_inval_entry() with -+ * the following additional effect (at least as of Linux kernel 4.8): -+ * -+ * If the provided *child* inode matches the inode that is currently -+ * associated with the cached dentry, and if there are any inotify -+ * watches registered for the dentry, then the watchers are informed -+ * that the dentry has been deleted. -+ * -+ * To avoid a deadlock this function must not be called while -+ * executing a related filesytem operation or while holding a lock -+ * that could be needed to execute such an operation (see the -+ * description of fuse_lowlevel_notify_inval_entry() for more -+ * details). -+ * -+ * When called correctly, this function will never block. -+ * -+ * Added in FUSE protocol version 7.18. If the kernel does not support -+ * this (or a newer) version, the function will return -ENOSYS and do -+ * nothing. -+ * -+ * @param se the session object -+ * @param parent inode number -+ * @param child inode number -+ * @param name file name -+ * @param namelen strlen() of file name -+ * @return zero for success, -errno for failure -+ */ -+int fuse_lowlevel_notify_delete(struct fuse_session *se, -+ fuse_ino_t parent, fuse_ino_t child, -+ const char *name, size_t namelen); -+ -+/** -+ * Store data to the kernel buffers -+ * -+ * Synchronously store data in the kernel buffers belonging to the -+ * given inode. The stored data is marked up-to-date (no read will be -+ * performed against it, unless it's invalidated or evicted from the -+ * cache). -+ * -+ * If the stored data overflows the current file size, then the size -+ * is extended, similarly to a write(2) on the filesystem. -+ * -+ * If this function returns an error, then the store wasn't fully -+ * completed, but it may have been partially completed. -+ * -+ * Added in FUSE protocol version 7.15. If the kernel does not support -+ * this (or a newer) version, the function will return -ENOSYS and do -+ * nothing. -+ * -+ * @param se the session object -+ * @param ino the inode number -+ * @param offset the starting offset into the file to store to -+ * @param bufv buffer vector -+ * @param flags flags controlling the copy -+ * @return zero for success, -errno for failure -+ */ -+int fuse_lowlevel_notify_store(struct fuse_session *se, fuse_ino_t ino, -+ off_t offset, struct fuse_bufvec *bufv, -+ enum fuse_buf_copy_flags flags); -+/** -+ * Retrieve data from the kernel buffers -+ * -+ * Retrieve data in the kernel buffers belonging to the given inode. -+ * If successful then the retrieve_reply() method will be called with -+ * the returned data. -+ * -+ * Only present pages are returned in the retrieve reply. Retrieving -+ * stops when it finds a non-present page and only data prior to that -+ * is returned. -+ * -+ * If this function returns an error, then the retrieve will not be -+ * completed and no reply will be sent. -+ * -+ * This function doesn't change the dirty state of pages in the kernel -+ * buffer. For dirty pages the write() method will be called -+ * regardless of having been retrieved previously. -+ * -+ * Added in FUSE protocol version 7.15. If the kernel does not support -+ * this (or a newer) version, the function will return -ENOSYS and do -+ * nothing. -+ * -+ * @param se the session object -+ * @param ino the inode number -+ * @param size the number of bytes to retrieve -+ * @param offset the starting offset into the file to retrieve from -+ * @param cookie user data to supply to the reply callback -+ * @return zero for success, -errno for failure -+ */ -+int fuse_lowlevel_notify_retrieve(struct fuse_session *se, fuse_ino_t ino, -+ size_t size, off_t offset, void *cookie); -+ -+ -+/* ----------------------------------------------------------- * -+ * Utility functions * -+ * ----------------------------------------------------------- */ -+ -+/** -+ * Get the userdata from the request -+ * -+ * @param req request handle -+ * @return the user data passed to fuse_session_new() -+ */ -+void *fuse_req_userdata(fuse_req_t req); -+ -+/** -+ * Get the context from the request -+ * -+ * The pointer returned by this function will only be valid for the -+ * request's lifetime -+ * -+ * @param req request handle -+ * @return the context structure -+ */ -+const struct fuse_ctx *fuse_req_ctx(fuse_req_t req); -+ -+/** -+ * Get the current supplementary group IDs for the specified request -+ * -+ * Similar to the getgroups(2) system call, except the return value is -+ * always the total number of group IDs, even if it is larger than the -+ * specified size. -+ * -+ * The current fuse kernel module in linux (as of 2.6.30) doesn't pass -+ * the group list to userspace, hence this function needs to parse -+ * "/proc/$TID/task/$TID/status" to get the group IDs. -+ * -+ * This feature may not be supported on all operating systems. In -+ * such a case this function will return -ENOSYS. -+ * -+ * @param req request handle -+ * @param size size of given array -+ * @param list array of group IDs to be filled in -+ * @return the total number of supplementary group IDs or -errno on failure -+ */ -+int fuse_req_getgroups(fuse_req_t req, int size, gid_t list[]); -+ -+/** -+ * Callback function for an interrupt -+ * -+ * @param req interrupted request -+ * @param data user data -+ */ -+typedef void (*fuse_interrupt_func_t)(fuse_req_t req, void *data); -+ -+/** -+ * Register/unregister callback for an interrupt -+ * -+ * If an interrupt has already happened, then the callback function is -+ * called from within this function, hence it's not possible for -+ * interrupts to be lost. -+ * -+ * @param req request handle -+ * @param func the callback function or NULL for unregister -+ * @param data user data passed to the callback function -+ */ -+void fuse_req_interrupt_func(fuse_req_t req, fuse_interrupt_func_t func, -+ void *data); -+ -+/** -+ * Check if a request has already been interrupted -+ * -+ * @param req request handle -+ * @return 1 if the request has been interrupted, 0 otherwise -+ */ -+int fuse_req_interrupted(fuse_req_t req); -+ -+ -+/* ----------------------------------------------------------- * -+ * Inquiry functions * -+ * ----------------------------------------------------------- */ -+ -+/** -+ * Print low-level version information to stdout. -+ */ -+void fuse_lowlevel_version(void); -+ -+/** -+ * Print available low-level options to stdout. This is not an -+ * exhaustive list, but includes only those options that may be of -+ * interest to an end-user of a file system. -+ */ -+void fuse_lowlevel_help(void); -+ -+/** -+ * Print available options for `fuse_parse_cmdline()`. -+ */ -+void fuse_cmdline_help(void); -+ -+/* ----------------------------------------------------------- * -+ * Filesystem setup & teardown * -+ * ----------------------------------------------------------- */ -+ -+struct fuse_cmdline_opts { -+ int singlethread; -+ int foreground; -+ int debug; -+ int nodefault_subtype; -+ char *mountpoint; -+ int show_version; -+ int show_help; -+ int clone_fd; -+ unsigned int max_idle_threads; -+}; -+ -+/** -+ * Utility function to parse common options for simple file systems -+ * using the low-level API. A help text that describes the available -+ * options can be printed with `fuse_cmdline_help`. A single -+ * non-option argument is treated as the mountpoint. Multiple -+ * non-option arguments will result in an error. -+ * -+ * If neither -o subtype= or -o fsname= options are given, a new -+ * subtype option will be added and set to the basename of the program -+ * (the fsname will remain unset, and then defaults to "fuse"). -+ * -+ * Known options will be removed from *args*, unknown options will -+ * remain. -+ * -+ * @param args argument vector (input+output) -+ * @param opts output argument for parsed options -+ * @return 0 on success, -1 on failure -+ */ -+int fuse_parse_cmdline(struct fuse_args *args, -+ struct fuse_cmdline_opts *opts); -+ -+/** -+ * Create a low level session. -+ * -+ * Returns a session structure suitable for passing to -+ * fuse_session_mount() and fuse_session_loop(). -+ * -+ * This function accepts most file-system independent mount options -+ * (like context, nodev, ro - see mount(8)), as well as the general -+ * fuse mount options listed in mount.fuse(8) (e.g. -o allow_root and -+ * -o default_permissions, but not ``-o use_ino``). Instead of `-o -+ * debug`, debugging may also enabled with `-d` or `--debug`. -+ * -+ * If not all options are known, an error message is written to stderr -+ * and the function returns NULL. -+ * -+ * Option parsing skips argv[0], which is assumed to contain the -+ * program name. To prevent accidentally passing an option in -+ * argv[0], this element must always be present (even if no options -+ * are specified). It may be set to the empty string ('\0') if no -+ * reasonable value can be provided. -+ * -+ * @param args argument vector -+ * @param op the (low-level) filesystem operations -+ * @param op_size sizeof(struct fuse_lowlevel_ops) -+ * @param userdata user data -+ * -+ * @return the fuse session on success, NULL on failure -+ **/ -+struct fuse_session *fuse_session_new(struct fuse_args *args, -+ const struct fuse_lowlevel_ops *op, -+ size_t op_size, void *userdata); -+ -+/** -+ * Mount a FUSE file system. -+ * -+ * @param mountpoint the mount point path -+ * @param se session object -+ * -+ * @return 0 on success, -1 on failure. -+ **/ -+int fuse_session_mount(struct fuse_session *se, const char *mountpoint); -+ -+/** -+ * Enter a single threaded, blocking event loop. -+ * -+ * When the event loop terminates because the connection to the FUSE -+ * kernel module has been closed, this function returns zero. This -+ * happens when the filesystem is unmounted regularly (by the -+ * filesystem owner or root running the umount(8) or fusermount(1) -+ * command), or if connection is explicitly severed by writing ``1`` -+ * to the``abort`` file in ``/sys/fs/fuse/connections/NNN``. The only -+ * way to distinguish between these two conditions is to check if the -+ * filesystem is still mounted after the session loop returns. -+ * -+ * When some error occurs during request processing, the function -+ * returns a negated errno(3) value. -+ * -+ * If the loop has been terminated because of a signal handler -+ * installed by fuse_set_signal_handlers(), this function returns the -+ * (positive) signal value that triggered the exit. -+ * -+ * @param se the session -+ * @return 0, -errno, or a signal value -+ */ -+int fuse_session_loop(struct fuse_session *se); -+ -+/** -+ * Enter a multi-threaded event loop. -+ * -+ * For a description of the return value and the conditions when the -+ * event loop exits, refer to the documentation of -+ * fuse_session_loop(). -+ * -+ * @param se the session -+ * @param config session loop configuration -+ * @return see fuse_session_loop() -+ */ -+#if FUSE_USE_VERSION < 32 -+int fuse_session_loop_mt_31(struct fuse_session *se, int clone_fd); -+#define fuse_session_loop_mt(se, clone_fd) fuse_session_loop_mt_31(se, clone_fd) -+#else -+int fuse_session_loop_mt(struct fuse_session *se, struct fuse_loop_config *config); -+#endif -+ -+/** -+ * Flag a session as terminated. -+ * -+ * This function is invoked by the POSIX signal handlers, when -+ * registered using fuse_set_signal_handlers(). It will cause any -+ * running event loops to terminate on the next opportunity. -+ * -+ * @param se the session -+ */ -+void fuse_session_exit(struct fuse_session *se); -+ -+/** -+ * Reset the terminated flag of a session -+ * -+ * @param se the session -+ */ -+void fuse_session_reset(struct fuse_session *se); -+ -+/** -+ * Query the terminated flag of a session -+ * -+ * @param se the session -+ * @return 1 if exited, 0 if not exited -+ */ -+int fuse_session_exited(struct fuse_session *se); -+ -+/** -+ * Ensure that file system is unmounted. -+ * -+ * In regular operation, the file system is typically unmounted by the -+ * user calling umount(8) or fusermount(1), which then terminates the -+ * FUSE session loop. However, the session loop may also terminate as -+ * a result of an explicit call to fuse_session_exit() (e.g. by a -+ * signal handler installed by fuse_set_signal_handler()). In this -+ * case the filesystem remains mounted, but any attempt to access it -+ * will block (while the filesystem process is still running) or give -+ * an ESHUTDOWN error (after the filesystem process has terminated). -+ * -+ * If the communication channel with the FUSE kernel module is still -+ * open (i.e., if the session loop was terminated by an explicit call -+ * to fuse_session_exit()), this function will close it and unmount -+ * the filesystem. If the communication channel has been closed by the -+ * kernel, this method will do (almost) nothing. -+ * -+ * NOTE: The above semantics mean that if the connection to the kernel -+ * is terminated via the ``/sys/fs/fuse/connections/NNN/abort`` file, -+ * this method will *not* unmount the filesystem. -+ * -+ * @param se the session -+ */ -+void fuse_session_unmount(struct fuse_session *se); -+ -+/** -+ * Destroy a session -+ * -+ * @param se the session -+ */ -+void fuse_session_destroy(struct fuse_session *se); -+ -+/* ----------------------------------------------------------- * -+ * Custom event loop support * -+ * ----------------------------------------------------------- */ -+ -+/** -+ * Return file descriptor for communication with kernel. -+ * -+ * The file selector can be used to integrate FUSE with a custom event -+ * loop. Whenever data is available for reading on the provided fd, -+ * the event loop should call `fuse_session_receive_buf` followed by -+ * `fuse_session_process_buf` to process the request. -+ * -+ * The returned file descriptor is valid until `fuse_session_unmount` -+ * is called. -+ * -+ * @param se the session -+ * @return a file descriptor -+ */ -+int fuse_session_fd(struct fuse_session *se); -+ -+/** -+ * Process a raw request supplied in a generic buffer -+ * -+ * The fuse_buf may contain a memory buffer or a pipe file descriptor. -+ * -+ * @param se the session -+ * @param buf the fuse_buf containing the request -+ */ -+void fuse_session_process_buf(struct fuse_session *se, -+ const struct fuse_buf *buf); -+ -+/** -+ * Read a raw request from the kernel into the supplied buffer. -+ * -+ * Depending on file system options, system capabilities, and request -+ * size the request is either read into a memory buffer or spliced -+ * into a temporary pipe. -+ * -+ * @param se the session -+ * @param buf the fuse_buf to store the request in -+ * @return the actual size of the raw request, or -errno on error -+ */ -+int fuse_session_receive_buf(struct fuse_session *se, struct fuse_buf *buf); -+ -+#ifdef __cplusplus -+} -+#endif -+ -+#endif /* FUSE_LOWLEVEL_H_ */ -diff --git a/tools/virtiofsd/fuse_misc.h b/tools/virtiofsd/fuse_misc.h -new file mode 100644 -index 0000000..2f6663e ---- /dev/null -+++ b/tools/virtiofsd/fuse_misc.h -@@ -0,0 +1,59 @@ -+/* -+ FUSE: Filesystem in Userspace -+ Copyright (C) 2001-2007 Miklos Szeredi -+ -+ This program can be distributed under the terms of the GNU LGPLv2. -+ See the file COPYING.LIB -+*/ -+ -+#include -+ -+/* -+ Versioned symbols cannot be used in some cases because it -+ - confuse the dynamic linker in uClibc -+ - not supported on MacOSX (in MachO binary format) -+*/ -+#if (!defined(__UCLIBC__) && !defined(__APPLE__)) -+#define FUSE_SYMVER(x) __asm__(x) -+#else -+#define FUSE_SYMVER(x) -+#endif -+ -+#ifndef USE_UCLIBC -+#define fuse_mutex_init(mut) pthread_mutex_init(mut, NULL) -+#else -+/* Is this hack still needed? */ -+static inline void fuse_mutex_init(pthread_mutex_t *mut) -+{ -+ pthread_mutexattr_t attr; -+ pthread_mutexattr_init(&attr); -+ pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ADAPTIVE_NP); -+ pthread_mutex_init(mut, &attr); -+ pthread_mutexattr_destroy(&attr); -+} -+#endif -+ -+#ifdef HAVE_STRUCT_STAT_ST_ATIM -+/* Linux */ -+#define ST_ATIM_NSEC(stbuf) ((stbuf)->st_atim.tv_nsec) -+#define ST_CTIM_NSEC(stbuf) ((stbuf)->st_ctim.tv_nsec) -+#define ST_MTIM_NSEC(stbuf) ((stbuf)->st_mtim.tv_nsec) -+#define ST_ATIM_NSEC_SET(stbuf, val) (stbuf)->st_atim.tv_nsec = (val) -+#define ST_CTIM_NSEC_SET(stbuf, val) (stbuf)->st_ctim.tv_nsec = (val) -+#define ST_MTIM_NSEC_SET(stbuf, val) (stbuf)->st_mtim.tv_nsec = (val) -+#elif defined(HAVE_STRUCT_STAT_ST_ATIMESPEC) -+/* FreeBSD */ -+#define ST_ATIM_NSEC(stbuf) ((stbuf)->st_atimespec.tv_nsec) -+#define ST_CTIM_NSEC(stbuf) ((stbuf)->st_ctimespec.tv_nsec) -+#define ST_MTIM_NSEC(stbuf) ((stbuf)->st_mtimespec.tv_nsec) -+#define ST_ATIM_NSEC_SET(stbuf, val) (stbuf)->st_atimespec.tv_nsec = (val) -+#define ST_CTIM_NSEC_SET(stbuf, val) (stbuf)->st_ctimespec.tv_nsec = (val) -+#define ST_MTIM_NSEC_SET(stbuf, val) (stbuf)->st_mtimespec.tv_nsec = (val) -+#else -+#define ST_ATIM_NSEC(stbuf) 0 -+#define ST_CTIM_NSEC(stbuf) 0 -+#define ST_MTIM_NSEC(stbuf) 0 -+#define ST_ATIM_NSEC_SET(stbuf, val) do { } while (0) -+#define ST_CTIM_NSEC_SET(stbuf, val) do { } while (0) -+#define ST_MTIM_NSEC_SET(stbuf, val) do { } while (0) -+#endif -diff --git a/tools/virtiofsd/fuse_opt.h b/tools/virtiofsd/fuse_opt.h -new file mode 100644 -index 0000000..d8573e7 ---- /dev/null -+++ b/tools/virtiofsd/fuse_opt.h -@@ -0,0 +1,271 @@ -+/* -+ FUSE: Filesystem in Userspace -+ Copyright (C) 2001-2007 Miklos Szeredi -+ -+ This program can be distributed under the terms of the GNU LGPLv2. -+ See the file COPYING.LIB. -+*/ -+ -+#ifndef FUSE_OPT_H_ -+#define FUSE_OPT_H_ -+ -+/** @file -+ * -+ * This file defines the option parsing interface of FUSE -+ */ -+ -+#ifdef __cplusplus -+extern "C" { -+#endif -+ -+/** -+ * Option description -+ * -+ * This structure describes a single option, and action associated -+ * with it, in case it matches. -+ * -+ * More than one such match may occur, in which case the action for -+ * each match is executed. -+ * -+ * There are three possible actions in case of a match: -+ * -+ * i) An integer (int or unsigned) variable determined by 'offset' is -+ * set to 'value' -+ * -+ * ii) The processing function is called, with 'value' as the key -+ * -+ * iii) An integer (any) or string (char *) variable determined by -+ * 'offset' is set to the value of an option parameter -+ * -+ * 'offset' should normally be either set to -+ * -+ * - 'offsetof(struct foo, member)' actions i) and iii) -+ * -+ * - -1 action ii) -+ * -+ * The 'offsetof()' macro is defined in the header. -+ * -+ * The template determines which options match, and also have an -+ * effect on the action. Normally the action is either i) or ii), but -+ * if a format is present in the template, then action iii) is -+ * performed. -+ * -+ * The types of templates are: -+ * -+ * 1) "-x", "-foo", "--foo", "--foo-bar", etc. These match only -+ * themselves. Invalid values are "--" and anything beginning -+ * with "-o" -+ * -+ * 2) "foo", "foo-bar", etc. These match "-ofoo", "-ofoo-bar" or -+ * the relevant option in a comma separated option list -+ * -+ * 3) "bar=", "--foo=", etc. These are variations of 1) and 2) -+ * which have a parameter -+ * -+ * 4) "bar=%s", "--foo=%lu", etc. Same matching as above but perform -+ * action iii). -+ * -+ * 5) "-x ", etc. Matches either "-xparam" or "-x param" as -+ * two separate arguments -+ * -+ * 6) "-x %s", etc. Combination of 4) and 5) -+ * -+ * If the format is "%s", memory is allocated for the string unlike with -+ * scanf(). The previous value (if non-NULL) stored at the this location is -+ * freed. -+ */ -+struct fuse_opt { -+ /** Matching template and optional parameter formatting */ -+ const char *templ; -+ -+ /** -+ * Offset of variable within 'data' parameter of fuse_opt_parse() -+ * or -1 -+ */ -+ unsigned long offset; -+ -+ /** -+ * Value to set the variable to, or to be passed as 'key' to the -+ * processing function. Ignored if template has a format -+ */ -+ int value; -+}; -+ -+/** -+ * Key option. In case of a match, the processing function will be -+ * called with the specified key. -+ */ -+#define FUSE_OPT_KEY(templ, key) { templ, -1U, key } -+ -+/** -+ * Last option. An array of 'struct fuse_opt' must end with a NULL -+ * template value -+ */ -+#define FUSE_OPT_END { NULL, 0, 0 } -+ -+/** -+ * Argument list -+ */ -+struct fuse_args { -+ /** Argument count */ -+ int argc; -+ -+ /** Argument vector. NULL terminated */ -+ char **argv; -+ -+ /** Is 'argv' allocated? */ -+ int allocated; -+}; -+ -+/** -+ * Initializer for 'struct fuse_args' -+ */ -+#define FUSE_ARGS_INIT(argc, argv) { argc, argv, 0 } -+ -+/** -+ * Key value passed to the processing function if an option did not -+ * match any template -+ */ -+#define FUSE_OPT_KEY_OPT -1 -+ -+/** -+ * Key value passed to the processing function for all non-options -+ * -+ * Non-options are the arguments beginning with a character other than -+ * '-' or all arguments after the special '--' option -+ */ -+#define FUSE_OPT_KEY_NONOPT -2 -+ -+/** -+ * Special key value for options to keep -+ * -+ * Argument is not passed to processing function, but behave as if the -+ * processing function returned 1 -+ */ -+#define FUSE_OPT_KEY_KEEP -3 -+ -+/** -+ * Special key value for options to discard -+ * -+ * Argument is not passed to processing function, but behave as if the -+ * processing function returned zero -+ */ -+#define FUSE_OPT_KEY_DISCARD -4 -+ -+/** -+ * Processing function -+ * -+ * This function is called if -+ * - option did not match any 'struct fuse_opt' -+ * - argument is a non-option -+ * - option did match and offset was set to -1 -+ * -+ * The 'arg' parameter will always contain the whole argument or -+ * option including the parameter if exists. A two-argument option -+ * ("-x foo") is always converted to single argument option of the -+ * form "-xfoo" before this function is called. -+ * -+ * Options of the form '-ofoo' are passed to this function without the -+ * '-o' prefix. -+ * -+ * The return value of this function determines whether this argument -+ * is to be inserted into the output argument vector, or discarded. -+ * -+ * @param data is the user data passed to the fuse_opt_parse() function -+ * @param arg is the whole argument or option -+ * @param key determines why the processing function was called -+ * @param outargs the current output argument list -+ * @return -1 on error, 0 if arg is to be discarded, 1 if arg should be kept -+ */ -+typedef int (*fuse_opt_proc_t)(void *data, const char *arg, int key, -+ struct fuse_args *outargs); -+ -+/** -+ * Option parsing function -+ * -+ * If 'args' was returned from a previous call to fuse_opt_parse() or -+ * it was constructed from -+ * -+ * A NULL 'args' is equivalent to an empty argument vector -+ * -+ * A NULL 'opts' is equivalent to an 'opts' array containing a single -+ * end marker -+ * -+ * A NULL 'proc' is equivalent to a processing function always -+ * returning '1' -+ * -+ * @param args is the input and output argument list -+ * @param data is the user data -+ * @param opts is the option description array -+ * @param proc is the processing function -+ * @return -1 on error, 0 on success -+ */ -+int fuse_opt_parse(struct fuse_args *args, void *data, -+ const struct fuse_opt opts[], fuse_opt_proc_t proc); -+ -+/** -+ * Add an option to a comma separated option list -+ * -+ * @param opts is a pointer to an option list, may point to a NULL value -+ * @param opt is the option to add -+ * @return -1 on allocation error, 0 on success -+ */ -+int fuse_opt_add_opt(char **opts, const char *opt); -+ -+/** -+ * Add an option, escaping commas, to a comma separated option list -+ * -+ * @param opts is a pointer to an option list, may point to a NULL value -+ * @param opt is the option to add -+ * @return -1 on allocation error, 0 on success -+ */ -+int fuse_opt_add_opt_escaped(char **opts, const char *opt); -+ -+/** -+ * Add an argument to a NULL terminated argument vector -+ * -+ * @param args is the structure containing the current argument list -+ * @param arg is the new argument to add -+ * @return -1 on allocation error, 0 on success -+ */ -+int fuse_opt_add_arg(struct fuse_args *args, const char *arg); -+ -+/** -+ * Add an argument at the specified position in a NULL terminated -+ * argument vector -+ * -+ * Adds the argument to the N-th position. This is useful for adding -+ * options at the beginning of the array which must not come after the -+ * special '--' option. -+ * -+ * @param args is the structure containing the current argument list -+ * @param pos is the position at which to add the argument -+ * @param arg is the new argument to add -+ * @return -1 on allocation error, 0 on success -+ */ -+int fuse_opt_insert_arg(struct fuse_args *args, int pos, const char *arg); -+ -+/** -+ * Free the contents of argument list -+ * -+ * The structure itself is not freed -+ * -+ * @param args is the structure containing the argument list -+ */ -+void fuse_opt_free_args(struct fuse_args *args); -+ -+ -+/** -+ * Check if an option matches -+ * -+ * @param opts is the option description array -+ * @param opt is the option to match -+ * @return 1 if a match is found, 0 if not -+ */ -+int fuse_opt_match(const struct fuse_opt opts[], const char *opt); -+ -+#ifdef __cplusplus -+} -+#endif -+ -+#endif /* FUSE_OPT_H_ */ -diff --git a/tools/virtiofsd/passthrough_helpers.h b/tools/virtiofsd/passthrough_helpers.h -new file mode 100644 -index 0000000..6b77c33 ---- /dev/null -+++ b/tools/virtiofsd/passthrough_helpers.h -@@ -0,0 +1,76 @@ -+/* -+ * FUSE: Filesystem in Userspace -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions -+ * are met: -+ * 1. Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * 2. Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND -+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE -+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY -+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF -+ * SUCH DAMAGE -+ */ -+ -+/* -+ * Creates files on the underlying file system in response to a FUSE_MKNOD -+ * operation -+ */ -+static int mknod_wrapper(int dirfd, const char *path, const char *link, -+ int mode, dev_t rdev) -+{ -+ int res; -+ -+ if (S_ISREG(mode)) { -+ res = openat(dirfd, path, O_CREAT | O_EXCL | O_WRONLY, mode); -+ if (res >= 0) -+ res = close(res); -+ } else if (S_ISDIR(mode)) { -+ res = mkdirat(dirfd, path, mode); -+ } else if (S_ISLNK(mode) && link != NULL) { -+ res = symlinkat(link, dirfd, path); -+ } else if (S_ISFIFO(mode)) { -+ res = mkfifoat(dirfd, path, mode); -+#ifdef __FreeBSD__ -+ } else if (S_ISSOCK(mode)) { -+ struct sockaddr_un su; -+ int fd; -+ -+ if (strlen(path) >= sizeof(su.sun_path)) { -+ errno = ENAMETOOLONG; -+ return -1; -+ } -+ fd = socket(AF_UNIX, SOCK_STREAM, 0); -+ if (fd >= 0) { -+ /* -+ * We must bind the socket to the underlying file -+ * system to create the socket file, even though -+ * we'll never listen on this socket. -+ */ -+ su.sun_family = AF_UNIX; -+ strncpy(su.sun_path, path, sizeof(su.sun_path)); -+ res = bindat(dirfd, fd, (struct sockaddr*)&su, -+ sizeof(su)); -+ if (res == 0) -+ close(fd); -+ } else { -+ res = -1; -+ } -+#endif -+ } else { -+ res = mknodat(dirfd, path, mode, rdev); -+ } -+ -+ return res; -+} --- -1.8.3.1 - diff --git a/kvm-virtiofsd-Remove-fuse_req_getgroups.patch b/kvm-virtiofsd-Remove-fuse_req_getgroups.patch deleted file mode 100644 index 27e71f2..0000000 --- a/kvm-virtiofsd-Remove-fuse_req_getgroups.patch +++ /dev/null @@ -1,193 +0,0 @@ -From 7a1860c83ff042f3e796c449e780ee0528107213 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Tue, 3 Mar 2020 18:43:08 +0000 -Subject: [PATCH 12/18] virtiofsd: Remove fuse_req_getgroups -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200303184314.155564-2-dgilbert@redhat.com> -Patchwork-id: 94122 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/7] virtiofsd: Remove fuse_req_getgroups -Bugzilla: 1797064 -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual -RH-Acked-by: Ján Tomko - -From: "Dr. David Alan Gilbert" - -Remove fuse_req_getgroups that's unused in virtiofsd; it came in -from libfuse but we don't actually use it. It was called from -fuse_getgroups which we previously removed (but had left it's header -in). - -Coverity had complained about null termination in it, but removing -it is the easiest answer. - -Fixes: Coverity CID: 1413117 (String not null terminated) -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Philippe Mathieu-Daudé -Reviewed-by: Stefan Hajnoczi -(cherry picked from commit 988717b46b6424907618cb845ace9d69062703af) -Signed-off-by: Danilo C. L. de Paula ---- - tools/virtiofsd/fuse.h | 20 ----------- - tools/virtiofsd/fuse_lowlevel.c | 77 ----------------------------------------- - tools/virtiofsd/fuse_lowlevel.h | 21 ----------- - 3 files changed, 118 deletions(-) - -diff --git a/tools/virtiofsd/fuse.h b/tools/virtiofsd/fuse.h -index 7a4c713..aba13fe 100644 ---- a/tools/virtiofsd/fuse.h -+++ b/tools/virtiofsd/fuse.h -@@ -1007,26 +1007,6 @@ void fuse_exit(struct fuse *f); - struct fuse_context *fuse_get_context(void); - - /** -- * Get the current supplementary group IDs for the current request -- * -- * Similar to the getgroups(2) system call, except the return value is -- * always the total number of group IDs, even if it is larger than the -- * specified size. -- * -- * The current fuse kernel module in linux (as of 2.6.30) doesn't pass -- * the group list to userspace, hence this function needs to parse -- * "/proc/$TID/task/$TID/status" to get the group IDs. -- * -- * This feature may not be supported on all operating systems. In -- * such a case this function will return -ENOSYS. -- * -- * @param size size of given array -- * @param list array of group IDs to be filled in -- * @return the total number of supplementary group IDs or -errno on failure -- */ --int fuse_getgroups(int size, gid_t list[]); -- --/** - * Check if the current request has already been interrupted - * - * @return 1 if the request has been interrupted, 0 otherwise -diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -index de2e2e0..01c418a 100644 ---- a/tools/virtiofsd/fuse_lowlevel.c -+++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -2667,83 +2667,6 @@ int fuse_lowlevel_is_virtio(struct fuse_session *se) - return !!se->virtio_dev; - } - --#ifdef linux --int fuse_req_getgroups(fuse_req_t req, int size, gid_t list[]) --{ -- char *buf; -- size_t bufsize = 1024; -- char path[128]; -- int ret; -- int fd; -- unsigned long pid = req->ctx.pid; -- char *s; -- -- sprintf(path, "/proc/%lu/task/%lu/status", pid, pid); -- --retry: -- buf = malloc(bufsize); -- if (buf == NULL) { -- return -ENOMEM; -- } -- -- ret = -EIO; -- fd = open(path, O_RDONLY); -- if (fd == -1) { -- goto out_free; -- } -- -- ret = read(fd, buf, bufsize); -- close(fd); -- if (ret < 0) { -- ret = -EIO; -- goto out_free; -- } -- -- if ((size_t)ret == bufsize) { -- free(buf); -- bufsize *= 4; -- goto retry; -- } -- -- ret = -EIO; -- s = strstr(buf, "\nGroups:"); -- if (s == NULL) { -- goto out_free; -- } -- -- s += 8; -- ret = 0; -- while (1) { -- char *end; -- unsigned long val = strtoul(s, &end, 0); -- if (end == s) { -- break; -- } -- -- s = end; -- if (ret < size) { -- list[ret] = val; -- } -- ret++; -- } -- --out_free: -- free(buf); -- return ret; --} --#else /* linux */ --/* -- * This is currently not implemented on other than Linux... -- */ --int fuse_req_getgroups(fuse_req_t req, int size, gid_t list[]) --{ -- (void)req; -- (void)size; -- (void)list; -- return -ENOSYS; --} --#endif -- - void fuse_session_exit(struct fuse_session *se) - { - se->exited = 1; -diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h -index 138041e..8f6d705 100644 ---- a/tools/virtiofsd/fuse_lowlevel.h -+++ b/tools/virtiofsd/fuse_lowlevel.h -@@ -1705,27 +1705,6 @@ void *fuse_req_userdata(fuse_req_t req); - const struct fuse_ctx *fuse_req_ctx(fuse_req_t req); - - /** -- * Get the current supplementary group IDs for the specified request -- * -- * Similar to the getgroups(2) system call, except the return value is -- * always the total number of group IDs, even if it is larger than the -- * specified size. -- * -- * The current fuse kernel module in linux (as of 2.6.30) doesn't pass -- * the group list to userspace, hence this function needs to parse -- * "/proc/$TID/task/$TID/status" to get the group IDs. -- * -- * This feature may not be supported on all operating systems. In -- * such a case this function will return -ENOSYS. -- * -- * @param req request handle -- * @param size size of given array -- * @param list array of group IDs to be filled in -- * @return the total number of supplementary group IDs or -errno on failure -- */ --int fuse_req_getgroups(fuse_req_t req, int size, gid_t list[]); -- --/** - * Callback function for an interrupt - * - * @param req interrupted request --- -1.8.3.1 - diff --git a/kvm-virtiofsd-Remove-unused-enum-fuse_buf_copy_flags.patch b/kvm-virtiofsd-Remove-unused-enum-fuse_buf_copy_flags.patch deleted file mode 100644 index 7f9c5bb..0000000 --- a/kvm-virtiofsd-Remove-unused-enum-fuse_buf_copy_flags.patch +++ /dev/null @@ -1,271 +0,0 @@ -From 80237df2b22eca685037456e65d149fed4654165 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:00:48 +0100 -Subject: [PATCH 017/116] virtiofsd: Remove unused enum fuse_buf_copy_flags -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-14-dgilbert@redhat.com> -Patchwork-id: 93465 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 013/112] virtiofsd: Remove unused enum fuse_buf_copy_flags -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Xiao Yang - -Signed-off-by: Xiao Yang -Reviewed-by: Stefan Hajnoczi -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 8c3fe75e0308ba2f01d160ace534b7e386cea808) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/buffer.c | 7 +++--- - tools/virtiofsd/fuse_common.h | 46 +--------------------------------------- - tools/virtiofsd/fuse_lowlevel.c | 13 +++++------- - tools/virtiofsd/fuse_lowlevel.h | 35 ++---------------------------- - tools/virtiofsd/passthrough_ll.c | 4 ++-- - 5 files changed, 13 insertions(+), 92 deletions(-) - -diff --git a/tools/virtiofsd/buffer.c b/tools/virtiofsd/buffer.c -index 5df946c..4d507f3 100644 ---- a/tools/virtiofsd/buffer.c -+++ b/tools/virtiofsd/buffer.c -@@ -171,7 +171,7 @@ static ssize_t fuse_buf_fd_to_fd(const struct fuse_buf *dst, size_t dst_off, - - static ssize_t fuse_buf_copy_one(const struct fuse_buf *dst, size_t dst_off, - const struct fuse_buf *src, size_t src_off, -- size_t len, enum fuse_buf_copy_flags flags) -+ size_t len) - { - int src_is_fd = src->flags & FUSE_BUF_IS_FD; - int dst_is_fd = dst->flags & FUSE_BUF_IS_FD; -@@ -224,8 +224,7 @@ static int fuse_bufvec_advance(struct fuse_bufvec *bufv, size_t len) - return 1; - } - --ssize_t fuse_buf_copy(struct fuse_bufvec *dstv, struct fuse_bufvec *srcv, -- enum fuse_buf_copy_flags flags) -+ssize_t fuse_buf_copy(struct fuse_bufvec *dstv, struct fuse_bufvec *srcv) - { - size_t copied = 0; - -@@ -249,7 +248,7 @@ ssize_t fuse_buf_copy(struct fuse_bufvec *dstv, struct fuse_bufvec *srcv, - dst_len = dst->size - dstv->off; - len = min_size(src_len, dst_len); - -- res = fuse_buf_copy_one(dst, dstv->off, src, srcv->off, len, flags); -+ res = fuse_buf_copy_one(dst, dstv->off, src, srcv->off, len); - if (res < 0) { - if (!copied) { - return res; -diff --git a/tools/virtiofsd/fuse_common.h b/tools/virtiofsd/fuse_common.h -index bd9bf86..0cb33ac 100644 ---- a/tools/virtiofsd/fuse_common.h -+++ b/tools/virtiofsd/fuse_common.h -@@ -605,48 +605,6 @@ enum fuse_buf_flags { - }; - - /** -- * Buffer copy flags -- */ --enum fuse_buf_copy_flags { -- /** -- * Don't use splice(2) -- * -- * Always fall back to using read and write instead of -- * splice(2) to copy data from one file descriptor to another. -- * -- * If this flag is not set, then only fall back if splice is -- * unavailable. -- */ -- FUSE_BUF_NO_SPLICE = (1 << 1), -- -- /** -- * Force splice -- * -- * Always use splice(2) to copy data from one file descriptor -- * to another. If splice is not available, return -EINVAL. -- */ -- FUSE_BUF_FORCE_SPLICE = (1 << 2), -- -- /** -- * Try to move data with splice. -- * -- * If splice is used, try to move pages from the source to the -- * destination instead of copying. See documentation of -- * SPLICE_F_MOVE in splice(2) man page. -- */ -- FUSE_BUF_SPLICE_MOVE = (1 << 3), -- -- /** -- * Don't block on the pipe when copying data with splice -- * -- * Makes the operations on the pipe non-blocking (if the pipe -- * is full or empty). See SPLICE_F_NONBLOCK in the splice(2) -- * man page. -- */ -- FUSE_BUF_SPLICE_NONBLOCK = (1 << 4), --}; -- --/** - * Single data buffer - * - * Generic data buffer for I/O, extended attributes, etc... Data may -@@ -741,11 +699,9 @@ size_t fuse_buf_size(const struct fuse_bufvec *bufv); - * - * @param dst destination buffer vector - * @param src source buffer vector -- * @param flags flags controlling the copy - * @return actual number of bytes copied or -errno on error - */ --ssize_t fuse_buf_copy(struct fuse_bufvec *dst, struct fuse_bufvec *src, -- enum fuse_buf_copy_flags flags); -+ssize_t fuse_buf_copy(struct fuse_bufvec *dst, struct fuse_bufvec *src); - - /* - * Signal handling -diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -index eb0ec49..3da80de 100644 ---- a/tools/virtiofsd/fuse_lowlevel.c -+++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -490,16 +490,14 @@ static int fuse_send_data_iov_fallback(struct fuse_session *se, - - static int fuse_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, - struct iovec *iov, int iov_count, -- struct fuse_bufvec *buf, unsigned int flags) -+ struct fuse_bufvec *buf) - { - size_t len = fuse_buf_size(buf); -- (void)flags; - - return fuse_send_data_iov_fallback(se, ch, iov, iov_count, buf, len); - } - --int fuse_reply_data(fuse_req_t req, struct fuse_bufvec *bufv, -- enum fuse_buf_copy_flags flags) -+int fuse_reply_data(fuse_req_t req, struct fuse_bufvec *bufv) - { - struct iovec iov[2]; - struct fuse_out_header out; -@@ -511,7 +509,7 @@ int fuse_reply_data(fuse_req_t req, struct fuse_bufvec *bufv, - out.unique = req->unique; - out.error = 0; - -- res = fuse_send_data_iov(req->se, req->ch, iov, 1, bufv, flags); -+ res = fuse_send_data_iov(req->se, req->ch, iov, 1, bufv); - if (res <= 0) { - fuse_free_req(req); - return res; -@@ -1969,8 +1967,7 @@ int fuse_lowlevel_notify_delete(struct fuse_session *se, fuse_ino_t parent, - } - - int fuse_lowlevel_notify_store(struct fuse_session *se, fuse_ino_t ino, -- off_t offset, struct fuse_bufvec *bufv, -- enum fuse_buf_copy_flags flags) -+ off_t offset, struct fuse_bufvec *bufv) - { - struct fuse_out_header out; - struct fuse_notify_store_out outarg; -@@ -1999,7 +1996,7 @@ int fuse_lowlevel_notify_store(struct fuse_session *se, fuse_ino_t ino, - iov[1].iov_base = &outarg; - iov[1].iov_len = sizeof(outarg); - -- res = fuse_send_data_iov(se, NULL, iov, 2, bufv, flags); -+ res = fuse_send_data_iov(se, NULL, iov, 2, bufv); - if (res > 0) { - res = -res; - } -diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h -index 12a84b4..2fa225d 100644 ---- a/tools/virtiofsd/fuse_lowlevel.h -+++ b/tools/virtiofsd/fuse_lowlevel.h -@@ -1363,33 +1363,6 @@ int fuse_reply_buf(fuse_req_t req, const char *buf, size_t size); - /** - * Reply with data copied/moved from buffer(s) - * -- * Zero copy data transfer ("splicing") will be used under -- * the following circumstances: -- * -- * 1. FUSE_CAP_SPLICE_WRITE is set in fuse_conn_info.want, and -- * 2. the kernel supports splicing from the fuse device -- * (FUSE_CAP_SPLICE_WRITE is set in fuse_conn_info.capable), and -- * 3. *flags* does not contain FUSE_BUF_NO_SPLICE -- * 4. The amount of data that is provided in file-descriptor backed -- * buffers (i.e., buffers for which bufv[n].flags == FUSE_BUF_FD) -- * is at least twice the page size. -- * -- * In order for SPLICE_F_MOVE to be used, the following additional -- * conditions have to be fulfilled: -- * -- * 1. FUSE_CAP_SPLICE_MOVE is set in fuse_conn_info.want, and -- * 2. the kernel supports it (i.e, FUSE_CAP_SPLICE_MOVE is set in -- fuse_conn_info.capable), and -- * 3. *flags* contains FUSE_BUF_SPLICE_MOVE -- * -- * Note that, if splice is used, the data is actually spliced twice: -- * once into a temporary pipe (to prepend header data), and then again -- * into the kernel. If some of the provided buffers are memory-backed, -- * the data in them is copied in step one and spliced in step two. -- * -- * The FUSE_BUF_SPLICE_FORCE_SPLICE and FUSE_BUF_SPLICE_NONBLOCK flags -- * are silently ignored. -- * - * Possible requests: - * read, readdir, getxattr, listxattr - * -@@ -1400,11 +1373,9 @@ int fuse_reply_buf(fuse_req_t req, const char *buf, size_t size); - * - * @param req request handle - * @param bufv buffer vector -- * @param flags flags controlling the copy - * @return zero for success, -errno for failure to send reply - */ --int fuse_reply_data(fuse_req_t req, struct fuse_bufvec *bufv, -- enum fuse_buf_copy_flags flags); -+int fuse_reply_data(fuse_req_t req, struct fuse_bufvec *bufv); - - /** - * Reply with data vector -@@ -1705,12 +1676,10 @@ int fuse_lowlevel_notify_delete(struct fuse_session *se, fuse_ino_t parent, - * @param ino the inode number - * @param offset the starting offset into the file to store to - * @param bufv buffer vector -- * @param flags flags controlling the copy - * @return zero for success, -errno for failure - */ - int fuse_lowlevel_notify_store(struct fuse_session *se, fuse_ino_t ino, -- off_t offset, struct fuse_bufvec *bufv, -- enum fuse_buf_copy_flags flags); -+ off_t offset, struct fuse_bufvec *bufv); - - /* - * Utility functions -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 9377718..126a56c 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -931,7 +931,7 @@ static void lo_read(fuse_req_t req, fuse_ino_t ino, size_t size, off_t offset, - buf.buf[0].fd = fi->fh; - buf.buf[0].pos = offset; - -- fuse_reply_data(req, &buf, FUSE_BUF_SPLICE_MOVE); -+ fuse_reply_data(req, &buf); - } - - static void lo_write_buf(fuse_req_t req, fuse_ino_t ino, -@@ -952,7 +952,7 @@ static void lo_write_buf(fuse_req_t req, fuse_ino_t ino, - out_buf.buf[0].size, (unsigned long)off); - } - -- res = fuse_buf_copy(&out_buf, in_buf, 0); -+ res = fuse_buf_copy(&out_buf, in_buf); - if (res < 0) { - fuse_reply_err(req, -res); - } else { --- -1.8.3.1 - diff --git a/kvm-virtiofsd-Reset-O_DIRECT-flag-during-file-open.patch b/kvm-virtiofsd-Reset-O_DIRECT-flag-during-file-open.patch deleted file mode 100644 index e1a3cd1..0000000 --- a/kvm-virtiofsd-Reset-O_DIRECT-flag-during-file-open.patch +++ /dev/null @@ -1,72 +0,0 @@ -From b8d62021f28114f054571b96ec0cd4dad4476923 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:02:14 +0100 -Subject: [PATCH 103/116] virtiofsd: Reset O_DIRECT flag during file open -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-100-dgilbert@redhat.com> -Patchwork-id: 93553 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 099/112] virtiofsd: Reset O_DIRECT flag during file open -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Vivek Goyal - -If an application wants to do direct IO and opens a file with O_DIRECT -in guest, that does not necessarily mean that we need to bypass page -cache on host as well. So reset this flag on host. - -If somebody needs to bypass page cache on host as well (and it is safe to -do so), we can add a knob in daemon later to control this behavior. - -I check virtio-9p and they do reset O_DIRECT flag. - -Signed-off-by: Vivek Goyal -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 65da4539803373ec4eec97ffc49ee90083e56efd) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 14 ++++++++++++++ - 1 file changed, 14 insertions(+) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index ccbbec1..948cb19 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -1721,6 +1721,13 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, - goto out; - } - -+ /* -+ * O_DIRECT in guest should not necessarily mean bypassing page -+ * cache on host as well. If somebody needs that behavior, it -+ * probably should be a configuration knob in daemon. -+ */ -+ fi->flags &= ~O_DIRECT; -+ - fd = openat(parent_inode->fd, name, (fi->flags | O_CREAT) & ~O_NOFOLLOW, - mode); - err = fd == -1 ? errno : 0; -@@ -1950,6 +1957,13 @@ static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) - fi->flags &= ~O_APPEND; - } - -+ /* -+ * O_DIRECT in guest should not necessarily mean bypassing page -+ * cache on host as well. If somebody needs that behavior, it -+ * probably should be a configuration knob in daemon. -+ */ -+ fi->flags &= ~O_DIRECT; -+ - sprintf(buf, "%i", lo_fd(req, ino)); - fd = openat(lo->proc_self_fd, buf, fi->flags & ~O_NOFOLLOW); - if (fd == -1) { --- -1.8.3.1 - diff --git a/kvm-virtiofsd-Send-replies-to-messages.patch b/kvm-virtiofsd-Send-replies-to-messages.patch deleted file mode 100644 index 5453fda..0000000 --- a/kvm-virtiofsd-Send-replies-to-messages.patch +++ /dev/null @@ -1,199 +0,0 @@ -From bb1f691dc410ce11ac9675ced70e78a3ce2511b0 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:03 +0100 -Subject: [PATCH 032/116] virtiofsd: Send replies to messages -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-29-dgilbert@redhat.com> -Patchwork-id: 93485 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 028/112] virtiofsd: Send replies to messages -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -Route fuse out messages back through the same queue elements -that had the command that triggered the request. - -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit df57ba919ec3edef9cc208d35685095e6e92713e) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_lowlevel.c | 4 ++ - tools/virtiofsd/fuse_virtio.c | 107 ++++++++++++++++++++++++++++++++++++++-- - tools/virtiofsd/fuse_virtio.h | 4 ++ - 3 files changed, 111 insertions(+), 4 deletions(-) - -diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -index af09fa2..380d93b 100644 ---- a/tools/virtiofsd/fuse_lowlevel.c -+++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -171,6 +171,10 @@ static int fuse_send_msg(struct fuse_session *se, struct fuse_chan *ch, - } - } - -+ if (fuse_lowlevel_is_virtio(se)) { -+ return virtio_send_msg(se, ch, iov, count); -+ } -+ - abort(); /* virtio should have taken it before here */ - return 0; - } -diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c -index 3841b20..05d0e29 100644 ---- a/tools/virtiofsd/fuse_virtio.c -+++ b/tools/virtiofsd/fuse_virtio.c -@@ -41,6 +41,9 @@ struct fv_QueueInfo { - /* Our queue index, corresponds to array position */ - int qidx; - int kick_fd; -+ -+ /* The element for the command currently being processed */ -+ VuVirtqElement *qe; - }; - - /* -@@ -121,6 +124,105 @@ static void copy_from_iov(struct fuse_buf *buf, size_t out_num, - } - } - -+/* -+ * Copy from one iov to another, the given number of bytes -+ * The caller must have checked sizes. -+ */ -+static void copy_iov(struct iovec *src_iov, int src_count, -+ struct iovec *dst_iov, int dst_count, size_t to_copy) -+{ -+ size_t dst_offset = 0; -+ /* Outer loop copies 'src' elements */ -+ while (to_copy) { -+ assert(src_count); -+ size_t src_len = src_iov[0].iov_len; -+ size_t src_offset = 0; -+ -+ if (src_len > to_copy) { -+ src_len = to_copy; -+ } -+ /* Inner loop copies contents of one 'src' to maybe multiple dst. */ -+ while (src_len) { -+ assert(dst_count); -+ size_t dst_len = dst_iov[0].iov_len - dst_offset; -+ if (dst_len > src_len) { -+ dst_len = src_len; -+ } -+ -+ memcpy(dst_iov[0].iov_base + dst_offset, -+ src_iov[0].iov_base + src_offset, dst_len); -+ src_len -= dst_len; -+ to_copy -= dst_len; -+ src_offset += dst_len; -+ dst_offset += dst_len; -+ -+ assert(dst_offset <= dst_iov[0].iov_len); -+ if (dst_offset == dst_iov[0].iov_len) { -+ dst_offset = 0; -+ dst_iov++; -+ dst_count--; -+ } -+ } -+ src_iov++; -+ src_count--; -+ } -+} -+ -+/* -+ * Called back by ll whenever it wants to send a reply/message back -+ * The 1st element of the iov starts with the fuse_out_header -+ * 'unique'==0 means it's a notify message. -+ */ -+int virtio_send_msg(struct fuse_session *se, struct fuse_chan *ch, -+ struct iovec *iov, int count) -+{ -+ VuVirtqElement *elem; -+ VuVirtq *q; -+ -+ assert(count >= 1); -+ assert(iov[0].iov_len >= sizeof(struct fuse_out_header)); -+ -+ struct fuse_out_header *out = iov[0].iov_base; -+ /* TODO: Endianness! */ -+ -+ size_t tosend_len = iov_size(iov, count); -+ -+ /* unique == 0 is notification, which we don't support */ -+ assert(out->unique); -+ /* For virtio we always have ch */ -+ assert(ch); -+ elem = ch->qi->qe; -+ q = &ch->qi->virtio_dev->dev.vq[ch->qi->qidx]; -+ -+ /* The 'in' part of the elem is to qemu */ -+ unsigned int in_num = elem->in_num; -+ struct iovec *in_sg = elem->in_sg; -+ size_t in_len = iov_size(in_sg, in_num); -+ fuse_log(FUSE_LOG_DEBUG, "%s: elem %d: with %d in desc of length %zd\n", -+ __func__, elem->index, in_num, in_len); -+ -+ /* -+ * The elem should have room for a 'fuse_out_header' (out from fuse) -+ * plus the data based on the len in the header. -+ */ -+ if (in_len < sizeof(struct fuse_out_header)) { -+ fuse_log(FUSE_LOG_ERR, "%s: elem %d too short for out_header\n", -+ __func__, elem->index); -+ return -E2BIG; -+ } -+ if (in_len < tosend_len) { -+ fuse_log(FUSE_LOG_ERR, "%s: elem %d too small for data len %zd\n", -+ __func__, elem->index, tosend_len); -+ return -E2BIG; -+ } -+ -+ copy_iov(iov, count, in_sg, in_num, tosend_len); -+ vu_queue_push(&se->virtio_dev->dev, q, elem, tosend_len); -+ vu_queue_notify(&se->virtio_dev->dev, q); -+ -+ return 0; -+} -+ - /* Thread function for individual queues, created when a queue is 'started' */ - static void *fv_queue_thread(void *opaque) - { -@@ -226,13 +328,10 @@ static void *fv_queue_thread(void *opaque) - - /* TODO! Endianness of header */ - -- /* TODO: Fixup fuse_send_msg */ - /* TODO: Add checks for fuse_session_exited */ - fuse_session_process_buf_int(se, &fbuf, &ch); - -- /* TODO: vu_queue_push(dev, q, elem, qi->write_count); */ -- vu_queue_notify(dev, q); -- -+ qi->qe = NULL; - free(elem); - elem = NULL; - } -diff --git a/tools/virtiofsd/fuse_virtio.h b/tools/virtiofsd/fuse_virtio.h -index 23026d6..135a148 100644 ---- a/tools/virtiofsd/fuse_virtio.h -+++ b/tools/virtiofsd/fuse_virtio.h -@@ -22,4 +22,8 @@ int virtio_session_mount(struct fuse_session *se); - - int virtio_loop(struct fuse_session *se); - -+ -+int virtio_send_msg(struct fuse_session *se, struct fuse_chan *ch, -+ struct iovec *iov, int count); -+ - #endif --- -1.8.3.1 - diff --git a/kvm-virtiofsd-Start-queue-threads.patch b/kvm-virtiofsd-Start-queue-threads.patch deleted file mode 100644 index 8b03cd6..0000000 --- a/kvm-virtiofsd-Start-queue-threads.patch +++ /dev/null @@ -1,165 +0,0 @@ -From 38282d996cde61261211160577b366b83cad8012 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:00 +0100 -Subject: [PATCH 029/116] virtiofsd: Start queue threads -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-26-dgilbert@redhat.com> -Patchwork-id: 93479 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 025/112] virtiofsd: Start queue threads -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -Start a thread for each queue when we get notified it's been started. - -Signed-off-by: Dr. David Alan Gilbert -fix by: -Signed-off-by: Jun Piao -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit e4c55a3c144493b436e40031e2eed61a84eca47b) - -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_virtio.c | 89 +++++++++++++++++++++++++++++++++++++++++++ - 1 file changed, 89 insertions(+) - -diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c -index 4819e56..2a94bb3 100644 ---- a/tools/virtiofsd/fuse_virtio.c -+++ b/tools/virtiofsd/fuse_virtio.c -@@ -11,6 +11,7 @@ - * See the file COPYING.LIB - */ - -+#include "qemu/osdep.h" - #include "fuse_virtio.h" - #include "fuse_i.h" - #include "standard-headers/linux/fuse.h" -@@ -30,6 +31,15 @@ - - #include "contrib/libvhost-user/libvhost-user.h" - -+struct fv_QueueInfo { -+ pthread_t thread; -+ struct fv_VuDev *virtio_dev; -+ -+ /* Our queue index, corresponds to array position */ -+ int qidx; -+ int kick_fd; -+}; -+ - /* - * We pass the dev element into libvhost-user - * and then use it to get back to the outer -@@ -38,6 +48,13 @@ - struct fv_VuDev { - VuDev dev; - struct fuse_session *se; -+ -+ /* -+ * The following pair of fields are only accessed in the main -+ * virtio_loop -+ */ -+ size_t nqueues; -+ struct fv_QueueInfo **qi; - }; - - /* From spec */ -@@ -83,6 +100,75 @@ static void fv_panic(VuDev *dev, const char *err) - exit(EXIT_FAILURE); - } - -+static void *fv_queue_thread(void *opaque) -+{ -+ struct fv_QueueInfo *qi = opaque; -+ fuse_log(FUSE_LOG_INFO, "%s: Start for queue %d kick_fd %d\n", __func__, -+ qi->qidx, qi->kick_fd); -+ while (1) { -+ /* TODO */ -+ } -+ -+ return NULL; -+} -+ -+/* Callback from libvhost-user on start or stop of a queue */ -+static void fv_queue_set_started(VuDev *dev, int qidx, bool started) -+{ -+ struct fv_VuDev *vud = container_of(dev, struct fv_VuDev, dev); -+ struct fv_QueueInfo *ourqi; -+ -+ fuse_log(FUSE_LOG_INFO, "%s: qidx=%d started=%d\n", __func__, qidx, -+ started); -+ assert(qidx >= 0); -+ -+ /* -+ * Ignore additional request queues for now. passthrough_ll.c must be -+ * audited for thread-safety issues first. It was written with a -+ * well-behaved client in mind and may not protect against all types of -+ * races yet. -+ */ -+ if (qidx > 1) { -+ fuse_log(FUSE_LOG_ERR, -+ "%s: multiple request queues not yet implemented, please only " -+ "configure 1 request queue\n", -+ __func__); -+ exit(EXIT_FAILURE); -+ } -+ -+ if (started) { -+ /* Fire up a thread to watch this queue */ -+ if (qidx >= vud->nqueues) { -+ vud->qi = realloc(vud->qi, (qidx + 1) * sizeof(vud->qi[0])); -+ assert(vud->qi); -+ memset(vud->qi + vud->nqueues, 0, -+ sizeof(vud->qi[0]) * (1 + (qidx - vud->nqueues))); -+ vud->nqueues = qidx + 1; -+ } -+ if (!vud->qi[qidx]) { -+ vud->qi[qidx] = calloc(sizeof(struct fv_QueueInfo), 1); -+ assert(vud->qi[qidx]); -+ vud->qi[qidx]->virtio_dev = vud; -+ vud->qi[qidx]->qidx = qidx; -+ } else { -+ /* Shouldn't have been started */ -+ assert(vud->qi[qidx]->kick_fd == -1); -+ } -+ ourqi = vud->qi[qidx]; -+ ourqi->kick_fd = dev->vq[qidx].kick_fd; -+ if (pthread_create(&ourqi->thread, NULL, fv_queue_thread, ourqi)) { -+ fuse_log(FUSE_LOG_ERR, "%s: Failed to create thread for queue %d\n", -+ __func__, qidx); -+ assert(0); -+ } -+ } else { -+ /* TODO: Kill the thread */ -+ assert(qidx < vud->nqueues); -+ ourqi = vud->qi[qidx]; -+ ourqi->kick_fd = -1; -+ } -+} -+ - static bool fv_queue_order(VuDev *dev, int qidx) - { - return false; -@@ -92,6 +178,9 @@ static const VuDevIface fv_iface = { - .get_features = fv_get_features, - .set_features = fv_set_features, - -+ /* Don't need process message, we've not got any at vhost-user level */ -+ .queue_set_started = fv_queue_set_started, -+ - .queue_is_processed_in_order = fv_queue_order, - }; - --- -1.8.3.1 - diff --git a/kvm-virtiofsd-Start-reading-commands-from-queue.patch b/kvm-virtiofsd-Start-reading-commands-from-queue.patch deleted file mode 100644 index 2022480..0000000 --- a/kvm-virtiofsd-Start-reading-commands-from-queue.patch +++ /dev/null @@ -1,200 +0,0 @@ -From b4af2eff8ecadb4e2c9520602455f77fac2cb943 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:02 +0100 -Subject: [PATCH 031/116] virtiofsd: Start reading commands from queue -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-28-dgilbert@redhat.com> -Patchwork-id: 93484 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 027/112] virtiofsd: Start reading commands from queue -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -Pop queue elements off queues, copy the data from them and -pass that to fuse. - - Note: 'out' in a VuVirtqElement is from QEMU - 'in' in libfuse is into the daemon - - So we read from the out iov's to get a fuse_in_header - -When we get a kick we've got to read all the elements until the queue -is empty. - -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit b509e1228b3e5eb83c14819045988999fc2dbd1b) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_i.h | 2 + - tools/virtiofsd/fuse_virtio.c | 99 +++++++++++++++++++++++++++++++++++++++++-- - 2 files changed, 98 insertions(+), 3 deletions(-) - -diff --git a/tools/virtiofsd/fuse_i.h b/tools/virtiofsd/fuse_i.h -index ec04449..1126723 100644 ---- a/tools/virtiofsd/fuse_i.h -+++ b/tools/virtiofsd/fuse_i.h -@@ -14,6 +14,7 @@ - #include "fuse_lowlevel.h" - - struct fv_VuDev; -+struct fv_QueueInfo; - - struct fuse_req { - struct fuse_session *se; -@@ -75,6 +76,7 @@ struct fuse_chan { - pthread_mutex_t lock; - int ctr; - int fd; -+ struct fv_QueueInfo *qi; - }; - - /** -diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c -index 05e7258..3841b20 100644 ---- a/tools/virtiofsd/fuse_virtio.c -+++ b/tools/virtiofsd/fuse_virtio.c -@@ -12,6 +12,7 @@ - */ - - #include "qemu/osdep.h" -+#include "qemu/iov.h" - #include "fuse_virtio.h" - #include "fuse_i.h" - #include "standard-headers/linux/fuse.h" -@@ -32,6 +33,7 @@ - - #include "contrib/libvhost-user/libvhost-user.h" - -+struct fv_VuDev; - struct fv_QueueInfo { - pthread_t thread; - struct fv_VuDev *virtio_dev; -@@ -101,10 +103,41 @@ static void fv_panic(VuDev *dev, const char *err) - exit(EXIT_FAILURE); - } - -+/* -+ * Copy from an iovec into a fuse_buf (memory only) -+ * Caller must ensure there is space -+ */ -+static void copy_from_iov(struct fuse_buf *buf, size_t out_num, -+ const struct iovec *out_sg) -+{ -+ void *dest = buf->mem; -+ -+ while (out_num) { -+ size_t onelen = out_sg->iov_len; -+ memcpy(dest, out_sg->iov_base, onelen); -+ dest += onelen; -+ out_sg++; -+ out_num--; -+ } -+} -+ - /* Thread function for individual queues, created when a queue is 'started' */ - static void *fv_queue_thread(void *opaque) - { - struct fv_QueueInfo *qi = opaque; -+ struct VuDev *dev = &qi->virtio_dev->dev; -+ struct VuVirtq *q = vu_get_queue(dev, qi->qidx); -+ struct fuse_session *se = qi->virtio_dev->se; -+ struct fuse_chan ch; -+ struct fuse_buf fbuf; -+ -+ fbuf.mem = NULL; -+ fbuf.flags = 0; -+ -+ fuse_mutex_init(&ch.lock); -+ ch.fd = (int)0xdaff0d111; -+ ch.qi = qi; -+ - fuse_log(FUSE_LOG_INFO, "%s: Start for queue %d kick_fd %d\n", __func__, - qi->qidx, qi->kick_fd); - while (1) { -@@ -141,11 +174,71 @@ static void *fv_queue_thread(void *opaque) - fuse_log(FUSE_LOG_ERR, "Eventfd_read for queue: %m\n"); - break; - } -- if (qi->virtio_dev->se->debug) { -- fprintf(stderr, "%s: Queue %d gave evalue: %zx\n", __func__, -- qi->qidx, (size_t)evalue); -+ /* out is from guest, in is too guest */ -+ unsigned int in_bytes, out_bytes; -+ vu_queue_get_avail_bytes(dev, q, &in_bytes, &out_bytes, ~0, ~0); -+ -+ fuse_log(FUSE_LOG_DEBUG, -+ "%s: Queue %d gave evalue: %zx available: in: %u out: %u\n", -+ __func__, qi->qidx, (size_t)evalue, in_bytes, out_bytes); -+ -+ while (1) { -+ /* -+ * An element contains one request and the space to send our -+ * response They're spread over multiple descriptors in a -+ * scatter/gather set and we can't trust the guest to keep them -+ * still; so copy in/out. -+ */ -+ VuVirtqElement *elem = vu_queue_pop(dev, q, sizeof(VuVirtqElement)); -+ if (!elem) { -+ break; -+ } -+ -+ if (!fbuf.mem) { -+ fbuf.mem = malloc(se->bufsize); -+ assert(fbuf.mem); -+ assert(se->bufsize > sizeof(struct fuse_in_header)); -+ } -+ /* The 'out' part of the elem is from qemu */ -+ unsigned int out_num = elem->out_num; -+ struct iovec *out_sg = elem->out_sg; -+ size_t out_len = iov_size(out_sg, out_num); -+ fuse_log(FUSE_LOG_DEBUG, -+ "%s: elem %d: with %d out desc of length %zd\n", __func__, -+ elem->index, out_num, out_len); -+ -+ /* -+ * The elem should contain a 'fuse_in_header' (in to fuse) -+ * plus the data based on the len in the header. -+ */ -+ if (out_len < sizeof(struct fuse_in_header)) { -+ fuse_log(FUSE_LOG_ERR, "%s: elem %d too short for in_header\n", -+ __func__, elem->index); -+ assert(0); /* TODO */ -+ } -+ if (out_len > se->bufsize) { -+ fuse_log(FUSE_LOG_ERR, "%s: elem %d too large for buffer\n", -+ __func__, elem->index); -+ assert(0); /* TODO */ -+ } -+ copy_from_iov(&fbuf, out_num, out_sg); -+ fbuf.size = out_len; -+ -+ /* TODO! Endianness of header */ -+ -+ /* TODO: Fixup fuse_send_msg */ -+ /* TODO: Add checks for fuse_session_exited */ -+ fuse_session_process_buf_int(se, &fbuf, &ch); -+ -+ /* TODO: vu_queue_push(dev, q, elem, qi->write_count); */ -+ vu_queue_notify(dev, q); -+ -+ free(elem); -+ elem = NULL; - } - } -+ pthread_mutex_destroy(&ch.lock); -+ free(fbuf.mem); - - return NULL; - } --- -1.8.3.1 - diff --git a/kvm-virtiofsd-Start-wiring-up-vhost-user.patch b/kvm-virtiofsd-Start-wiring-up-vhost-user.patch deleted file mode 100644 index 7b50118..0000000 --- a/kvm-virtiofsd-Start-wiring-up-vhost-user.patch +++ /dev/null @@ -1,247 +0,0 @@ -From 020f593031b0b54e4c35faffea489b700aed6a72 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:00:57 +0100 -Subject: [PATCH 026/116] virtiofsd: Start wiring up vhost-user -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-23-dgilbert@redhat.com> -Patchwork-id: 93477 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 022/112] virtiofsd: Start wiring up vhost-user -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -Listen on our unix socket for the connection from QEMU, when we get it -initialise vhost-user and dive into our own loop variant (currently -dummy). - -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit f6f3573c6f271af5ded63ce28589a113f7205c72) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_i.h | 4 ++ - tools/virtiofsd/fuse_lowlevel.c | 5 +++ - tools/virtiofsd/fuse_lowlevel.h | 7 ++++ - tools/virtiofsd/fuse_virtio.c | 87 +++++++++++++++++++++++++++++++++++++++- - tools/virtiofsd/fuse_virtio.h | 2 + - tools/virtiofsd/passthrough_ll.c | 7 +--- - 6 files changed, 106 insertions(+), 6 deletions(-) - -diff --git a/tools/virtiofsd/fuse_i.h b/tools/virtiofsd/fuse_i.h -index 82d6ac7..ec04449 100644 ---- a/tools/virtiofsd/fuse_i.h -+++ b/tools/virtiofsd/fuse_i.h -@@ -13,6 +13,8 @@ - #include "fuse.h" - #include "fuse_lowlevel.h" - -+struct fv_VuDev; -+ - struct fuse_req { - struct fuse_session *se; - uint64_t unique; -@@ -65,6 +67,8 @@ struct fuse_session { - size_t bufsize; - int error; - char *vu_socket_path; -+ int vu_socketfd; -+ struct fv_VuDev *virtio_dev; - }; - - struct fuse_chan { -diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -index 5df124e..af09fa2 100644 ---- a/tools/virtiofsd/fuse_lowlevel.c -+++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -2242,6 +2242,11 @@ void fuse_session_unmount(struct fuse_session *se) - { - } - -+int fuse_lowlevel_is_virtio(struct fuse_session *se) -+{ -+ return se->vu_socket_path != NULL; -+} -+ - #ifdef linux - int fuse_req_getgroups(fuse_req_t req, int size, gid_t list[]) - { -diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h -index 2fa225d..f6b3470 100644 ---- a/tools/virtiofsd/fuse_lowlevel.h -+++ b/tools/virtiofsd/fuse_lowlevel.h -@@ -1755,6 +1755,13 @@ void fuse_req_interrupt_func(fuse_req_t req, fuse_interrupt_func_t func, - */ - int fuse_req_interrupted(fuse_req_t req); - -+/** -+ * Check if the session is connected via virtio -+ * -+ * @param se session object -+ * @return 1 if the session is a virtio session -+ */ -+int fuse_lowlevel_is_virtio(struct fuse_session *se); - - /* - * Inquiry functions -diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c -index cbef6ff..2ae3c76 100644 ---- a/tools/virtiofsd/fuse_virtio.c -+++ b/tools/virtiofsd/fuse_virtio.c -@@ -19,18 +19,78 @@ - - #include - #include -+#include - #include - #include - #include - #include - #include - -+#include "contrib/libvhost-user/libvhost-user.h" -+ -+/* -+ * We pass the dev element into libvhost-user -+ * and then use it to get back to the outer -+ * container for other data. -+ */ -+struct fv_VuDev { -+ VuDev dev; -+ struct fuse_session *se; -+}; -+ - /* From spec */ - struct virtio_fs_config { - char tag[36]; - uint32_t num_queues; - }; - -+/* -+ * Callback from libvhost-user if there's a new fd we're supposed to listen -+ * to, typically a queue kick? -+ */ -+static void fv_set_watch(VuDev *dev, int fd, int condition, vu_watch_cb cb, -+ void *data) -+{ -+ fuse_log(FUSE_LOG_WARNING, "%s: TODO! fd=%d\n", __func__, fd); -+} -+ -+/* -+ * Callback from libvhost-user if we're no longer supposed to listen on an fd -+ */ -+static void fv_remove_watch(VuDev *dev, int fd) -+{ -+ fuse_log(FUSE_LOG_WARNING, "%s: TODO! fd=%d\n", __func__, fd); -+} -+ -+/* Callback from libvhost-user to panic */ -+static void fv_panic(VuDev *dev, const char *err) -+{ -+ fuse_log(FUSE_LOG_ERR, "%s: libvhost-user: %s\n", __func__, err); -+ /* TODO: Allow reconnects?? */ -+ exit(EXIT_FAILURE); -+} -+ -+static bool fv_queue_order(VuDev *dev, int qidx) -+{ -+ return false; -+} -+ -+static const VuDevIface fv_iface = { -+ /* TODO: Add other callbacks */ -+ .queue_is_processed_in_order = fv_queue_order, -+}; -+ -+int virtio_loop(struct fuse_session *se) -+{ -+ fuse_log(FUSE_LOG_INFO, "%s: Entry\n", __func__); -+ -+ while (1) { -+ /* TODO: Add stuffing */ -+ } -+ -+ fuse_log(FUSE_LOG_INFO, "%s: Exit\n", __func__); -+} -+ - int virtio_session_mount(struct fuse_session *se) - { - struct sockaddr_un un; -@@ -75,5 +135,30 @@ int virtio_session_mount(struct fuse_session *se) - return -1; - } - -- return -1; -+ fuse_log(FUSE_LOG_INFO, "%s: Waiting for vhost-user socket connection...\n", -+ __func__); -+ int data_sock = accept(listen_sock, NULL, NULL); -+ if (data_sock == -1) { -+ fuse_log(FUSE_LOG_ERR, "vhost socket accept: %m\n"); -+ close(listen_sock); -+ return -1; -+ } -+ close(listen_sock); -+ fuse_log(FUSE_LOG_INFO, "%s: Received vhost-user socket connection\n", -+ __func__); -+ -+ /* TODO: Some cleanup/deallocation! */ -+ se->virtio_dev = calloc(sizeof(struct fv_VuDev), 1); -+ if (!se->virtio_dev) { -+ fuse_log(FUSE_LOG_ERR, "%s: virtio_dev calloc failed\n", __func__); -+ close(data_sock); -+ return -1; -+ } -+ -+ se->vu_socketfd = data_sock; -+ se->virtio_dev->se = se; -+ vu_init(&se->virtio_dev->dev, 2, se->vu_socketfd, fv_panic, fv_set_watch, -+ fv_remove_watch, &fv_iface); -+ -+ return 0; - } -diff --git a/tools/virtiofsd/fuse_virtio.h b/tools/virtiofsd/fuse_virtio.h -index 8f2edb6..23026d6 100644 ---- a/tools/virtiofsd/fuse_virtio.h -+++ b/tools/virtiofsd/fuse_virtio.h -@@ -20,4 +20,6 @@ struct fuse_session; - - int virtio_session_mount(struct fuse_session *se); - -+int virtio_loop(struct fuse_session *se); -+ - #endif -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index fc9b264..037c5d7 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -36,6 +36,7 @@ - */ - - #include "qemu/osdep.h" -+#include "fuse_virtio.h" - #include "fuse_lowlevel.h" - #include - #include -@@ -1395,11 +1396,7 @@ int main(int argc, char *argv[]) - fuse_daemonize(opts.foreground); - - /* Block until ctrl+c or fusermount -u */ -- if (opts.singlethread) { -- ret = fuse_session_loop(se); -- } else { -- ret = fuse_session_loop_mt(se, opts.clone_fd); -- } -+ ret = virtio_loop(se); - - fuse_session_unmount(se); - err_out3: --- -1.8.3.1 - diff --git a/kvm-virtiofsd-Support-remote-posix-locks.patch b/kvm-virtiofsd-Support-remote-posix-locks.patch deleted file mode 100644 index e60364a..0000000 --- a/kvm-virtiofsd-Support-remote-posix-locks.patch +++ /dev/null @@ -1,355 +0,0 @@ -From 8e46d0862c4c204f92c08ce2ae961921f270efb5 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:02:03 +0100 -Subject: [PATCH 092/116] virtiofsd: Support remote posix locks -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-89-dgilbert@redhat.com> -Patchwork-id: 93537 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 088/112] virtiofsd: Support remote posix locks -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Vivek Goyal - -Doing posix locks with-in guest kernel are not sufficient if a file/dir -is being shared by multiple guests. So we need the notion of daemon doing -the locks which are visible to rest of the guests. - -Given posix locks are per process, one can not call posix lock API on host, -otherwise bunch of basic posix locks properties are broken. For example, -If two processes (A and B) in guest open the file and take locks on different -sections of file, if one of the processes closes the fd, it will close -fd on virtiofsd and all posix locks on file will go away. This means if -process A closes the fd, then locks of process B will go away too. - -Similar other problems exist too. - -This patch set tries to emulate posix locks while using open file -description locks provided on Linux. - -Daemon provides two options (-o posix_lock, -o no_posix_lock) to enable -or disable posix locking in daemon. By default it is enabled. - -There are few issues though. - -- GETLK() returns pid of process holding lock. As we are emulating locks - using OFD, and these locks are not per process and don't return pid - of process, so GETLK() in guest does not reuturn process pid. - -- As of now only F_SETLK is supported and not F_SETLKW. We can't block - the thread in virtiofsd for arbitrary long duration as there is only - one thread serving the queue. That means unlock request will not make - it to daemon and F_SETLKW will block infinitely and bring virtio-fs - to a halt. This is a solvable problem though and will require significant - changes in virtiofsd and kernel. Left as a TODO item for now. - -Signed-off-by: Vivek Goyal -Reviewed-by: Masayoshi Mizuma -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 0e81414c54161296212f6bc8a1c70526c4a9755a) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/helper.c | 3 + - tools/virtiofsd/passthrough_ll.c | 189 +++++++++++++++++++++++++++++++++++++++ - 2 files changed, 192 insertions(+) - -diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c -index 5672024..33749bf 100644 ---- a/tools/virtiofsd/helper.c -+++ b/tools/virtiofsd/helper.c -@@ -156,6 +156,9 @@ void fuse_cmdline_help(void) - " allowed (default: 10)\n" - " -o norace disable racy fallback\n" - " default: false\n" -+ " -o posix_lock|no_posix_lock\n" -+ " enable/disable remote posix lock\n" -+ " default: posix_lock\n" - " -o readdirplus|no_readdirplus\n" - " enable/disable readirplus\n" - " default: readdirplus except with " -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 05b5f89..9414935 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -67,6 +67,12 @@ - #include "passthrough_helpers.h" - #include "seccomp.h" - -+/* Keep track of inode posix locks for each owner. */ -+struct lo_inode_plock { -+ uint64_t lock_owner; -+ int fd; /* fd for OFD locks */ -+}; -+ - struct lo_map_elem { - union { - struct lo_inode *inode; -@@ -95,6 +101,8 @@ struct lo_inode { - struct lo_key key; - uint64_t refcount; /* protected by lo->mutex */ - fuse_ino_t fuse_ino; -+ pthread_mutex_t plock_mutex; -+ GHashTable *posix_locks; /* protected by lo_inode->plock_mutex */ - }; - - struct lo_cred { -@@ -114,6 +122,7 @@ struct lo_data { - int norace; - int writeback; - int flock; -+ int posix_lock; - int xattr; - char *source; - double timeout; -@@ -137,6 +146,8 @@ static const struct fuse_opt lo_opts[] = { - { "source=%s", offsetof(struct lo_data, source), 0 }, - { "flock", offsetof(struct lo_data, flock), 1 }, - { "no_flock", offsetof(struct lo_data, flock), 0 }, -+ { "posix_lock", offsetof(struct lo_data, posix_lock), 1 }, -+ { "no_posix_lock", offsetof(struct lo_data, posix_lock), 0 }, - { "xattr", offsetof(struct lo_data, xattr), 1 }, - { "no_xattr", offsetof(struct lo_data, xattr), 0 }, - { "timeout=%lf", offsetof(struct lo_data, timeout), 0 }, -@@ -485,6 +496,17 @@ static void lo_init(void *userdata, struct fuse_conn_info *conn) - fuse_log(FUSE_LOG_DEBUG, "lo_init: activating flock locks\n"); - conn->want |= FUSE_CAP_FLOCK_LOCKS; - } -+ -+ if (conn->capable & FUSE_CAP_POSIX_LOCKS) { -+ if (lo->posix_lock) { -+ fuse_log(FUSE_LOG_DEBUG, "lo_init: activating posix locks\n"); -+ conn->want |= FUSE_CAP_POSIX_LOCKS; -+ } else { -+ fuse_log(FUSE_LOG_DEBUG, "lo_init: disabling posix locks\n"); -+ conn->want &= ~FUSE_CAP_POSIX_LOCKS; -+ } -+ } -+ - if ((lo->cache == CACHE_NONE && !lo->readdirplus_set) || - lo->readdirplus_clear) { - fuse_log(FUSE_LOG_DEBUG, "lo_init: disabling readdirplus\n"); -@@ -772,6 +794,19 @@ static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st) - return p; - } - -+/* value_destroy_func for posix_locks GHashTable */ -+static void posix_locks_value_destroy(gpointer data) -+{ -+ struct lo_inode_plock *plock = data; -+ -+ /* -+ * We had used open() for locks and had only one fd. So -+ * closing this fd should release all OFD locks. -+ */ -+ close(plock->fd); -+ free(plock); -+} -+ - static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, - struct fuse_entry_param *e) - { -@@ -825,6 +860,9 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, - newfd = -1; - inode->key.ino = e->attr.st_ino; - inode->key.dev = e->attr.st_dev; -+ pthread_mutex_init(&inode->plock_mutex, NULL); -+ inode->posix_locks = g_hash_table_new_full( -+ g_direct_hash, g_direct_equal, NULL, posix_locks_value_destroy); - - pthread_mutex_lock(&lo->mutex); - inode->fuse_ino = lo_add_inode_mapping(req, inode); -@@ -1160,6 +1198,11 @@ static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode, - if (!inode->refcount) { - lo_map_remove(&lo->ino_map, inode->fuse_ino); - g_hash_table_remove(lo->inodes, &inode->key); -+ if (g_hash_table_size(inode->posix_locks)) { -+ fuse_log(FUSE_LOG_WARNING, "Hash table is not empty\n"); -+ } -+ g_hash_table_destroy(inode->posix_locks); -+ pthread_mutex_destroy(&inode->plock_mutex); - pthread_mutex_unlock(&lo->mutex); - close(inode->fd); - free(inode); -@@ -1516,6 +1559,136 @@ out: - } - } - -+/* Should be called with inode->plock_mutex held */ -+static struct lo_inode_plock *lookup_create_plock_ctx(struct lo_data *lo, -+ struct lo_inode *inode, -+ uint64_t lock_owner, -+ pid_t pid, int *err) -+{ -+ struct lo_inode_plock *plock; -+ char procname[64]; -+ int fd; -+ -+ plock = -+ g_hash_table_lookup(inode->posix_locks, GUINT_TO_POINTER(lock_owner)); -+ -+ if (plock) { -+ return plock; -+ } -+ -+ plock = malloc(sizeof(struct lo_inode_plock)); -+ if (!plock) { -+ *err = ENOMEM; -+ return NULL; -+ } -+ -+ /* Open another instance of file which can be used for ofd locks. */ -+ sprintf(procname, "%i", inode->fd); -+ -+ /* TODO: What if file is not writable? */ -+ fd = openat(lo->proc_self_fd, procname, O_RDWR); -+ if (fd == -1) { -+ *err = errno; -+ free(plock); -+ return NULL; -+ } -+ -+ plock->lock_owner = lock_owner; -+ plock->fd = fd; -+ g_hash_table_insert(inode->posix_locks, GUINT_TO_POINTER(plock->lock_owner), -+ plock); -+ return plock; -+} -+ -+static void lo_getlk(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, -+ struct flock *lock) -+{ -+ struct lo_data *lo = lo_data(req); -+ struct lo_inode *inode; -+ struct lo_inode_plock *plock; -+ int ret, saverr = 0; -+ -+ fuse_log(FUSE_LOG_DEBUG, -+ "lo_getlk(ino=%" PRIu64 ", flags=%d)" -+ " owner=0x%lx, l_type=%d l_start=0x%lx" -+ " l_len=0x%lx\n", -+ ino, fi->flags, fi->lock_owner, lock->l_type, lock->l_start, -+ lock->l_len); -+ -+ inode = lo_inode(req, ino); -+ if (!inode) { -+ fuse_reply_err(req, EBADF); -+ return; -+ } -+ -+ pthread_mutex_lock(&inode->plock_mutex); -+ plock = -+ lookup_create_plock_ctx(lo, inode, fi->lock_owner, lock->l_pid, &ret); -+ if (!plock) { -+ pthread_mutex_unlock(&inode->plock_mutex); -+ fuse_reply_err(req, ret); -+ return; -+ } -+ -+ ret = fcntl(plock->fd, F_OFD_GETLK, lock); -+ if (ret == -1) { -+ saverr = errno; -+ } -+ pthread_mutex_unlock(&inode->plock_mutex); -+ -+ if (saverr) { -+ fuse_reply_err(req, saverr); -+ } else { -+ fuse_reply_lock(req, lock); -+ } -+} -+ -+static void lo_setlk(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, -+ struct flock *lock, int sleep) -+{ -+ struct lo_data *lo = lo_data(req); -+ struct lo_inode *inode; -+ struct lo_inode_plock *plock; -+ int ret, saverr = 0; -+ -+ fuse_log(FUSE_LOG_DEBUG, -+ "lo_setlk(ino=%" PRIu64 ", flags=%d)" -+ " cmd=%d pid=%d owner=0x%lx sleep=%d l_whence=%d" -+ " l_start=0x%lx l_len=0x%lx\n", -+ ino, fi->flags, lock->l_type, lock->l_pid, fi->lock_owner, sleep, -+ lock->l_whence, lock->l_start, lock->l_len); -+ -+ if (sleep) { -+ fuse_reply_err(req, EOPNOTSUPP); -+ return; -+ } -+ -+ inode = lo_inode(req, ino); -+ if (!inode) { -+ fuse_reply_err(req, EBADF); -+ return; -+ } -+ -+ pthread_mutex_lock(&inode->plock_mutex); -+ plock = -+ lookup_create_plock_ctx(lo, inode, fi->lock_owner, lock->l_pid, &ret); -+ -+ if (!plock) { -+ pthread_mutex_unlock(&inode->plock_mutex); -+ fuse_reply_err(req, ret); -+ return; -+ } -+ -+ /* TODO: Is it alright to modify flock? */ -+ lock->l_pid = 0; -+ ret = fcntl(plock->fd, F_OFD_SETLK, lock); -+ if (ret == -1) { -+ saverr = errno; -+ } -+ pthread_mutex_unlock(&inode->plock_mutex); -+ fuse_reply_err(req, saverr); -+} -+ - static void lo_fsyncdir(fuse_req_t req, fuse_ino_t ino, int datasync, - struct fuse_file_info *fi) - { -@@ -1617,6 +1790,19 @@ static void lo_flush(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) - { - int res; - (void)ino; -+ struct lo_inode *inode; -+ -+ inode = lo_inode(req, ino); -+ if (!inode) { -+ fuse_reply_err(req, EBADF); -+ return; -+ } -+ -+ /* An fd is going away. Cleanup associated posix locks */ -+ pthread_mutex_lock(&inode->plock_mutex); -+ g_hash_table_remove(inode->posix_locks, GUINT_TO_POINTER(fi->lock_owner)); -+ pthread_mutex_unlock(&inode->plock_mutex); -+ - res = close(dup(lo_fi_fd(req, fi))); - fuse_reply_err(req, res == -1 ? errno : 0); - } -@@ -2080,6 +2266,8 @@ static struct fuse_lowlevel_ops lo_oper = { - .releasedir = lo_releasedir, - .fsyncdir = lo_fsyncdir, - .create = lo_create, -+ .getlk = lo_getlk, -+ .setlk = lo_setlk, - .open = lo_open, - .release = lo_release, - .flush = lo_flush, -@@ -2434,6 +2622,7 @@ int main(int argc, char *argv[]) - struct lo_data lo = { - .debug = 0, - .writeback = 0, -+ .posix_lock = 1, - .proc_self_fd = -1, - }; - struct lo_map_elem *root_elem; --- -1.8.3.1 - diff --git a/kvm-virtiofsd-Trim-down-imported-files.patch b/kvm-virtiofsd-Trim-down-imported-files.patch deleted file mode 100644 index f3f1e85..0000000 --- a/kvm-virtiofsd-Trim-down-imported-files.patch +++ /dev/null @@ -1,1582 +0,0 @@ -From 9d3788b1c2fa5cb4f14e292232a05c6a5217802d Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:00:44 +0100 -Subject: [PATCH 013/116] virtiofsd: Trim down imported files -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-10-dgilbert@redhat.com> -Patchwork-id: 93463 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 009/112] virtiofsd: Trim down imported files -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -There's a lot of the original fuse code we don't need; trim them down. - -Signed-off-by: Dr. David Alan Gilbert -with additional trimming by: -Signed-off-by: Misono Tomohiro -Reviewed-by: Daniel P. Berrangé -Reviewed-by: Xiao Yang -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit a3e23f325439a290c504d6bbc48c2e742149ecab) - -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/buffer.c | 71 +--- - tools/virtiofsd/fuse.h | 46 --- - tools/virtiofsd/fuse_common.h | 32 -- - tools/virtiofsd/fuse_i.h | 41 --- - tools/virtiofsd/fuse_log.h | 8 - - tools/virtiofsd/fuse_lowlevel.c | 675 +--------------------------------- - tools/virtiofsd/fuse_lowlevel.h | 28 -- - tools/virtiofsd/fuse_opt.h | 8 - - tools/virtiofsd/helper.c | 143 ------- - tools/virtiofsd/passthrough_helpers.h | 26 -- - tools/virtiofsd/passthrough_ll.c | 1 - - 11 files changed, 8 insertions(+), 1071 deletions(-) - -diff --git a/tools/virtiofsd/buffer.c b/tools/virtiofsd/buffer.c -index 5ab9b87..aefb7db 100644 ---- a/tools/virtiofsd/buffer.c -+++ b/tools/virtiofsd/buffer.c -@@ -157,73 +157,6 @@ static ssize_t fuse_buf_fd_to_fd(const struct fuse_buf *dst, size_t dst_off, - return copied; - } - --#ifdef HAVE_SPLICE --static ssize_t fuse_buf_splice(const struct fuse_buf *dst, size_t dst_off, -- const struct fuse_buf *src, size_t src_off, -- size_t len, enum fuse_buf_copy_flags flags) --{ -- int splice_flags = 0; -- off_t *srcpos = NULL; -- off_t *dstpos = NULL; -- off_t srcpos_val; -- off_t dstpos_val; -- ssize_t res; -- size_t copied = 0; -- -- if (flags & FUSE_BUF_SPLICE_MOVE) -- splice_flags |= SPLICE_F_MOVE; -- if (flags & FUSE_BUF_SPLICE_NONBLOCK) -- splice_flags |= SPLICE_F_NONBLOCK; -- -- if (src->flags & FUSE_BUF_FD_SEEK) { -- srcpos_val = src->pos + src_off; -- srcpos = &srcpos_val; -- } -- if (dst->flags & FUSE_BUF_FD_SEEK) { -- dstpos_val = dst->pos + dst_off; -- dstpos = &dstpos_val; -- } -- -- while (len) { -- res = splice(src->fd, srcpos, dst->fd, dstpos, len, -- splice_flags); -- if (res == -1) { -- if (copied) -- break; -- -- if (errno != EINVAL || (flags & FUSE_BUF_FORCE_SPLICE)) -- return -errno; -- -- /* Maybe splice is not supported for this combination */ -- return fuse_buf_fd_to_fd(dst, dst_off, src, src_off, -- len); -- } -- if (res == 0) -- break; -- -- copied += res; -- if (!(src->flags & FUSE_BUF_FD_RETRY) && -- !(dst->flags & FUSE_BUF_FD_RETRY)) { -- break; -- } -- -- len -= res; -- } -- -- return copied; --} --#else --static ssize_t fuse_buf_splice(const struct fuse_buf *dst, size_t dst_off, -- const struct fuse_buf *src, size_t src_off, -- size_t len, enum fuse_buf_copy_flags flags) --{ -- (void) flags; -- -- return fuse_buf_fd_to_fd(dst, dst_off, src, src_off, len); --} --#endif -- -- - static ssize_t fuse_buf_copy_one(const struct fuse_buf *dst, size_t dst_off, - const struct fuse_buf *src, size_t src_off, - size_t len, enum fuse_buf_copy_flags flags) -@@ -247,10 +180,8 @@ static ssize_t fuse_buf_copy_one(const struct fuse_buf *dst, size_t dst_off, - return fuse_buf_write(dst, dst_off, src, src_off, len); - } else if (!dst_is_fd) { - return fuse_buf_read(dst, dst_off, src, src_off, len); -- } else if (flags & FUSE_BUF_NO_SPLICE) { -- return fuse_buf_fd_to_fd(dst, dst_off, src, src_off, len); - } else { -- return fuse_buf_splice(dst, dst_off, src, src_off, len, flags); -+ return fuse_buf_fd_to_fd(dst, dst_off, src, src_off, len); - } - } - -diff --git a/tools/virtiofsd/fuse.h b/tools/virtiofsd/fuse.h -index 883f6e5..3202fba 100644 ---- a/tools/virtiofsd/fuse.h -+++ b/tools/virtiofsd/fuse.h -@@ -25,10 +25,6 @@ - #include - #include - --#ifdef __cplusplus --extern "C" { --#endif -- - /* ----------------------------------------------------------- * - * Basic FUSE API * - * ----------------------------------------------------------- */ -@@ -979,44 +975,6 @@ int fuse_loop(struct fuse *f); - void fuse_exit(struct fuse *f); - - /** -- * FUSE event loop with multiple threads -- * -- * Requests from the kernel are processed, and the appropriate -- * operations are called. Request are processed in parallel by -- * distributing them between multiple threads. -- * -- * For a description of the return value and the conditions when the -- * event loop exits, refer to the documentation of -- * fuse_session_loop(). -- * -- * Note: using fuse_loop() instead of fuse_loop_mt() means you are running in -- * single-threaded mode, and that you will not have to worry about reentrancy, -- * though you will have to worry about recursive lookups. In single-threaded -- * mode, FUSE will wait for one callback to return before calling another. -- * -- * Enabling multiple threads, by using fuse_loop_mt(), will cause FUSE to make -- * multiple simultaneous calls into the various callback functions given by your -- * fuse_operations record. -- * -- * If you are using multiple threads, you can enjoy all the parallel execution -- * and interactive response benefits of threads, and you get to enjoy all the -- * benefits of race conditions and locking bugs, too. Ensure that any code used -- * in the callback function of fuse_operations is also thread-safe. -- * -- * @param f the FUSE handle -- * @param config loop configuration -- * @return see fuse_session_loop() -- * -- * See also: fuse_loop() -- */ --#if FUSE_USE_VERSION < 32 --int fuse_loop_mt_31(struct fuse *f, int clone_fd); --#define fuse_loop_mt(f, clone_fd) fuse_loop_mt_31(f, clone_fd) --#else --int fuse_loop_mt(struct fuse *f, struct fuse_loop_config *config); --#endif -- --/** - * Get the current context - * - * The context is only valid for the duration of a filesystem -@@ -1268,8 +1226,4 @@ struct fuse_session *fuse_get_session(struct fuse *f); - */ - int fuse_open_channel(const char *mountpoint, const char *options); - --#ifdef __cplusplus --} --#endif -- - #endif /* FUSE_H_ */ -diff --git a/tools/virtiofsd/fuse_common.h b/tools/virtiofsd/fuse_common.h -index 2d686b2..bf8f8cc 100644 ---- a/tools/virtiofsd/fuse_common.h -+++ b/tools/virtiofsd/fuse_common.h -@@ -28,10 +28,6 @@ - #define FUSE_MAKE_VERSION(maj, min) ((maj) * 10 + (min)) - #define FUSE_VERSION FUSE_MAKE_VERSION(FUSE_MAJOR_VERSION, FUSE_MINOR_VERSION) - --#ifdef __cplusplus --extern "C" { --#endif -- - /** - * Information about an open file. - * -@@ -100,30 +96,6 @@ struct fuse_file_info { - uint32_t poll_events; - }; - --/** -- * Configuration parameters passed to fuse_session_loop_mt() and -- * fuse_loop_mt(). -- */ --struct fuse_loop_config { -- /** -- * whether to use separate device fds for each thread -- * (may increase performance) -- */ -- int clone_fd; -- -- /** -- * The maximum number of available worker threads before they -- * start to get deleted when they become idle. If not -- * specified, the default is 10. -- * -- * Adjusting this has performance implications; a very small number -- * of threads in the pool will cause a lot of thread creation and -- * deletion overhead and performance may suffer. When set to 0, a new -- * thread will be created to service every operation. -- */ -- unsigned int max_idle_threads; --}; -- - /************************************************************************** - * Capability bits for 'fuse_conn_info.capable' and 'fuse_conn_info.want' * - **************************************************************************/ -@@ -802,10 +774,6 @@ void fuse_remove_signal_handlers(struct fuse_session *se); - # error only API version 30 or greater is supported - #endif - --#ifdef __cplusplus --} --#endif -- - - /* - * This interface uses 64 bit off_t. -diff --git a/tools/virtiofsd/fuse_i.h b/tools/virtiofsd/fuse_i.h -index d38b630..b39522e 100644 ---- a/tools/virtiofsd/fuse_i.h -+++ b/tools/virtiofsd/fuse_i.h -@@ -9,8 +9,6 @@ - #include "fuse.h" - #include "fuse_lowlevel.h" - --struct mount_opts; -- - struct fuse_req { - struct fuse_session *se; - uint64_t unique; -@@ -45,7 +43,6 @@ struct fuse_session { - char *mountpoint; - volatile int exited; - int fd; -- struct mount_opts *mo; - int debug; - int deny_others; - struct fuse_lowlevel_ops op; -@@ -58,7 +55,6 @@ struct fuse_session { - struct fuse_req interrupts; - pthread_mutex_t lock; - int got_destroy; -- pthread_key_t pipe_key; - int broken_splice_nonblock; - uint64_t notify_ctr; - struct fuse_notify_req notify_list; -@@ -87,53 +83,16 @@ struct fuse_module { - int ctr; - }; - --/* ----------------------------------------------------------- * -- * Channel interface (when using -o clone_fd) * -- * ----------------------------------------------------------- */ -- --/** -- * Obtain counted reference to the channel -- * -- * @param ch the channel -- * @return the channel -- */ --struct fuse_chan *fuse_chan_get(struct fuse_chan *ch); -- --/** -- * Drop counted reference to a channel -- * -- * @param ch the channel -- */ --void fuse_chan_put(struct fuse_chan *ch); -- --struct mount_opts *parse_mount_opts(struct fuse_args *args); --void destroy_mount_opts(struct mount_opts *mo); --void fuse_mount_version(void); --unsigned get_max_read(struct mount_opts *o); --void fuse_kern_unmount(const char *mountpoint, int fd); --int fuse_kern_mount(const char *mountpoint, struct mount_opts *mo); -- - int fuse_send_reply_iov_nofree(fuse_req_t req, int error, struct iovec *iov, - int count); - void fuse_free_req(fuse_req_t req); - --void cuse_lowlevel_init(fuse_req_t req, fuse_ino_t nodeide, const void *inarg); -- --int fuse_start_thread(pthread_t *thread_id, void *(*func)(void *), void *arg); -- --int fuse_session_receive_buf_int(struct fuse_session *se, struct fuse_buf *buf, -- struct fuse_chan *ch); - void fuse_session_process_buf_int(struct fuse_session *se, - const struct fuse_buf *buf, struct fuse_chan *ch); - --struct fuse *fuse_new_31(struct fuse_args *args, const struct fuse_operations *op, -- size_t op_size, void *private_data); --int fuse_loop_mt_32(struct fuse *f, struct fuse_loop_config *config); --int fuse_session_loop_mt_32(struct fuse_session *se, struct fuse_loop_config *config); - - #define FUSE_MAX_MAX_PAGES 256 - #define FUSE_DEFAULT_MAX_PAGES_PER_REQ 32 - - /* room needed in buffer to accommodate header */ - #define FUSE_BUFFER_HEADER_SIZE 0x1000 -- -diff --git a/tools/virtiofsd/fuse_log.h b/tools/virtiofsd/fuse_log.h -index 5e112e0..0af700d 100644 ---- a/tools/virtiofsd/fuse_log.h -+++ b/tools/virtiofsd/fuse_log.h -@@ -16,10 +16,6 @@ - - #include - --#ifdef __cplusplus --extern "C" { --#endif -- - /** - * Log severity level - * -@@ -75,8 +71,4 @@ void fuse_set_log_func(fuse_log_func_t func); - */ - void fuse_log(enum fuse_log_level level, const char *fmt, ...); - --#ifdef __cplusplus --} --#endif -- - #endif /* FUSE_LOG_H_ */ -diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -index f2d7038..e6fa247 100644 ---- a/tools/virtiofsd/fuse_lowlevel.c -+++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -16,7 +16,6 @@ - #include "fuse_kernel.h" - #include "fuse_opt.h" - #include "fuse_misc.h" --#include "mount_util.h" - - #include - #include -@@ -28,12 +27,6 @@ - #include - #include - --#ifndef F_LINUX_SPECIFIC_BASE --#define F_LINUX_SPECIFIC_BASE 1024 --#endif --#ifndef F_SETPIPE_SZ --#define F_SETPIPE_SZ (F_LINUX_SPECIFIC_BASE + 7) --#endif - - - #define PARAM(inarg) (((char *)(inarg)) + sizeof(*(inarg))) -@@ -137,7 +130,6 @@ void fuse_free_req(fuse_req_t req) - req->u.ni.data = NULL; - list_del_req(req); - ctr = --req->ctr; -- fuse_chan_put(req->ch); - req->ch = NULL; - pthread_mutex_unlock(&se->lock); - if (!ctr) -@@ -184,19 +176,7 @@ static int fuse_send_msg(struct fuse_session *se, struct fuse_chan *ch, - } - } - -- ssize_t res = writev(ch ? ch->fd : se->fd, -- iov, count); -- int err = errno; -- -- if (res == -1) { -- assert(se != NULL); -- -- /* ENOENT means the operation was interrupted */ -- if (!fuse_session_exited(se) && err != ENOENT) -- perror("fuse: writing device"); -- return -err; -- } -- -+ abort(); /* virtio should have taken it before here */ - return 0; - } - -@@ -480,10 +460,6 @@ static int fuse_send_data_iov_fallback(struct fuse_session *se, - struct fuse_bufvec *buf, - size_t len) - { -- struct fuse_bufvec mem_buf = FUSE_BUFVEC_INIT(len); -- void *mbuf; -- int res; -- - /* Optimize common case */ - if (buf->count == 1 && buf->idx == 0 && buf->off == 0 && - !(buf->buf[0].flags & FUSE_BUF_IS_FD)) { -@@ -496,350 +472,10 @@ static int fuse_send_data_iov_fallback(struct fuse_session *se, - return fuse_send_msg(se, ch, iov, iov_count); - } - -- res = posix_memalign(&mbuf, pagesize, len); -- if (res != 0) -- return res; -- -- mem_buf.buf[0].mem = mbuf; -- res = fuse_buf_copy(&mem_buf, buf, 0); -- if (res < 0) { -- free(mbuf); -- return -res; -- } -- len = res; -- -- iov[iov_count].iov_base = mbuf; -- iov[iov_count].iov_len = len; -- iov_count++; -- res = fuse_send_msg(se, ch, iov, iov_count); -- free(mbuf); -- -- return res; --} -- --struct fuse_ll_pipe { -- size_t size; -- int can_grow; -- int pipe[2]; --}; -- --static void fuse_ll_pipe_free(struct fuse_ll_pipe *llp) --{ -- close(llp->pipe[0]); -- close(llp->pipe[1]); -- free(llp); --} -- --#ifdef HAVE_SPLICE --#if !defined(HAVE_PIPE2) || !defined(O_CLOEXEC) --static int fuse_pipe(int fds[2]) --{ -- int rv = pipe(fds); -- -- if (rv == -1) -- return rv; -- -- if (fcntl(fds[0], F_SETFL, O_NONBLOCK) == -1 || -- fcntl(fds[1], F_SETFL, O_NONBLOCK) == -1 || -- fcntl(fds[0], F_SETFD, FD_CLOEXEC) == -1 || -- fcntl(fds[1], F_SETFD, FD_CLOEXEC) == -1) { -- close(fds[0]); -- close(fds[1]); -- rv = -1; -- } -- return rv; --} --#else --static int fuse_pipe(int fds[2]) --{ -- return pipe2(fds, O_CLOEXEC | O_NONBLOCK); --} --#endif -- --static struct fuse_ll_pipe *fuse_ll_get_pipe(struct fuse_session *se) --{ -- struct fuse_ll_pipe *llp = pthread_getspecific(se->pipe_key); -- if (llp == NULL) { -- int res; -- -- llp = malloc(sizeof(struct fuse_ll_pipe)); -- if (llp == NULL) -- return NULL; -- -- res = fuse_pipe(llp->pipe); -- if (res == -1) { -- free(llp); -- return NULL; -- } -- -- /* -- *the default size is 16 pages on linux -- */ -- llp->size = pagesize * 16; -- llp->can_grow = 1; -- -- pthread_setspecific(se->pipe_key, llp); -- } -- -- return llp; --} --#endif -- --static void fuse_ll_clear_pipe(struct fuse_session *se) --{ -- struct fuse_ll_pipe *llp = pthread_getspecific(se->pipe_key); -- if (llp) { -- pthread_setspecific(se->pipe_key, NULL); -- fuse_ll_pipe_free(llp); -- } --} -- --#if defined(HAVE_SPLICE) && defined(HAVE_VMSPLICE) --static int read_back(int fd, char *buf, size_t len) --{ -- int res; -- -- res = read(fd, buf, len); -- if (res == -1) { -- fuse_log(FUSE_LOG_ERR, "fuse: internal error: failed to read back from pipe: %s\n", strerror(errno)); -- return -EIO; -- } -- if (res != len) { -- fuse_log(FUSE_LOG_ERR, "fuse: internal error: short read back from pipe: %i from %zi\n", res, len); -- return -EIO; -- } -+ abort(); /* Will have taken vhost path */ - return 0; - } - --static int grow_pipe_to_max(int pipefd) --{ -- int max; -- int res; -- int maxfd; -- char buf[32]; -- -- maxfd = open("/proc/sys/fs/pipe-max-size", O_RDONLY); -- if (maxfd < 0) -- return -errno; -- -- res = read(maxfd, buf, sizeof(buf) - 1); -- if (res < 0) { -- int saved_errno; -- -- saved_errno = errno; -- close(maxfd); -- return -saved_errno; -- } -- close(maxfd); -- buf[res] = '\0'; -- -- max = atoi(buf); -- res = fcntl(pipefd, F_SETPIPE_SZ, max); -- if (res < 0) -- return -errno; -- return max; --} -- --static int fuse_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, -- struct iovec *iov, int iov_count, -- struct fuse_bufvec *buf, unsigned int flags) --{ -- int res; -- size_t len = fuse_buf_size(buf); -- struct fuse_out_header *out = iov[0].iov_base; -- struct fuse_ll_pipe *llp; -- int splice_flags; -- size_t pipesize; -- size_t total_fd_size; -- size_t idx; -- size_t headerlen; -- struct fuse_bufvec pipe_buf = FUSE_BUFVEC_INIT(len); -- -- if (se->broken_splice_nonblock) -- goto fallback; -- -- if (flags & FUSE_BUF_NO_SPLICE) -- goto fallback; -- -- total_fd_size = 0; -- for (idx = buf->idx; idx < buf->count; idx++) { -- if (buf->buf[idx].flags & FUSE_BUF_IS_FD) { -- total_fd_size = buf->buf[idx].size; -- if (idx == buf->idx) -- total_fd_size -= buf->off; -- } -- } -- if (total_fd_size < 2 * pagesize) -- goto fallback; -- -- if (se->conn.proto_minor < 14 || -- !(se->conn.want & FUSE_CAP_SPLICE_WRITE)) -- goto fallback; -- -- llp = fuse_ll_get_pipe(se); -- if (llp == NULL) -- goto fallback; -- -- -- headerlen = iov_length(iov, iov_count); -- -- out->len = headerlen + len; -- -- /* -- * Heuristic for the required pipe size, does not work if the -- * source contains less than page size fragments -- */ -- pipesize = pagesize * (iov_count + buf->count + 1) + out->len; -- -- if (llp->size < pipesize) { -- if (llp->can_grow) { -- res = fcntl(llp->pipe[0], F_SETPIPE_SZ, pipesize); -- if (res == -1) { -- res = grow_pipe_to_max(llp->pipe[0]); -- if (res > 0) -- llp->size = res; -- llp->can_grow = 0; -- goto fallback; -- } -- llp->size = res; -- } -- if (llp->size < pipesize) -- goto fallback; -- } -- -- -- res = vmsplice(llp->pipe[1], iov, iov_count, SPLICE_F_NONBLOCK); -- if (res == -1) -- goto fallback; -- -- if (res != headerlen) { -- res = -EIO; -- fuse_log(FUSE_LOG_ERR, "fuse: short vmsplice to pipe: %u/%zu\n", res, -- headerlen); -- goto clear_pipe; -- } -- -- pipe_buf.buf[0].flags = FUSE_BUF_IS_FD; -- pipe_buf.buf[0].fd = llp->pipe[1]; -- -- res = fuse_buf_copy(&pipe_buf, buf, -- FUSE_BUF_FORCE_SPLICE | FUSE_BUF_SPLICE_NONBLOCK); -- if (res < 0) { -- if (res == -EAGAIN || res == -EINVAL) { -- /* -- * Should only get EAGAIN on kernels with -- * broken SPLICE_F_NONBLOCK support (<= -- * 2.6.35) where this error or a short read is -- * returned even if the pipe itself is not -- * full -- * -- * EINVAL might mean that splice can't handle -- * this combination of input and output. -- */ -- if (res == -EAGAIN) -- se->broken_splice_nonblock = 1; -- -- pthread_setspecific(se->pipe_key, NULL); -- fuse_ll_pipe_free(llp); -- goto fallback; -- } -- res = -res; -- goto clear_pipe; -- } -- -- if (res != 0 && res < len) { -- struct fuse_bufvec mem_buf = FUSE_BUFVEC_INIT(len); -- void *mbuf; -- size_t now_len = res; -- /* -- * For regular files a short count is either -- * 1) due to EOF, or -- * 2) because of broken SPLICE_F_NONBLOCK (see above) -- * -- * For other inputs it's possible that we overflowed -- * the pipe because of small buffer fragments. -- */ -- -- res = posix_memalign(&mbuf, pagesize, len); -- if (res != 0) -- goto clear_pipe; -- -- mem_buf.buf[0].mem = mbuf; -- mem_buf.off = now_len; -- res = fuse_buf_copy(&mem_buf, buf, 0); -- if (res > 0) { -- char *tmpbuf; -- size_t extra_len = res; -- /* -- * Trickiest case: got more data. Need to get -- * back the data from the pipe and then fall -- * back to regular write. -- */ -- tmpbuf = malloc(headerlen); -- if (tmpbuf == NULL) { -- free(mbuf); -- res = ENOMEM; -- goto clear_pipe; -- } -- res = read_back(llp->pipe[0], tmpbuf, headerlen); -- free(tmpbuf); -- if (res != 0) { -- free(mbuf); -- goto clear_pipe; -- } -- res = read_back(llp->pipe[0], mbuf, now_len); -- if (res != 0) { -- free(mbuf); -- goto clear_pipe; -- } -- len = now_len + extra_len; -- iov[iov_count].iov_base = mbuf; -- iov[iov_count].iov_len = len; -- iov_count++; -- res = fuse_send_msg(se, ch, iov, iov_count); -- free(mbuf); -- return res; -- } -- free(mbuf); -- res = now_len; -- } -- len = res; -- out->len = headerlen + len; -- -- if (se->debug) { -- fuse_log(FUSE_LOG_DEBUG, -- " unique: %llu, success, outsize: %i (splice)\n", -- (unsigned long long) out->unique, out->len); -- } -- -- splice_flags = 0; -- if ((flags & FUSE_BUF_SPLICE_MOVE) && -- (se->conn.want & FUSE_CAP_SPLICE_MOVE)) -- splice_flags |= SPLICE_F_MOVE; -- -- res = splice(llp->pipe[0], NULL, ch ? ch->fd : se->fd, -- NULL, out->len, splice_flags); -- if (res == -1) { -- res = -errno; -- perror("fuse: splice from pipe"); -- goto clear_pipe; -- } -- if (res != out->len) { -- res = -EIO; -- fuse_log(FUSE_LOG_ERR, "fuse: short splice from pipe: %u/%u\n", -- res, out->len); -- goto clear_pipe; -- } -- return 0; -- --clear_pipe: -- fuse_ll_clear_pipe(se); -- return res; -- --fallback: -- return fuse_send_data_iov_fallback(se, ch, iov, iov_count, buf, len); --} --#else - static int fuse_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, - struct iovec *iov, int iov_count, - struct fuse_bufvec *buf, unsigned int flags) -@@ -849,7 +485,6 @@ static int fuse_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, - - return fuse_send_data_iov_fallback(se, ch, iov, iov_count, buf, len); - } --#endif - - int fuse_reply_data(fuse_req_t req, struct fuse_bufvec *bufv, - enum fuse_buf_copy_flags flags) -@@ -1408,16 +1043,11 @@ static void do_write_buf(fuse_req_t req, fuse_ino_t nodeid, const void *inarg, - if (bufv.buf[0].size < arg->size) { - fuse_log(FUSE_LOG_ERR, "fuse: do_write_buf: buffer size too small\n"); - fuse_reply_err(req, EIO); -- goto out; -+ return; - } - bufv.buf[0].size = arg->size; - - se->op.write_buf(req, nodeid, &bufv, arg->offset, &fi); -- --out: -- /* Need to reset the pipe if ->write_buf() didn't consume all data */ -- if ((ibuf->flags & FUSE_BUF_IS_FD) && bufv.idx < bufv.count) -- fuse_ll_clear_pipe(se); - } - - static void do_flush(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -@@ -2038,17 +1668,6 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - return; - } - -- unsigned max_read_mo = get_max_read(se->mo); -- if (se->conn.max_read != max_read_mo) { -- fuse_log(FUSE_LOG_ERR, "fuse: error: init() and fuse_session_new() " -- "requested different maximum read size (%u vs %u)\n", -- se->conn.max_read, max_read_mo); -- fuse_reply_err(req, EPROTO); -- se->error = -EPROTO; -- fuse_session_exit(se); -- return; -- } -- - if (se->conn.max_write < bufsize - FUSE_BUFFER_HEADER_SIZE) { - se->bufsize = se->conn.max_write + FUSE_BUFFER_HEADER_SIZE; - } -@@ -2364,8 +1983,6 @@ static void fuse_ll_retrieve_reply(struct fuse_notify_req *nreq, - } - out: - free(rreq); -- if ((ibuf->flags & FUSE_BUF_IS_FD) && bufv.idx < bufv.count) -- fuse_ll_clear_pipe(se); - } - - int fuse_lowlevel_notify_retrieve(struct fuse_session *se, fuse_ino_t ino, -@@ -2496,7 +2113,6 @@ static struct { - [FUSE_RENAME2] = { do_rename2, "RENAME2" }, - [FUSE_COPY_FILE_RANGE] = { do_copy_file_range, "COPY_FILE_RANGE" }, - [FUSE_LSEEK] = { do_lseek, "LSEEK" }, -- [CUSE_INIT] = { cuse_lowlevel_init, "CUSE_INIT" }, - }; - - #define FUSE_MAXOP (sizeof(fuse_ll_ops) / sizeof(fuse_ll_ops[0])) -@@ -2509,21 +2125,6 @@ static const char *opname(enum fuse_opcode opcode) - return fuse_ll_ops[opcode].name; - } - --static int fuse_ll_copy_from_pipe(struct fuse_bufvec *dst, -- struct fuse_bufvec *src) --{ -- ssize_t res = fuse_buf_copy(dst, src, 0); -- if (res < 0) { -- fuse_log(FUSE_LOG_ERR, "fuse: copy from pipe: %s\n", strerror(-res)); -- return res; -- } -- if ((size_t)res < fuse_buf_size(dst)) { -- fuse_log(FUSE_LOG_ERR, "fuse: copy from pipe: short read\n"); -- return -1; -- } -- return 0; --} -- - void fuse_session_process_buf(struct fuse_session *se, - const struct fuse_buf *buf) - { -@@ -2533,36 +2134,12 @@ void fuse_session_process_buf(struct fuse_session *se, - void fuse_session_process_buf_int(struct fuse_session *se, - const struct fuse_buf *buf, struct fuse_chan *ch) - { -- const size_t write_header_size = sizeof(struct fuse_in_header) + -- sizeof(struct fuse_write_in); -- struct fuse_bufvec bufv = { .buf[0] = *buf, .count = 1 }; -- struct fuse_bufvec tmpbuf = FUSE_BUFVEC_INIT(write_header_size); - struct fuse_in_header *in; - const void *inarg; - struct fuse_req *req; -- void *mbuf = NULL; - int err; -- int res; -- -- if (buf->flags & FUSE_BUF_IS_FD) { -- if (buf->size < tmpbuf.buf[0].size) -- tmpbuf.buf[0].size = buf->size; - -- mbuf = malloc(tmpbuf.buf[0].size); -- if (mbuf == NULL) { -- fuse_log(FUSE_LOG_ERR, "fuse: failed to allocate header\n"); -- goto clear_pipe; -- } -- tmpbuf.buf[0].mem = mbuf; -- -- res = fuse_ll_copy_from_pipe(&tmpbuf, &bufv); -- if (res < 0) -- goto clear_pipe; -- -- in = mbuf; -- } else { -- in = buf->mem; -- } -+ in = buf->mem; - - if (se->debug) { - fuse_log(FUSE_LOG_DEBUG, -@@ -2584,14 +2161,14 @@ void fuse_session_process_buf_int(struct fuse_session *se, - }; - - fuse_send_msg(se, ch, &iov, 1); -- goto clear_pipe; -+ return; - } - - req->unique = in->unique; - req->ctx.uid = in->uid; - req->ctx.gid = in->gid; - req->ctx.pid = in->pid; -- req->ch = ch ? fuse_chan_get(ch) : NULL; -+ req->ch = ch; - - err = EIO; - if (!se->got_init) { -@@ -2627,28 +2204,6 @@ void fuse_session_process_buf_int(struct fuse_session *se, - fuse_reply_err(intr, EAGAIN); - } - -- if ((buf->flags & FUSE_BUF_IS_FD) && write_header_size < buf->size && -- (in->opcode != FUSE_WRITE || !se->op.write_buf) && -- in->opcode != FUSE_NOTIFY_REPLY) { -- void *newmbuf; -- -- err = ENOMEM; -- newmbuf = realloc(mbuf, buf->size); -- if (newmbuf == NULL) -- goto reply_err; -- mbuf = newmbuf; -- -- tmpbuf = FUSE_BUFVEC_INIT(buf->size - write_header_size); -- tmpbuf.buf[0].mem = (char *)mbuf + write_header_size; -- -- res = fuse_ll_copy_from_pipe(&tmpbuf, &bufv); -- err = -res; -- if (res < 0) -- goto reply_err; -- -- in = mbuf; -- } -- - inarg = (void *) &in[1]; - if (in->opcode == FUSE_WRITE && se->op.write_buf) - do_write_buf(req, in->nodeid, inarg, buf); -@@ -2657,16 +2212,10 @@ void fuse_session_process_buf_int(struct fuse_session *se, - else - fuse_ll_ops[in->opcode].func(req, in->nodeid, inarg); - --out_free: -- free(mbuf); - return; - - reply_err: - fuse_reply_err(req, err); --clear_pipe: -- if (buf->flags & FUSE_BUF_IS_FD) -- fuse_ll_clear_pipe(se); -- goto out_free; - } - - #define LL_OPTION(n,o,v) \ -@@ -2684,7 +2233,6 @@ void fuse_lowlevel_version(void) - { - printf("using FUSE kernel interface version %i.%i\n", - FUSE_KERNEL_VERSION, FUSE_KERNEL_MINOR_VERSION); -- fuse_mount_version(); - } - - void fuse_lowlevel_help(void) -@@ -2692,204 +2240,29 @@ void fuse_lowlevel_help(void) - /* These are not all options, but the ones that are - potentially of interest to an end-user */ - printf( --" -o allow_other allow access by all users\n" - " -o allow_root allow access by root\n" --" -o auto_unmount auto unmount on process termination\n"); -+); - } - - void fuse_session_destroy(struct fuse_session *se) - { -- struct fuse_ll_pipe *llp; -- - if (se->got_init && !se->got_destroy) { - if (se->op.destroy) - se->op.destroy(se->userdata); - } -- llp = pthread_getspecific(se->pipe_key); -- if (llp != NULL) -- fuse_ll_pipe_free(llp); -- pthread_key_delete(se->pipe_key); - pthread_mutex_destroy(&se->lock); - free(se->cuse_data); - if (se->fd != -1) - close(se->fd); -- destroy_mount_opts(se->mo); - free(se); - } - - --static void fuse_ll_pipe_destructor(void *data) --{ -- struct fuse_ll_pipe *llp = data; -- fuse_ll_pipe_free(llp); --} -- --int fuse_session_receive_buf(struct fuse_session *se, struct fuse_buf *buf) --{ -- return fuse_session_receive_buf_int(se, buf, NULL); --} -- --int fuse_session_receive_buf_int(struct fuse_session *se, struct fuse_buf *buf, -- struct fuse_chan *ch) --{ -- int err; -- ssize_t res; --#ifdef HAVE_SPLICE -- size_t bufsize = se->bufsize; -- struct fuse_ll_pipe *llp; -- struct fuse_buf tmpbuf; -- -- if (se->conn.proto_minor < 14 || !(se->conn.want & FUSE_CAP_SPLICE_READ)) -- goto fallback; -- -- llp = fuse_ll_get_pipe(se); -- if (llp == NULL) -- goto fallback; -- -- if (llp->size < bufsize) { -- if (llp->can_grow) { -- res = fcntl(llp->pipe[0], F_SETPIPE_SZ, bufsize); -- if (res == -1) { -- llp->can_grow = 0; -- res = grow_pipe_to_max(llp->pipe[0]); -- if (res > 0) -- llp->size = res; -- goto fallback; -- } -- llp->size = res; -- } -- if (llp->size < bufsize) -- goto fallback; -- } -- -- res = splice(ch ? ch->fd : se->fd, -- NULL, llp->pipe[1], NULL, bufsize, 0); -- err = errno; -- -- if (fuse_session_exited(se)) -- return 0; -- -- if (res == -1) { -- if (err == ENODEV) { -- /* Filesystem was unmounted, or connection was aborted -- via /sys/fs/fuse/connections */ -- fuse_session_exit(se); -- return 0; -- } -- if (err != EINTR && err != EAGAIN) -- perror("fuse: splice from device"); -- return -err; -- } -- -- if (res < sizeof(struct fuse_in_header)) { -- fuse_log(FUSE_LOG_ERR, "short splice from fuse device\n"); -- return -EIO; -- } -- -- tmpbuf = (struct fuse_buf) { -- .size = res, -- .flags = FUSE_BUF_IS_FD, -- .fd = llp->pipe[0], -- }; -- -- /* -- * Don't bother with zero copy for small requests. -- * fuse_loop_mt() needs to check for FORGET so this more than -- * just an optimization. -- */ -- if (res < sizeof(struct fuse_in_header) + -- sizeof(struct fuse_write_in) + pagesize) { -- struct fuse_bufvec src = { .buf[0] = tmpbuf, .count = 1 }; -- struct fuse_bufvec dst = { .count = 1 }; -- -- if (!buf->mem) { -- buf->mem = malloc(se->bufsize); -- if (!buf->mem) { -- fuse_log(FUSE_LOG_ERR, -- "fuse: failed to allocate read buffer\n"); -- return -ENOMEM; -- } -- } -- buf->size = se->bufsize; -- buf->flags = 0; -- dst.buf[0] = *buf; -- -- res = fuse_buf_copy(&dst, &src, 0); -- if (res < 0) { -- fuse_log(FUSE_LOG_ERR, "fuse: copy from pipe: %s\n", -- strerror(-res)); -- fuse_ll_clear_pipe(se); -- return res; -- } -- if (res < tmpbuf.size) { -- fuse_log(FUSE_LOG_ERR, "fuse: copy from pipe: short read\n"); -- fuse_ll_clear_pipe(se); -- return -EIO; -- } -- assert(res == tmpbuf.size); -- -- } else { -- /* Don't overwrite buf->mem, as that would cause a leak */ -- buf->fd = tmpbuf.fd; -- buf->flags = tmpbuf.flags; -- } -- buf->size = tmpbuf.size; -- -- return res; -- --fallback: --#endif -- if (!buf->mem) { -- buf->mem = malloc(se->bufsize); -- if (!buf->mem) { -- fuse_log(FUSE_LOG_ERR, -- "fuse: failed to allocate read buffer\n"); -- return -ENOMEM; -- } -- } -- --restart: -- res = read(ch ? ch->fd : se->fd, buf->mem, se->bufsize); -- err = errno; -- -- if (fuse_session_exited(se)) -- return 0; -- if (res == -1) { -- /* ENOENT means the operation was interrupted, it's safe -- to restart */ -- if (err == ENOENT) -- goto restart; -- -- if (err == ENODEV) { -- /* Filesystem was unmounted, or connection was aborted -- via /sys/fs/fuse/connections */ -- fuse_session_exit(se); -- return 0; -- } -- /* Errors occurring during normal operation: EINTR (read -- interrupted), EAGAIN (nonblocking I/O), ENODEV (filesystem -- umounted) */ -- if (err != EINTR && err != EAGAIN) -- perror("fuse: reading device"); -- return -err; -- } -- if ((size_t) res < sizeof(struct fuse_in_header)) { -- fuse_log(FUSE_LOG_ERR, "short read on fuse device\n"); -- return -EIO; -- } -- -- buf->size = res; -- -- return res; --} -- - struct fuse_session *fuse_session_new(struct fuse_args *args, - const struct fuse_lowlevel_ops *op, - size_t op_size, void *userdata) - { -- int err; - struct fuse_session *se; -- struct mount_opts *mo; - - if (sizeof(struct fuse_lowlevel_ops) < op_size) { - fuse_log(FUSE_LOG_ERR, "fuse: warning: library too old, some operations may not work\n"); -@@ -2913,20 +2286,6 @@ struct fuse_session *fuse_session_new(struct fuse_args *args, - /* Parse options */ - if(fuse_opt_parse(args, se, fuse_ll_opts, NULL) == -1) - goto out2; -- if(se->deny_others) { -- /* Allowing access only by root is done by instructing -- * kernel to allow access by everyone, and then restricting -- * access to root and mountpoint owner in libfuse. -- */ -- // We may be adding the option a second time, but -- // that doesn't hurt. -- if(fuse_opt_add_arg(args, "-oallow_other") == -1) -- goto out2; -- } -- mo = parse_mount_opts(args); -- if (mo == NULL) -- goto out3; -- - if(args->argc == 1 && - args->argv[0][0] == '-') { - fuse_log(FUSE_LOG_ERR, "fuse: warning: argv[0] looks like an option, but " -@@ -2940,9 +2299,6 @@ struct fuse_session *fuse_session_new(struct fuse_args *args, - goto out4; - } - -- if (se->debug) -- fuse_log(FUSE_LOG_DEBUG, "FUSE library version: %s\n", PACKAGE_VERSION); -- - se->bufsize = FUSE_MAX_MAX_PAGES * getpagesize() + - FUSE_BUFFER_HEADER_SIZE; - -@@ -2952,26 +2308,14 @@ struct fuse_session *fuse_session_new(struct fuse_args *args, - se->notify_ctr = 1; - fuse_mutex_init(&se->lock); - -- err = pthread_key_create(&se->pipe_key, fuse_ll_pipe_destructor); -- if (err) { -- fuse_log(FUSE_LOG_ERR, "fuse: failed to create thread specific key: %s\n", -- strerror(err)); -- goto out5; -- } -- - memcpy(&se->op, op, op_size); - se->owner = getuid(); - se->userdata = userdata; - -- se->mo = mo; - return se; - --out5: -- pthread_mutex_destroy(&se->lock); - out4: - fuse_opt_free_args(args); --out3: -- free(mo); - out2: - free(se); - out1: -@@ -3035,11 +2379,6 @@ int fuse_session_fd(struct fuse_session *se) - - void fuse_session_unmount(struct fuse_session *se) - { -- if (se->mountpoint != NULL) { -- fuse_kern_unmount(se->mountpoint, se->fd); -- free(se->mountpoint); -- se->mountpoint = NULL; -- } - } - - #ifdef linux -diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h -index 18c6363..6b1adfc 100644 ---- a/tools/virtiofsd/fuse_lowlevel.h -+++ b/tools/virtiofsd/fuse_lowlevel.h -@@ -31,10 +31,6 @@ - #include - #include - --#ifdef __cplusplus --extern "C" { --#endif -- - /* ----------------------------------------------------------- * - * Miscellaneous definitions * - * ----------------------------------------------------------- */ -@@ -1863,14 +1859,12 @@ void fuse_cmdline_help(void); - * ----------------------------------------------------------- */ - - struct fuse_cmdline_opts { -- int singlethread; - int foreground; - int debug; - int nodefault_subtype; - char *mountpoint; - int show_version; - int show_help; -- int clone_fd; - unsigned int max_idle_threads; - }; - -@@ -1962,24 +1956,6 @@ int fuse_session_mount(struct fuse_session *se, const char *mountpoint); - int fuse_session_loop(struct fuse_session *se); - - /** -- * Enter a multi-threaded event loop. -- * -- * For a description of the return value and the conditions when the -- * event loop exits, refer to the documentation of -- * fuse_session_loop(). -- * -- * @param se the session -- * @param config session loop configuration -- * @return see fuse_session_loop() -- */ --#if FUSE_USE_VERSION < 32 --int fuse_session_loop_mt_31(struct fuse_session *se, int clone_fd); --#define fuse_session_loop_mt(se, clone_fd) fuse_session_loop_mt_31(se, clone_fd) --#else --int fuse_session_loop_mt(struct fuse_session *se, struct fuse_loop_config *config); --#endif -- --/** - * Flag a session as terminated. - * - * This function is invoked by the POSIX signal handlers, when -@@ -2082,8 +2058,4 @@ void fuse_session_process_buf(struct fuse_session *se, - */ - int fuse_session_receive_buf(struct fuse_session *se, struct fuse_buf *buf); - --#ifdef __cplusplus --} --#endif -- - #endif /* FUSE_LOWLEVEL_H_ */ -diff --git a/tools/virtiofsd/fuse_opt.h b/tools/virtiofsd/fuse_opt.h -index d8573e7..6910255 100644 ---- a/tools/virtiofsd/fuse_opt.h -+++ b/tools/virtiofsd/fuse_opt.h -@@ -14,10 +14,6 @@ - * This file defines the option parsing interface of FUSE - */ - --#ifdef __cplusplus --extern "C" { --#endif -- - /** - * Option description - * -@@ -264,8 +260,4 @@ void fuse_opt_free_args(struct fuse_args *args); - */ - int fuse_opt_match(const struct fuse_opt opts[], const char *opt); - --#ifdef __cplusplus --} --#endif -- - #endif /* FUSE_OPT_H_ */ -diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c -index 64ff7ad..5a2e64c 100644 ---- a/tools/virtiofsd/helper.c -+++ b/tools/virtiofsd/helper.c -@@ -41,14 +41,10 @@ static const struct fuse_opt fuse_helper_opts[] = { - FUSE_OPT_KEY("-d", FUSE_OPT_KEY_KEEP), - FUSE_OPT_KEY("debug", FUSE_OPT_KEY_KEEP), - FUSE_HELPER_OPT("-f", foreground), -- FUSE_HELPER_OPT("-s", singlethread), - FUSE_HELPER_OPT("fsname=", nodefault_subtype), - FUSE_OPT_KEY("fsname=", FUSE_OPT_KEY_KEEP), --#ifndef __FreeBSD__ - FUSE_HELPER_OPT("subtype=", nodefault_subtype), - FUSE_OPT_KEY("subtype=", FUSE_OPT_KEY_KEEP), --#endif -- FUSE_HELPER_OPT("clone_fd", clone_fd), - FUSE_HELPER_OPT("max_idle_threads=%u", max_idle_threads), - FUSE_OPT_END - }; -@@ -132,9 +128,6 @@ void fuse_cmdline_help(void) - " -V --version print version\n" - " -d -o debug enable debug output (implies -f)\n" - " -f foreground operation\n" -- " -s disable multi-threaded operation\n" -- " -o clone_fd use separate fuse device fd for each thread\n" -- " (may improve performance)\n" - " -o max_idle_threads the maximum number of idle worker threads\n" - " allowed (default: 10)\n"); - } -@@ -171,34 +164,6 @@ static int fuse_helper_opt_proc(void *data, const char *arg, int key, - } - } - --/* Under FreeBSD, there is no subtype option so this -- function actually sets the fsname */ --static int add_default_subtype(const char *progname, struct fuse_args *args) --{ -- int res; -- char *subtype_opt; -- -- const char *basename = strrchr(progname, '/'); -- if (basename == NULL) -- basename = progname; -- else if (basename[1] != '\0') -- basename++; -- -- subtype_opt = (char *) malloc(strlen(basename) + 64); -- if (subtype_opt == NULL) { -- fuse_log(FUSE_LOG_ERR, "fuse: memory allocation failed\n"); -- return -1; -- } --#ifdef __FreeBSD__ -- sprintf(subtype_opt, "-ofsname=%s", basename); --#else -- sprintf(subtype_opt, "-osubtype=%s", basename); --#endif -- res = fuse_opt_add_arg(args, subtype_opt); -- free(subtype_opt); -- return res; --} -- - int fuse_parse_cmdline(struct fuse_args *args, - struct fuse_cmdline_opts *opts) - { -@@ -210,14 +175,6 @@ int fuse_parse_cmdline(struct fuse_args *args, - fuse_helper_opt_proc) == -1) - return -1; - -- /* *Linux*: if neither -o subtype nor -o fsname are specified, -- set subtype to program's basename. -- *FreeBSD*: if fsname is not specified, set to program's -- basename. */ -- if (!opts->nodefault_subtype) -- if (add_default_subtype(args->argv[0], args) == -1) -- return -1; -- - return 0; - } - -@@ -276,88 +233,6 @@ int fuse_daemonize(int foreground) - return 0; - } - --int fuse_main_real(int argc, char *argv[], const struct fuse_operations *op, -- size_t op_size, void *user_data) --{ -- struct fuse_args args = FUSE_ARGS_INIT(argc, argv); -- struct fuse *fuse; -- struct fuse_cmdline_opts opts; -- int res; -- -- if (fuse_parse_cmdline(&args, &opts) != 0) -- return 1; -- -- if (opts.show_version) { -- printf("FUSE library version %s\n", PACKAGE_VERSION); -- fuse_lowlevel_version(); -- res = 0; -- goto out1; -- } -- -- if (opts.show_help) { -- if(args.argv[0][0] != '\0') -- printf("usage: %s [options] \n\n", -- args.argv[0]); -- printf("FUSE options:\n"); -- fuse_cmdline_help(); -- fuse_lib_help(&args); -- res = 0; -- goto out1; -- } -- -- if (!opts.show_help && -- !opts.mountpoint) { -- fuse_log(FUSE_LOG_ERR, "error: no mountpoint specified\n"); -- res = 2; -- goto out1; -- } -- -- -- fuse = fuse_new_31(&args, op, op_size, user_data); -- if (fuse == NULL) { -- res = 3; -- goto out1; -- } -- -- if (fuse_mount(fuse,opts.mountpoint) != 0) { -- res = 4; -- goto out2; -- } -- -- if (fuse_daemonize(opts.foreground) != 0) { -- res = 5; -- goto out3; -- } -- -- struct fuse_session *se = fuse_get_session(fuse); -- if (fuse_set_signal_handlers(se) != 0) { -- res = 6; -- goto out3; -- } -- -- if (opts.singlethread) -- res = fuse_loop(fuse); -- else { -- struct fuse_loop_config loop_config; -- loop_config.clone_fd = opts.clone_fd; -- loop_config.max_idle_threads = opts.max_idle_threads; -- res = fuse_loop_mt_32(fuse, &loop_config); -- } -- if (res) -- res = 7; -- -- fuse_remove_signal_handlers(se); --out3: -- fuse_unmount(fuse); --out2: -- fuse_destroy(fuse); --out1: -- free(opts.mountpoint); -- fuse_opt_free_args(&args); -- return res; --} -- -- - void fuse_apply_conn_info_opts(struct fuse_conn_info_opts *opts, - struct fuse_conn_info *conn) - { -@@ -420,21 +295,3 @@ struct fuse_conn_info_opts* fuse_parse_conn_info_opts(struct fuse_args *args) - } - return opts; - } -- --int fuse_open_channel(const char *mountpoint, const char* options) --{ -- struct mount_opts *opts = NULL; -- int fd = -1; -- const char *argv[] = { "", "-o", options }; -- int argc = sizeof(argv) / sizeof(argv[0]); -- struct fuse_args args = FUSE_ARGS_INIT(argc, (char**) argv); -- -- opts = parse_mount_opts(&args); -- if (opts == NULL) -- return -1; -- -- fd = fuse_kern_mount(mountpoint, opts); -- destroy_mount_opts(opts); -- -- return fd; --} -diff --git a/tools/virtiofsd/passthrough_helpers.h b/tools/virtiofsd/passthrough_helpers.h -index 6b77c33..7c5f561 100644 ---- a/tools/virtiofsd/passthrough_helpers.h -+++ b/tools/virtiofsd/passthrough_helpers.h -@@ -42,32 +42,6 @@ static int mknod_wrapper(int dirfd, const char *path, const char *link, - res = symlinkat(link, dirfd, path); - } else if (S_ISFIFO(mode)) { - res = mkfifoat(dirfd, path, mode); --#ifdef __FreeBSD__ -- } else if (S_ISSOCK(mode)) { -- struct sockaddr_un su; -- int fd; -- -- if (strlen(path) >= sizeof(su.sun_path)) { -- errno = ENAMETOOLONG; -- return -1; -- } -- fd = socket(AF_UNIX, SOCK_STREAM, 0); -- if (fd >= 0) { -- /* -- * We must bind the socket to the underlying file -- * system to create the socket file, even though -- * we'll never listen on this socket. -- */ -- su.sun_family = AF_UNIX; -- strncpy(su.sun_path, path, sizeof(su.sun_path)); -- res = bindat(dirfd, fd, (struct sockaddr*)&su, -- sizeof(su)); -- if (res == 0) -- close(fd); -- } else { -- res = -1; -- } --#endif - } else { - res = mknodat(dirfd, path, mode, rdev); - } -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index e1a6056..e5f7115 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -1240,7 +1240,6 @@ int main(int argc, char *argv[]) - ret = 0; - goto err_out1; - } else if (opts.show_version) { -- printf("FUSE library version %s\n", fuse_pkgversion()); - fuse_lowlevel_version(); - ret = 0; - goto err_out1; --- -1.8.3.1 - diff --git a/kvm-virtiofsd-Trim-out-compatibility-code.patch b/kvm-virtiofsd-Trim-out-compatibility-code.patch deleted file mode 100644 index 411af77..0000000 --- a/kvm-virtiofsd-Trim-out-compatibility-code.patch +++ /dev/null @@ -1,545 +0,0 @@ -From ff16b837e402de773581f77ca188f8806c0b500f Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:00:51 +0100 -Subject: [PATCH 020/116] virtiofsd: Trim out compatibility code -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-17-dgilbert@redhat.com> -Patchwork-id: 93468 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 016/112] virtiofsd: Trim out compatibility code -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -virtiofsd only supports major=7, minor>=31; trim out a lot of -old compatibility code. - -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 72c42e2d65510e073cf78fdc924d121c77fa0080) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_lowlevel.c | 330 +++++++++++++++------------------------- - 1 file changed, 119 insertions(+), 211 deletions(-) - -diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -index 07fb8a6..514d79c 100644 ---- a/tools/virtiofsd/fuse_lowlevel.c -+++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -387,16 +387,7 @@ static void fill_open(struct fuse_open_out *arg, const struct fuse_file_info *f) - int fuse_reply_entry(fuse_req_t req, const struct fuse_entry_param *e) - { - struct fuse_entry_out arg; -- size_t size = req->se->conn.proto_minor < 9 ? FUSE_COMPAT_ENTRY_OUT_SIZE : -- sizeof(arg); -- -- /* -- * before ABI 7.4 e->ino == 0 was invalid, only ENOENT meant -- * negative entry -- */ -- if (!e->ino && req->se->conn.proto_minor < 4) { -- return fuse_reply_err(req, ENOENT); -- } -+ size_t size = sizeof(arg); - - memset(&arg, 0, sizeof(arg)); - fill_entry(&arg, e); -@@ -407,9 +398,7 @@ int fuse_reply_create(fuse_req_t req, const struct fuse_entry_param *e, - const struct fuse_file_info *f) - { - char buf[sizeof(struct fuse_entry_out) + sizeof(struct fuse_open_out)]; -- size_t entrysize = req->se->conn.proto_minor < 9 ? -- FUSE_COMPAT_ENTRY_OUT_SIZE : -- sizeof(struct fuse_entry_out); -+ size_t entrysize = sizeof(struct fuse_entry_out); - struct fuse_entry_out *earg = (struct fuse_entry_out *)buf; - struct fuse_open_out *oarg = (struct fuse_open_out *)(buf + entrysize); - -@@ -423,8 +412,7 @@ int fuse_reply_attr(fuse_req_t req, const struct stat *attr, - double attr_timeout) - { - struct fuse_attr_out arg; -- size_t size = -- req->se->conn.proto_minor < 9 ? FUSE_COMPAT_ATTR_OUT_SIZE : sizeof(arg); -+ size_t size = sizeof(arg); - - memset(&arg, 0, sizeof(arg)); - arg.attr_valid = calc_timeout_sec(attr_timeout); -@@ -519,8 +507,7 @@ int fuse_reply_data(fuse_req_t req, struct fuse_bufvec *bufv) - int fuse_reply_statfs(fuse_req_t req, const struct statvfs *stbuf) - { - struct fuse_statfs_out arg; -- size_t size = -- req->se->conn.proto_minor < 4 ? FUSE_COMPAT_STATFS_SIZE : sizeof(arg); -+ size_t size = sizeof(arg); - - memset(&arg, 0, sizeof(arg)); - convert_statfs(stbuf, &arg.st); -@@ -604,45 +591,31 @@ int fuse_reply_ioctl_retry(fuse_req_t req, const struct iovec *in_iov, - iov[count].iov_len = sizeof(arg); - count++; - -- if (req->se->conn.proto_minor < 16) { -- if (in_count) { -- iov[count].iov_base = (void *)in_iov; -- iov[count].iov_len = sizeof(in_iov[0]) * in_count; -- count++; -- } -+ /* Can't handle non-compat 64bit ioctls on 32bit */ -+ if (sizeof(void *) == 4 && req->ioctl_64bit) { -+ res = fuse_reply_err(req, EINVAL); -+ goto out; -+ } - -- if (out_count) { -- iov[count].iov_base = (void *)out_iov; -- iov[count].iov_len = sizeof(out_iov[0]) * out_count; -- count++; -+ if (in_count) { -+ in_fiov = fuse_ioctl_iovec_copy(in_iov, in_count); -+ if (!in_fiov) { -+ goto enomem; - } -- } else { -- /* Can't handle non-compat 64bit ioctls on 32bit */ -- if (sizeof(void *) == 4 && req->ioctl_64bit) { -- res = fuse_reply_err(req, EINVAL); -- goto out; -- } -- -- if (in_count) { -- in_fiov = fuse_ioctl_iovec_copy(in_iov, in_count); -- if (!in_fiov) { -- goto enomem; -- } - -- iov[count].iov_base = (void *)in_fiov; -- iov[count].iov_len = sizeof(in_fiov[0]) * in_count; -- count++; -+ iov[count].iov_base = (void *)in_fiov; -+ iov[count].iov_len = sizeof(in_fiov[0]) * in_count; -+ count++; -+ } -+ if (out_count) { -+ out_fiov = fuse_ioctl_iovec_copy(out_iov, out_count); -+ if (!out_fiov) { -+ goto enomem; - } -- if (out_count) { -- out_fiov = fuse_ioctl_iovec_copy(out_iov, out_count); -- if (!out_fiov) { -- goto enomem; -- } - -- iov[count].iov_base = (void *)out_fiov; -- iov[count].iov_len = sizeof(out_fiov[0]) * out_count; -- count++; -- } -+ iov[count].iov_base = (void *)out_fiov; -+ iov[count].iov_len = sizeof(out_fiov[0]) * out_count; -+ count++; - } - - res = send_reply_iov(req, 0, iov, count); -@@ -784,14 +757,12 @@ static void do_getattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - struct fuse_file_info *fip = NULL; - struct fuse_file_info fi; - -- if (req->se->conn.proto_minor >= 9) { -- struct fuse_getattr_in *arg = (struct fuse_getattr_in *)inarg; -+ struct fuse_getattr_in *arg = (struct fuse_getattr_in *)inarg; - -- if (arg->getattr_flags & FUSE_GETATTR_FH) { -- memset(&fi, 0, sizeof(fi)); -- fi.fh = arg->fh; -- fip = &fi; -- } -+ if (arg->getattr_flags & FUSE_GETATTR_FH) { -+ memset(&fi, 0, sizeof(fi)); -+ fi.fh = arg->fh; -+ fip = &fi; - } - - if (req->se->op.getattr) { -@@ -856,11 +827,7 @@ static void do_mknod(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - struct fuse_mknod_in *arg = (struct fuse_mknod_in *)inarg; - char *name = PARAM(arg); - -- if (req->se->conn.proto_minor >= 12) { -- req->ctx.umask = arg->umask; -- } else { -- name = (char *)inarg + FUSE_COMPAT_MKNOD_IN_SIZE; -- } -+ req->ctx.umask = arg->umask; - - if (req->se->op.mknod) { - req->se->op.mknod(req, nodeid, name, arg->mode, arg->rdev); -@@ -873,9 +840,7 @@ static void do_mkdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { - struct fuse_mkdir_in *arg = (struct fuse_mkdir_in *)inarg; - -- if (req->se->conn.proto_minor >= 12) { -- req->ctx.umask = arg->umask; -- } -+ req->ctx.umask = arg->umask; - - if (req->se->op.mkdir) { - req->se->op.mkdir(req, nodeid, PARAM(arg), arg->mode); -@@ -967,11 +932,7 @@ static void do_create(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - memset(&fi, 0, sizeof(fi)); - fi.flags = arg->flags; - -- if (req->se->conn.proto_minor >= 12) { -- req->ctx.umask = arg->umask; -- } else { -- name = (char *)inarg + sizeof(struct fuse_open_in); -- } -+ req->ctx.umask = arg->umask; - - req->se->op.create(req, nodeid, name, arg->mode, &fi); - } else { -@@ -1003,10 +964,8 @@ static void do_read(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - - memset(&fi, 0, sizeof(fi)); - fi.fh = arg->fh; -- if (req->se->conn.proto_minor >= 9) { -- fi.lock_owner = arg->lock_owner; -- fi.flags = arg->flags; -- } -+ fi.lock_owner = arg->lock_owner; -+ fi.flags = arg->flags; - req->se->op.read(req, nodeid, arg->size, arg->offset, &fi); - } else { - fuse_reply_err(req, ENOSYS); -@@ -1023,13 +982,9 @@ static void do_write(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - fi.fh = arg->fh; - fi.writepage = (arg->write_flags & FUSE_WRITE_CACHE) != 0; - -- if (req->se->conn.proto_minor < 9) { -- param = ((char *)arg) + FUSE_COMPAT_WRITE_IN_SIZE; -- } else { -- fi.lock_owner = arg->lock_owner; -- fi.flags = arg->flags; -- param = PARAM(arg); -- } -+ fi.lock_owner = arg->lock_owner; -+ fi.flags = arg->flags; -+ param = PARAM(arg); - - if (req->se->op.write) { - req->se->op.write(req, nodeid, param, arg->size, arg->offset, &fi); -@@ -1053,21 +1008,14 @@ static void do_write_buf(fuse_req_t req, fuse_ino_t nodeid, const void *inarg, - fi.fh = arg->fh; - fi.writepage = arg->write_flags & FUSE_WRITE_CACHE; - -- if (se->conn.proto_minor < 9) { -- bufv.buf[0].mem = ((char *)arg) + FUSE_COMPAT_WRITE_IN_SIZE; -- bufv.buf[0].size -= -- sizeof(struct fuse_in_header) + FUSE_COMPAT_WRITE_IN_SIZE; -- assert(!(bufv.buf[0].flags & FUSE_BUF_IS_FD)); -- } else { -- fi.lock_owner = arg->lock_owner; -- fi.flags = arg->flags; -- if (!(bufv.buf[0].flags & FUSE_BUF_IS_FD)) { -- bufv.buf[0].mem = PARAM(arg); -- } -- -- bufv.buf[0].size -= -- sizeof(struct fuse_in_header) + sizeof(struct fuse_write_in); -+ fi.lock_owner = arg->lock_owner; -+ fi.flags = arg->flags; -+ if (!(bufv.buf[0].flags & FUSE_BUF_IS_FD)) { -+ bufv.buf[0].mem = PARAM(arg); - } -+ -+ bufv.buf[0].size -= -+ sizeof(struct fuse_in_header) + sizeof(struct fuse_write_in); - if (bufv.buf[0].size < arg->size) { - fuse_log(FUSE_LOG_ERR, "fuse: do_write_buf: buffer size too small\n"); - fuse_reply_err(req, EIO); -@@ -1086,9 +1034,7 @@ static void do_flush(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - memset(&fi, 0, sizeof(fi)); - fi.fh = arg->fh; - fi.flush = 1; -- if (req->se->conn.proto_minor >= 7) { -- fi.lock_owner = arg->lock_owner; -- } -+ fi.lock_owner = arg->lock_owner; - - if (req->se->op.flush) { - req->se->op.flush(req, nodeid, &fi); -@@ -1105,10 +1051,8 @@ static void do_release(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - memset(&fi, 0, sizeof(fi)); - fi.flags = arg->flags; - fi.fh = arg->fh; -- if (req->se->conn.proto_minor >= 8) { -- fi.flush = (arg->release_flags & FUSE_RELEASE_FLUSH) ? 1 : 0; -- fi.lock_owner = arg->lock_owner; -- } -+ fi.flush = (arg->release_flags & FUSE_RELEASE_FLUSH) ? 1 : 0; -+ fi.lock_owner = arg->lock_owner; - if (arg->release_flags & FUSE_RELEASE_FLOCK_UNLOCK) { - fi.flock_release = 1; - fi.lock_owner = arg->lock_owner; -@@ -1477,8 +1421,7 @@ static void do_ioctl(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - memset(&fi, 0, sizeof(fi)); - fi.fh = arg->fh; - -- if (sizeof(void *) == 4 && req->se->conn.proto_minor >= 16 && -- !(flags & FUSE_IOCTL_32BIT)) { -+ if (sizeof(void *) == 4 && !(flags & FUSE_IOCTL_32BIT)) { - req->ioctl_64bit = 1; - } - -@@ -1603,7 +1546,7 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - outarg.major = FUSE_KERNEL_VERSION; - outarg.minor = FUSE_KERNEL_MINOR_VERSION; - -- if (arg->major < 7) { -+ if (arg->major < 7 || (arg->major == 7 && arg->minor < 31)) { - fuse_log(FUSE_LOG_ERR, "fuse: unsupported protocol version: %u.%u\n", - arg->major, arg->minor); - fuse_reply_err(req, EPROTO); -@@ -1616,81 +1559,71 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - return; - } - -- if (arg->minor >= 6) { -- if (arg->max_readahead < se->conn.max_readahead) { -- se->conn.max_readahead = arg->max_readahead; -- } -- if (arg->flags & FUSE_ASYNC_READ) { -- se->conn.capable |= FUSE_CAP_ASYNC_READ; -- } -- if (arg->flags & FUSE_POSIX_LOCKS) { -- se->conn.capable |= FUSE_CAP_POSIX_LOCKS; -- } -- if (arg->flags & FUSE_ATOMIC_O_TRUNC) { -- se->conn.capable |= FUSE_CAP_ATOMIC_O_TRUNC; -- } -- if (arg->flags & FUSE_EXPORT_SUPPORT) { -- se->conn.capable |= FUSE_CAP_EXPORT_SUPPORT; -- } -- if (arg->flags & FUSE_DONT_MASK) { -- se->conn.capable |= FUSE_CAP_DONT_MASK; -- } -- if (arg->flags & FUSE_FLOCK_LOCKS) { -- se->conn.capable |= FUSE_CAP_FLOCK_LOCKS; -- } -- if (arg->flags & FUSE_AUTO_INVAL_DATA) { -- se->conn.capable |= FUSE_CAP_AUTO_INVAL_DATA; -- } -- if (arg->flags & FUSE_DO_READDIRPLUS) { -- se->conn.capable |= FUSE_CAP_READDIRPLUS; -- } -- if (arg->flags & FUSE_READDIRPLUS_AUTO) { -- se->conn.capable |= FUSE_CAP_READDIRPLUS_AUTO; -- } -- if (arg->flags & FUSE_ASYNC_DIO) { -- se->conn.capable |= FUSE_CAP_ASYNC_DIO; -- } -- if (arg->flags & FUSE_WRITEBACK_CACHE) { -- se->conn.capable |= FUSE_CAP_WRITEBACK_CACHE; -- } -- if (arg->flags & FUSE_NO_OPEN_SUPPORT) { -- se->conn.capable |= FUSE_CAP_NO_OPEN_SUPPORT; -- } -- if (arg->flags & FUSE_PARALLEL_DIROPS) { -- se->conn.capable |= FUSE_CAP_PARALLEL_DIROPS; -- } -- if (arg->flags & FUSE_POSIX_ACL) { -- se->conn.capable |= FUSE_CAP_POSIX_ACL; -- } -- if (arg->flags & FUSE_HANDLE_KILLPRIV) { -- se->conn.capable |= FUSE_CAP_HANDLE_KILLPRIV; -- } -- if (arg->flags & FUSE_NO_OPENDIR_SUPPORT) { -- se->conn.capable |= FUSE_CAP_NO_OPENDIR_SUPPORT; -- } -- if (!(arg->flags & FUSE_MAX_PAGES)) { -- size_t max_bufsize = -- FUSE_DEFAULT_MAX_PAGES_PER_REQ * getpagesize() + -- FUSE_BUFFER_HEADER_SIZE; -- if (bufsize > max_bufsize) { -- bufsize = max_bufsize; -- } -+ if (arg->max_readahead < se->conn.max_readahead) { -+ se->conn.max_readahead = arg->max_readahead; -+ } -+ if (arg->flags & FUSE_ASYNC_READ) { -+ se->conn.capable |= FUSE_CAP_ASYNC_READ; -+ } -+ if (arg->flags & FUSE_POSIX_LOCKS) { -+ se->conn.capable |= FUSE_CAP_POSIX_LOCKS; -+ } -+ if (arg->flags & FUSE_ATOMIC_O_TRUNC) { -+ se->conn.capable |= FUSE_CAP_ATOMIC_O_TRUNC; -+ } -+ if (arg->flags & FUSE_EXPORT_SUPPORT) { -+ se->conn.capable |= FUSE_CAP_EXPORT_SUPPORT; -+ } -+ if (arg->flags & FUSE_DONT_MASK) { -+ se->conn.capable |= FUSE_CAP_DONT_MASK; -+ } -+ if (arg->flags & FUSE_FLOCK_LOCKS) { -+ se->conn.capable |= FUSE_CAP_FLOCK_LOCKS; -+ } -+ if (arg->flags & FUSE_AUTO_INVAL_DATA) { -+ se->conn.capable |= FUSE_CAP_AUTO_INVAL_DATA; -+ } -+ if (arg->flags & FUSE_DO_READDIRPLUS) { -+ se->conn.capable |= FUSE_CAP_READDIRPLUS; -+ } -+ if (arg->flags & FUSE_READDIRPLUS_AUTO) { -+ se->conn.capable |= FUSE_CAP_READDIRPLUS_AUTO; -+ } -+ if (arg->flags & FUSE_ASYNC_DIO) { -+ se->conn.capable |= FUSE_CAP_ASYNC_DIO; -+ } -+ if (arg->flags & FUSE_WRITEBACK_CACHE) { -+ se->conn.capable |= FUSE_CAP_WRITEBACK_CACHE; -+ } -+ if (arg->flags & FUSE_NO_OPEN_SUPPORT) { -+ se->conn.capable |= FUSE_CAP_NO_OPEN_SUPPORT; -+ } -+ if (arg->flags & FUSE_PARALLEL_DIROPS) { -+ se->conn.capable |= FUSE_CAP_PARALLEL_DIROPS; -+ } -+ if (arg->flags & FUSE_POSIX_ACL) { -+ se->conn.capable |= FUSE_CAP_POSIX_ACL; -+ } -+ if (arg->flags & FUSE_HANDLE_KILLPRIV) { -+ se->conn.capable |= FUSE_CAP_HANDLE_KILLPRIV; -+ } -+ if (arg->flags & FUSE_NO_OPENDIR_SUPPORT) { -+ se->conn.capable |= FUSE_CAP_NO_OPENDIR_SUPPORT; -+ } -+ if (!(arg->flags & FUSE_MAX_PAGES)) { -+ size_t max_bufsize = FUSE_DEFAULT_MAX_PAGES_PER_REQ * getpagesize() + -+ FUSE_BUFFER_HEADER_SIZE; -+ if (bufsize > max_bufsize) { -+ bufsize = max_bufsize; - } -- } else { -- se->conn.max_readahead = 0; - } -- -- if (se->conn.proto_minor >= 14) { - #ifdef HAVE_SPLICE - #ifdef HAVE_VMSPLICE -- se->conn.capable |= FUSE_CAP_SPLICE_WRITE | FUSE_CAP_SPLICE_MOVE; -+ se->conn.capable |= FUSE_CAP_SPLICE_WRITE | FUSE_CAP_SPLICE_MOVE; - #endif -- se->conn.capable |= FUSE_CAP_SPLICE_READ; -+ se->conn.capable |= FUSE_CAP_SPLICE_READ; - #endif -- } -- if (se->conn.proto_minor >= 18) { -- se->conn.capable |= FUSE_CAP_IOCTL_DIR; -- } -+ se->conn.capable |= FUSE_CAP_IOCTL_DIR; - - /* - * Default settings for modern filesystems. -@@ -1797,24 +1730,20 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - outarg.max_readahead = se->conn.max_readahead; - outarg.max_write = se->conn.max_write; -- if (se->conn.proto_minor >= 13) { -- if (se->conn.max_background >= (1 << 16)) { -- se->conn.max_background = (1 << 16) - 1; -- } -- if (se->conn.congestion_threshold > se->conn.max_background) { -- se->conn.congestion_threshold = se->conn.max_background; -- } -- if (!se->conn.congestion_threshold) { -- se->conn.congestion_threshold = se->conn.max_background * 3 / 4; -- } -- -- outarg.max_background = se->conn.max_background; -- outarg.congestion_threshold = se->conn.congestion_threshold; -+ if (se->conn.max_background >= (1 << 16)) { -+ se->conn.max_background = (1 << 16) - 1; -+ } -+ if (se->conn.congestion_threshold > se->conn.max_background) { -+ se->conn.congestion_threshold = se->conn.max_background; - } -- if (se->conn.proto_minor >= 23) { -- outarg.time_gran = se->conn.time_gran; -+ if (!se->conn.congestion_threshold) { -+ se->conn.congestion_threshold = se->conn.max_background * 3 / 4; - } - -+ outarg.max_background = se->conn.max_background; -+ outarg.congestion_threshold = se->conn.congestion_threshold; -+ outarg.time_gran = se->conn.time_gran; -+ - if (se->debug) { - fuse_log(FUSE_LOG_DEBUG, " INIT: %u.%u\n", outarg.major, - outarg.minor); -@@ -1828,11 +1757,6 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - outarg.congestion_threshold); - fuse_log(FUSE_LOG_DEBUG, " time_gran=%u\n", outarg.time_gran); - } -- if (arg->minor < 5) { -- outargsize = FUSE_COMPAT_INIT_OUT_SIZE; -- } else if (arg->minor < 23) { -- outargsize = FUSE_COMPAT_22_INIT_OUT_SIZE; -- } - - send_reply_ok(req, &outarg, outargsize); - } -@@ -1896,10 +1820,6 @@ int fuse_lowlevel_notify_inval_inode(struct fuse_session *se, fuse_ino_t ino, - return -EINVAL; - } - -- if (se->conn.proto_major < 6 || se->conn.proto_minor < 12) { -- return -ENOSYS; -- } -- - outarg.ino = ino; - outarg.off = off; - outarg.len = len; -@@ -1920,10 +1840,6 @@ int fuse_lowlevel_notify_inval_entry(struct fuse_session *se, fuse_ino_t parent, - return -EINVAL; - } - -- if (se->conn.proto_major < 6 || se->conn.proto_minor < 12) { -- return -ENOSYS; -- } -- - outarg.parent = parent; - outarg.namelen = namelen; - outarg.padding = 0; -@@ -1947,10 +1863,6 @@ int fuse_lowlevel_notify_delete(struct fuse_session *se, fuse_ino_t parent, - return -EINVAL; - } - -- if (se->conn.proto_major < 6 || se->conn.proto_minor < 18) { -- return -ENOSYS; -- } -- - outarg.parent = parent; - outarg.child = child; - outarg.namelen = namelen; -@@ -1977,10 +1889,6 @@ int fuse_lowlevel_notify_store(struct fuse_session *se, fuse_ino_t ino, - return -EINVAL; - } - -- if (se->conn.proto_major < 6 || se->conn.proto_minor < 15) { -- return -ENOSYS; -- } -- - out.unique = 0; - out.error = FUSE_NOTIFY_STORE; - --- -1.8.3.1 - diff --git a/kvm-virtiofsd-add-definition-of-fuse_buf_writev.patch b/kvm-virtiofsd-add-definition-of-fuse_buf_writev.patch deleted file mode 100644 index a0882d5..0000000 --- a/kvm-virtiofsd-add-definition-of-fuse_buf_writev.patch +++ /dev/null @@ -1,93 +0,0 @@ -From e4c8fd1060fb69a093064851ebf66dd82533ec0e Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:02:17 +0100 -Subject: [PATCH 106/116] virtiofsd: add definition of fuse_buf_writev() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-103-dgilbert@redhat.com> -Patchwork-id: 93557 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 102/112] virtiofsd: add definition of fuse_buf_writev() -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: piaojun - -Define fuse_buf_writev() which use pwritev and writev to improve io -bandwidth. Especially, the src bufs with 0 size should be skipped as -their mems are not *block_size* aligned which will cause writev failed -in direct io mode. - -Signed-off-by: Jun Piao -Suggested-by: Stefan Hajnoczi -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 9ceaaa15cf21073c2b23058c374f61c30cd39c31) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/buffer.c | 38 ++++++++++++++++++++++++++++++++++++++ - 1 file changed, 38 insertions(+) - -diff --git a/tools/virtiofsd/buffer.c b/tools/virtiofsd/buffer.c -index 42a608f..37befeb 100644 ---- a/tools/virtiofsd/buffer.c -+++ b/tools/virtiofsd/buffer.c -@@ -14,6 +14,7 @@ - #include "fuse_lowlevel.h" - #include - #include -+#include - #include - #include - -@@ -33,6 +34,43 @@ size_t fuse_buf_size(const struct fuse_bufvec *bufv) - return size; - } - -+__attribute__((unused)) -+static ssize_t fuse_buf_writev(struct fuse_buf *out_buf, -+ struct fuse_bufvec *in_buf) -+{ -+ ssize_t res, i, j; -+ size_t iovcnt = in_buf->count; -+ struct iovec *iov; -+ int fd = out_buf->fd; -+ -+ iov = calloc(iovcnt, sizeof(struct iovec)); -+ if (!iov) { -+ return -ENOMEM; -+ } -+ -+ for (i = 0, j = 0; i < iovcnt; i++) { -+ /* Skip the buf with 0 size */ -+ if (in_buf->buf[i].size) { -+ iov[j].iov_base = in_buf->buf[i].mem; -+ iov[j].iov_len = in_buf->buf[i].size; -+ j++; -+ } -+ } -+ -+ if (out_buf->flags & FUSE_BUF_FD_SEEK) { -+ res = pwritev(fd, iov, iovcnt, out_buf->pos); -+ } else { -+ res = writev(fd, iov, iovcnt); -+ } -+ -+ if (res == -1) { -+ res = -errno; -+ } -+ -+ free(iov); -+ return res; -+} -+ - static size_t min_size(size_t s1, size_t s2) - { - return s1 < s2 ? s1 : s2; --- -1.8.3.1 - diff --git a/kvm-virtiofsd-add-fd-FDNUM-fd-passing-option.patch b/kvm-virtiofsd-add-fd-FDNUM-fd-passing-option.patch deleted file mode 100644 index 451f12b..0000000 --- a/kvm-virtiofsd-add-fd-FDNUM-fd-passing-option.patch +++ /dev/null @@ -1,170 +0,0 @@ -From f91a9bdc171142174110e9ff1716b611f6fb0039 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:07 +0100 -Subject: [PATCH 036/116] virtiofsd: add --fd=FDNUM fd passing option -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-33-dgilbert@redhat.com> -Patchwork-id: 93487 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 032/112] virtiofsd: add --fd=FDNUM fd passing option -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -Although --socket-path=PATH is useful for manual invocations, management -tools typically create the UNIX domain socket themselves and pass it to -the vhost-user device backend. This way QEMU can be launched -immediately with a valid socket. No waiting for the vhost-user device -backend is required when fd passing is used. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit cee8e35d4386e34bf79c3ca2aab7f7b1bb48cf8d) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_i.h | 1 + - tools/virtiofsd/fuse_lowlevel.c | 16 ++++++++++++---- - tools/virtiofsd/fuse_virtio.c | 31 +++++++++++++++++++++++++------ - 3 files changed, 38 insertions(+), 10 deletions(-) - -diff --git a/tools/virtiofsd/fuse_i.h b/tools/virtiofsd/fuse_i.h -index 1126723..45995f3 100644 ---- a/tools/virtiofsd/fuse_i.h -+++ b/tools/virtiofsd/fuse_i.h -@@ -68,6 +68,7 @@ struct fuse_session { - size_t bufsize; - int error; - char *vu_socket_path; -+ int vu_listen_fd; - int vu_socketfd; - struct fv_VuDev *virtio_dev; - }; -diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -index 4f4684d..95f4db8 100644 ---- a/tools/virtiofsd/fuse_lowlevel.c -+++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -2130,6 +2130,7 @@ static const struct fuse_opt fuse_ll_opts[] = { - LL_OPTION("--debug", debug, 1), - LL_OPTION("allow_root", deny_others, 1), - LL_OPTION("--socket-path=%s", vu_socket_path, 0), -+ LL_OPTION("--fd=%d", vu_listen_fd, 0), - FUSE_OPT_END - }; - -@@ -2147,7 +2148,8 @@ void fuse_lowlevel_help(void) - */ - printf( - " -o allow_root allow access by root\n" -- " --socket-path=PATH path for the vhost-user socket\n"); -+ " --socket-path=PATH path for the vhost-user socket\n" -+ " --fd=FDNUM fd number of vhost-user socket\n"); - } - - void fuse_session_destroy(struct fuse_session *se) -@@ -2191,6 +2193,7 @@ struct fuse_session *fuse_session_new(struct fuse_args *args, - goto out1; - } - se->fd = -1; -+ se->vu_listen_fd = -1; - se->conn.max_write = UINT_MAX; - se->conn.max_readahead = UINT_MAX; - -@@ -2212,8 +2215,13 @@ struct fuse_session *fuse_session_new(struct fuse_args *args, - goto out4; - } - -- if (!se->vu_socket_path) { -- fprintf(stderr, "fuse: missing -o vhost_user_socket option\n"); -+ if (!se->vu_socket_path && se->vu_listen_fd < 0) { -+ fuse_log(FUSE_LOG_ERR, "fuse: missing --socket-path or --fd option\n"); -+ goto out4; -+ } -+ if (se->vu_socket_path && se->vu_listen_fd >= 0) { -+ fuse_log(FUSE_LOG_ERR, -+ "fuse: --socket-path and --fd cannot be given together\n"); - goto out4; - } - -@@ -2253,7 +2261,7 @@ void fuse_session_unmount(struct fuse_session *se) - - int fuse_lowlevel_is_virtio(struct fuse_session *se) - { -- return se->vu_socket_path != NULL; -+ return !!se->virtio_dev; - } - - #ifdef linux -diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c -index 7e2711b..635f877 100644 ---- a/tools/virtiofsd/fuse_virtio.c -+++ b/tools/virtiofsd/fuse_virtio.c -@@ -638,18 +638,21 @@ int virtio_loop(struct fuse_session *se) - return 0; - } - --int virtio_session_mount(struct fuse_session *se) -+static int fv_create_listen_socket(struct fuse_session *se) - { - struct sockaddr_un un; - mode_t old_umask; - -+ /* Nothing to do if fd is already initialized */ -+ if (se->vu_listen_fd >= 0) { -+ return 0; -+ } -+ - if (strlen(se->vu_socket_path) >= sizeof(un.sun_path)) { - fuse_log(FUSE_LOG_ERR, "Socket path too long\n"); - return -1; - } - -- se->fd = -1; -- - /* - * Create the Unix socket to communicate with qemu - * based on QEMU's vhost-user-bridge -@@ -682,15 +685,31 @@ int virtio_session_mount(struct fuse_session *se) - return -1; - } - -+ se->vu_listen_fd = listen_sock; -+ return 0; -+} -+ -+int virtio_session_mount(struct fuse_session *se) -+{ -+ int ret; -+ -+ ret = fv_create_listen_socket(se); -+ if (ret < 0) { -+ return ret; -+ } -+ -+ se->fd = -1; -+ - fuse_log(FUSE_LOG_INFO, "%s: Waiting for vhost-user socket connection...\n", - __func__); -- int data_sock = accept(listen_sock, NULL, NULL); -+ int data_sock = accept(se->vu_listen_fd, NULL, NULL); - if (data_sock == -1) { - fuse_log(FUSE_LOG_ERR, "vhost socket accept: %m\n"); -- close(listen_sock); -+ close(se->vu_listen_fd); - return -1; - } -- close(listen_sock); -+ close(se->vu_listen_fd); -+ se->vu_listen_fd = -1; - fuse_log(FUSE_LOG_INFO, "%s: Received vhost-user socket connection\n", - __func__); - --- -1.8.3.1 - diff --git a/kvm-virtiofsd-add-fuse_mbuf_iter-API.patch b/kvm-virtiofsd-add-fuse_mbuf_iter-API.patch deleted file mode 100644 index b874dc9..0000000 --- a/kvm-virtiofsd-add-fuse_mbuf_iter-API.patch +++ /dev/null @@ -1,134 +0,0 @@ -From 1b0edd3d0a2ee5c097bcf3501c1dfa937f02e473 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:21 +0100 -Subject: [PATCH 050/116] virtiofsd: add fuse_mbuf_iter API -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-47-dgilbert@redhat.com> -Patchwork-id: 93502 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 046/112] virtiofsd: add fuse_mbuf_iter API -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -Introduce an API for consuming bytes from a buffer with size checks. -All FUSE operations will be converted to use this safe API instead of -void *inarg. - -Signed-off-by: Stefan Hajnoczi -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit dad157e880416ab3a0e45beaa0e81977516568bc) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/buffer.c | 28 +++++++++++++++++++++++++ - tools/virtiofsd/fuse_common.h | 49 ++++++++++++++++++++++++++++++++++++++++++- - 2 files changed, 76 insertions(+), 1 deletion(-) - -diff --git a/tools/virtiofsd/buffer.c b/tools/virtiofsd/buffer.c -index 772efa9..42a608f 100644 ---- a/tools/virtiofsd/buffer.c -+++ b/tools/virtiofsd/buffer.c -@@ -267,3 +267,31 @@ ssize_t fuse_buf_copy(struct fuse_bufvec *dstv, struct fuse_bufvec *srcv) - - return copied; - } -+ -+void *fuse_mbuf_iter_advance(struct fuse_mbuf_iter *iter, size_t len) -+{ -+ void *ptr; -+ -+ if (len > iter->size - iter->pos) { -+ return NULL; -+ } -+ -+ ptr = iter->mem + iter->pos; -+ iter->pos += len; -+ return ptr; -+} -+ -+const char *fuse_mbuf_iter_advance_str(struct fuse_mbuf_iter *iter) -+{ -+ const char *str = iter->mem + iter->pos; -+ size_t remaining = iter->size - iter->pos; -+ size_t i; -+ -+ for (i = 0; i < remaining; i++) { -+ if (str[i] == '\0') { -+ iter->pos += i + 1; -+ return str; -+ } -+ } -+ return NULL; -+} -diff --git a/tools/virtiofsd/fuse_common.h b/tools/virtiofsd/fuse_common.h -index 0cb33ac..f8f6433 100644 ---- a/tools/virtiofsd/fuse_common.h -+++ b/tools/virtiofsd/fuse_common.h -@@ -703,10 +703,57 @@ size_t fuse_buf_size(const struct fuse_bufvec *bufv); - */ - ssize_t fuse_buf_copy(struct fuse_bufvec *dst, struct fuse_bufvec *src); - -+/** -+ * Memory buffer iterator -+ * -+ */ -+struct fuse_mbuf_iter { -+ /** -+ * Data pointer -+ */ -+ void *mem; -+ -+ /** -+ * Total length, in bytes -+ */ -+ size_t size; -+ -+ /** -+ * Offset from start of buffer -+ */ -+ size_t pos; -+}; -+ -+/* Initialize memory buffer iterator from a fuse_buf */ -+#define FUSE_MBUF_ITER_INIT(fbuf) \ -+ ((struct fuse_mbuf_iter){ \ -+ .mem = fbuf->mem, \ -+ .size = fbuf->size, \ -+ .pos = 0, \ -+ }) -+ -+/** -+ * Consume bytes from a memory buffer iterator -+ * -+ * @param iter memory buffer iterator -+ * @param len number of bytes to consume -+ * @return pointer to start of consumed bytes or -+ * NULL if advancing beyond end of buffer -+ */ -+void *fuse_mbuf_iter_advance(struct fuse_mbuf_iter *iter, size_t len); -+ -+/** -+ * Consume a NUL-terminated string from a memory buffer iterator -+ * -+ * @param iter memory buffer iterator -+ * @return pointer to the string or -+ * NULL if advancing beyond end of buffer or there is no NUL-terminator -+ */ -+const char *fuse_mbuf_iter_advance_str(struct fuse_mbuf_iter *iter); -+ - /* - * Signal handling - */ -- - /** - * Exit session on HUP, TERM and INT signals and ignore PIPE signal - * --- -1.8.3.1 - diff --git a/kvm-virtiofsd-add-helper-for-lo_data-cleanup.patch b/kvm-virtiofsd-add-helper-for-lo_data-cleanup.patch deleted file mode 100644 index bdef115..0000000 --- a/kvm-virtiofsd-add-helper-for-lo_data-cleanup.patch +++ /dev/null @@ -1,88 +0,0 @@ -From 7a3c94e10b087c06635ef72aadb1550184dd5c58 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:58 +0100 -Subject: [PATCH 087/116] virtiofsd: add helper for lo_data cleanup -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-84-dgilbert@redhat.com> -Patchwork-id: 93538 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 083/112] virtiofsd: add helper for lo_data cleanup -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Liu Bo - -This offers an helper function for lo_data's cleanup. - -Signed-off-by: Liu Bo -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 18a69cbbb6a4caa7c2040c6db4a33b044a32be7e) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 37 +++++++++++++++++++++---------------- - 1 file changed, 21 insertions(+), 16 deletions(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 056ebe8..e8dc5c7 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -2407,6 +2407,26 @@ static gboolean lo_key_equal(gconstpointer a, gconstpointer b) - return la->ino == lb->ino && la->dev == lb->dev; - } - -+static void fuse_lo_data_cleanup(struct lo_data *lo) -+{ -+ if (lo->inodes) { -+ g_hash_table_destroy(lo->inodes); -+ } -+ lo_map_destroy(&lo->fd_map); -+ lo_map_destroy(&lo->dirp_map); -+ lo_map_destroy(&lo->ino_map); -+ -+ if (lo->proc_self_fd >= 0) { -+ close(lo->proc_self_fd); -+ } -+ -+ if (lo->root.fd >= 0) { -+ close(lo->root.fd); -+ } -+ -+ free(lo->source); -+} -+ - int main(int argc, char *argv[]) - { - struct fuse_args args = FUSE_ARGS_INIT(argc, argv); -@@ -2554,22 +2574,7 @@ err_out2: - err_out1: - fuse_opt_free_args(&args); - -- if (lo.inodes) { -- g_hash_table_destroy(lo.inodes); -- } -- lo_map_destroy(&lo.fd_map); -- lo_map_destroy(&lo.dirp_map); -- lo_map_destroy(&lo.ino_map); -- -- if (lo.proc_self_fd >= 0) { -- close(lo.proc_self_fd); -- } -- -- if (lo.root.fd >= 0) { -- close(lo.root.fd); -- } -- -- free(lo.source); -+ fuse_lo_data_cleanup(&lo); - - return ret ? 1 : 0; - } --- -1.8.3.1 - diff --git a/kvm-virtiofsd-add-o-source-PATH-to-help-output.patch b/kvm-virtiofsd-add-o-source-PATH-to-help-output.patch deleted file mode 100644 index 5e81663..0000000 --- a/kvm-virtiofsd-add-o-source-PATH-to-help-output.patch +++ /dev/null @@ -1,46 +0,0 @@ -From c55995c25f60168e3cb6b5bae1bf9a47813383d0 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:00:55 +0100 -Subject: [PATCH 024/116] virtiofsd: add -o source=PATH to help output -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-21-dgilbert@redhat.com> -Patchwork-id: 93474 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 020/112] virtiofsd: add -o source=PATH to help output -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -The -o source=PATH option will be used by most command-line invocations. -Let's document it! - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 4ff075f72be2f489c8998ae492ec5cdbbbd73e07) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 26ac870..fc9b264 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -1319,6 +1319,7 @@ int main(int argc, char *argv[]) - if (opts.show_help) { - printf("usage: %s [options]\n\n", argv[0]); - fuse_cmdline_help(); -+ printf(" -o source=PATH shared directory tree\n"); - fuse_lowlevel_help(); - ret = 0; - goto err_out1; --- -1.8.3.1 - diff --git a/kvm-virtiofsd-add-print-capabilities-option.patch b/kvm-virtiofsd-add-print-capabilities-option.patch deleted file mode 100644 index b57e408..0000000 --- a/kvm-virtiofsd-add-print-capabilities-option.patch +++ /dev/null @@ -1,121 +0,0 @@ -From 23d81ee7564084f29e32fedaed5196ae1a5a3240 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:10 +0100 -Subject: [PATCH 039/116] virtiofsd: add --print-capabilities option -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-36-dgilbert@redhat.com> -Patchwork-id: 93486 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 035/112] virtiofsd: add --print-capabilities option -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -Add the --print-capabilities option as per vhost-user.rst "Backend -programs conventions". Currently there are no advertised features. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 45018fbb0a73ce66fd3dd87ecd2872b45658add4) -Signed-off-by: Miroslav Rezanina ---- - docs/interop/vhost-user.json | 4 +++- - tools/virtiofsd/fuse_lowlevel.h | 1 + - tools/virtiofsd/helper.c | 2 ++ - tools/virtiofsd/passthrough_ll.c | 12 ++++++++++++ - 4 files changed, 18 insertions(+), 1 deletion(-) - -diff --git a/docs/interop/vhost-user.json b/docs/interop/vhost-user.json -index da6aaf5..d4ea1f7 100644 ---- a/docs/interop/vhost-user.json -+++ b/docs/interop/vhost-user.json -@@ -31,6 +31,7 @@ - # @rproc-serial: virtio remoteproc serial link - # @scsi: virtio scsi - # @vsock: virtio vsock transport -+# @fs: virtio fs (since 4.2) - # - # Since: 4.0 - ## -@@ -50,7 +51,8 @@ - 'rpmsg', - 'rproc-serial', - 'scsi', -- 'vsock' -+ 'vsock', -+ 'fs' - ] - } - -diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h -index f6b3470..0d61df8 100644 ---- a/tools/virtiofsd/fuse_lowlevel.h -+++ b/tools/virtiofsd/fuse_lowlevel.h -@@ -1794,6 +1794,7 @@ struct fuse_cmdline_opts { - int nodefault_subtype; - int show_version; - int show_help; -+ int print_capabilities; - unsigned int max_idle_threads; - }; - -diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c -index a3645fc..b8ec5ac 100644 ---- a/tools/virtiofsd/helper.c -+++ b/tools/virtiofsd/helper.c -@@ -40,6 +40,7 @@ static const struct fuse_opt fuse_helper_opts[] = { - FUSE_HELPER_OPT("--help", show_help), - FUSE_HELPER_OPT("-V", show_version), - FUSE_HELPER_OPT("--version", show_version), -+ FUSE_HELPER_OPT("--print-capabilities", print_capabilities), - FUSE_HELPER_OPT("-d", debug), - FUSE_HELPER_OPT("debug", debug), - FUSE_HELPER_OPT("-d", foreground), -@@ -135,6 +136,7 @@ void fuse_cmdline_help(void) - { - printf(" -h --help print help\n" - " -V --version print version\n" -+ " --print-capabilities print vhost-user.json\n" - " -d -o debug enable debug output (implies -f)\n" - " -f foreground operation\n" - " --daemonize run in background\n" -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 037c5d7..cd27c09 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -1298,6 +1298,14 @@ static struct fuse_lowlevel_ops lo_oper = { - .lseek = lo_lseek, - }; - -+/* Print vhost-user.json backend program capabilities */ -+static void print_capabilities(void) -+{ -+ printf("{\n"); -+ printf(" \"type\": \"fs\"\n"); -+ printf("}\n"); -+} -+ - int main(int argc, char *argv[]) - { - struct fuse_args args = FUSE_ARGS_INIT(argc, argv); -@@ -1328,6 +1336,10 @@ int main(int argc, char *argv[]) - fuse_lowlevel_version(); - ret = 0; - goto err_out1; -+ } else if (opts.print_capabilities) { -+ print_capabilities(); -+ ret = 0; -+ goto err_out1; - } - - if (fuse_opt_parse(&args, &lo, lo_opts, NULL) == -1) { --- -1.8.3.1 - diff --git a/kvm-virtiofsd-add-seccomp-whitelist.patch b/kvm-virtiofsd-add-seccomp-whitelist.patch deleted file mode 100644 index b34108e..0000000 --- a/kvm-virtiofsd-add-seccomp-whitelist.patch +++ /dev/null @@ -1,285 +0,0 @@ -From 58c4e9473b364fb62aac797b0d69fd8ddb02c8c7 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:30 +0100 -Subject: [PATCH 059/116] virtiofsd: add seccomp whitelist -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-56-dgilbert@redhat.com> -Patchwork-id: 93511 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 055/112] virtiofsd: add seccomp whitelist -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -Only allow system calls that are needed by virtiofsd. All other system -calls cause SIGSYS to be directed at the thread and the process will -coredump. - -Restricting system calls reduces the kernel attack surface and limits -what the process can do when compromised. - -Signed-off-by: Stefan Hajnoczi -with additional entries by: -Signed-off-by: Ganesh Maharaj Mahalingam -Signed-off-by: Masayoshi Mizuma -Signed-off-by: Misono Tomohiro -Signed-off-by: piaojun -Signed-off-by: Vivek Goyal -Signed-off-by: Eric Ren -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 4f8bde99c175ffd86b5125098a4707d43f5e80c6) - -Signed-off-by: Miroslav Rezanina ---- - Makefile | 5 +- - tools/virtiofsd/Makefile.objs | 5 +- - tools/virtiofsd/passthrough_ll.c | 2 + - tools/virtiofsd/seccomp.c | 151 +++++++++++++++++++++++++++++++++++++++ - tools/virtiofsd/seccomp.h | 14 ++++ - 5 files changed, 174 insertions(+), 3 deletions(-) - create mode 100644 tools/virtiofsd/seccomp.c - create mode 100644 tools/virtiofsd/seccomp.h - -diff --git a/Makefile b/Makefile -index 0e9755d..6879a06 100644 ---- a/Makefile -+++ b/Makefile -@@ -330,7 +330,7 @@ endif - endif - endif - --ifdef CONFIG_LINUX -+ifeq ($(CONFIG_LINUX)$(CONFIG_SECCOMP),yy) - HELPERS-y += virtiofsd$(EXESUF) - vhost-user-json-y += tools/virtiofsd/50-qemu-virtiofsd.json - endif -@@ -681,7 +681,8 @@ rdmacm-mux$(EXESUF): LIBS += "-libumad" - rdmacm-mux$(EXESUF): $(rdmacm-mux-obj-y) $(COMMON_LDADDS) - $(call LINK, $^) - --ifdef CONFIG_LINUX # relies on Linux-specific syscalls -+# relies on Linux-specific syscalls -+ifeq ($(CONFIG_LINUX)$(CONFIG_SECCOMP),yy) - virtiofsd$(EXESUF): $(virtiofsd-obj-y) libvhost-user.a $(COMMON_LDADDS) - $(call LINK, $^) - endif -diff --git a/tools/virtiofsd/Makefile.objs b/tools/virtiofsd/Makefile.objs -index 45a8075..076f667 100644 ---- a/tools/virtiofsd/Makefile.objs -+++ b/tools/virtiofsd/Makefile.objs -@@ -5,5 +5,8 @@ virtiofsd-obj-y = buffer.o \ - fuse_signals.o \ - fuse_virtio.o \ - helper.o \ -- passthrough_ll.o -+ passthrough_ll.o \ -+ seccomp.o - -+seccomp.o-cflags := $(SECCOMP_CFLAGS) -+seccomp.o-libs := $(SECCOMP_LIBS) -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 0947d14..bd8925b 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -59,6 +59,7 @@ - #include - - #include "passthrough_helpers.h" -+#include "seccomp.h" - - struct lo_map_elem { - union { -@@ -2091,6 +2092,7 @@ static void setup_sandbox(struct lo_data *lo, struct fuse_session *se) - { - setup_namespaces(lo, se); - setup_mounts(lo->source); -+ setup_seccomp(); - } - - int main(int argc, char *argv[]) -diff --git a/tools/virtiofsd/seccomp.c b/tools/virtiofsd/seccomp.c -new file mode 100644 -index 0000000..691fb63 ---- /dev/null -+++ b/tools/virtiofsd/seccomp.c -@@ -0,0 +1,151 @@ -+/* -+ * Seccomp sandboxing for virtiofsd -+ * -+ * Copyright (C) 2019 Red Hat, Inc. -+ * -+ * SPDX-License-Identifier: GPL-2.0-or-later -+ */ -+ -+#include "qemu/osdep.h" -+#include "seccomp.h" -+#include "fuse_i.h" -+#include "fuse_log.h" -+#include -+#include -+#include -+#include -+ -+/* Bodge for libseccomp 2.4.2 which broke ppoll */ -+#if !defined(__SNR_ppoll) && defined(__SNR_brk) -+#ifdef __NR_ppoll -+#define __SNR_ppoll __NR_ppoll -+#else -+#define __SNR_ppoll __PNR_ppoll -+#endif -+#endif -+ -+static const int syscall_whitelist[] = { -+ /* TODO ireg sem*() syscalls */ -+ SCMP_SYS(brk), -+ SCMP_SYS(capget), /* For CAP_FSETID */ -+ SCMP_SYS(capset), -+ SCMP_SYS(clock_gettime), -+ SCMP_SYS(clone), -+#ifdef __NR_clone3 -+ SCMP_SYS(clone3), -+#endif -+ SCMP_SYS(close), -+ SCMP_SYS(copy_file_range), -+ SCMP_SYS(dup), -+ SCMP_SYS(eventfd2), -+ SCMP_SYS(exit), -+ SCMP_SYS(exit_group), -+ SCMP_SYS(fallocate), -+ SCMP_SYS(fchmodat), -+ SCMP_SYS(fchownat), -+ SCMP_SYS(fcntl), -+ SCMP_SYS(fdatasync), -+ SCMP_SYS(fgetxattr), -+ SCMP_SYS(flistxattr), -+ SCMP_SYS(flock), -+ SCMP_SYS(fremovexattr), -+ SCMP_SYS(fsetxattr), -+ SCMP_SYS(fstat), -+ SCMP_SYS(fstatfs), -+ SCMP_SYS(fsync), -+ SCMP_SYS(ftruncate), -+ SCMP_SYS(futex), -+ SCMP_SYS(getdents), -+ SCMP_SYS(getdents64), -+ SCMP_SYS(getegid), -+ SCMP_SYS(geteuid), -+ SCMP_SYS(getpid), -+ SCMP_SYS(gettid), -+ SCMP_SYS(gettimeofday), -+ SCMP_SYS(linkat), -+ SCMP_SYS(lseek), -+ SCMP_SYS(madvise), -+ SCMP_SYS(mkdirat), -+ SCMP_SYS(mknodat), -+ SCMP_SYS(mmap), -+ SCMP_SYS(mprotect), -+ SCMP_SYS(mremap), -+ SCMP_SYS(munmap), -+ SCMP_SYS(newfstatat), -+ SCMP_SYS(open), -+ SCMP_SYS(openat), -+ SCMP_SYS(ppoll), -+ SCMP_SYS(prctl), /* TODO restrict to just PR_SET_NAME? */ -+ SCMP_SYS(preadv), -+ SCMP_SYS(pread64), -+ SCMP_SYS(pwritev), -+ SCMP_SYS(pwrite64), -+ SCMP_SYS(read), -+ SCMP_SYS(readlinkat), -+ SCMP_SYS(recvmsg), -+ SCMP_SYS(renameat), -+ SCMP_SYS(renameat2), -+ SCMP_SYS(rt_sigaction), -+ SCMP_SYS(rt_sigprocmask), -+ SCMP_SYS(rt_sigreturn), -+ SCMP_SYS(sendmsg), -+ SCMP_SYS(setresgid), -+ SCMP_SYS(setresuid), -+#ifdef __NR_setresgid32 -+ SCMP_SYS(setresgid32), -+#endif -+#ifdef __NR_setresuid32 -+ SCMP_SYS(setresuid32), -+#endif -+ SCMP_SYS(set_robust_list), -+ SCMP_SYS(symlinkat), -+ SCMP_SYS(time), /* Rarely needed, except on static builds */ -+ SCMP_SYS(tgkill), -+ SCMP_SYS(unlinkat), -+ SCMP_SYS(utimensat), -+ SCMP_SYS(write), -+ SCMP_SYS(writev), -+}; -+ -+void setup_seccomp(void) -+{ -+ scmp_filter_ctx ctx; -+ size_t i; -+ -+#ifdef SCMP_ACT_KILL_PROCESS -+ ctx = seccomp_init(SCMP_ACT_KILL_PROCESS); -+ /* Handle a newer libseccomp but an older kernel */ -+ if (!ctx && errno == EOPNOTSUPP) { -+ ctx = seccomp_init(SCMP_ACT_TRAP); -+ } -+#else -+ ctx = seccomp_init(SCMP_ACT_TRAP); -+#endif -+ if (!ctx) { -+ fuse_log(FUSE_LOG_ERR, "seccomp_init() failed\n"); -+ exit(1); -+ } -+ -+ for (i = 0; i < G_N_ELEMENTS(syscall_whitelist); i++) { -+ if (seccomp_rule_add(ctx, SCMP_ACT_ALLOW, -+ syscall_whitelist[i], 0) != 0) { -+ fuse_log(FUSE_LOG_ERR, "seccomp_rule_add syscall %d", -+ syscall_whitelist[i]); -+ exit(1); -+ } -+ } -+ -+ /* libvhost-user calls this for post-copy migration, we don't need it */ -+ if (seccomp_rule_add(ctx, SCMP_ACT_ERRNO(ENOSYS), -+ SCMP_SYS(userfaultfd), 0) != 0) { -+ fuse_log(FUSE_LOG_ERR, "seccomp_rule_add userfaultfd failed\n"); -+ exit(1); -+ } -+ -+ if (seccomp_load(ctx) < 0) { -+ fuse_log(FUSE_LOG_ERR, "seccomp_load() failed\n"); -+ exit(1); -+ } -+ -+ seccomp_release(ctx); -+} -diff --git a/tools/virtiofsd/seccomp.h b/tools/virtiofsd/seccomp.h -new file mode 100644 -index 0000000..86bce72 ---- /dev/null -+++ b/tools/virtiofsd/seccomp.h -@@ -0,0 +1,14 @@ -+/* -+ * Seccomp sandboxing for virtiofsd -+ * -+ * Copyright (C) 2019 Red Hat, Inc. -+ * -+ * SPDX-License-Identifier: GPL-2.0-or-later -+ */ -+ -+#ifndef VIRTIOFSD_SECCOMP_H -+#define VIRTIOFSD_SECCOMP_H -+ -+void setup_seccomp(void); -+ -+#endif /* VIRTIOFSD_SECCOMP_H */ --- -1.8.3.1 - diff --git a/kvm-virtiofsd-add-some-options-to-the-help-message.patch b/kvm-virtiofsd-add-some-options-to-the-help-message.patch deleted file mode 100644 index ac6dc54..0000000 --- a/kvm-virtiofsd-add-some-options-to-the-help-message.patch +++ /dev/null @@ -1,74 +0,0 @@ -From 6d62abb99b6b918f05f099b01a99f4326a69d650 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:02:26 +0100 -Subject: [PATCH 115/116] virtiofsd: add some options to the help message -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-112-dgilbert@redhat.com> -Patchwork-id: 93565 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 111/112] virtiofsd: add some options to the help message -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Masayoshi Mizuma - -Add following options to the help message: -- cache -- flock|no_flock -- norace -- posix_lock|no_posix_lock -- readdirplus|no_readdirplus -- timeout -- writeback|no_writeback -- xattr|no_xattr - -Signed-off-by: Masayoshi Mizuma - -dgilbert: Split cache, norace, posix_lock, readdirplus off - into our own earlier patches that added the options - -Reviewed-by: Dr. David Alan Gilbert -Reviewed-by: Misono Tomohiro -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 1d59b1b210d7c3b0bdf4b10ebe0bb1fccfcb8b95) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/helper.c | 10 +++++++++- - 1 file changed, 9 insertions(+), 1 deletion(-) - -diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c -index f98d8f2..0801cf7 100644 ---- a/tools/virtiofsd/helper.c -+++ b/tools/virtiofsd/helper.c -@@ -148,6 +148,8 @@ void fuse_cmdline_help(void) - " -o cache= cache mode. could be one of \"auto, " - "always, none\"\n" - " default: auto\n" -+ " -o flock|no_flock enable/disable flock\n" -+ " default: no_flock\n" - " -o log_level= log level, default to \"info\"\n" - " level could be one of \"debug, " - "info, warn, err\"\n" -@@ -163,7 +165,13 @@ void fuse_cmdline_help(void) - " enable/disable readirplus\n" - " default: readdirplus except with " - "cache=none\n" -- ); -+ " -o timeout= I/O timeout (second)\n" -+ " default: depends on cache= option.\n" -+ " -o writeback|no_writeback enable/disable writeback cache\n" -+ " default: no_writeback\n" -+ " -o xattr|no_xattr enable/disable xattr\n" -+ " default: no_xattr\n" -+ ); - } - - static int fuse_helper_opt_proc(void *data, const char *arg, int key, --- -1.8.3.1 - diff --git a/kvm-virtiofsd-add-syslog-command-line-option.patch b/kvm-virtiofsd-add-syslog-command-line-option.patch deleted file mode 100644 index 5b55342..0000000 --- a/kvm-virtiofsd-add-syslog-command-line-option.patch +++ /dev/null @@ -1,239 +0,0 @@ -From 6f5cf644bebc189bdb16f1caf3d7c47835d7c287 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:36 +0100 -Subject: [PATCH 065/116] virtiofsd: add --syslog command-line option -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-62-dgilbert@redhat.com> -Patchwork-id: 93509 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 061/112] virtiofsd: add --syslog command-line option -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -Sometimes collecting output from stderr is inconvenient or does not fit -within the overall logging architecture. Add syslog(3) support for -cases where stderr cannot be used. - -Signed-off-by: Stefan Hajnoczi -dgilbert: Reworked as a logging function -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit f185621d41f03a23b55795b89e6584253fa23505) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_lowlevel.h | 1 + - tools/virtiofsd/helper.c | 2 ++ - tools/virtiofsd/passthrough_ll.c | 50 +++++++++++++++++++++++++++++++++++++--- - tools/virtiofsd/seccomp.c | 32 +++++++++++++++++-------- - tools/virtiofsd/seccomp.h | 4 +++- - 5 files changed, 76 insertions(+), 13 deletions(-) - -diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h -index 0d61df8..f2750bc 100644 ---- a/tools/virtiofsd/fuse_lowlevel.h -+++ b/tools/virtiofsd/fuse_lowlevel.h -@@ -1795,6 +1795,7 @@ struct fuse_cmdline_opts { - int show_version; - int show_help; - int print_capabilities; -+ int syslog; - unsigned int max_idle_threads; - }; - -diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c -index 5531425..9692ef9 100644 ---- a/tools/virtiofsd/helper.c -+++ b/tools/virtiofsd/helper.c -@@ -54,6 +54,7 @@ static const struct fuse_opt fuse_helper_opts[] = { - FUSE_HELPER_OPT("subtype=", nodefault_subtype), - FUSE_OPT_KEY("subtype=", FUSE_OPT_KEY_KEEP), - FUSE_HELPER_OPT("max_idle_threads=%u", max_idle_threads), -+ FUSE_HELPER_OPT("--syslog", syslog), - FUSE_OPT_END - }; - -@@ -138,6 +139,7 @@ void fuse_cmdline_help(void) - " -V --version print version\n" - " --print-capabilities print vhost-user.json\n" - " -d -o debug enable debug output (implies -f)\n" -+ " --syslog log to syslog (default stderr)\n" - " -f foreground operation\n" - " --daemonize run in background\n" - " -o max_idle_threads the maximum number of idle worker " -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index c281d81..0372aca 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -58,6 +58,7 @@ - #include - #include - #include -+#include - #include - - #include "passthrough_helpers.h" -@@ -138,6 +139,7 @@ static const struct fuse_opt lo_opts[] = { - { "norace", offsetof(struct lo_data, norace), 1 }, - FUSE_OPT_END - }; -+static bool use_syslog = false; - - static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n); - -@@ -2262,11 +2264,12 @@ static void setup_mounts(const char *source) - * Lock down this process to prevent access to other processes or files outside - * source directory. This reduces the impact of arbitrary code execution bugs. - */ --static void setup_sandbox(struct lo_data *lo, struct fuse_session *se) -+static void setup_sandbox(struct lo_data *lo, struct fuse_session *se, -+ bool enable_syslog) - { - setup_namespaces(lo, se); - setup_mounts(lo->source); -- setup_seccomp(); -+ setup_seccomp(enable_syslog); - } - - /* Raise the maximum number of open file descriptors */ -@@ -2298,6 +2301,42 @@ static void setup_nofile_rlimit(void) - } - } - -+static void log_func(enum fuse_log_level level, const char *fmt, va_list ap) -+{ -+ if (use_syslog) { -+ int priority = LOG_ERR; -+ switch (level) { -+ case FUSE_LOG_EMERG: -+ priority = LOG_EMERG; -+ break; -+ case FUSE_LOG_ALERT: -+ priority = LOG_ALERT; -+ break; -+ case FUSE_LOG_CRIT: -+ priority = LOG_CRIT; -+ break; -+ case FUSE_LOG_ERR: -+ priority = LOG_ERR; -+ break; -+ case FUSE_LOG_WARNING: -+ priority = LOG_WARNING; -+ break; -+ case FUSE_LOG_NOTICE: -+ priority = LOG_NOTICE; -+ break; -+ case FUSE_LOG_INFO: -+ priority = LOG_INFO; -+ break; -+ case FUSE_LOG_DEBUG: -+ priority = LOG_DEBUG; -+ break; -+ } -+ vsyslog(priority, fmt, ap); -+ } else { -+ vfprintf(stderr, fmt, ap); -+ } -+} -+ - int main(int argc, char *argv[]) - { - struct fuse_args args = FUSE_ARGS_INIT(argc, argv); -@@ -2336,6 +2375,11 @@ int main(int argc, char *argv[]) - if (fuse_parse_cmdline(&args, &opts) != 0) { - return 1; - } -+ fuse_set_log_func(log_func); -+ use_syslog = opts.syslog; -+ if (use_syslog) { -+ openlog("virtiofsd", LOG_PID, LOG_DAEMON); -+ } - if (opts.show_help) { - printf("usage: %s [options]\n\n", argv[0]); - fuse_cmdline_help(); -@@ -2424,7 +2468,7 @@ int main(int argc, char *argv[]) - /* Must be before sandbox since it wants /proc */ - setup_capng(); - -- setup_sandbox(&lo, se); -+ setup_sandbox(&lo, se, opts.syslog); - - /* Block until ctrl+c or fusermount -u */ - ret = virtio_loop(se); -diff --git a/tools/virtiofsd/seccomp.c b/tools/virtiofsd/seccomp.c -index 691fb63..2d9d4a7 100644 ---- a/tools/virtiofsd/seccomp.c -+++ b/tools/virtiofsd/seccomp.c -@@ -107,11 +107,28 @@ static const int syscall_whitelist[] = { - SCMP_SYS(writev), - }; - --void setup_seccomp(void) -+/* Syscalls used when --syslog is enabled */ -+static const int syscall_whitelist_syslog[] = { -+ SCMP_SYS(sendto), -+}; -+ -+static void add_whitelist(scmp_filter_ctx ctx, const int syscalls[], size_t len) - { -- scmp_filter_ctx ctx; - size_t i; - -+ for (i = 0; i < len; i++) { -+ if (seccomp_rule_add(ctx, SCMP_ACT_ALLOW, syscalls[i], 0) != 0) { -+ fuse_log(FUSE_LOG_ERR, "seccomp_rule_add syscall %d failed\n", -+ syscalls[i]); -+ exit(1); -+ } -+ } -+} -+ -+void setup_seccomp(bool enable_syslog) -+{ -+ scmp_filter_ctx ctx; -+ - #ifdef SCMP_ACT_KILL_PROCESS - ctx = seccomp_init(SCMP_ACT_KILL_PROCESS); - /* Handle a newer libseccomp but an older kernel */ -@@ -126,13 +143,10 @@ void setup_seccomp(void) - exit(1); - } - -- for (i = 0; i < G_N_ELEMENTS(syscall_whitelist); i++) { -- if (seccomp_rule_add(ctx, SCMP_ACT_ALLOW, -- syscall_whitelist[i], 0) != 0) { -- fuse_log(FUSE_LOG_ERR, "seccomp_rule_add syscall %d", -- syscall_whitelist[i]); -- exit(1); -- } -+ add_whitelist(ctx, syscall_whitelist, G_N_ELEMENTS(syscall_whitelist)); -+ if (enable_syslog) { -+ add_whitelist(ctx, syscall_whitelist_syslog, -+ G_N_ELEMENTS(syscall_whitelist_syslog)); - } - - /* libvhost-user calls this for post-copy migration, we don't need it */ -diff --git a/tools/virtiofsd/seccomp.h b/tools/virtiofsd/seccomp.h -index 86bce72..d47c8ea 100644 ---- a/tools/virtiofsd/seccomp.h -+++ b/tools/virtiofsd/seccomp.h -@@ -9,6 +9,8 @@ - #ifndef VIRTIOFSD_SECCOMP_H - #define VIRTIOFSD_SECCOMP_H - --void setup_seccomp(void); -+#include -+ -+void setup_seccomp(bool enable_syslog); - - #endif /* VIRTIOFSD_SECCOMP_H */ --- -1.8.3.1 - diff --git a/kvm-virtiofsd-add-thread-pool-size-NUM-option.patch b/kvm-virtiofsd-add-thread-pool-size-NUM-option.patch deleted file mode 100644 index 0241a9d..0000000 --- a/kvm-virtiofsd-add-thread-pool-size-NUM-option.patch +++ /dev/null @@ -1,106 +0,0 @@ -From 3dbfb932288eb5a55dfdc0eebca7e4c7f0cf6f33 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:02:22 +0100 -Subject: [PATCH 111/116] virtiofsd: add --thread-pool-size=NUM option -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-108-dgilbert@redhat.com> -Patchwork-id: 93561 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 107/112] virtiofsd: add --thread-pool-size=NUM option -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -Add an option to control the size of the thread pool. Requests are now -processed in parallel by default. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Daniel P. Berrangé -Reviewed-by: Philippe Mathieu-Daudé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 951b3120dbc971f08681e1d860360e4a1e638902) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_i.h | 1 + - tools/virtiofsd/fuse_lowlevel.c | 7 ++++++- - tools/virtiofsd/fuse_virtio.c | 5 +++-- - 3 files changed, 10 insertions(+), 3 deletions(-) - -diff --git a/tools/virtiofsd/fuse_i.h b/tools/virtiofsd/fuse_i.h -index 1447d86..4e47e58 100644 ---- a/tools/virtiofsd/fuse_i.h -+++ b/tools/virtiofsd/fuse_i.h -@@ -72,6 +72,7 @@ struct fuse_session { - int vu_listen_fd; - int vu_socketfd; - struct fv_VuDev *virtio_dev; -+ int thread_pool_size; - }; - - struct fuse_chan { -diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -index 79a4031..de2e2e0 100644 ---- a/tools/virtiofsd/fuse_lowlevel.c -+++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -28,6 +28,7 @@ - #include - #include - -+#define THREAD_POOL_SIZE 64 - - #define OFFSET_MAX 0x7fffffffffffffffLL - -@@ -2519,6 +2520,7 @@ static const struct fuse_opt fuse_ll_opts[] = { - LL_OPTION("allow_root", deny_others, 1), - LL_OPTION("--socket-path=%s", vu_socket_path, 0), - LL_OPTION("--fd=%d", vu_listen_fd, 0), -+ LL_OPTION("--thread-pool-size=%d", thread_pool_size, 0), - FUSE_OPT_END - }; - -@@ -2537,7 +2539,9 @@ void fuse_lowlevel_help(void) - printf( - " -o allow_root allow access by root\n" - " --socket-path=PATH path for the vhost-user socket\n" -- " --fd=FDNUM fd number of vhost-user socket\n"); -+ " --fd=FDNUM fd number of vhost-user socket\n" -+ " --thread-pool-size=NUM thread pool size limit (default %d)\n", -+ THREAD_POOL_SIZE); - } - - void fuse_session_destroy(struct fuse_session *se) -@@ -2591,6 +2595,7 @@ struct fuse_session *fuse_session_new(struct fuse_args *args, - } - se->fd = -1; - se->vu_listen_fd = -1; -+ se->thread_pool_size = THREAD_POOL_SIZE; - se->conn.max_write = UINT_MAX; - se->conn.max_readahead = UINT_MAX; - -diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c -index 0dcf2ef..9f65823 100644 ---- a/tools/virtiofsd/fuse_virtio.c -+++ b/tools/virtiofsd/fuse_virtio.c -@@ -572,10 +572,11 @@ static void *fv_queue_thread(void *opaque) - struct fv_QueueInfo *qi = opaque; - struct VuDev *dev = &qi->virtio_dev->dev; - struct VuVirtq *q = vu_get_queue(dev, qi->qidx); -+ struct fuse_session *se = qi->virtio_dev->se; - GThreadPool *pool; - -- pool = g_thread_pool_new(fv_queue_worker, qi, 1 /* TODO max_threads */, -- TRUE, NULL); -+ pool = g_thread_pool_new(fv_queue_worker, qi, se->thread_pool_size, TRUE, -+ NULL); - if (!pool) { - fuse_log(FUSE_LOG_ERR, "%s: g_thread_pool_new failed\n", __func__); - return NULL; --- -1.8.3.1 - diff --git a/kvm-virtiofsd-add-vhost-user.json-file.patch b/kvm-virtiofsd-add-vhost-user.json-file.patch deleted file mode 100644 index a24b24f..0000000 --- a/kvm-virtiofsd-add-vhost-user.json-file.patch +++ /dev/null @@ -1,73 +0,0 @@ -From 77eb3258e76a1ac240503572d4f41d45cb832ba2 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:09 +0100 -Subject: [PATCH 038/116] virtiofsd: add vhost-user.json file -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-35-dgilbert@redhat.com> -Patchwork-id: 93490 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 034/112] virtiofsd: add vhost-user.json file -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -Install a vhost-user.json file describing virtiofsd. This allows -libvirt and other management tools to enumerate vhost-user backend -programs. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 315616ed50ba15a5d7236ade8a402a93898202de) -Signed-off-by: Miroslav Rezanina ---- - .gitignore | 1 + - Makefile | 1 + - tools/virtiofsd/50-qemu-virtiofsd.json.in | 5 +++++ - 3 files changed, 7 insertions(+) - create mode 100644 tools/virtiofsd/50-qemu-virtiofsd.json.in - -diff --git a/.gitignore b/.gitignore -index aefad32..d7a4f99 100644 ---- a/.gitignore -+++ b/.gitignore -@@ -6,6 +6,7 @@ - /config-target.* - /config.status - /config-temp -+/tools/virtiofsd/50-qemu-virtiofsd.json - /elf2dmp - /trace-events-all - /trace/generated-events.h -diff --git a/Makefile b/Makefile -index 1526775..0e9755d 100644 ---- a/Makefile -+++ b/Makefile -@@ -332,6 +332,7 @@ endif - - ifdef CONFIG_LINUX - HELPERS-y += virtiofsd$(EXESUF) -+vhost-user-json-y += tools/virtiofsd/50-qemu-virtiofsd.json - endif - - # Sphinx does not allow building manuals into the same directory as -diff --git a/tools/virtiofsd/50-qemu-virtiofsd.json.in b/tools/virtiofsd/50-qemu-virtiofsd.json.in -new file mode 100644 -index 0000000..9bcd86f ---- /dev/null -+++ b/tools/virtiofsd/50-qemu-virtiofsd.json.in -@@ -0,0 +1,5 @@ -+{ -+ "description": "QEMU virtiofsd vhost-user-fs", -+ "type": "fs", -+ "binary": "@libexecdir@/virtiofsd" -+} --- -1.8.3.1 - diff --git a/kvm-virtiofsd-cap-ng-helpers.patch b/kvm-virtiofsd-cap-ng-helpers.patch deleted file mode 100644 index 305745d..0000000 --- a/kvm-virtiofsd-cap-ng-helpers.patch +++ /dev/null @@ -1,175 +0,0 @@ -From f62613d8058bcb60b26727d980a37537103b0033 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:32 +0100 -Subject: [PATCH 061/116] virtiofsd: cap-ng helpers -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-58-dgilbert@redhat.com> -Patchwork-id: 93512 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 057/112] virtiofsd: cap-ng helpers -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -libcap-ng reads /proc during capng_get_caps_process, and virtiofsd's -sandboxing doesn't have /proc mounted; thus we have to do the -caps read before we sandbox it and save/restore the state. - -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 2405f3c0d19eb4d516a88aa4e5c54e5f9c6bbea3) -Signed-off-by: Miroslav Rezanina ---- - Makefile | 4 +-- - tools/virtiofsd/passthrough_ll.c | 72 ++++++++++++++++++++++++++++++++++++++++ - 2 files changed, 74 insertions(+), 2 deletions(-) - -diff --git a/Makefile b/Makefile -index 6879a06..ff05c30 100644 ---- a/Makefile -+++ b/Makefile -@@ -330,7 +330,7 @@ endif - endif - endif - --ifeq ($(CONFIG_LINUX)$(CONFIG_SECCOMP),yy) -+ifeq ($(CONFIG_LINUX)$(CONFIG_SECCOMP)$(CONFIG_LIBCAP_NG),yyy) - HELPERS-y += virtiofsd$(EXESUF) - vhost-user-json-y += tools/virtiofsd/50-qemu-virtiofsd.json - endif -@@ -682,7 +682,7 @@ rdmacm-mux$(EXESUF): $(rdmacm-mux-obj-y) $(COMMON_LDADDS) - $(call LINK, $^) - - # relies on Linux-specific syscalls --ifeq ($(CONFIG_LINUX)$(CONFIG_SECCOMP),yy) -+ifeq ($(CONFIG_LINUX)$(CONFIG_SECCOMP)$(CONFIG_LIBCAP_NG),yyy) - virtiofsd$(EXESUF): $(virtiofsd-obj-y) libvhost-user.a $(COMMON_LDADDS) - $(call LINK, $^) - endif -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index bd8925b..97e7c75 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -39,6 +39,7 @@ - #include "fuse_virtio.h" - #include "fuse_lowlevel.h" - #include -+#include - #include - #include - #include -@@ -139,6 +140,13 @@ static const struct fuse_opt lo_opts[] = { - - static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n); - -+static struct { -+ pthread_mutex_t mutex; -+ void *saved; -+} cap; -+/* That we loaded cap-ng in the current thread from the saved */ -+static __thread bool cap_loaded = 0; -+ - static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st); - - static int is_dot_or_dotdot(const char *name) -@@ -162,6 +170,37 @@ static struct lo_data *lo_data(fuse_req_t req) - return (struct lo_data *)fuse_req_userdata(req); - } - -+/* -+ * Load capng's state from our saved state if the current thread -+ * hadn't previously been loaded. -+ * returns 0 on success -+ */ -+static int load_capng(void) -+{ -+ if (!cap_loaded) { -+ pthread_mutex_lock(&cap.mutex); -+ capng_restore_state(&cap.saved); -+ /* -+ * restore_state free's the saved copy -+ * so make another. -+ */ -+ cap.saved = capng_save_state(); -+ if (!cap.saved) { -+ fuse_log(FUSE_LOG_ERR, "capng_save_state (thread)\n"); -+ return -EINVAL; -+ } -+ pthread_mutex_unlock(&cap.mutex); -+ -+ /* -+ * We want to use the loaded state for our pid, -+ * not the original -+ */ -+ capng_setpid(syscall(SYS_gettid)); -+ cap_loaded = true; -+ } -+ return 0; -+} -+ - static void lo_map_init(struct lo_map *map) - { - map->elems = NULL; -@@ -2024,6 +2063,35 @@ static void setup_namespaces(struct lo_data *lo, struct fuse_session *se) - } - - /* -+ * Capture the capability state, we'll need to restore this for individual -+ * threads later; see load_capng. -+ */ -+static void setup_capng(void) -+{ -+ /* Note this accesses /proc so has to happen before the sandbox */ -+ if (capng_get_caps_process()) { -+ fuse_log(FUSE_LOG_ERR, "capng_get_caps_process\n"); -+ exit(1); -+ } -+ pthread_mutex_init(&cap.mutex, NULL); -+ pthread_mutex_lock(&cap.mutex); -+ cap.saved = capng_save_state(); -+ if (!cap.saved) { -+ fuse_log(FUSE_LOG_ERR, "capng_save_state\n"); -+ exit(1); -+ } -+ pthread_mutex_unlock(&cap.mutex); -+} -+ -+static void cleanup_capng(void) -+{ -+ free(cap.saved); -+ cap.saved = NULL; -+ pthread_mutex_destroy(&cap.mutex); -+} -+ -+ -+/* - * Make the source directory our root so symlinks cannot escape and no other - * files are accessible. Assumes unshare(CLONE_NEWNS) was already called. - */ -@@ -2216,12 +2284,16 @@ int main(int argc, char *argv[]) - - fuse_daemonize(opts.foreground); - -+ /* Must be before sandbox since it wants /proc */ -+ setup_capng(); -+ - setup_sandbox(&lo, se); - - /* Block until ctrl+c or fusermount -u */ - ret = virtio_loop(se); - - fuse_session_unmount(se); -+ cleanup_capng(); - err_out3: - fuse_remove_signal_handlers(se); - err_out2: --- -1.8.3.1 - diff --git a/kvm-virtiofsd-check-input-buffer-size-in-fuse_lowlevel.c.patch b/kvm-virtiofsd-check-input-buffer-size-in-fuse_lowlevel.c.patch deleted file mode 100644 index caa4560..0000000 --- a/kvm-virtiofsd-check-input-buffer-size-in-fuse_lowlevel.c.patch +++ /dev/null @@ -1,1111 +0,0 @@ -From d6a0067e6c08523a8f605f775be980eaf0a23690 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:23 +0100 -Subject: [PATCH 052/116] virtiofsd: check input buffer size in fuse_lowlevel.c - ops -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-49-dgilbert@redhat.com> -Patchwork-id: 93503 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 048/112] virtiofsd: check input buffer size in fuse_lowlevel.c ops -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -Each FUSE operation involves parsing the input buffer. Currently the -code assumes the input buffer is large enough for the expected -arguments. This patch uses fuse_mbuf_iter to check the size. - -Most operations are simple to convert. Some are more complicated due to -variable-length inputs or different sizes depending on the protocol -version. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Sergio Lopez -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 70995754416eb4491c31607fe380a83cfd25a087) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_lowlevel.c | 581 +++++++++++++++++++++++++++++++--------- - 1 file changed, 456 insertions(+), 125 deletions(-) - -diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -index 611e8b0..02e1d83 100644 ---- a/tools/virtiofsd/fuse_lowlevel.c -+++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -19,6 +19,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -27,7 +28,6 @@ - #include - - --#define PARAM(inarg) (((char *)(inarg)) + sizeof(*(inarg))) - #define OFFSET_MAX 0x7fffffffffffffffLL - - struct fuse_pollhandle { -@@ -706,9 +706,14 @@ int fuse_reply_lseek(fuse_req_t req, off_t off) - return send_reply_ok(req, &arg, sizeof(arg)); - } - --static void do_lookup(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_lookup(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- char *name = (char *)inarg; -+ const char *name = fuse_mbuf_iter_advance_str(iter); -+ if (!name) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } - - if (req->se->op.lookup) { - req->se->op.lookup(req, nodeid, name); -@@ -717,9 +722,16 @@ static void do_lookup(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - } - --static void do_forget(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_forget(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- struct fuse_forget_in *arg = (struct fuse_forget_in *)inarg; -+ struct fuse_forget_in *arg; -+ -+ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ if (!arg) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } - - if (req->se->op.forget) { - req->se->op.forget(req, nodeid, arg->nlookup); -@@ -729,20 +741,48 @@ static void do_forget(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - - static void do_batch_forget(fuse_req_t req, fuse_ino_t nodeid, -- const void *inarg) -+ struct fuse_mbuf_iter *iter) - { -- struct fuse_batch_forget_in *arg = (void *)inarg; -- struct fuse_forget_one *param = (void *)PARAM(arg); -- unsigned int i; -+ struct fuse_batch_forget_in *arg; -+ struct fuse_forget_data *forgets; -+ size_t scount; - - (void)nodeid; - -+ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ if (!arg) { -+ fuse_reply_none(req); -+ return; -+ } -+ -+ /* -+ * Prevent integer overflow. The compiler emits the following warning -+ * unless we use the scount local variable: -+ * -+ * error: comparison is always false due to limited range of data type -+ * [-Werror=type-limits] -+ * -+ * This may be true on 64-bit hosts but we need this check for 32-bit -+ * hosts. -+ */ -+ scount = arg->count; -+ if (scount > SIZE_MAX / sizeof(forgets[0])) { -+ fuse_reply_none(req); -+ return; -+ } -+ -+ forgets = fuse_mbuf_iter_advance(iter, arg->count * sizeof(forgets[0])); -+ if (!forgets) { -+ fuse_reply_none(req); -+ return; -+ } -+ - if (req->se->op.forget_multi) { -- req->se->op.forget_multi(req, arg->count, -- (struct fuse_forget_data *)param); -+ req->se->op.forget_multi(req, arg->count, forgets); - } else if (req->se->op.forget) { -+ unsigned int i; -+ - for (i = 0; i < arg->count; i++) { -- struct fuse_forget_one *forget = ¶m[i]; - struct fuse_req *dummy_req; - - dummy_req = fuse_ll_alloc_req(req->se); -@@ -754,7 +794,7 @@ static void do_batch_forget(fuse_req_t req, fuse_ino_t nodeid, - dummy_req->ctx = req->ctx; - dummy_req->ch = NULL; - -- req->se->op.forget(dummy_req, forget->nodeid, forget->nlookup); -+ req->se->op.forget(dummy_req, forgets[i].ino, forgets[i].nlookup); - } - fuse_reply_none(req); - } else { -@@ -762,12 +802,19 @@ static void do_batch_forget(fuse_req_t req, fuse_ino_t nodeid, - } - } - --static void do_getattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_getattr(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { - struct fuse_file_info *fip = NULL; - struct fuse_file_info fi; - -- struct fuse_getattr_in *arg = (struct fuse_getattr_in *)inarg; -+ struct fuse_getattr_in *arg; -+ -+ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ if (!arg) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } - - if (arg->getattr_flags & FUSE_GETATTR_FH) { - memset(&fi, 0, sizeof(fi)); -@@ -782,14 +829,21 @@ static void do_getattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - } - --static void do_setattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_setattr(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- struct fuse_setattr_in *arg = (struct fuse_setattr_in *)inarg; -- - if (req->se->op.setattr) { -+ struct fuse_setattr_in *arg; - struct fuse_file_info *fi = NULL; - struct fuse_file_info fi_store; - struct stat stbuf; -+ -+ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ if (!arg) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } -+ - memset(&stbuf, 0, sizeof(stbuf)); - convert_attr(arg, &stbuf); - if (arg->valid & FATTR_FH) { -@@ -810,9 +864,16 @@ static void do_setattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - } - --static void do_access(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_access(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- struct fuse_access_in *arg = (struct fuse_access_in *)inarg; -+ struct fuse_access_in *arg; -+ -+ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ if (!arg) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } - - if (req->se->op.access) { - req->se->op.access(req, nodeid, arg->mask); -@@ -821,9 +882,10 @@ static void do_access(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - } - --static void do_readlink(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_readlink(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- (void)inarg; -+ (void)iter; - - if (req->se->op.readlink) { - req->se->op.readlink(req, nodeid); -@@ -832,10 +894,18 @@ static void do_readlink(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - } - --static void do_mknod(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_mknod(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- struct fuse_mknod_in *arg = (struct fuse_mknod_in *)inarg; -- char *name = PARAM(arg); -+ struct fuse_mknod_in *arg; -+ const char *name; -+ -+ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ name = fuse_mbuf_iter_advance_str(iter); -+ if (!arg || !name) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } - - req->ctx.umask = arg->umask; - -@@ -846,22 +916,37 @@ static void do_mknod(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - } - --static void do_mkdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_mkdir(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- struct fuse_mkdir_in *arg = (struct fuse_mkdir_in *)inarg; -+ struct fuse_mkdir_in *arg; -+ const char *name; -+ -+ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ name = fuse_mbuf_iter_advance_str(iter); -+ if (!arg || !name) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } - - req->ctx.umask = arg->umask; - - if (req->se->op.mkdir) { -- req->se->op.mkdir(req, nodeid, PARAM(arg), arg->mode); -+ req->se->op.mkdir(req, nodeid, name, arg->mode); - } else { - fuse_reply_err(req, ENOSYS); - } - } - --static void do_unlink(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_unlink(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- char *name = (char *)inarg; -+ const char *name = fuse_mbuf_iter_advance_str(iter); -+ -+ if (!name) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } - - if (req->se->op.unlink) { - req->se->op.unlink(req, nodeid, name); -@@ -870,9 +955,15 @@ static void do_unlink(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - } - --static void do_rmdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_rmdir(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- char *name = (char *)inarg; -+ const char *name = fuse_mbuf_iter_advance_str(iter); -+ -+ if (!name) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } - - if (req->se->op.rmdir) { - req->se->op.rmdir(req, nodeid, name); -@@ -881,10 +972,16 @@ static void do_rmdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - } - --static void do_symlink(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_symlink(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- char *name = (char *)inarg; -- char *linkname = ((char *)inarg) + strlen((char *)inarg) + 1; -+ const char *name = fuse_mbuf_iter_advance_str(iter); -+ const char *linkname = fuse_mbuf_iter_advance_str(iter); -+ -+ if (!name || !linkname) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } - - if (req->se->op.symlink) { - req->se->op.symlink(req, linkname, nodeid, name); -@@ -893,11 +990,20 @@ static void do_symlink(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - } - --static void do_rename(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_rename(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- struct fuse_rename_in *arg = (struct fuse_rename_in *)inarg; -- char *oldname = PARAM(arg); -- char *newname = oldname + strlen(oldname) + 1; -+ struct fuse_rename_in *arg; -+ const char *oldname; -+ const char *newname; -+ -+ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ oldname = fuse_mbuf_iter_advance_str(iter); -+ newname = fuse_mbuf_iter_advance_str(iter); -+ if (!arg || !oldname || !newname) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } - - if (req->se->op.rename) { - req->se->op.rename(req, nodeid, oldname, arg->newdir, newname, 0); -@@ -906,11 +1012,20 @@ static void do_rename(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - } - --static void do_rename2(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_rename2(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- struct fuse_rename2_in *arg = (struct fuse_rename2_in *)inarg; -- char *oldname = PARAM(arg); -- char *newname = oldname + strlen(oldname) + 1; -+ struct fuse_rename2_in *arg; -+ const char *oldname; -+ const char *newname; -+ -+ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ oldname = fuse_mbuf_iter_advance_str(iter); -+ newname = fuse_mbuf_iter_advance_str(iter); -+ if (!arg || !oldname || !newname) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } - - if (req->se->op.rename) { - req->se->op.rename(req, nodeid, oldname, arg->newdir, newname, -@@ -920,24 +1035,38 @@ static void do_rename2(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - } - --static void do_link(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_link(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- struct fuse_link_in *arg = (struct fuse_link_in *)inarg; -+ struct fuse_link_in *arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ const char *name = fuse_mbuf_iter_advance_str(iter); -+ -+ if (!arg || !name) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } - - if (req->se->op.link) { -- req->se->op.link(req, arg->oldnodeid, nodeid, PARAM(arg)); -+ req->se->op.link(req, arg->oldnodeid, nodeid, name); - } else { - fuse_reply_err(req, ENOSYS); - } - } - --static void do_create(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_create(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- struct fuse_create_in *arg = (struct fuse_create_in *)inarg; -- - if (req->se->op.create) { -+ struct fuse_create_in *arg; - struct fuse_file_info fi; -- char *name = PARAM(arg); -+ const char *name; -+ -+ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ name = fuse_mbuf_iter_advance_str(iter); -+ if (!arg || !name) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } - - memset(&fi, 0, sizeof(fi)); - fi.flags = arg->flags; -@@ -950,11 +1079,18 @@ static void do_create(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - } - --static void do_open(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_open(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- struct fuse_open_in *arg = (struct fuse_open_in *)inarg; -+ struct fuse_open_in *arg; - struct fuse_file_info fi; - -+ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ if (!arg) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } -+ - memset(&fi, 0, sizeof(fi)); - fi.flags = arg->flags; - -@@ -965,13 +1101,15 @@ static void do_open(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - } - --static void do_read(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_read(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- struct fuse_read_in *arg = (struct fuse_read_in *)inarg; -- - if (req->se->op.read) { -+ struct fuse_read_in *arg; - struct fuse_file_info fi; - -+ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ - memset(&fi, 0, sizeof(fi)); - fi.fh = arg->fh; - fi.lock_owner = arg->lock_owner; -@@ -982,11 +1120,24 @@ static void do_read(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - } - --static void do_write(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_write(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- struct fuse_write_in *arg = (struct fuse_write_in *)inarg; -+ struct fuse_write_in *arg; - struct fuse_file_info fi; -- char *param; -+ const char *param; -+ -+ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ if (!arg) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } -+ -+ param = fuse_mbuf_iter_advance(iter, arg->size); -+ if (!param) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } - - memset(&fi, 0, sizeof(fi)); - fi.fh = arg->fh; -@@ -994,7 +1145,6 @@ static void do_write(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - - fi.lock_owner = arg->lock_owner; - fi.flags = arg->flags; -- param = PARAM(arg); - - if (req->se->op.write) { - req->se->op.write(req, nodeid, param, arg->size, arg->offset, &fi); -@@ -1052,11 +1202,18 @@ static void do_write_buf(fuse_req_t req, fuse_ino_t nodeid, - se->op.write_buf(req, nodeid, pbufv, arg->offset, &fi); - } - --static void do_flush(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_flush(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- struct fuse_flush_in *arg = (struct fuse_flush_in *)inarg; -+ struct fuse_flush_in *arg; - struct fuse_file_info fi; - -+ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ if (!arg) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } -+ - memset(&fi, 0, sizeof(fi)); - fi.fh = arg->fh; - fi.flush = 1; -@@ -1069,19 +1226,26 @@ static void do_flush(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - } - --static void do_release(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_release(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- struct fuse_release_in *arg = (struct fuse_release_in *)inarg; -+ struct fuse_release_in *arg; - struct fuse_file_info fi; - -+ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ if (!arg) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } -+ - memset(&fi, 0, sizeof(fi)); - fi.flags = arg->flags; - fi.fh = arg->fh; - fi.flush = (arg->release_flags & FUSE_RELEASE_FLUSH) ? 1 : 0; - fi.lock_owner = arg->lock_owner; -+ - if (arg->release_flags & FUSE_RELEASE_FLOCK_UNLOCK) { - fi.flock_release = 1; -- fi.lock_owner = arg->lock_owner; - } - - if (req->se->op.release) { -@@ -1091,11 +1255,19 @@ static void do_release(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - } - --static void do_fsync(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_fsync(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- struct fuse_fsync_in *arg = (struct fuse_fsync_in *)inarg; -+ struct fuse_fsync_in *arg; - struct fuse_file_info fi; -- int datasync = arg->fsync_flags & 1; -+ int datasync; -+ -+ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ if (!arg) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } -+ datasync = arg->fsync_flags & 1; - - memset(&fi, 0, sizeof(fi)); - fi.fh = arg->fh; -@@ -1111,11 +1283,18 @@ static void do_fsync(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - } - --static void do_opendir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_opendir(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- struct fuse_open_in *arg = (struct fuse_open_in *)inarg; -+ struct fuse_open_in *arg; - struct fuse_file_info fi; - -+ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ if (!arg) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } -+ - memset(&fi, 0, sizeof(fi)); - fi.flags = arg->flags; - -@@ -1126,11 +1305,18 @@ static void do_opendir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - } - --static void do_readdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_readdir(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- struct fuse_read_in *arg = (struct fuse_read_in *)inarg; -+ struct fuse_read_in *arg; - struct fuse_file_info fi; - -+ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ if (!arg) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } -+ - memset(&fi, 0, sizeof(fi)); - fi.fh = arg->fh; - -@@ -1141,11 +1327,18 @@ static void do_readdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - } - --static void do_readdirplus(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_readdirplus(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- struct fuse_read_in *arg = (struct fuse_read_in *)inarg; -+ struct fuse_read_in *arg; - struct fuse_file_info fi; - -+ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ if (!arg) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } -+ - memset(&fi, 0, sizeof(fi)); - fi.fh = arg->fh; - -@@ -1156,11 +1349,18 @@ static void do_readdirplus(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - } - --static void do_releasedir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_releasedir(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- struct fuse_release_in *arg = (struct fuse_release_in *)inarg; -+ struct fuse_release_in *arg; - struct fuse_file_info fi; - -+ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ if (!arg) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } -+ - memset(&fi, 0, sizeof(fi)); - fi.flags = arg->flags; - fi.fh = arg->fh; -@@ -1172,11 +1372,19 @@ static void do_releasedir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - } - --static void do_fsyncdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_fsyncdir(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- struct fuse_fsync_in *arg = (struct fuse_fsync_in *)inarg; -+ struct fuse_fsync_in *arg; - struct fuse_file_info fi; -- int datasync = arg->fsync_flags & 1; -+ int datasync; -+ -+ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ if (!arg) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } -+ datasync = arg->fsync_flags & 1; - - memset(&fi, 0, sizeof(fi)); - fi.fh = arg->fh; -@@ -1188,10 +1396,11 @@ static void do_fsyncdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - } - --static void do_statfs(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_statfs(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { - (void)nodeid; -- (void)inarg; -+ (void)iter; - - if (req->se->op.statfs) { - req->se->op.statfs(req, nodeid); -@@ -1204,11 +1413,25 @@ static void do_statfs(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - } - --static void do_setxattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_setxattr(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- struct fuse_setxattr_in *arg = (struct fuse_setxattr_in *)inarg; -- char *name = PARAM(arg); -- char *value = name + strlen(name) + 1; -+ struct fuse_setxattr_in *arg; -+ const char *name; -+ const char *value; -+ -+ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ name = fuse_mbuf_iter_advance_str(iter); -+ if (!arg || !name) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } -+ -+ value = fuse_mbuf_iter_advance(iter, arg->size); -+ if (!value) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } - - if (req->se->op.setxattr) { - req->se->op.setxattr(req, nodeid, name, value, arg->size, arg->flags); -@@ -1217,20 +1440,36 @@ static void do_setxattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - } - --static void do_getxattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_getxattr(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- struct fuse_getxattr_in *arg = (struct fuse_getxattr_in *)inarg; -+ struct fuse_getxattr_in *arg; -+ const char *name; -+ -+ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ name = fuse_mbuf_iter_advance_str(iter); -+ if (!arg || !name) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } - - if (req->se->op.getxattr) { -- req->se->op.getxattr(req, nodeid, PARAM(arg), arg->size); -+ req->se->op.getxattr(req, nodeid, name, arg->size); - } else { - fuse_reply_err(req, ENOSYS); - } - } - --static void do_listxattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_listxattr(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- struct fuse_getxattr_in *arg = (struct fuse_getxattr_in *)inarg; -+ struct fuse_getxattr_in *arg; -+ -+ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ if (!arg) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } - - if (req->se->op.listxattr) { - req->se->op.listxattr(req, nodeid, arg->size); -@@ -1239,9 +1478,15 @@ static void do_listxattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - } - --static void do_removexattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_removexattr(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- char *name = (char *)inarg; -+ const char *name = fuse_mbuf_iter_advance_str(iter); -+ -+ if (!name) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } - - if (req->se->op.removexattr) { - req->se->op.removexattr(req, nodeid, name); -@@ -1265,12 +1510,19 @@ static void convert_fuse_file_lock(struct fuse_file_lock *fl, - flock->l_pid = fl->pid; - } - --static void do_getlk(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_getlk(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- struct fuse_lk_in *arg = (struct fuse_lk_in *)inarg; -+ struct fuse_lk_in *arg; - struct fuse_file_info fi; - struct flock flock; - -+ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ if (!arg) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } -+ - memset(&fi, 0, sizeof(fi)); - fi.fh = arg->fh; - fi.lock_owner = arg->owner; -@@ -1284,12 +1536,18 @@ static void do_getlk(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - - static void do_setlk_common(fuse_req_t req, fuse_ino_t nodeid, -- const void *inarg, int sleep) -+ struct fuse_mbuf_iter *iter, int sleep) - { -- struct fuse_lk_in *arg = (struct fuse_lk_in *)inarg; -+ struct fuse_lk_in *arg; - struct fuse_file_info fi; - struct flock flock; - -+ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ if (!arg) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } -+ - memset(&fi, 0, sizeof(fi)); - fi.fh = arg->fh; - fi.lock_owner = arg->owner; -@@ -1327,14 +1585,16 @@ static void do_setlk_common(fuse_req_t req, fuse_ino_t nodeid, - } - } - --static void do_setlk(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_setlk(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- do_setlk_common(req, nodeid, inarg, 0); -+ do_setlk_common(req, nodeid, iter, 0); - } - --static void do_setlkw(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_setlkw(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- do_setlk_common(req, nodeid, inarg, 1); -+ do_setlk_common(req, nodeid, iter, 1); - } - - static int find_interrupted(struct fuse_session *se, struct fuse_req *req) -@@ -1379,12 +1639,20 @@ static int find_interrupted(struct fuse_session *se, struct fuse_req *req) - return 0; - } - --static void do_interrupt(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_interrupt(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- struct fuse_interrupt_in *arg = (struct fuse_interrupt_in *)inarg; -+ struct fuse_interrupt_in *arg; - struct fuse_session *se = req->se; - - (void)nodeid; -+ -+ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ if (!arg) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } -+ - if (se->debug) { - fuse_log(FUSE_LOG_DEBUG, "INTERRUPT: %llu\n", - (unsigned long long)arg->unique); -@@ -1425,9 +1693,15 @@ static struct fuse_req *check_interrupt(struct fuse_session *se, - } - } - --static void do_bmap(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_bmap(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- struct fuse_bmap_in *arg = (struct fuse_bmap_in *)inarg; -+ struct fuse_bmap_in *arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ -+ if (!arg) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } - - if (req->se->op.bmap) { - req->se->op.bmap(req, nodeid, arg->blocksize, arg->block); -@@ -1436,18 +1710,34 @@ static void do_bmap(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - } - --static void do_ioctl(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_ioctl(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- struct fuse_ioctl_in *arg = (struct fuse_ioctl_in *)inarg; -- unsigned int flags = arg->flags; -- void *in_buf = arg->in_size ? PARAM(arg) : NULL; -+ struct fuse_ioctl_in *arg; -+ unsigned int flags; -+ void *in_buf = NULL; - struct fuse_file_info fi; - -+ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ if (!arg) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } -+ -+ flags = arg->flags; - if (flags & FUSE_IOCTL_DIR && !(req->se->conn.want & FUSE_CAP_IOCTL_DIR)) { - fuse_reply_err(req, ENOTTY); - return; - } - -+ if (arg->in_size) { -+ in_buf = fuse_mbuf_iter_advance(iter, arg->in_size); -+ if (!in_buf) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } -+ } -+ - memset(&fi, 0, sizeof(fi)); - fi.fh = arg->fh; - -@@ -1468,11 +1758,18 @@ void fuse_pollhandle_destroy(struct fuse_pollhandle *ph) - free(ph); - } - --static void do_poll(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_poll(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- struct fuse_poll_in *arg = (struct fuse_poll_in *)inarg; -+ struct fuse_poll_in *arg; - struct fuse_file_info fi; - -+ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ if (!arg) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } -+ - memset(&fi, 0, sizeof(fi)); - fi.fh = arg->fh; - fi.poll_events = arg->events; -@@ -1496,11 +1793,18 @@ static void do_poll(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - } - --static void do_fallocate(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_fallocate(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- struct fuse_fallocate_in *arg = (struct fuse_fallocate_in *)inarg; -+ struct fuse_fallocate_in *arg; - struct fuse_file_info fi; - -+ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ if (!arg) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } -+ - memset(&fi, 0, sizeof(fi)); - fi.fh = arg->fh; - -@@ -1513,12 +1817,17 @@ static void do_fallocate(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - - static void do_copy_file_range(fuse_req_t req, fuse_ino_t nodeid_in, -- const void *inarg) -+ struct fuse_mbuf_iter *iter) - { -- struct fuse_copy_file_range_in *arg = -- (struct fuse_copy_file_range_in *)inarg; -+ struct fuse_copy_file_range_in *arg; - struct fuse_file_info fi_in, fi_out; - -+ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ if (!arg) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } -+ - memset(&fi_in, 0, sizeof(fi_in)); - fi_in.fh = arg->fh_in; - -@@ -1535,11 +1844,17 @@ static void do_copy_file_range(fuse_req_t req, fuse_ino_t nodeid_in, - } - } - --static void do_lseek(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_lseek(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- struct fuse_lseek_in *arg = (struct fuse_lseek_in *)inarg; -+ struct fuse_lseek_in *arg; - struct fuse_file_info fi; - -+ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ if (!arg) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } - memset(&fi, 0, sizeof(fi)); - fi.fh = arg->fh; - -@@ -1550,15 +1865,33 @@ static void do_lseek(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - } - --static void do_init(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_init(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- struct fuse_init_in *arg = (struct fuse_init_in *)inarg; -+ size_t compat_size = offsetof(struct fuse_init_in, max_readahead); -+ struct fuse_init_in *arg; - struct fuse_init_out outarg; - struct fuse_session *se = req->se; - size_t bufsize = se->bufsize; - size_t outargsize = sizeof(outarg); - - (void)nodeid; -+ -+ /* First consume the old fields... */ -+ arg = fuse_mbuf_iter_advance(iter, compat_size); -+ if (!arg) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } -+ -+ /* ...and now consume the new fields. */ -+ if (arg->major == 7 && arg->minor >= 6) { -+ if (!fuse_mbuf_iter_advance(iter, sizeof(*arg) - compat_size)) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } -+ } -+ - if (se->debug) { - fuse_log(FUSE_LOG_DEBUG, "INIT: %u.%u\n", arg->major, arg->minor); - if (arg->major == 7 && arg->minor >= 6) { -@@ -1791,12 +2124,13 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - send_reply_ok(req, &outarg, outargsize); - } - --static void do_destroy(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_destroy(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { - struct fuse_session *se = req->se; - - (void)nodeid; -- (void)inarg; -+ (void)iter; - - se->got_destroy = 1; - if (se->op.destroy) { -@@ -1976,7 +2310,7 @@ int fuse_req_interrupted(fuse_req_t req) - } - - static struct { -- void (*func)(fuse_req_t, fuse_ino_t, const void *); -+ void (*func)(fuse_req_t, fuse_ino_t, struct fuse_mbuf_iter *); - const char *name; - } fuse_ll_ops[] = { - [FUSE_LOOKUP] = { do_lookup, "LOOKUP" }, -@@ -2060,7 +2394,6 @@ void fuse_session_process_buf_int(struct fuse_session *se, - const struct fuse_buf *buf = bufv->buf; - struct fuse_mbuf_iter iter = FUSE_MBUF_ITER_INIT(buf); - struct fuse_in_header *in; -- const void *inarg; - struct fuse_req *req; - int err; - -@@ -2138,13 +2471,11 @@ void fuse_session_process_buf_int(struct fuse_session *se, - } - } - -- inarg = (void *)&in[1]; - if (in->opcode == FUSE_WRITE && se->op.write_buf) { - do_write_buf(req, in->nodeid, &iter, bufv); - } else { -- fuse_ll_ops[in->opcode].func(req, in->nodeid, inarg); -+ fuse_ll_ops[in->opcode].func(req, in->nodeid, &iter); - } -- - return; - - reply_err: --- -1.8.3.1 - diff --git a/kvm-virtiofsd-cleanup-allocated-resource-in-se.patch b/kvm-virtiofsd-cleanup-allocated-resource-in-se.patch deleted file mode 100644 index b6de0a9..0000000 --- a/kvm-virtiofsd-cleanup-allocated-resource-in-se.patch +++ /dev/null @@ -1,82 +0,0 @@ -From 99ff67682ef7c5659bdc9836008541861ae313d5 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:56 +0100 -Subject: [PATCH 085/116] virtiofsd: cleanup allocated resource in se -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-82-dgilbert@redhat.com> -Patchwork-id: 93533 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 081/112] virtiofsd: cleanup allocated resource in se -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Liu Bo - -This cleans up unfreed resources in se on quiting, including -se->virtio_dev, se->vu_socket_path, se->vu_socketfd. - -Signed-off-by: Liu Bo -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 61cfc44982e566c33b9d5df17858e4d5ae373873) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_lowlevel.c | 7 +++++++ - tools/virtiofsd/fuse_virtio.c | 7 +++++++ - tools/virtiofsd/fuse_virtio.h | 2 +- - 3 files changed, 15 insertions(+), 1 deletion(-) - -diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -index 65f91da..440508a 100644 ---- a/tools/virtiofsd/fuse_lowlevel.c -+++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -2532,6 +2532,13 @@ void fuse_session_destroy(struct fuse_session *se) - if (se->fd != -1) { - close(se->fd); - } -+ -+ if (se->vu_socket_path) { -+ virtio_session_close(se); -+ free(se->vu_socket_path); -+ se->vu_socket_path = NULL; -+ } -+ - free(se); - } - -diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c -index 7a8774a..e7bd772 100644 ---- a/tools/virtiofsd/fuse_virtio.c -+++ b/tools/virtiofsd/fuse_virtio.c -@@ -833,3 +833,10 @@ int virtio_session_mount(struct fuse_session *se) - - return 0; - } -+ -+void virtio_session_close(struct fuse_session *se) -+{ -+ close(se->vu_socketfd); -+ free(se->virtio_dev); -+ se->virtio_dev = NULL; -+} -diff --git a/tools/virtiofsd/fuse_virtio.h b/tools/virtiofsd/fuse_virtio.h -index cc676b9..1116840 100644 ---- a/tools/virtiofsd/fuse_virtio.h -+++ b/tools/virtiofsd/fuse_virtio.h -@@ -19,7 +19,7 @@ - struct fuse_session; - - int virtio_session_mount(struct fuse_session *se); -- -+void virtio_session_close(struct fuse_session *se); - int virtio_loop(struct fuse_session *se); - - --- -1.8.3.1 - diff --git a/kvm-virtiofsd-convert-more-fprintf-and-perror-to-use-fus.patch b/kvm-virtiofsd-convert-more-fprintf-and-perror-to-use-fus.patch deleted file mode 100644 index d01b000..0000000 --- a/kvm-virtiofsd-convert-more-fprintf-and-perror-to-use-fus.patch +++ /dev/null @@ -1,99 +0,0 @@ -From e00543b0384fba61a9c7274c73e11a25e7ab2946 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:02:13 +0100 -Subject: [PATCH 102/116] virtiofsd: convert more fprintf and perror to use - fuse log infra -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-99-dgilbert@redhat.com> -Patchwork-id: 93552 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 098/112] virtiofsd: convert more fprintf and perror to use fuse log infra -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Eryu Guan - -Signed-off-by: Eryu Guan -Reviewed-by: Daniel P. Berrangé -Reviewed-by: Misono Tomohiro -Reviewed-by: Philippe Mathieu-Daudé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit fc1aed0bf96259d0b46b1cfea7497b7762c4ee3d) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_signals.c | 7 +++++-- - tools/virtiofsd/helper.c | 9 ++++++--- - 2 files changed, 11 insertions(+), 5 deletions(-) - -diff --git a/tools/virtiofsd/fuse_signals.c b/tools/virtiofsd/fuse_signals.c -index dc7c8ac..f18625b 100644 ---- a/tools/virtiofsd/fuse_signals.c -+++ b/tools/virtiofsd/fuse_signals.c -@@ -12,6 +12,7 @@ - #include "fuse_i.h" - #include "fuse_lowlevel.h" - -+#include - #include - #include - #include -@@ -47,13 +48,15 @@ static int set_one_signal_handler(int sig, void (*handler)(int), int remove) - sa.sa_flags = 0; - - if (sigaction(sig, NULL, &old_sa) == -1) { -- perror("fuse: cannot get old signal handler"); -+ fuse_log(FUSE_LOG_ERR, "fuse: cannot get old signal handler: %s\n", -+ strerror(errno)); - return -1; - } - - if (old_sa.sa_handler == (remove ? handler : SIG_DFL) && - sigaction(sig, &sa, NULL) == -1) { -- perror("fuse: cannot set signal handler"); -+ fuse_log(FUSE_LOG_ERR, "fuse: cannot set signal handler: %s\n", -+ strerror(errno)); - return -1; - } - return 0; -diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c -index 33749bf..f98d8f2 100644 ---- a/tools/virtiofsd/helper.c -+++ b/tools/virtiofsd/helper.c -@@ -208,7 +208,8 @@ int fuse_daemonize(int foreground) - char completed; - - if (pipe(waiter)) { -- perror("fuse_daemonize: pipe"); -+ fuse_log(FUSE_LOG_ERR, "fuse_daemonize: pipe: %s\n", -+ strerror(errno)); - return -1; - } - -@@ -218,7 +219,8 @@ int fuse_daemonize(int foreground) - */ - switch (fork()) { - case -1: -- perror("fuse_daemonize: fork"); -+ fuse_log(FUSE_LOG_ERR, "fuse_daemonize: fork: %s\n", -+ strerror(errno)); - return -1; - case 0: - break; -@@ -228,7 +230,8 @@ int fuse_daemonize(int foreground) - } - - if (setsid() == -1) { -- perror("fuse_daemonize: setsid"); -+ fuse_log(FUSE_LOG_ERR, "fuse_daemonize: setsid: %s\n", -+ strerror(errno)); - return -1; - } - --- -1.8.3.1 - diff --git a/kvm-virtiofsd-do-not-always-set-FUSE_FLOCK_LOCKS.patch b/kvm-virtiofsd-do-not-always-set-FUSE_FLOCK_LOCKS.patch deleted file mode 100644 index 8c1022a..0000000 --- a/kvm-virtiofsd-do-not-always-set-FUSE_FLOCK_LOCKS.patch +++ /dev/null @@ -1,57 +0,0 @@ -From 8e6473e906dfc7d2a62abaf1ec80ff461e4d201d Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:02:12 +0100 -Subject: [PATCH 101/116] virtiofsd: do not always set FUSE_FLOCK_LOCKS -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-98-dgilbert@redhat.com> -Patchwork-id: 93551 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 097/112] virtiofsd: do not always set FUSE_FLOCK_LOCKS -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Peng Tao - -Right now we always enable it regardless of given commandlines. -Fix it by setting the flag relying on the lo->flock bit. - -Signed-off-by: Peng Tao -Reviewed-by: Misono Tomohiro -Reviewed-by: Sergio Lopez -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit e468d4af5f5192ab33283464a9f6933044ce47f7) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 11 ++++++++--- - 1 file changed, 8 insertions(+), 3 deletions(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index ab16135..ccbbec1 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -546,9 +546,14 @@ static void lo_init(void *userdata, struct fuse_conn_info *conn) - fuse_log(FUSE_LOG_DEBUG, "lo_init: activating writeback\n"); - conn->want |= FUSE_CAP_WRITEBACK_CACHE; - } -- if (lo->flock && conn->capable & FUSE_CAP_FLOCK_LOCKS) { -- fuse_log(FUSE_LOG_DEBUG, "lo_init: activating flock locks\n"); -- conn->want |= FUSE_CAP_FLOCK_LOCKS; -+ if (conn->capable & FUSE_CAP_FLOCK_LOCKS) { -+ if (lo->flock) { -+ fuse_log(FUSE_LOG_DEBUG, "lo_init: activating flock locks\n"); -+ conn->want |= FUSE_CAP_FLOCK_LOCKS; -+ } else { -+ fuse_log(FUSE_LOG_DEBUG, "lo_init: disabling flock locks\n"); -+ conn->want &= ~FUSE_CAP_FLOCK_LOCKS; -+ } - } - - if (conn->capable & FUSE_CAP_POSIX_LOCKS) { --- -1.8.3.1 - diff --git a/kvm-virtiofsd-do_read-missing-NULL-check.patch b/kvm-virtiofsd-do_read-missing-NULL-check.patch deleted file mode 100644 index 4f8e5ef..0000000 --- a/kvm-virtiofsd-do_read-missing-NULL-check.patch +++ /dev/null @@ -1,49 +0,0 @@ -From 901c005299b0316bbca7bc190de56f6c7a2a9880 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Tue, 3 Mar 2020 18:43:11 +0000 -Subject: [PATCH 15/18] virtiofsd: do_read missing NULL check -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200303184314.155564-5-dgilbert@redhat.com> -Patchwork-id: 94127 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 4/7] virtiofsd: do_read missing NULL check -Bugzilla: 1797064 -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual -RH-Acked-by: Ján Tomko - -From: "Dr. David Alan Gilbert" - -Missing a NULL check if the argument fetch fails. - -Fixes: Coverity CID 1413119 -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Philippe Mathieu-Daudé -Reviewed-by: Stefan Hajnoczi -(cherry picked from commit 99ce9a7e60fd12b213b985343ff8fcc172de59fd) -Signed-off-by: Danilo C. L. de Paula ---- - tools/virtiofsd/fuse_lowlevel.c | 4 ++++ - 1 file changed, 4 insertions(+) - -diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -index 01c418a..704c036 100644 ---- a/tools/virtiofsd/fuse_lowlevel.c -+++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -1116,6 +1116,10 @@ static void do_read(fuse_req_t req, fuse_ino_t nodeid, - struct fuse_file_info fi; - - arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ if (!arg) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } - - memset(&fi, 0, sizeof(fi)); - fi.fh = arg->fh; --- -1.8.3.1 - diff --git a/kvm-virtiofsd-enable-PARALLEL_DIROPS-during-INIT.patch b/kvm-virtiofsd-enable-PARALLEL_DIROPS-during-INIT.patch deleted file mode 100644 index 3279a5e..0000000 --- a/kvm-virtiofsd-enable-PARALLEL_DIROPS-during-INIT.patch +++ /dev/null @@ -1,47 +0,0 @@ -From bc127914b29f2e4163bc7ca786e04ed955d96016 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:02:00 +0100 -Subject: [PATCH 089/116] virtiofsd: enable PARALLEL_DIROPS during INIT -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-86-dgilbert@redhat.com> -Patchwork-id: 93539 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 085/112] virtiofsd: enable PARALLEL_DIROPS during INIT -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Liu Bo - -lookup is a RO operations, PARALLEL_DIROPS can be enabled. - -Signed-off-by: Liu Bo -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit b7ed733a3841c4d489d3bd6ca7ed23c84db119c2) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_lowlevel.c | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -index aac282f..70568d2 100644 ---- a/tools/virtiofsd/fuse_lowlevel.c -+++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -2062,6 +2062,9 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid, - if (se->conn.want & FUSE_CAP_ASYNC_READ) { - outarg.flags |= FUSE_ASYNC_READ; - } -+ if (se->conn.want & FUSE_CAP_PARALLEL_DIROPS) { -+ outarg.flags |= FUSE_PARALLEL_DIROPS; -+ } - if (se->conn.want & FUSE_CAP_POSIX_LOCKS) { - outarg.flags |= FUSE_POSIX_LOCKS; - } --- -1.8.3.1 - diff --git a/kvm-virtiofsd-extract-root-inode-init-into-setup_root.patch b/kvm-virtiofsd-extract-root-inode-init-into-setup_root.patch deleted file mode 100644 index 96f91a1..0000000 --- a/kvm-virtiofsd-extract-root-inode-init-into-setup_root.patch +++ /dev/null @@ -1,111 +0,0 @@ -From 983b383bc4a92a9f7ecff0332cadefed2f58f502 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:50 +0100 -Subject: [PATCH 079/116] virtiofsd: extract root inode init into setup_root() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-76-dgilbert@redhat.com> -Patchwork-id: 93527 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 075/112] virtiofsd: extract root inode init into setup_root() -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Miklos Szeredi - -Inititialize the root inode in a single place. - -Signed-off-by: Miklos Szeredi -Signed-off-by: Stefan Hajnoczi -dgilbert: -with fix suggested by Misono Tomohiro -Reviewed-by: Misono Tomohiro -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 3ca8a2b1c83eb185c232a4e87abbb65495263756) - -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 35 +++++++++++++++++++++++++---------- - 1 file changed, 25 insertions(+), 10 deletions(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 33bfb4d..9e7191e 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -2351,6 +2351,30 @@ static void log_func(enum fuse_log_level level, const char *fmt, va_list ap) - } - } - -+static void setup_root(struct lo_data *lo, struct lo_inode *root) -+{ -+ int fd, res; -+ struct stat stat; -+ -+ fd = open("/", O_PATH); -+ if (fd == -1) { -+ fuse_log(FUSE_LOG_ERR, "open(%s, O_PATH): %m\n", lo->source); -+ exit(1); -+ } -+ -+ res = fstatat(fd, "", &stat, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); -+ if (res == -1) { -+ fuse_log(FUSE_LOG_ERR, "fstatat(%s): %m\n", lo->source); -+ exit(1); -+ } -+ -+ root->is_symlink = false; -+ root->fd = fd; -+ root->ino = stat.st_ino; -+ root->dev = stat.st_dev; -+ root->refcount = 2; -+} -+ - int main(int argc, char *argv[]) - { - struct fuse_args args = FUSE_ARGS_INIT(argc, argv); -@@ -2426,8 +2450,6 @@ int main(int argc, char *argv[]) - if (lo.debug) { - current_log_level = FUSE_LOG_DEBUG; - } -- lo.root.refcount = 2; -- - if (lo.source) { - struct stat stat; - int res; -@@ -2446,7 +2468,6 @@ int main(int argc, char *argv[]) - } else { - lo.source = "/"; - } -- lo.root.is_symlink = false; - if (!lo.timeout_set) { - switch (lo.cache) { - case CACHE_NEVER: -@@ -2466,13 +2487,6 @@ int main(int argc, char *argv[]) - exit(1); - } - -- lo.root.fd = open(lo.source, O_PATH); -- -- if (lo.root.fd == -1) { -- fuse_log(FUSE_LOG_ERR, "open(\"%s\", O_PATH): %m\n", lo.source); -- exit(1); -- } -- - se = fuse_session_new(&args, &lo_oper, sizeof(lo_oper), &lo); - if (se == NULL) { - goto err_out1; -@@ -2495,6 +2509,7 @@ int main(int argc, char *argv[]) - - setup_sandbox(&lo, se, opts.syslog); - -+ setup_root(&lo, &lo.root); - /* Block until ctrl+c or fusermount -u */ - ret = virtio_loop(se); - --- -1.8.3.1 - diff --git a/kvm-virtiofsd-fail-when-parent-inode-isn-t-known-in-lo_d.patch b/kvm-virtiofsd-fail-when-parent-inode-isn-t-known-in-lo_d.patch deleted file mode 100644 index 4860bec..0000000 --- a/kvm-virtiofsd-fail-when-parent-inode-isn-t-known-in-lo_d.patch +++ /dev/null @@ -1,85 +0,0 @@ -From b3cd18ab58e331d3610cf00f857d6a945f11a030 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:49 +0100 -Subject: [PATCH 078/116] virtiofsd: fail when parent inode isn't known in - lo_do_lookup() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-75-dgilbert@redhat.com> -Patchwork-id: 93529 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 074/112] virtiofsd: fail when parent inode isn't known in lo_do_lookup() -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Miklos Szeredi - -The Linux file handle APIs (struct export_operations) can access inodes -that are not attached to parents because path name traversal is not -performed. Refuse if there is no parent in lo_do_lookup(). - -Also clean up lo_do_lookup() while we're here. - -Signed-off-by: Miklos Szeredi -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Misono Tomohiro -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 9de4fab5995d115f8ebfb41d8d94a866d80a1708) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 14 ++++++++++++-- - 1 file changed, 12 insertions(+), 2 deletions(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index de12e75..33bfb4d 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -777,6 +777,15 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, - struct lo_data *lo = lo_data(req); - struct lo_inode *inode, *dir = lo_inode(req, parent); - -+ /* -+ * name_to_handle_at() and open_by_handle_at() can reach here with fuse -+ * mount point in guest, but we don't have its inode info in the -+ * ino_map. -+ */ -+ if (!dir) { -+ return ENOENT; -+ } -+ - memset(e, 0, sizeof(*e)); - e->attr_timeout = lo->timeout; - e->entry_timeout = lo->timeout; -@@ -786,7 +795,7 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, - name = "."; - } - -- newfd = openat(lo_fd(req, parent), name, O_PATH | O_NOFOLLOW); -+ newfd = openat(dir->fd, name, O_PATH | O_NOFOLLOW); - if (newfd == -1) { - goto out_err; - } -@@ -796,7 +805,7 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, - goto out_err; - } - -- inode = lo_find(lo_data(req), &e->attr); -+ inode = lo_find(lo, &e->attr); - if (inode) { - close(newfd); - newfd = -1; -@@ -812,6 +821,7 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, - inode->is_symlink = S_ISLNK(e->attr.st_mode); - inode->refcount = 1; - inode->fd = newfd; -+ newfd = -1; - inode->ino = e->attr.st_ino; - inode->dev = e->attr.st_dev; - --- -1.8.3.1 - diff --git a/kvm-virtiofsd-fix-error-handling-in-main.patch b/kvm-virtiofsd-fix-error-handling-in-main.patch deleted file mode 100644 index a831992..0000000 --- a/kvm-virtiofsd-fix-error-handling-in-main.patch +++ /dev/null @@ -1,63 +0,0 @@ -From 0ea1c7375d6509367399c706eb9d1e8cf79a5830 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:55 +0100 -Subject: [PATCH 084/116] virtiofsd: fix error handling in main() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-81-dgilbert@redhat.com> -Patchwork-id: 93534 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 080/112] virtiofsd: fix error handling in main() -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Liu Bo - -Neither fuse_parse_cmdline() nor fuse_opt_parse() goes to the right place -to do cleanup. - -Signed-off-by: Liu Bo -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit c6de804670f2255ce776263124c37f3370dc5ac1) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 5 +++-- - 1 file changed, 3 insertions(+), 2 deletions(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 9ed77a1..af050c6 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -2443,13 +2443,14 @@ int main(int argc, char *argv[]) - lo_map_init(&lo.fd_map); - - if (fuse_parse_cmdline(&args, &opts) != 0) { -- return 1; -+ goto err_out1; - } - fuse_set_log_func(log_func); - use_syslog = opts.syslog; - if (use_syslog) { - openlog("virtiofsd", LOG_PID, LOG_DAEMON); - } -+ - if (opts.show_help) { - printf("usage: %s [options]\n\n", argv[0]); - fuse_cmdline_help(); -@@ -2468,7 +2469,7 @@ int main(int argc, char *argv[]) - } - - if (fuse_opt_parse(&args, &lo, lo_opts, NULL) == -1) { -- return 1; -+ goto err_out1; - } - - /* --- -1.8.3.1 - diff --git a/kvm-virtiofsd-fix-incorrect-error-handling-in-lo_do_look.patch b/kvm-virtiofsd-fix-incorrect-error-handling-in-lo_do_look.patch deleted file mode 100644 index 420a8a6..0000000 --- a/kvm-virtiofsd-fix-incorrect-error-handling-in-lo_do_look.patch +++ /dev/null @@ -1,44 +0,0 @@ -From 9c291ca8624318613ede6e4174d08cf45aae8384 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:02:01 +0100 -Subject: [PATCH 090/116] virtiofsd: fix incorrect error handling in - lo_do_lookup -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-87-dgilbert@redhat.com> -Patchwork-id: 93543 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 086/112] virtiofsd: fix incorrect error handling in lo_do_lookup -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Eric Ren - -Signed-off-by: Eric Ren -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit fc3f0041b43b6c64aa97b3558a6abe1a10028354) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 1 - - 1 file changed, 1 deletion(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index e8dc5c7..05b5f89 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -814,7 +814,6 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, - close(newfd); - newfd = -1; - } else { -- saverr = ENOMEM; - inode = calloc(1, sizeof(struct lo_inode)); - if (!inode) { - goto out_err; --- -1.8.3.1 - diff --git a/kvm-virtiofsd-fix-libfuse-information-leaks.patch b/kvm-virtiofsd-fix-libfuse-information-leaks.patch deleted file mode 100644 index 90debb0..0000000 --- a/kvm-virtiofsd-fix-libfuse-information-leaks.patch +++ /dev/null @@ -1,322 +0,0 @@ -From e0d64e481e5a9fab5ff90d2a8f84afcd3311d13b Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:35 +0100 -Subject: [PATCH 064/116] virtiofsd: fix libfuse information leaks -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-61-dgilbert@redhat.com> -Patchwork-id: 93515 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 060/112] virtiofsd: fix libfuse information leaks -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -Some FUSE message replies contain padding fields that are not -initialized by libfuse. This is fine in traditional FUSE applications -because the kernel is trusted. virtiofsd does not trust the guest and -must not expose uninitialized memory. - -Use C struct initializers to automatically zero out memory. Not all of -these code changes are strictly necessary but they will prevent future -information leaks if the structs are extended. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Daniel P. Berrangé -Reviewed-by: Philippe Mathieu-Daudé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 3db2876a0153ac7103c077c53090e020faffb3ea) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_lowlevel.c | 150 ++++++++++++++++++++-------------------- - 1 file changed, 76 insertions(+), 74 deletions(-) - -diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -index 2d6dc5a..6ceb33d 100644 ---- a/tools/virtiofsd/fuse_lowlevel.c -+++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -44,21 +44,23 @@ static __attribute__((constructor)) void fuse_ll_init_pagesize(void) - - static void convert_stat(const struct stat *stbuf, struct fuse_attr *attr) - { -- attr->ino = stbuf->st_ino; -- attr->mode = stbuf->st_mode; -- attr->nlink = stbuf->st_nlink; -- attr->uid = stbuf->st_uid; -- attr->gid = stbuf->st_gid; -- attr->rdev = stbuf->st_rdev; -- attr->size = stbuf->st_size; -- attr->blksize = stbuf->st_blksize; -- attr->blocks = stbuf->st_blocks; -- attr->atime = stbuf->st_atime; -- attr->mtime = stbuf->st_mtime; -- attr->ctime = stbuf->st_ctime; -- attr->atimensec = ST_ATIM_NSEC(stbuf); -- attr->mtimensec = ST_MTIM_NSEC(stbuf); -- attr->ctimensec = ST_CTIM_NSEC(stbuf); -+ *attr = (struct fuse_attr){ -+ .ino = stbuf->st_ino, -+ .mode = stbuf->st_mode, -+ .nlink = stbuf->st_nlink, -+ .uid = stbuf->st_uid, -+ .gid = stbuf->st_gid, -+ .rdev = stbuf->st_rdev, -+ .size = stbuf->st_size, -+ .blksize = stbuf->st_blksize, -+ .blocks = stbuf->st_blocks, -+ .atime = stbuf->st_atime, -+ .mtime = stbuf->st_mtime, -+ .ctime = stbuf->st_ctime, -+ .atimensec = ST_ATIM_NSEC(stbuf), -+ .mtimensec = ST_MTIM_NSEC(stbuf), -+ .ctimensec = ST_CTIM_NSEC(stbuf), -+ }; - } - - static void convert_attr(const struct fuse_setattr_in *attr, struct stat *stbuf) -@@ -183,16 +185,16 @@ static int fuse_send_msg(struct fuse_session *se, struct fuse_chan *ch, - int fuse_send_reply_iov_nofree(fuse_req_t req, int error, struct iovec *iov, - int count) - { -- struct fuse_out_header out; -+ struct fuse_out_header out = { -+ .unique = req->unique, -+ .error = error, -+ }; - - if (error <= -1000 || error > 0) { - fuse_log(FUSE_LOG_ERR, "fuse: bad error value: %i\n", error); - error = -ERANGE; - } - -- out.unique = req->unique; -- out.error = error; -- - iov[0].iov_base = &out; - iov[0].iov_len = sizeof(struct fuse_out_header); - -@@ -277,14 +279,16 @@ size_t fuse_add_direntry(fuse_req_t req, char *buf, size_t bufsize, - static void convert_statfs(const struct statvfs *stbuf, - struct fuse_kstatfs *kstatfs) - { -- kstatfs->bsize = stbuf->f_bsize; -- kstatfs->frsize = stbuf->f_frsize; -- kstatfs->blocks = stbuf->f_blocks; -- kstatfs->bfree = stbuf->f_bfree; -- kstatfs->bavail = stbuf->f_bavail; -- kstatfs->files = stbuf->f_files; -- kstatfs->ffree = stbuf->f_ffree; -- kstatfs->namelen = stbuf->f_namemax; -+ *kstatfs = (struct fuse_kstatfs){ -+ .bsize = stbuf->f_bsize, -+ .frsize = stbuf->f_frsize, -+ .blocks = stbuf->f_blocks, -+ .bfree = stbuf->f_bfree, -+ .bavail = stbuf->f_bavail, -+ .files = stbuf->f_files, -+ .ffree = stbuf->f_ffree, -+ .namelen = stbuf->f_namemax, -+ }; - } - - static int send_reply_ok(fuse_req_t req, const void *arg, size_t argsize) -@@ -328,12 +332,14 @@ static unsigned int calc_timeout_nsec(double t) - static void fill_entry(struct fuse_entry_out *arg, - const struct fuse_entry_param *e) - { -- arg->nodeid = e->ino; -- arg->generation = e->generation; -- arg->entry_valid = calc_timeout_sec(e->entry_timeout); -- arg->entry_valid_nsec = calc_timeout_nsec(e->entry_timeout); -- arg->attr_valid = calc_timeout_sec(e->attr_timeout); -- arg->attr_valid_nsec = calc_timeout_nsec(e->attr_timeout); -+ *arg = (struct fuse_entry_out){ -+ .nodeid = e->ino, -+ .generation = e->generation, -+ .entry_valid = calc_timeout_sec(e->entry_timeout), -+ .entry_valid_nsec = calc_timeout_nsec(e->entry_timeout), -+ .attr_valid = calc_timeout_sec(e->attr_timeout), -+ .attr_valid_nsec = calc_timeout_nsec(e->attr_timeout), -+ }; - convert_stat(&e->attr, &arg->attr); - } - -@@ -362,10 +368,12 @@ size_t fuse_add_direntry_plus(fuse_req_t req, char *buf, size_t bufsize, - fill_entry(&dp->entry_out, e); - - struct fuse_dirent *dirent = &dp->dirent; -- dirent->ino = e->attr.st_ino; -- dirent->off = off; -- dirent->namelen = namelen; -- dirent->type = (e->attr.st_mode & S_IFMT) >> 12; -+ *dirent = (struct fuse_dirent){ -+ .ino = e->attr.st_ino, -+ .off = off, -+ .namelen = namelen, -+ .type = (e->attr.st_mode & S_IFMT) >> 12, -+ }; - memcpy(dirent->name, name, namelen); - memset(dirent->name + namelen, 0, entlen_padded - entlen); - -@@ -496,15 +504,14 @@ static int fuse_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, - int fuse_reply_data(fuse_req_t req, struct fuse_bufvec *bufv) - { - struct iovec iov[2]; -- struct fuse_out_header out; -+ struct fuse_out_header out = { -+ .unique = req->unique, -+ }; - int res; - - iov[0].iov_base = &out; - iov[0].iov_len = sizeof(struct fuse_out_header); - -- out.unique = req->unique; -- out.error = 0; -- - res = fuse_send_data_iov(req->se, req->ch, iov, 1, bufv); - if (res <= 0) { - fuse_free_req(req); -@@ -2145,14 +2152,14 @@ static void do_destroy(fuse_req_t req, fuse_ino_t nodeid, - static int send_notify_iov(struct fuse_session *se, int notify_code, - struct iovec *iov, int count) - { -- struct fuse_out_header out; -+ struct fuse_out_header out = { -+ .error = notify_code, -+ }; - - if (!se->got_init) { - return -ENOTCONN; - } - -- out.unique = 0; -- out.error = notify_code; - iov[0].iov_base = &out; - iov[0].iov_len = sizeof(struct fuse_out_header); - -@@ -2162,11 +2169,11 @@ static int send_notify_iov(struct fuse_session *se, int notify_code, - int fuse_lowlevel_notify_poll(struct fuse_pollhandle *ph) - { - if (ph != NULL) { -- struct fuse_notify_poll_wakeup_out outarg; -+ struct fuse_notify_poll_wakeup_out outarg = { -+ .kh = ph->kh, -+ }; - struct iovec iov[2]; - -- outarg.kh = ph->kh; -- - iov[1].iov_base = &outarg; - iov[1].iov_len = sizeof(outarg); - -@@ -2179,17 +2186,17 @@ int fuse_lowlevel_notify_poll(struct fuse_pollhandle *ph) - int fuse_lowlevel_notify_inval_inode(struct fuse_session *se, fuse_ino_t ino, - off_t off, off_t len) - { -- struct fuse_notify_inval_inode_out outarg; -+ struct fuse_notify_inval_inode_out outarg = { -+ .ino = ino, -+ .off = off, -+ .len = len, -+ }; - struct iovec iov[2]; - - if (!se) { - return -EINVAL; - } - -- outarg.ino = ino; -- outarg.off = off; -- outarg.len = len; -- - iov[1].iov_base = &outarg; - iov[1].iov_len = sizeof(outarg); - -@@ -2199,17 +2206,16 @@ int fuse_lowlevel_notify_inval_inode(struct fuse_session *se, fuse_ino_t ino, - int fuse_lowlevel_notify_inval_entry(struct fuse_session *se, fuse_ino_t parent, - const char *name, size_t namelen) - { -- struct fuse_notify_inval_entry_out outarg; -+ struct fuse_notify_inval_entry_out outarg = { -+ .parent = parent, -+ .namelen = namelen, -+ }; - struct iovec iov[3]; - - if (!se) { - return -EINVAL; - } - -- outarg.parent = parent; -- outarg.namelen = namelen; -- outarg.padding = 0; -- - iov[1].iov_base = &outarg; - iov[1].iov_len = sizeof(outarg); - iov[2].iov_base = (void *)name; -@@ -2222,18 +2228,17 @@ int fuse_lowlevel_notify_delete(struct fuse_session *se, fuse_ino_t parent, - fuse_ino_t child, const char *name, - size_t namelen) - { -- struct fuse_notify_delete_out outarg; -+ struct fuse_notify_delete_out outarg = { -+ .parent = parent, -+ .child = child, -+ .namelen = namelen, -+ }; - struct iovec iov[3]; - - if (!se) { - return -EINVAL; - } - -- outarg.parent = parent; -- outarg.child = child; -- outarg.namelen = namelen; -- outarg.padding = 0; -- - iov[1].iov_base = &outarg; - iov[1].iov_len = sizeof(outarg); - iov[2].iov_base = (void *)name; -@@ -2245,24 +2250,21 @@ int fuse_lowlevel_notify_delete(struct fuse_session *se, fuse_ino_t parent, - int fuse_lowlevel_notify_store(struct fuse_session *se, fuse_ino_t ino, - off_t offset, struct fuse_bufvec *bufv) - { -- struct fuse_out_header out; -- struct fuse_notify_store_out outarg; -+ struct fuse_out_header out = { -+ .error = FUSE_NOTIFY_STORE, -+ }; -+ struct fuse_notify_store_out outarg = { -+ .nodeid = ino, -+ .offset = offset, -+ .size = fuse_buf_size(bufv), -+ }; - struct iovec iov[3]; -- size_t size = fuse_buf_size(bufv); - int res; - - if (!se) { - return -EINVAL; - } - -- out.unique = 0; -- out.error = FUSE_NOTIFY_STORE; -- -- outarg.nodeid = ino; -- outarg.offset = offset; -- outarg.size = size; -- outarg.padding = 0; -- - iov[0].iov_base = &out; - iov[0].iov_len = sizeof(out); - iov[1].iov_base = &outarg; --- -1.8.3.1 - diff --git a/kvm-virtiofsd-fix-lo_destroy-resource-leaks.patch b/kvm-virtiofsd-fix-lo_destroy-resource-leaks.patch deleted file mode 100644 index 6243037..0000000 --- a/kvm-virtiofsd-fix-lo_destroy-resource-leaks.patch +++ /dev/null @@ -1,94 +0,0 @@ -From 9a44d78f5019280b006bb5b3de7164336289d639 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:02:21 +0100 -Subject: [PATCH 110/116] virtiofsd: fix lo_destroy() resource leaks -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-107-dgilbert@redhat.com> -Patchwork-id: 93560 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 106/112] virtiofsd: fix lo_destroy() resource leaks -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -Now that lo_destroy() is serialized we can call unref_inode() so that -all inode resources are freed. - -Signed-off-by: Stefan Hajnoczi -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Philippe Mathieu-Daudé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 28f7a3b026f231bfe8de5fed6a18a8d27b1dfcee) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 41 ++++++++++++++++++++-------------------- - 1 file changed, 20 insertions(+), 21 deletions(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 79b8b71..eb001b9 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -1371,26 +1371,6 @@ static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode, - } - } - --static int unref_all_inodes_cb(gpointer key, gpointer value, gpointer user_data) --{ -- struct lo_inode *inode = value; -- struct lo_data *lo = user_data; -- -- inode->nlookup = 0; -- lo_map_remove(&lo->ino_map, inode->fuse_ino); -- close(inode->fd); -- lo_inode_put(lo, &inode); /* Drop our refcount from lo_do_lookup() */ -- -- return TRUE; --} -- --static void unref_all_inodes(struct lo_data *lo) --{ -- pthread_mutex_lock(&lo->mutex); -- g_hash_table_foreach_remove(lo->inodes, unref_all_inodes_cb, lo); -- pthread_mutex_unlock(&lo->mutex); --} -- - static void lo_forget_one(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) - { - struct lo_data *lo = lo_data(req); -@@ -2477,7 +2457,26 @@ static void lo_lseek(fuse_req_t req, fuse_ino_t ino, off_t off, int whence, - static void lo_destroy(void *userdata) - { - struct lo_data *lo = (struct lo_data *)userdata; -- unref_all_inodes(lo); -+ -+ /* -+ * Normally lo->mutex must be taken when traversing lo->inodes but -+ * lo_destroy() is a serialized request so no races are possible here. -+ * -+ * In addition, we cannot acquire lo->mutex since unref_inode() takes it -+ * too and this would result in a recursive lock. -+ */ -+ while (true) { -+ GHashTableIter iter; -+ gpointer key, value; -+ -+ g_hash_table_iter_init(&iter, lo->inodes); -+ if (!g_hash_table_iter_next(&iter, &key, &value)) { -+ break; -+ } -+ -+ struct lo_inode *inode = value; -+ unref_inode_lolocked(lo, inode, inode->nlookup); -+ } - } - - static struct fuse_lowlevel_ops lo_oper = { --- -1.8.3.1 - diff --git a/kvm-virtiofsd-fix-memory-leak-on-lo.source.patch b/kvm-virtiofsd-fix-memory-leak-on-lo.source.patch deleted file mode 100644 index 4d7d6dc..0000000 --- a/kvm-virtiofsd-fix-memory-leak-on-lo.source.patch +++ /dev/null @@ -1,66 +0,0 @@ -From 9e0f5b64f30c2f841f297e25c2f3a6d82c8a16b8 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:57 +0100 -Subject: [PATCH 086/116] virtiofsd: fix memory leak on lo.source -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-83-dgilbert@redhat.com> -Patchwork-id: 93536 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 082/112] virtiofsd: fix memory leak on lo.source -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Liu Bo - -valgrind reported that lo.source is leaked on quiting, but it was defined -as (const char*) as it may point to a const string "/". - -Signed-off-by: Liu Bo -Reviewed-by: Misono Tomohiro -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit eb68a33b5fc5dde87bd9b99b94e7c33a5d8ea82e) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 7 ++++--- - 1 file changed, 4 insertions(+), 3 deletions(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index af050c6..056ebe8 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -115,7 +115,7 @@ struct lo_data { - int writeback; - int flock; - int xattr; -- const char *source; -+ char *source; - double timeout; - int cache; - int timeout_set; -@@ -2497,9 +2497,8 @@ int main(int argc, char *argv[]) - fuse_log(FUSE_LOG_ERR, "source is not a directory\n"); - exit(1); - } -- - } else { -- lo.source = "/"; -+ lo.source = strdup("/"); - } - if (!lo.timeout_set) { - switch (lo.cache) { -@@ -2570,5 +2569,7 @@ err_out1: - close(lo.root.fd); - } - -+ free(lo.source); -+ - return ret ? 1 : 0; - } --- -1.8.3.1 - diff --git a/kvm-virtiofsd-fv_create_listen_socket-error-path-socket-.patch b/kvm-virtiofsd-fv_create_listen_socket-error-path-socket-.patch deleted file mode 100644 index b17d93c..0000000 --- a/kvm-virtiofsd-fv_create_listen_socket-error-path-socket-.patch +++ /dev/null @@ -1,56 +0,0 @@ -From 3b6461ee08654b2cbb6d4e0cc15c02f89a6610d5 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Tue, 3 Mar 2020 18:43:09 +0000 -Subject: [PATCH 13/18] virtiofsd: fv_create_listen_socket error path socket - leak -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200303184314.155564-3-dgilbert@redhat.com> -Patchwork-id: 94124 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 2/7] virtiofsd: fv_create_listen_socket error path socket leak -Bugzilla: 1797064 -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual -RH-Acked-by: Ján Tomko - -From: "Dr. David Alan Gilbert" - -If we fail when bringing up the socket we can leak the listen_fd; -in practice the daemon will exit so it's not really a problem. - -Fixes: Coverity CID 1413121 -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Philippe Mathieu-Daudé -Reviewed-by: Stefan Hajnoczi -(cherry picked from commit 6fa249027f97e3080f3d9c0fab3f94f8f80828fe) -Signed-off-by: Danilo C. L. de Paula ---- - tools/virtiofsd/fuse_virtio.c | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c -index 80a6e92..dd1c605 100644 ---- a/tools/virtiofsd/fuse_virtio.c -+++ b/tools/virtiofsd/fuse_virtio.c -@@ -916,6 +916,7 @@ static int fv_create_listen_socket(struct fuse_session *se) - old_umask = umask(0077); - if (bind(listen_sock, (struct sockaddr *)&un, addr_len) == -1) { - fuse_log(FUSE_LOG_ERR, "vhost socket bind: %m\n"); -+ close(listen_sock); - umask(old_umask); - return -1; - } -@@ -923,6 +924,7 @@ static int fv_create_listen_socket(struct fuse_session *se) - - if (listen(listen_sock, 1) == -1) { - fuse_log(FUSE_LOG_ERR, "vhost socket listen: %m\n"); -+ close(listen_sock); - return -1; - } - --- -1.8.3.1 - diff --git a/kvm-virtiofsd-get-set-features-callbacks.patch b/kvm-virtiofsd-get-set-features-callbacks.patch deleted file mode 100644 index fcb5ca2..0000000 --- a/kvm-virtiofsd-get-set-features-callbacks.patch +++ /dev/null @@ -1,66 +0,0 @@ -From 59bfe3ad924d00dc9c7a4363fcd3db36ea247988 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:00:59 +0100 -Subject: [PATCH 028/116] virtiofsd: get/set features callbacks -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-25-dgilbert@redhat.com> -Patchwork-id: 93478 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 024/112] virtiofsd: get/set features callbacks -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -Add the get/set features callbacks. - -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit f2cef5fb9ae20136ca18d16328787b69b3abfa18) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_virtio.c | 15 ++++++++++++++- - 1 file changed, 14 insertions(+), 1 deletion(-) - -diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c -index 1928a20..4819e56 100644 ---- a/tools/virtiofsd/fuse_virtio.c -+++ b/tools/virtiofsd/fuse_virtio.c -@@ -46,6 +46,17 @@ struct virtio_fs_config { - uint32_t num_queues; - }; - -+/* Callback from libvhost-user */ -+static uint64_t fv_get_features(VuDev *dev) -+{ -+ return 1ULL << VIRTIO_F_VERSION_1; -+} -+ -+/* Callback from libvhost-user */ -+static void fv_set_features(VuDev *dev, uint64_t features) -+{ -+} -+ - /* - * Callback from libvhost-user if there's a new fd we're supposed to listen - * to, typically a queue kick? -@@ -78,7 +89,9 @@ static bool fv_queue_order(VuDev *dev, int qidx) - } - - static const VuDevIface fv_iface = { -- /* TODO: Add other callbacks */ -+ .get_features = fv_get_features, -+ .set_features = fv_set_features, -+ - .queue_is_processed_in_order = fv_queue_order, - }; - --- -1.8.3.1 - diff --git a/kvm-virtiofsd-introduce-inode-refcount-to-prevent-use-af.patch b/kvm-virtiofsd-introduce-inode-refcount-to-prevent-use-af.patch deleted file mode 100644 index 68d20e7..0000000 --- a/kvm-virtiofsd-introduce-inode-refcount-to-prevent-use-af.patch +++ /dev/null @@ -1,589 +0,0 @@ -From da6ee5c24397d2ca93dfaf275fdd9dafc922da15 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:02:11 +0100 -Subject: [PATCH 100/116] virtiofsd: introduce inode refcount to prevent - use-after-free -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-97-dgilbert@redhat.com> -Patchwork-id: 93550 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 096/112] virtiofsd: introduce inode refcount to prevent use-after-free -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -If thread A is using an inode it must not be deleted by thread B when -processing a FUSE_FORGET request. - -The FUSE protocol itself already has a counter called nlookup that is -used in FUSE_FORGET messages. We cannot trust this counter since the -untrusted client can manipulate it via FUSE_FORGET messages. - -Introduce a new refcount to keep inodes alive for the required lifespan. -lo_inode_put() must be called to release a reference. FUSE's nlookup -counter holds exactly one reference so that the inode stays alive as -long as the client still wants to remember it. - -Note that the lo_inode->is_symlink field is moved to avoid creating a -hole in the struct due to struct field alignment. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Misono Tomohiro -Reviewed-by: Sergio Lopez -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit c241aa9457d88c6a0d027f48fadfed131646bce3) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 169 +++++++++++++++++++++++++++++++++------ - 1 file changed, 146 insertions(+), 23 deletions(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index e3a6d6b..ab16135 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -97,7 +97,13 @@ struct lo_key { - - struct lo_inode { - int fd; -- bool is_symlink; -+ -+ /* -+ * Atomic reference count for this object. The nlookup field holds a -+ * reference and release it when nlookup reaches 0. -+ */ -+ gint refcount; -+ - struct lo_key key; - - /* -@@ -116,6 +122,8 @@ struct lo_inode { - fuse_ino_t fuse_ino; - pthread_mutex_t plock_mutex; - GHashTable *posix_locks; /* protected by lo_inode->plock_mutex */ -+ -+ bool is_symlink; - }; - - struct lo_cred { -@@ -471,6 +479,23 @@ static ssize_t lo_add_inode_mapping(fuse_req_t req, struct lo_inode *inode) - return elem - lo_data(req)->ino_map.elems; - } - -+static void lo_inode_put(struct lo_data *lo, struct lo_inode **inodep) -+{ -+ struct lo_inode *inode = *inodep; -+ -+ if (!inode) { -+ return; -+ } -+ -+ *inodep = NULL; -+ -+ if (g_atomic_int_dec_and_test(&inode->refcount)) { -+ close(inode->fd); -+ free(inode); -+ } -+} -+ -+/* Caller must release refcount using lo_inode_put() */ - static struct lo_inode *lo_inode(fuse_req_t req, fuse_ino_t ino) - { - struct lo_data *lo = lo_data(req); -@@ -478,6 +503,9 @@ static struct lo_inode *lo_inode(fuse_req_t req, fuse_ino_t ino) - - pthread_mutex_lock(&lo->mutex); - elem = lo_map_get(&lo->ino_map, ino); -+ if (elem) { -+ g_atomic_int_inc(&elem->inode->refcount); -+ } - pthread_mutex_unlock(&lo->mutex); - - if (!elem) { -@@ -487,10 +515,23 @@ static struct lo_inode *lo_inode(fuse_req_t req, fuse_ino_t ino) - return elem->inode; - } - -+/* -+ * TODO Remove this helper and force callers to hold an inode refcount until -+ * they are done with the fd. This will be done in a later patch to make -+ * review easier. -+ */ - static int lo_fd(fuse_req_t req, fuse_ino_t ino) - { - struct lo_inode *inode = lo_inode(req, ino); -- return inode ? inode->fd : -1; -+ int fd; -+ -+ if (!inode) { -+ return -1; -+ } -+ -+ fd = inode->fd; -+ lo_inode_put(lo_data(req), &inode); -+ return fd; - } - - static void lo_init(void *userdata, struct fuse_conn_info *conn) -@@ -545,6 +586,10 @@ static void lo_getattr(fuse_req_t req, fuse_ino_t ino, - fuse_reply_attr(req, &buf, lo->timeout); - } - -+/* -+ * Increments parent->nlookup and caller must release refcount using -+ * lo_inode_put(&parent). -+ */ - static int lo_parent_and_name(struct lo_data *lo, struct lo_inode *inode, - char path[PATH_MAX], struct lo_inode **parent) - { -@@ -582,6 +627,7 @@ retry: - p = &lo->root; - pthread_mutex_lock(&lo->mutex); - p->nlookup++; -+ g_atomic_int_inc(&p->refcount); - pthread_mutex_unlock(&lo->mutex); - } else { - *last = '\0'; -@@ -625,6 +671,7 @@ retry: - - fail_unref: - unref_inode_lolocked(lo, p, 1); -+ lo_inode_put(lo, &p); - fail: - if (retries) { - retries--; -@@ -663,6 +710,7 @@ fallback: - if (res != -1) { - res = utimensat(parent->fd, path, tv, AT_SYMLINK_NOFOLLOW); - unref_inode_lolocked(lo, parent, 1); -+ lo_inode_put(lo, &parent); - } - - return res; -@@ -780,11 +828,13 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, - goto out_err; - } - } -+ lo_inode_put(lo, &inode); - - return lo_getattr(req, ino, fi); - - out_err: - saverr = errno; -+ lo_inode_put(lo, &inode); - fuse_reply_err(req, saverr); - } - -@@ -801,6 +851,7 @@ static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st) - if (p) { - assert(p->nlookup > 0); - p->nlookup++; -+ g_atomic_int_inc(&p->refcount); - } - pthread_mutex_unlock(&lo->mutex); - -@@ -820,6 +871,10 @@ static void posix_locks_value_destroy(gpointer data) - free(plock); - } - -+/* -+ * Increments nlookup and caller must release refcount using -+ * lo_inode_put(&parent). -+ */ - static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, - struct fuse_entry_param *e) - { -@@ -827,7 +882,8 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, - int res; - int saverr; - struct lo_data *lo = lo_data(req); -- struct lo_inode *inode, *dir = lo_inode(req, parent); -+ struct lo_inode *inode = NULL; -+ struct lo_inode *dir = lo_inode(req, parent); - - /* - * name_to_handle_at() and open_by_handle_at() can reach here with fuse -@@ -868,6 +924,13 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, - } - - inode->is_symlink = S_ISLNK(e->attr.st_mode); -+ -+ /* -+ * One for the caller and one for nlookup (released in -+ * unref_inode_lolocked()) -+ */ -+ g_atomic_int_set(&inode->refcount, 2); -+ - inode->nlookup = 1; - inode->fd = newfd; - newfd = -1; -@@ -883,6 +946,8 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, - pthread_mutex_unlock(&lo->mutex); - } - e->ino = inode->fuse_ino; -+ lo_inode_put(lo, &inode); -+ lo_inode_put(lo, &dir); - - fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", (unsigned long long)parent, - name, (unsigned long long)e->ino); -@@ -894,6 +959,8 @@ out_err: - if (newfd != -1) { - close(newfd); - } -+ lo_inode_put(lo, &inode); -+ lo_inode_put(lo, &dir); - return saverr; - } - -@@ -991,6 +1058,7 @@ static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent, - { - int res; - int saverr; -+ struct lo_data *lo = lo_data(req); - struct lo_inode *dir; - struct fuse_entry_param e; - struct lo_cred old = {}; -@@ -1032,9 +1100,11 @@ static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent, - name, (unsigned long long)e.ino); - - fuse_reply_entry(req, &e); -+ lo_inode_put(lo, &dir); - return; - - out: -+ lo_inode_put(lo, &dir); - fuse_reply_err(req, saverr); - } - -@@ -1085,6 +1155,7 @@ fallback: - if (res != -1) { - res = linkat(parent->fd, path, dfd, name, 0); - unref_inode_lolocked(lo, parent, 1); -+ lo_inode_put(lo, &parent); - } - - return res; -@@ -1095,6 +1166,7 @@ static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent, - { - int res; - struct lo_data *lo = lo_data(req); -+ struct lo_inode *parent_inode; - struct lo_inode *inode; - struct fuse_entry_param e; - int saverr; -@@ -1104,17 +1176,18 @@ static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent, - return; - } - -+ parent_inode = lo_inode(req, parent); - inode = lo_inode(req, ino); -- if (!inode) { -- fuse_reply_err(req, EBADF); -- return; -+ if (!parent_inode || !inode) { -+ errno = EBADF; -+ goto out_err; - } - - memset(&e, 0, sizeof(struct fuse_entry_param)); - e.attr_timeout = lo->timeout; - e.entry_timeout = lo->timeout; - -- res = linkat_empty_nofollow(lo, inode, lo_fd(req, parent), name); -+ res = linkat_empty_nofollow(lo, inode, parent_inode->fd, name); - if (res == -1) { - goto out_err; - } -@@ -1133,13 +1206,18 @@ static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent, - name, (unsigned long long)e.ino); - - fuse_reply_entry(req, &e); -+ lo_inode_put(lo, &parent_inode); -+ lo_inode_put(lo, &inode); - return; - - out_err: - saverr = errno; -+ lo_inode_put(lo, &parent_inode); -+ lo_inode_put(lo, &inode); - fuse_reply_err(req, saverr); - } - -+/* Increments nlookup and caller must release refcount using lo_inode_put() */ - static struct lo_inode *lookup_name(fuse_req_t req, fuse_ino_t parent, - const char *name) - { -@@ -1176,6 +1254,7 @@ static void lo_rmdir(fuse_req_t req, fuse_ino_t parent, const char *name) - - fuse_reply_err(req, res == -1 ? errno : 0); - unref_inode_lolocked(lo, inode, 1); -+ lo_inode_put(lo, &inode); - } - - static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name, -@@ -1183,8 +1262,10 @@ static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name, - unsigned int flags) - { - int res; -- struct lo_inode *oldinode; -- struct lo_inode *newinode; -+ struct lo_inode *parent_inode; -+ struct lo_inode *newparent_inode; -+ struct lo_inode *oldinode = NULL; -+ struct lo_inode *newinode = NULL; - struct lo_data *lo = lo_data(req); - - if (!is_safe_path_component(name) || !is_safe_path_component(newname)) { -@@ -1192,6 +1273,13 @@ static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name, - return; - } - -+ parent_inode = lo_inode(req, parent); -+ newparent_inode = lo_inode(req, newparent); -+ if (!parent_inode || !newparent_inode) { -+ fuse_reply_err(req, EBADF); -+ goto out; -+ } -+ - oldinode = lookup_name(req, parent, name); - newinode = lookup_name(req, newparent, newname); - -@@ -1204,8 +1292,8 @@ static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name, - #ifndef SYS_renameat2 - fuse_reply_err(req, EINVAL); - #else -- res = syscall(SYS_renameat2, lo_fd(req, parent), name, -- lo_fd(req, newparent), newname, flags); -+ res = syscall(SYS_renameat2, parent_inode->fd, name, -+ newparent_inode->fd, newname, flags); - if (res == -1 && errno == ENOSYS) { - fuse_reply_err(req, EINVAL); - } else { -@@ -1215,12 +1303,16 @@ static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name, - goto out; - } - -- res = renameat(lo_fd(req, parent), name, lo_fd(req, newparent), newname); -+ res = renameat(parent_inode->fd, name, newparent_inode->fd, newname); - - fuse_reply_err(req, res == -1 ? errno : 0); - out: - unref_inode_lolocked(lo, oldinode, 1); - unref_inode_lolocked(lo, newinode, 1); -+ lo_inode_put(lo, &oldinode); -+ lo_inode_put(lo, &newinode); -+ lo_inode_put(lo, &parent_inode); -+ lo_inode_put(lo, &newparent_inode); - } - - static void lo_unlink(fuse_req_t req, fuse_ino_t parent, const char *name) -@@ -1244,6 +1336,7 @@ static void lo_unlink(fuse_req_t req, fuse_ino_t parent, const char *name) - - fuse_reply_err(req, res == -1 ? errno : 0); - unref_inode_lolocked(lo, inode, 1); -+ lo_inode_put(lo, &inode); - } - - static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode, -@@ -1265,8 +1358,9 @@ static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode, - g_hash_table_destroy(inode->posix_locks); - pthread_mutex_destroy(&inode->plock_mutex); - pthread_mutex_unlock(&lo->mutex); -- close(inode->fd); -- free(inode); -+ -+ /* Drop our refcount from lo_do_lookup() */ -+ lo_inode_put(lo, &inode); - } else { - pthread_mutex_unlock(&lo->mutex); - } -@@ -1280,6 +1374,7 @@ static int unref_all_inodes_cb(gpointer key, gpointer value, gpointer user_data) - inode->nlookup = 0; - lo_map_remove(&lo->ino_map, inode->fuse_ino); - close(inode->fd); -+ lo_inode_put(lo, &inode); /* Drop our refcount from lo_do_lookup() */ - - return TRUE; - } -@@ -1306,6 +1401,7 @@ static void lo_forget_one(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) - (unsigned long long)nlookup); - - unref_inode_lolocked(lo, inode, nlookup); -+ lo_inode_put(lo, &inode); - } - - static void lo_forget(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) -@@ -1537,6 +1633,7 @@ static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, - err = 0; - error: - lo_dirp_put(&d); -+ lo_inode_put(lo, &dinode); - - /* - * If there's an error, we can only signal it if we haven't stored -@@ -1595,6 +1692,7 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, - { - int fd; - struct lo_data *lo = lo_data(req); -+ struct lo_inode *parent_inode; - struct fuse_entry_param e; - int err; - struct lo_cred old = {}; -@@ -1607,12 +1705,18 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, - return; - } - -+ parent_inode = lo_inode(req, parent); -+ if (!parent_inode) { -+ fuse_reply_err(req, EBADF); -+ return; -+ } -+ - err = lo_change_cred(req, &old); - if (err) { - goto out; - } - -- fd = openat(lo_fd(req, parent), name, (fi->flags | O_CREAT) & ~O_NOFOLLOW, -+ fd = openat(parent_inode->fd, name, (fi->flags | O_CREAT) & ~O_NOFOLLOW, - mode); - err = fd == -1 ? errno : 0; - lo_restore_cred(&old); -@@ -1625,8 +1729,8 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, - pthread_mutex_unlock(&lo->mutex); - if (fh == -1) { - close(fd); -- fuse_reply_err(req, ENOMEM); -- return; -+ err = ENOMEM; -+ goto out; - } - - fi->fh = fh; -@@ -1639,6 +1743,8 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, - } - - out: -+ lo_inode_put(lo, &parent_inode); -+ - if (err) { - fuse_reply_err(req, err); - } else { -@@ -1712,16 +1818,18 @@ static void lo_getlk(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, - plock = - lookup_create_plock_ctx(lo, inode, fi->lock_owner, lock->l_pid, &ret); - if (!plock) { -- pthread_mutex_unlock(&inode->plock_mutex); -- fuse_reply_err(req, ret); -- return; -+ saverr = ret; -+ goto out; - } - - ret = fcntl(plock->fd, F_OFD_GETLK, lock); - if (ret == -1) { - saverr = errno; - } -+ -+out: - pthread_mutex_unlock(&inode->plock_mutex); -+ lo_inode_put(lo, &inode); - - if (saverr) { - fuse_reply_err(req, saverr); -@@ -1761,9 +1869,8 @@ static void lo_setlk(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, - lookup_create_plock_ctx(lo, inode, fi->lock_owner, lock->l_pid, &ret); - - if (!plock) { -- pthread_mutex_unlock(&inode->plock_mutex); -- fuse_reply_err(req, ret); -- return; -+ saverr = ret; -+ goto out; - } - - /* TODO: Is it alright to modify flock? */ -@@ -1772,7 +1879,11 @@ static void lo_setlk(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, - if (ret == -1) { - saverr = errno; - } -+ -+out: - pthread_mutex_unlock(&inode->plock_mutex); -+ lo_inode_put(lo, &inode); -+ - fuse_reply_err(req, saverr); - } - -@@ -1898,6 +2009,7 @@ static void lo_flush(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) - pthread_mutex_unlock(&inode->plock_mutex); - - res = close(dup(lo_fi_fd(req, fi))); -+ lo_inode_put(lo_data(req), &inode); - fuse_reply_err(req, res == -1 ? errno : 0); - } - -@@ -2115,11 +2227,14 @@ out_free: - if (fd >= 0) { - close(fd); - } -+ -+ lo_inode_put(lo, &inode); - return; - - out_err: - saverr = errno; - out: -+ lo_inode_put(lo, &inode); - fuse_reply_err(req, saverr); - goto out_free; - } -@@ -2190,11 +2305,14 @@ out_free: - if (fd >= 0) { - close(fd); - } -+ -+ lo_inode_put(lo, &inode); - return; - - out_err: - saverr = errno; - out: -+ lo_inode_put(lo, &inode); - fuse_reply_err(req, saverr); - goto out_free; - } -@@ -2243,6 +2361,8 @@ out: - if (fd >= 0) { - close(fd); - } -+ -+ lo_inode_put(lo, &inode); - fuse_reply_err(req, saverr); - } - -@@ -2289,6 +2409,8 @@ out: - if (fd >= 0) { - close(fd); - } -+ -+ lo_inode_put(lo, &inode); - fuse_reply_err(req, saverr); - } - -@@ -2671,6 +2793,7 @@ static void setup_root(struct lo_data *lo, struct lo_inode *root) - root->key.ino = stat.st_ino; - root->key.dev = stat.st_dev; - root->nlookup = 2; -+ g_atomic_int_set(&root->refcount, 2); - } - - static guint lo_key_hash(gconstpointer key) --- -1.8.3.1 - diff --git a/kvm-virtiofsd-load_capng-missing-unlock.patch b/kvm-virtiofsd-load_capng-missing-unlock.patch deleted file mode 100644 index bc04f6b..0000000 --- a/kvm-virtiofsd-load_capng-missing-unlock.patch +++ /dev/null @@ -1,46 +0,0 @@ -From ece7649025fbdbde48ff0b954e8ec2e42c4a8b3d Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Tue, 3 Mar 2020 18:43:10 +0000 -Subject: [PATCH 14/18] virtiofsd: load_capng missing unlock -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200303184314.155564-4-dgilbert@redhat.com> -Patchwork-id: 94126 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 3/7] virtiofsd: load_capng missing unlock -Bugzilla: 1797064 -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual -RH-Acked-by: Ján Tomko - -From: "Dr. David Alan Gilbert" - -Missing unlock in error path. - -Fixes: Covertiy CID 1413123 -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Philippe Mathieu-Daudé -Reviewed-by: Stefan Hajnoczi -(cherry picked from commit 686391112fd42c615bcc4233472887a66a9b5a4a) -Signed-off-by: Danilo C. L. de Paula ---- - tools/virtiofsd/passthrough_ll.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index e6f2399..c635fc8 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -232,6 +232,7 @@ static int load_capng(void) - */ - cap.saved = capng_save_state(); - if (!cap.saved) { -+ pthread_mutex_unlock(&cap.mutex); - fuse_log(FUSE_LOG_ERR, "capng_save_state (thread)\n"); - return -EINVAL; - } --- -1.8.3.1 - diff --git a/kvm-virtiofsd-make-f-foreground-the-default.patch b/kvm-virtiofsd-make-f-foreground-the-default.patch deleted file mode 100644 index d6cb0e3..0000000 --- a/kvm-virtiofsd-make-f-foreground-the-default.patch +++ /dev/null @@ -1,76 +0,0 @@ -From 7f2e1f79a3addb242c3018c7a80e2e57589119f0 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:08 +0100 -Subject: [PATCH 037/116] virtiofsd: make -f (foreground) the default -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-34-dgilbert@redhat.com> -Patchwork-id: 93489 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 033/112] virtiofsd: make -f (foreground) the default -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -According to vhost-user.rst "Backend program conventions", backend -programs should run in the foregound by default. Follow the -conventions so libvirt and other management tools can control virtiofsd -in a standard way. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 0bbd31753714ac2899efda0f0de31e353e965789) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/helper.c | 8 ++++++++ - 1 file changed, 8 insertions(+) - -diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c -index 676032e..a3645fc 100644 ---- a/tools/virtiofsd/helper.c -+++ b/tools/virtiofsd/helper.c -@@ -29,6 +29,11 @@ - { \ - t, offsetof(struct fuse_cmdline_opts, p), 1 \ - } -+#define FUSE_HELPER_OPT_VALUE(t, p, v) \ -+ { \ -+ t, offsetof(struct fuse_cmdline_opts, p), v \ -+ } -+ - - static const struct fuse_opt fuse_helper_opts[] = { - FUSE_HELPER_OPT("-h", show_help), -@@ -42,6 +47,7 @@ static const struct fuse_opt fuse_helper_opts[] = { - FUSE_OPT_KEY("-d", FUSE_OPT_KEY_KEEP), - FUSE_OPT_KEY("debug", FUSE_OPT_KEY_KEEP), - FUSE_HELPER_OPT("-f", foreground), -+ FUSE_HELPER_OPT_VALUE("--daemonize", foreground, 0), - FUSE_HELPER_OPT("fsname=", nodefault_subtype), - FUSE_OPT_KEY("fsname=", FUSE_OPT_KEY_KEEP), - FUSE_HELPER_OPT("subtype=", nodefault_subtype), -@@ -131,6 +137,7 @@ void fuse_cmdline_help(void) - " -V --version print version\n" - " -d -o debug enable debug output (implies -f)\n" - " -f foreground operation\n" -+ " --daemonize run in background\n" - " -o max_idle_threads the maximum number of idle worker " - "threads\n" - " allowed (default: 10)\n"); -@@ -158,6 +165,7 @@ int fuse_parse_cmdline(struct fuse_args *args, struct fuse_cmdline_opts *opts) - memset(opts, 0, sizeof(struct fuse_cmdline_opts)); - - opts->max_idle_threads = 10; -+ opts->foreground = 1; - - if (fuse_opt_parse(args, opts, fuse_helper_opts, fuse_helper_opt_proc) == - -1) { --- -1.8.3.1 - diff --git a/kvm-virtiofsd-make-lo_release-atomic.patch b/kvm-virtiofsd-make-lo_release-atomic.patch deleted file mode 100644 index 6d88549..0000000 --- a/kvm-virtiofsd-make-lo_release-atomic.patch +++ /dev/null @@ -1,62 +0,0 @@ -From 4ebabb66f4132186152edf8e1907fce436bf5c69 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:02:06 +0100 -Subject: [PATCH 095/116] virtiofsd: make lo_release() atomic -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-92-dgilbert@redhat.com> -Patchwork-id: 93545 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 091/112] virtiofsd: make lo_release() atomic -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -Hold the lock across both lo_map_get() and lo_map_remove() to prevent -races between two FUSE_RELEASE requests. In this case I don't see a -serious bug but it's safer to do things atomically. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit baed65c060c0e524530bc243eec427fb408bd477) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 12 ++++++++---- - 1 file changed, 8 insertions(+), 4 deletions(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 9414935..690edbc 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -1772,14 +1772,18 @@ static void lo_release(fuse_req_t req, fuse_ino_t ino, - struct fuse_file_info *fi) - { - struct lo_data *lo = lo_data(req); -- int fd; -+ struct lo_map_elem *elem; -+ int fd = -1; - - (void)ino; - -- fd = lo_fi_fd(req, fi); -- - pthread_mutex_lock(&lo->mutex); -- lo_map_remove(&lo->fd_map, fi->fh); -+ elem = lo_map_get(&lo->fd_map, fi->fh); -+ if (elem) { -+ fd = elem->fd; -+ elem = NULL; -+ lo_map_remove(&lo->fd_map, fi->fh); -+ } - pthread_mutex_unlock(&lo->mutex); - - close(fd); --- -1.8.3.1 - diff --git a/kvm-virtiofsd-move-to-a-new-pid-namespace.patch b/kvm-virtiofsd-move-to-a-new-pid-namespace.patch deleted file mode 100644 index 9a33d1b..0000000 --- a/kvm-virtiofsd-move-to-a-new-pid-namespace.patch +++ /dev/null @@ -1,223 +0,0 @@ -From a7a87a751a9893830d031a957a751b7622b71fb2 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:29 +0100 -Subject: [PATCH 058/116] virtiofsd: move to a new pid namespace -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-55-dgilbert@redhat.com> -Patchwork-id: 93510 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 054/112] virtiofsd: move to a new pid namespace -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -virtiofsd needs access to /proc/self/fd. Let's move to a new pid -namespace so that a compromised process cannot see another other -processes running on the system. - -One wrinkle in this approach: unshare(CLONE_NEWPID) affects *child* -processes and not the current process. Therefore we need to fork the -pid 1 process that will actually run virtiofsd and leave a parent in -waitpid(2). This is not the same thing as daemonization and parent -processes should not notice a difference. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 8e1d4ef231d8327be219f7aea7aa15d181375bbc) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 134 +++++++++++++++++++++++++-------------- - 1 file changed, 86 insertions(+), 48 deletions(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 27ab328..0947d14 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -51,7 +51,10 @@ - #include - #include - #include -+#include - #include -+#include -+#include - #include - #include - -@@ -1945,24 +1948,95 @@ static void print_capabilities(void) - } - - /* -- * Called after our UNIX domain sockets have been created, now we can move to -- * an empty network namespace to prevent TCP/IP and other network activity in -- * case this process is compromised. -+ * Move to a new mount, net, and pid namespaces to isolate this process. - */ --static void setup_net_namespace(void) -+static void setup_namespaces(struct lo_data *lo, struct fuse_session *se) - { -- if (unshare(CLONE_NEWNET) != 0) { -- fuse_log(FUSE_LOG_ERR, "unshare(CLONE_NEWNET): %m\n"); -+ pid_t child; -+ -+ /* -+ * Create a new pid namespace for *child* processes. We'll have to -+ * fork in order to enter the new pid namespace. A new mount namespace -+ * is also needed so that we can remount /proc for the new pid -+ * namespace. -+ * -+ * Our UNIX domain sockets have been created. Now we can move to -+ * an empty network namespace to prevent TCP/IP and other network -+ * activity in case this process is compromised. -+ */ -+ if (unshare(CLONE_NEWPID | CLONE_NEWNS | CLONE_NEWNET) != 0) { -+ fuse_log(FUSE_LOG_ERR, "unshare(CLONE_NEWPID | CLONE_NEWNS): %m\n"); -+ exit(1); -+ } -+ -+ child = fork(); -+ if (child < 0) { -+ fuse_log(FUSE_LOG_ERR, "fork() failed: %m\n"); -+ exit(1); -+ } -+ if (child > 0) { -+ pid_t waited; -+ int wstatus; -+ -+ /* The parent waits for the child */ -+ do { -+ waited = waitpid(child, &wstatus, 0); -+ } while (waited < 0 && errno == EINTR && !se->exited); -+ -+ /* We were terminated by a signal, see fuse_signals.c */ -+ if (se->exited) { -+ exit(0); -+ } -+ -+ if (WIFEXITED(wstatus)) { -+ exit(WEXITSTATUS(wstatus)); -+ } -+ -+ exit(1); -+ } -+ -+ /* Send us SIGTERM when the parent thread terminates, see prctl(2) */ -+ prctl(PR_SET_PDEATHSIG, SIGTERM); -+ -+ /* -+ * If the mounts have shared propagation then we want to opt out so our -+ * mount changes don't affect the parent mount namespace. -+ */ -+ if (mount(NULL, "/", NULL, MS_REC | MS_SLAVE, NULL) < 0) { -+ fuse_log(FUSE_LOG_ERR, "mount(/, MS_REC|MS_SLAVE): %m\n"); -+ exit(1); -+ } -+ -+ /* The child must remount /proc to use the new pid namespace */ -+ if (mount("proc", "/proc", "proc", -+ MS_NODEV | MS_NOEXEC | MS_NOSUID | MS_RELATIME, NULL) < 0) { -+ fuse_log(FUSE_LOG_ERR, "mount(/proc): %m\n"); -+ exit(1); -+ } -+ -+ /* Now we can get our /proc/self/fd directory file descriptor */ -+ lo->proc_self_fd = open("/proc/self/fd", O_PATH); -+ if (lo->proc_self_fd == -1) { -+ fuse_log(FUSE_LOG_ERR, "open(/proc/self/fd, O_PATH): %m\n"); - exit(1); - } - } - --/* This magic is based on lxc's lxc_pivot_root() */ --static void setup_pivot_root(const char *source) -+/* -+ * Make the source directory our root so symlinks cannot escape and no other -+ * files are accessible. Assumes unshare(CLONE_NEWNS) was already called. -+ */ -+static void setup_mounts(const char *source) - { - int oldroot; - int newroot; - -+ if (mount(source, source, NULL, MS_BIND, NULL) < 0) { -+ fuse_log(FUSE_LOG_ERR, "mount(%s, %s, MS_BIND): %m\n", source, source); -+ exit(1); -+ } -+ -+ /* This magic is based on lxc's lxc_pivot_root() */ - oldroot = open("/", O_DIRECTORY | O_RDONLY | O_CLOEXEC); - if (oldroot < 0) { - fuse_log(FUSE_LOG_ERR, "open(/): %m\n"); -@@ -2009,47 +2083,14 @@ static void setup_pivot_root(const char *source) - close(oldroot); - } - --static void setup_proc_self_fd(struct lo_data *lo) --{ -- lo->proc_self_fd = open("/proc/self/fd", O_PATH); -- if (lo->proc_self_fd == -1) { -- fuse_log(FUSE_LOG_ERR, "open(/proc/self/fd, O_PATH): %m\n"); -- exit(1); -- } --} -- --/* -- * Make the source directory our root so symlinks cannot escape and no other -- * files are accessible. -- */ --static void setup_mount_namespace(const char *source) --{ -- if (unshare(CLONE_NEWNS) != 0) { -- fuse_log(FUSE_LOG_ERR, "unshare(CLONE_NEWNS): %m\n"); -- exit(1); -- } -- -- if (mount(NULL, "/", NULL, MS_REC | MS_SLAVE, NULL) < 0) { -- fuse_log(FUSE_LOG_ERR, "mount(/, MS_REC|MS_PRIVATE): %m\n"); -- exit(1); -- } -- -- if (mount(source, source, NULL, MS_BIND, NULL) < 0) { -- fuse_log(FUSE_LOG_ERR, "mount(%s, %s, MS_BIND): %m\n", source, source); -- exit(1); -- } -- -- setup_pivot_root(source); --} -- - /* - * Lock down this process to prevent access to other processes or files outside - * source directory. This reduces the impact of arbitrary code execution bugs. - */ --static void setup_sandbox(struct lo_data *lo) -+static void setup_sandbox(struct lo_data *lo, struct fuse_session *se) - { -- setup_net_namespace(); -- setup_mount_namespace(lo->source); -+ setup_namespaces(lo, se); -+ setup_mounts(lo->source); - } - - int main(int argc, char *argv[]) -@@ -2173,10 +2214,7 @@ int main(int argc, char *argv[]) - - fuse_daemonize(opts.foreground); - -- /* Must be after daemonize to get the right /proc/self/fd */ -- setup_proc_self_fd(&lo); -- -- setup_sandbox(&lo); -+ setup_sandbox(&lo, se); - - /* Block until ctrl+c or fusermount -u */ - ret = virtio_loop(se); --- -1.8.3.1 - diff --git a/kvm-virtiofsd-move-to-an-empty-network-namespace.patch b/kvm-virtiofsd-move-to-an-empty-network-namespace.patch deleted file mode 100644 index 69a7c20..0000000 --- a/kvm-virtiofsd-move-to-an-empty-network-namespace.patch +++ /dev/null @@ -1,66 +0,0 @@ -From 19a16f26bdeb6302159736e182a18b06160a3f42 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:28 +0100 -Subject: [PATCH 057/116] virtiofsd: move to an empty network namespace -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-54-dgilbert@redhat.com> -Patchwork-id: 93508 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 053/112] virtiofsd: move to an empty network namespace -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -If the process is compromised there should be no network access. Use an -empty network namespace to sandbox networking. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit d74830d12ae233186ff74ddf64c552d26bb39e50) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 14 ++++++++++++++ - 1 file changed, 14 insertions(+) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 0570453..27ab328 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -1944,6 +1944,19 @@ static void print_capabilities(void) - printf("}\n"); - } - -+/* -+ * Called after our UNIX domain sockets have been created, now we can move to -+ * an empty network namespace to prevent TCP/IP and other network activity in -+ * case this process is compromised. -+ */ -+static void setup_net_namespace(void) -+{ -+ if (unshare(CLONE_NEWNET) != 0) { -+ fuse_log(FUSE_LOG_ERR, "unshare(CLONE_NEWNET): %m\n"); -+ exit(1); -+ } -+} -+ - /* This magic is based on lxc's lxc_pivot_root() */ - static void setup_pivot_root(const char *source) - { -@@ -2035,6 +2048,7 @@ static void setup_mount_namespace(const char *source) - */ - static void setup_sandbox(struct lo_data *lo) - { -+ setup_net_namespace(); - setup_mount_namespace(lo->source); - } - --- -1.8.3.1 - diff --git a/kvm-virtiofsd-passthrough_ll-Pass-errno-to-fuse_reply_er.patch b/kvm-virtiofsd-passthrough_ll-Pass-errno-to-fuse_reply_er.patch deleted file mode 100644 index e3d5773..0000000 --- a/kvm-virtiofsd-passthrough_ll-Pass-errno-to-fuse_reply_er.patch +++ /dev/null @@ -1,54 +0,0 @@ -From fe031dbbf5e287f64de9fcc9aec361e8ab492109 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:02:24 +0100 -Subject: [PATCH 113/116] virtiofsd/passthrough_ll: Pass errno to - fuse_reply_err() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-110-dgilbert@redhat.com> -Patchwork-id: 93559 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 109/112] virtiofsd/passthrough_ll: Pass errno to fuse_reply_err() -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Xiao Yang - -lo_copy_file_range() passes -errno to fuse_reply_err() and then fuse_reply_err() -changes it to errno again, so that subsequent fuse_send_reply_iov_nofree() catches -the wrong errno.(i.e. reports "fuse: bad error value: ..."). - -Make fuse_send_reply_iov_nofree() accept the correct -errno by passing errno -directly in lo_copy_file_range(). - -Signed-off-by: Xiao Yang -Reviewed-by: Eryu Guan - -dgilbert: Sent upstream and now Merged as aa1185e153f774f1df65 -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit a931b6861e59c78d861017e9c6a9c161ff49a163) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index fc15d61..e6f2399 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -2441,7 +2441,7 @@ static void lo_copy_file_range(fuse_req_t req, fuse_ino_t ino_in, off_t off_in, - - res = copy_file_range(in_fd, &off_in, out_fd, &off_out, len, flags); - if (res < 0) { -- fuse_reply_err(req, -errno); -+ fuse_reply_err(req, errno); - } else { - fuse_reply_write(req, res); - } --- -1.8.3.1 - diff --git a/kvm-virtiofsd-passthrough_ll-Use-cache_readdir-for-direc.patch b/kvm-virtiofsd-passthrough_ll-Use-cache_readdir-for-direc.patch deleted file mode 100644 index ddacdbe..0000000 --- a/kvm-virtiofsd-passthrough_ll-Use-cache_readdir-for-direc.patch +++ /dev/null @@ -1,48 +0,0 @@ -From 83b03fc4a3ecf6086394363488bbebc8d55428c0 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:02:16 +0100 -Subject: [PATCH 105/116] virtiofsd: passthrough_ll: Use cache_readdir for - directory open -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-102-dgilbert@redhat.com> -Patchwork-id: 93555 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 101/112] virtiofsd: passthrough_ll: Use cache_readdir for directory open -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Misono Tomohiro - -Since keep_cache(FOPEN_KEEP_CACHE) has no effect for directory as -described in fuse_common.h, use cache_readdir(FOPNE_CACHE_DIR) for -diretory open when cache=always mode. - -Signed-off-by: Misono Tomohiro -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 9b610b09b49b1aada256097b338d49da805da6ae) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 4c61ac5..79b8b71 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -1523,7 +1523,7 @@ static void lo_opendir(fuse_req_t req, fuse_ino_t ino, - - fi->fh = fh; - if (lo->cache == CACHE_ALWAYS) { -- fi->keep_cache = 1; -+ fi->cache_readdir = 1; - } - fuse_reply_open(req, fi); - return; --- -1.8.3.1 - diff --git a/kvm-virtiofsd-passthrough_ll-add-dirp_map-to-hide-lo_dir.patch b/kvm-virtiofsd-passthrough_ll-add-dirp_map-to-hide-lo_dir.patch deleted file mode 100644 index 0506574..0000000 --- a/kvm-virtiofsd-passthrough_ll-add-dirp_map-to-hide-lo_dir.patch +++ /dev/null @@ -1,238 +0,0 @@ -From 474d0adafed4d73720d6413b2903d6c4b529e5e6 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:15 +0100 -Subject: [PATCH 044/116] virtiofsd: passthrough_ll: add dirp_map to hide - lo_dirp pointers -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-41-dgilbert@redhat.com> -Patchwork-id: 93495 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 040/112] virtiofsd: passthrough_ll: add dirp_map to hide lo_dirp pointers -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -Do not expose lo_dirp pointers to clients. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Philippe Mathieu-Daudé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit b39bce121bfad8757eec0ee41f14607b883935d3) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 103 +++++++++++++++++++++++++++++---------- - 1 file changed, 76 insertions(+), 27 deletions(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index a3ebf74..5f5a72f 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -56,27 +56,10 @@ - - #include "passthrough_helpers.h" - --/* -- * We are re-using pointers to our `struct lo_inode` -- * elements as inodes. This means that we must be able to -- * store uintptr_t values in a fuse_ino_t variable. The following -- * incantation checks this condition at compile time. -- */ --#if defined(__GNUC__) && \ -- (__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 6) && \ -- !defined __cplusplus --_Static_assert(sizeof(fuse_ino_t) >= sizeof(uintptr_t), -- "fuse_ino_t too small to hold uintptr_t values!"); --#else --struct _uintptr_to_must_hold_fuse_ino_t_dummy_struct { -- unsigned _uintptr_to_must_hold_fuse_ino_t -- : ((sizeof(fuse_ino_t) >= sizeof(uintptr_t)) ? 1 : -1); --}; --#endif -- - struct lo_map_elem { - union { - struct lo_inode *inode; -+ struct lo_dirp *dirp; - ssize_t freelist; - }; - bool in_use; -@@ -123,6 +106,7 @@ struct lo_data { - int timeout_set; - struct lo_inode root; /* protected by lo->mutex */ - struct lo_map ino_map; /* protected by lo->mutex */ -+ struct lo_map dirp_map; /* protected by lo->mutex */ - }; - - static const struct fuse_opt lo_opts[] = { -@@ -253,6 +237,20 @@ static void lo_map_remove(struct lo_map *map, size_t key) - } - - /* Assumes lo->mutex is held */ -+static ssize_t lo_add_dirp_mapping(fuse_req_t req, struct lo_dirp *dirp) -+{ -+ struct lo_map_elem *elem; -+ -+ elem = lo_map_alloc_elem(&lo_data(req)->dirp_map); -+ if (!elem) { -+ return -1; -+ } -+ -+ elem->dirp = dirp; -+ return elem - lo_data(req)->dirp_map.elems; -+} -+ -+/* Assumes lo->mutex is held */ - static ssize_t lo_add_inode_mapping(fuse_req_t req, struct lo_inode *inode) - { - struct lo_map_elem *elem; -@@ -861,9 +859,19 @@ struct lo_dirp { - off_t offset; - }; - --static struct lo_dirp *lo_dirp(struct fuse_file_info *fi) -+static struct lo_dirp *lo_dirp(fuse_req_t req, struct fuse_file_info *fi) - { -- return (struct lo_dirp *)(uintptr_t)fi->fh; -+ struct lo_data *lo = lo_data(req); -+ struct lo_map_elem *elem; -+ -+ pthread_mutex_lock(&lo->mutex); -+ elem = lo_map_get(&lo->dirp_map, fi->fh); -+ pthread_mutex_unlock(&lo->mutex); -+ if (!elem) { -+ return NULL; -+ } -+ -+ return elem->dirp; - } - - static void lo_opendir(fuse_req_t req, fuse_ino_t ino, -@@ -873,6 +881,7 @@ static void lo_opendir(fuse_req_t req, fuse_ino_t ino, - struct lo_data *lo = lo_data(req); - struct lo_dirp *d; - int fd; -+ ssize_t fh; - - d = calloc(1, sizeof(struct lo_dirp)); - if (d == NULL) { -@@ -892,7 +901,14 @@ static void lo_opendir(fuse_req_t req, fuse_ino_t ino, - d->offset = 0; - d->entry = NULL; - -- fi->fh = (uintptr_t)d; -+ pthread_mutex_lock(&lo->mutex); -+ fh = lo_add_dirp_mapping(req, d); -+ pthread_mutex_unlock(&lo->mutex); -+ if (fh == -1) { -+ goto out_err; -+ } -+ -+ fi->fh = fh; - if (lo->cache == CACHE_ALWAYS) { - fi->keep_cache = 1; - } -@@ -903,6 +919,9 @@ out_errno: - error = errno; - out_err: - if (d) { -+ if (d->dp) { -+ closedir(d->dp); -+ } - if (fd != -1) { - close(fd); - } -@@ -920,17 +939,21 @@ static int is_dot_or_dotdot(const char *name) - static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, - off_t offset, struct fuse_file_info *fi, int plus) - { -- struct lo_dirp *d = lo_dirp(fi); -- char *buf; -+ struct lo_dirp *d; -+ char *buf = NULL; - char *p; - size_t rem = size; -- int err; -+ int err = ENOMEM; - - (void)ino; - -+ d = lo_dirp(req, fi); -+ if (!d) { -+ goto error; -+ } -+ - buf = calloc(1, size); - if (!buf) { -- err = ENOMEM; - goto error; - } - p = buf; -@@ -1028,8 +1051,21 @@ static void lo_readdirplus(fuse_req_t req, fuse_ino_t ino, size_t size, - static void lo_releasedir(fuse_req_t req, fuse_ino_t ino, - struct fuse_file_info *fi) - { -- struct lo_dirp *d = lo_dirp(fi); -+ struct lo_data *lo = lo_data(req); -+ struct lo_dirp *d; -+ - (void)ino; -+ -+ d = lo_dirp(req, fi); -+ if (!d) { -+ fuse_reply_err(req, EBADF); -+ return; -+ } -+ -+ pthread_mutex_lock(&lo->mutex); -+ lo_map_remove(&lo->dirp_map, fi->fh); -+ pthread_mutex_unlock(&lo->mutex); -+ - closedir(d->dp); - free(d); - fuse_reply_err(req, 0); -@@ -1081,8 +1117,18 @@ static void lo_fsyncdir(fuse_req_t req, fuse_ino_t ino, int datasync, - struct fuse_file_info *fi) - { - int res; -- int fd = dirfd(lo_dirp(fi)->dp); -+ struct lo_dirp *d; -+ int fd; -+ - (void)ino; -+ -+ d = lo_dirp(req, fi); -+ if (!d) { -+ fuse_reply_err(req, EBADF); -+ return; -+ } -+ -+ fd = dirfd(d->dp); - if (datasync) { - res = fdatasync(fd); - } else { -@@ -1614,6 +1660,8 @@ int main(int argc, char *argv[]) - root_elem = lo_map_reserve(&lo.ino_map, lo.root.fuse_ino); - root_elem->inode = &lo.root; - -+ lo_map_init(&lo.dirp_map); -+ - if (fuse_parse_cmdline(&args, &opts) != 0) { - return 1; - } -@@ -1710,6 +1758,7 @@ err_out2: - err_out1: - fuse_opt_free_args(&args); - -+ lo_map_destroy(&lo.dirp_map); - lo_map_destroy(&lo.ino_map); - - if (lo.root.fd >= 0) { --- -1.8.3.1 - diff --git a/kvm-virtiofsd-passthrough_ll-add-fallback-for-racy-ops.patch b/kvm-virtiofsd-passthrough_ll-add-fallback-for-racy-ops.patch deleted file mode 100644 index b8de3d8..0000000 --- a/kvm-virtiofsd-passthrough_ll-add-fallback-for-racy-ops.patch +++ /dev/null @@ -1,303 +0,0 @@ -From 03effbc021064bb77d231ae5ca02d1a579c71ee1 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:17 +0100 -Subject: [PATCH 046/116] virtiofsd: passthrough_ll: add fallback for racy ops -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-43-dgilbert@redhat.com> -Patchwork-id: 93496 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 042/112] virtiofsd: passthrough_ll: add fallback for racy ops -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Miklos Szeredi - -We have two operations that cannot be done race-free on a symlink in -certain cases: utimes and link. - -Add racy fallback for these if the race-free method doesn't work. We do -our best to avoid races even in this case: - - - get absolute path by reading /proc/self/fd/NN symlink - - - lookup parent directory: after this we are safe against renames in - ancestors - - - lookup name in parent directory, and verify that we got to the original - inode, if not retry the whole thing - -Both utimes(2) and link(2) hold i_lock on the inode across the operation, -so a racing rename/delete by this fuse instance is not possible, only from -other entities changing the filesystem. - -If the "norace" option is given, then disable the racy fallbacks. - -Signed-off-by: Miklos Szeredi -Reviewed-by: Masayoshi Mizuma -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 5fe319a7b19c9c328e6e061bffcf1ff6cc8b89ce) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/helper.c | 5 +- - tools/virtiofsd/passthrough_ll.c | 157 +++++++++++++++++++++++++++++++++++---- - 2 files changed, 145 insertions(+), 17 deletions(-) - -diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c -index b8ec5ac..5531425 100644 ---- a/tools/virtiofsd/helper.c -+++ b/tools/virtiofsd/helper.c -@@ -142,7 +142,10 @@ void fuse_cmdline_help(void) - " --daemonize run in background\n" - " -o max_idle_threads the maximum number of idle worker " - "threads\n" -- " allowed (default: 10)\n"); -+ " allowed (default: 10)\n" -+ " -o norace disable racy fallback\n" -+ " default: false\n" -+ ); - } - - static int fuse_helper_opt_proc(void *data, const char *arg, int key, -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 9815bfa..ac380ef 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -98,6 +98,7 @@ enum { - struct lo_data { - pthread_mutex_t mutex; - int debug; -+ int norace; - int writeback; - int flock; - int xattr; -@@ -124,10 +125,15 @@ static const struct fuse_opt lo_opts[] = { - { "cache=never", offsetof(struct lo_data, cache), CACHE_NEVER }, - { "cache=auto", offsetof(struct lo_data, cache), CACHE_NORMAL }, - { "cache=always", offsetof(struct lo_data, cache), CACHE_ALWAYS }, -- -+ { "norace", offsetof(struct lo_data, norace), 1 }, - FUSE_OPT_END - }; - -+static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n); -+ -+static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st); -+ -+ - static struct lo_data *lo_data(fuse_req_t req) - { - return (struct lo_data *)fuse_req_userdata(req); -@@ -347,23 +353,127 @@ static void lo_getattr(fuse_req_t req, fuse_ino_t ino, - fuse_reply_attr(req, &buf, lo->timeout); - } - --static int utimensat_empty_nofollow(struct lo_inode *inode, -- const struct timespec *tv) -+static int lo_parent_and_name(struct lo_data *lo, struct lo_inode *inode, -+ char path[PATH_MAX], struct lo_inode **parent) - { -- int res; - char procname[64]; -+ char *last; -+ struct stat stat; -+ struct lo_inode *p; -+ int retries = 2; -+ int res; -+ -+retry: -+ sprintf(procname, "/proc/self/fd/%i", inode->fd); -+ -+ res = readlink(procname, path, PATH_MAX); -+ if (res < 0) { -+ fuse_log(FUSE_LOG_WARNING, "%s: readlink failed: %m\n", __func__); -+ goto fail_noretry; -+ } -+ -+ if (res >= PATH_MAX) { -+ fuse_log(FUSE_LOG_WARNING, "%s: readlink overflowed\n", __func__); -+ goto fail_noretry; -+ } -+ path[res] = '\0'; -+ -+ last = strrchr(path, '/'); -+ if (last == NULL) { -+ /* Shouldn't happen */ -+ fuse_log( -+ FUSE_LOG_WARNING, -+ "%s: INTERNAL ERROR: bad path read from proc\n", __func__); -+ goto fail_noretry; -+ } -+ if (last == path) { -+ p = &lo->root; -+ pthread_mutex_lock(&lo->mutex); -+ p->refcount++; -+ pthread_mutex_unlock(&lo->mutex); -+ } else { -+ *last = '\0'; -+ res = fstatat(AT_FDCWD, last == path ? "/" : path, &stat, 0); -+ if (res == -1) { -+ if (!retries) { -+ fuse_log(FUSE_LOG_WARNING, -+ "%s: failed to stat parent: %m\n", __func__); -+ } -+ goto fail; -+ } -+ p = lo_find(lo, &stat); -+ if (p == NULL) { -+ if (!retries) { -+ fuse_log(FUSE_LOG_WARNING, -+ "%s: failed to find parent\n", __func__); -+ } -+ goto fail; -+ } -+ } -+ last++; -+ res = fstatat(p->fd, last, &stat, AT_SYMLINK_NOFOLLOW); -+ if (res == -1) { -+ if (!retries) { -+ fuse_log(FUSE_LOG_WARNING, -+ "%s: failed to stat last\n", __func__); -+ } -+ goto fail_unref; -+ } -+ if (stat.st_dev != inode->dev || stat.st_ino != inode->ino) { -+ if (!retries) { -+ fuse_log(FUSE_LOG_WARNING, -+ "%s: failed to match last\n", __func__); -+ } -+ goto fail_unref; -+ } -+ *parent = p; -+ memmove(path, last, strlen(last) + 1); -+ -+ return 0; -+ -+fail_unref: -+ unref_inode(lo, p, 1); -+fail: -+ if (retries) { -+ retries--; -+ goto retry; -+ } -+fail_noretry: -+ errno = EIO; -+ return -1; -+} -+ -+static int utimensat_empty(struct lo_data *lo, struct lo_inode *inode, -+ const struct timespec *tv) -+{ -+ int res; -+ struct lo_inode *parent; -+ char path[PATH_MAX]; - - if (inode->is_symlink) { -- res = utimensat(inode->fd, "", tv, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); -+ res = utimensat(inode->fd, "", tv, AT_EMPTY_PATH); - if (res == -1 && errno == EINVAL) { - /* Sorry, no race free way to set times on symlink. */ -- errno = EPERM; -+ if (lo->norace) { -+ errno = EPERM; -+ } else { -+ goto fallback; -+ } - } - return res; - } -- sprintf(procname, "/proc/self/fd/%i", inode->fd); -+ sprintf(path, "/proc/self/fd/%i", inode->fd); - -- return utimensat(AT_FDCWD, procname, tv, 0); -+ return utimensat(AT_FDCWD, path, tv, 0); -+ -+fallback: -+ res = lo_parent_and_name(lo, inode, path, &parent); -+ if (res != -1) { -+ res = utimensat(parent->fd, path, tv, AT_SYMLINK_NOFOLLOW); -+ unref_inode(lo, parent, 1); -+ } -+ -+ return res; - } - - static int lo_fi_fd(fuse_req_t req, struct fuse_file_info *fi) -@@ -387,6 +497,7 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, - { - int saverr; - char procname[64]; -+ struct lo_data *lo = lo_data(req); - struct lo_inode *inode; - int ifd; - int res; -@@ -459,7 +570,7 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, - if (fi) { - res = futimens(fd, tv); - } else { -- res = utimensat_empty_nofollow(inode, tv); -+ res = utimensat_empty(lo, inode, tv); - } - if (res == -1) { - goto out_err; -@@ -709,24 +820,38 @@ static void lo_symlink(fuse_req_t req, const char *link, fuse_ino_t parent, - lo_mknod_symlink(req, parent, name, S_IFLNK, 0, link); - } - --static int linkat_empty_nofollow(struct lo_inode *inode, int dfd, -- const char *name) -+static int linkat_empty_nofollow(struct lo_data *lo, struct lo_inode *inode, -+ int dfd, const char *name) - { - int res; -- char procname[64]; -+ struct lo_inode *parent; -+ char path[PATH_MAX]; - - if (inode->is_symlink) { - res = linkat(inode->fd, "", dfd, name, AT_EMPTY_PATH); - if (res == -1 && (errno == ENOENT || errno == EINVAL)) { - /* Sorry, no race free way to hard-link a symlink. */ -- errno = EPERM; -+ if (lo->norace) { -+ errno = EPERM; -+ } else { -+ goto fallback; -+ } - } - return res; - } - -- sprintf(procname, "/proc/self/fd/%i", inode->fd); -+ sprintf(path, "/proc/self/fd/%i", inode->fd); -+ -+ return linkat(AT_FDCWD, path, dfd, name, AT_SYMLINK_FOLLOW); -+ -+fallback: -+ res = lo_parent_and_name(lo, inode, path, &parent); -+ if (res != -1) { -+ res = linkat(parent->fd, path, dfd, name, 0); -+ unref_inode(lo, parent, 1); -+ } - -- return linkat(AT_FDCWD, procname, dfd, name, AT_SYMLINK_FOLLOW); -+ return res; - } - - static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent, -@@ -748,7 +873,7 @@ static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent, - e.attr_timeout = lo->timeout; - e.entry_timeout = lo->timeout; - -- res = linkat_empty_nofollow(inode, lo_fd(req, parent), name); -+ res = linkat_empty_nofollow(lo, inode, lo_fd(req, parent), name); - if (res == -1) { - goto out_err; - } --- -1.8.3.1 - diff --git a/kvm-virtiofsd-passthrough_ll-add-fd_map-to-hide-file-des.patch b/kvm-virtiofsd-passthrough_ll-add-fd_map-to-hide-file-des.patch deleted file mode 100644 index 24b2a6e..0000000 --- a/kvm-virtiofsd-passthrough_ll-add-fd_map-to-hide-file-des.patch +++ /dev/null @@ -1,328 +0,0 @@ -From 35337e604e9149d6d8fcf74b8b82ac33a8611ebb Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:16 +0100 -Subject: [PATCH 045/116] virtiofsd: passthrough_ll: add fd_map to hide file - descriptors -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-42-dgilbert@redhat.com> -Patchwork-id: 93494 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 041/112] virtiofsd: passthrough_ll: add fd_map to hide file descriptors -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -Do not expose file descriptor numbers to clients. This prevents the -abuse of internal file descriptors (like stdin/stdout). - -Signed-off-by: Stefan Hajnoczi -Fix from: -Signed-off-by: Xiao Yang -dgilbert: - Added lseek -Reviewed-by: Masayoshi Mizuma -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 73b4d19dfc4248a74c1f3e511cfa934681d9c602) - -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 116 +++++++++++++++++++++++++++++++-------- - 1 file changed, 94 insertions(+), 22 deletions(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 5f5a72f..9815bfa 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -60,6 +60,7 @@ struct lo_map_elem { - union { - struct lo_inode *inode; - struct lo_dirp *dirp; -+ int fd; - ssize_t freelist; - }; - bool in_use; -@@ -107,6 +108,7 @@ struct lo_data { - struct lo_inode root; /* protected by lo->mutex */ - struct lo_map ino_map; /* protected by lo->mutex */ - struct lo_map dirp_map; /* protected by lo->mutex */ -+ struct lo_map fd_map; /* protected by lo->mutex */ - }; - - static const struct fuse_opt lo_opts[] = { -@@ -237,6 +239,20 @@ static void lo_map_remove(struct lo_map *map, size_t key) - } - - /* Assumes lo->mutex is held */ -+static ssize_t lo_add_fd_mapping(fuse_req_t req, int fd) -+{ -+ struct lo_map_elem *elem; -+ -+ elem = lo_map_alloc_elem(&lo_data(req)->fd_map); -+ if (!elem) { -+ return -1; -+ } -+ -+ elem->fd = fd; -+ return elem - lo_data(req)->fd_map.elems; -+} -+ -+/* Assumes lo->mutex is held */ - static ssize_t lo_add_dirp_mapping(fuse_req_t req, struct lo_dirp *dirp) - { - struct lo_map_elem *elem; -@@ -350,6 +366,22 @@ static int utimensat_empty_nofollow(struct lo_inode *inode, - return utimensat(AT_FDCWD, procname, tv, 0); - } - -+static int lo_fi_fd(fuse_req_t req, struct fuse_file_info *fi) -+{ -+ struct lo_data *lo = lo_data(req); -+ struct lo_map_elem *elem; -+ -+ pthread_mutex_lock(&lo->mutex); -+ elem = lo_map_get(&lo->fd_map, fi->fh); -+ pthread_mutex_unlock(&lo->mutex); -+ -+ if (!elem) { -+ return -1; -+ } -+ -+ return elem->fd; -+} -+ - static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, - int valid, struct fuse_file_info *fi) - { -@@ -358,6 +390,7 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, - struct lo_inode *inode; - int ifd; - int res; -+ int fd; - - inode = lo_inode(req, ino); - if (!inode) { -@@ -367,9 +400,14 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, - - ifd = inode->fd; - -+ /* If fi->fh is invalid we'll report EBADF later */ -+ if (fi) { -+ fd = lo_fi_fd(req, fi); -+ } -+ - if (valid & FUSE_SET_ATTR_MODE) { - if (fi) { -- res = fchmod(fi->fh, attr->st_mode); -+ res = fchmod(fd, attr->st_mode); - } else { - sprintf(procname, "/proc/self/fd/%i", ifd); - res = chmod(procname, attr->st_mode); -@@ -389,7 +427,7 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, - } - if (valid & FUSE_SET_ATTR_SIZE) { - if (fi) { -- res = ftruncate(fi->fh, attr->st_size); -+ res = ftruncate(fd, attr->st_size); - } else { - sprintf(procname, "/proc/self/fd/%i", ifd); - res = truncate(procname, attr->st_size); -@@ -419,7 +457,7 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, - } - - if (fi) { -- res = futimens(fi->fh, tv); -+ res = futimens(fd, tv); - } else { - res = utimensat_empty_nofollow(inode, tv); - } -@@ -1096,7 +1134,18 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, - lo_restore_cred(&old); - - if (!err) { -- fi->fh = fd; -+ ssize_t fh; -+ -+ pthread_mutex_lock(&lo->mutex); -+ fh = lo_add_fd_mapping(req, fd); -+ pthread_mutex_unlock(&lo->mutex); -+ if (fh == -1) { -+ close(fd); -+ fuse_reply_err(req, ENOMEM); -+ return; -+ } -+ -+ fi->fh = fh; - err = lo_do_lookup(req, parent, name, &e); - } - if (lo->cache == CACHE_NEVER) { -@@ -1140,6 +1189,7 @@ static void lo_fsyncdir(fuse_req_t req, fuse_ino_t ino, int datasync, - static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) - { - int fd; -+ ssize_t fh; - char buf[64]; - struct lo_data *lo = lo_data(req); - -@@ -1175,7 +1225,16 @@ static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) - return (void)fuse_reply_err(req, errno); - } - -- fi->fh = fd; -+ pthread_mutex_lock(&lo->mutex); -+ fh = lo_add_fd_mapping(req, fd); -+ pthread_mutex_unlock(&lo->mutex); -+ if (fh == -1) { -+ close(fd); -+ fuse_reply_err(req, ENOMEM); -+ return; -+ } -+ -+ fi->fh = fh; - if (lo->cache == CACHE_NEVER) { - fi->direct_io = 1; - } else if (lo->cache == CACHE_ALWAYS) { -@@ -1187,9 +1246,18 @@ static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) - static void lo_release(fuse_req_t req, fuse_ino_t ino, - struct fuse_file_info *fi) - { -+ struct lo_data *lo = lo_data(req); -+ int fd; -+ - (void)ino; - -- close(fi->fh); -+ fd = lo_fi_fd(req, fi); -+ -+ pthread_mutex_lock(&lo->mutex); -+ lo_map_remove(&lo->fd_map, fi->fh); -+ pthread_mutex_unlock(&lo->mutex); -+ -+ close(fd); - fuse_reply_err(req, 0); - } - -@@ -1197,7 +1265,7 @@ static void lo_flush(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) - { - int res; - (void)ino; -- res = close(dup(fi->fh)); -+ res = close(dup(lo_fi_fd(req, fi))); - fuse_reply_err(req, res == -1 ? errno : 0); - } - -@@ -1224,7 +1292,7 @@ static void lo_fsync(fuse_req_t req, fuse_ino_t ino, int datasync, - return (void)fuse_reply_err(req, errno); - } - } else { -- fd = fi->fh; -+ fd = lo_fi_fd(req, fi); - } - - if (datasync) { -@@ -1251,7 +1319,7 @@ static void lo_read(fuse_req_t req, fuse_ino_t ino, size_t size, off_t offset, - } - - buf.buf[0].flags = FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK; -- buf.buf[0].fd = fi->fh; -+ buf.buf[0].fd = lo_fi_fd(req, fi); - buf.buf[0].pos = offset; - - fuse_reply_data(req, &buf); -@@ -1266,7 +1334,7 @@ static void lo_write_buf(fuse_req_t req, fuse_ino_t ino, - struct fuse_bufvec out_buf = FUSE_BUFVEC_INIT(fuse_buf_size(in_buf)); - - out_buf.buf[0].flags = FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK; -- out_buf.buf[0].fd = fi->fh; -+ out_buf.buf[0].fd = lo_fi_fd(req, fi); - out_buf.buf[0].pos = off; - - if (lo_debug(req)) { -@@ -1303,7 +1371,7 @@ static void lo_fallocate(fuse_req_t req, fuse_ino_t ino, int mode, off_t offset, - (void)ino; - - #ifdef CONFIG_FALLOCATE -- err = fallocate(fi->fh, mode, offset, length); -+ err = fallocate(lo_fi_fd(req, fi), mode, offset, length); - if (err < 0) { - err = errno; - } -@@ -1314,7 +1382,7 @@ static void lo_fallocate(fuse_req_t req, fuse_ino_t ino, int mode, off_t offset, - return; - } - -- err = posix_fallocate(fi->fh, offset, length); -+ err = posix_fallocate(lo_fi_fd(req, fi), offset, length); - #endif - - fuse_reply_err(req, err); -@@ -1326,7 +1394,7 @@ static void lo_flock(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, - int res; - (void)ino; - -- res = flock(fi->fh, op); -+ res = flock(lo_fi_fd(req, fi), op); - - fuse_reply_err(req, res == -1 ? errno : 0); - } -@@ -1551,17 +1619,19 @@ static void lo_copy_file_range(fuse_req_t req, fuse_ino_t ino_in, off_t off_in, - off_t off_out, struct fuse_file_info *fi_out, - size_t len, int flags) - { -+ int in_fd, out_fd; - ssize_t res; - -- if (lo_debug(req)) -- fuse_log(FUSE_LOG_DEBUG, -- "lo_copy_file_range(ino=%" PRIu64 "/fd=%lu, " -- "off=%lu, ino=%" PRIu64 "/fd=%lu, " -- "off=%lu, size=%zd, flags=0x%x)\n", -- ino_in, fi_in->fh, off_in, ino_out, fi_out->fh, off_out, len, -- flags); -+ in_fd = lo_fi_fd(req, fi_in); -+ out_fd = lo_fi_fd(req, fi_out); -+ -+ fuse_log(FUSE_LOG_DEBUG, -+ "lo_copy_file_range(ino=%" PRIu64 "/fd=%d, " -+ "off=%lu, ino=%" PRIu64 "/fd=%d, " -+ "off=%lu, size=%zd, flags=0x%x)\n", -+ ino_in, in_fd, off_in, ino_out, out_fd, off_out, len, flags); - -- res = copy_file_range(fi_in->fh, &off_in, fi_out->fh, &off_out, len, flags); -+ res = copy_file_range(in_fd, &off_in, out_fd, &off_out, len, flags); - if (res < 0) { - fuse_reply_err(req, -errno); - } else { -@@ -1576,7 +1646,7 @@ static void lo_lseek(fuse_req_t req, fuse_ino_t ino, off_t off, int whence, - off_t res; - - (void)ino; -- res = lseek(fi->fh, off, whence); -+ res = lseek(lo_fi_fd(req, fi), off, whence); - if (res != -1) { - fuse_reply_lseek(req, res); - } else { -@@ -1661,6 +1731,7 @@ int main(int argc, char *argv[]) - root_elem->inode = &lo.root; - - lo_map_init(&lo.dirp_map); -+ lo_map_init(&lo.fd_map); - - if (fuse_parse_cmdline(&args, &opts) != 0) { - return 1; -@@ -1758,6 +1829,7 @@ err_out2: - err_out1: - fuse_opt_free_args(&args); - -+ lo_map_destroy(&lo.fd_map); - lo_map_destroy(&lo.dirp_map); - lo_map_destroy(&lo.ino_map); - --- -1.8.3.1 - diff --git a/kvm-virtiofsd-passthrough_ll-add-ino_map-to-hide-lo_inod.patch b/kvm-virtiofsd-passthrough_ll-add-ino_map-to-hide-lo_inod.patch deleted file mode 100644 index ba8b730..0000000 --- a/kvm-virtiofsd-passthrough_ll-add-ino_map-to-hide-lo_inod.patch +++ /dev/null @@ -1,395 +0,0 @@ -From d81396cc3d9815730903b0755c9d2e67d6954d54 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:14 +0100 -Subject: [PATCH 043/116] virtiofsd: passthrough_ll: add ino_map to hide - lo_inode pointers -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-40-dgilbert@redhat.com> -Patchwork-id: 93493 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 039/112] virtiofsd: passthrough_ll: add ino_map to hide lo_inode pointers -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -Do not expose lo_inode pointers to clients. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Masayoshi Mizuma -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 92fb57b83cdbfc4bf53c0c46a3d0bcbc36e64126) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 144 +++++++++++++++++++++++++++++++-------- - 1 file changed, 114 insertions(+), 30 deletions(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index e83a976..a3ebf74 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -57,8 +57,8 @@ - #include "passthrough_helpers.h" - - /* -- * We are re-using pointers to our `struct lo_inode` and `struct -- * lo_dirp` elements as inodes. This means that we must be able to -+ * We are re-using pointers to our `struct lo_inode` -+ * elements as inodes. This means that we must be able to - * store uintptr_t values in a fuse_ino_t variable. The following - * incantation checks this condition at compile time. - */ -@@ -76,7 +76,7 @@ struct _uintptr_to_must_hold_fuse_ino_t_dummy_struct { - - struct lo_map_elem { - union { -- /* Element values will go here... */ -+ struct lo_inode *inode; - ssize_t freelist; - }; - bool in_use; -@@ -97,6 +97,7 @@ struct lo_inode { - ino_t ino; - dev_t dev; - uint64_t refcount; /* protected by lo->mutex */ -+ fuse_ino_t fuse_ino; - }; - - struct lo_cred { -@@ -121,6 +122,7 @@ struct lo_data { - int cache; - int timeout_set; - struct lo_inode root; /* protected by lo->mutex */ -+ struct lo_map ino_map; /* protected by lo->mutex */ - }; - - static const struct fuse_opt lo_opts[] = { -@@ -145,14 +147,14 @@ static struct lo_data *lo_data(fuse_req_t req) - return (struct lo_data *)fuse_req_userdata(req); - } - --__attribute__((unused)) static void lo_map_init(struct lo_map *map) -+static void lo_map_init(struct lo_map *map) - { - map->elems = NULL; - map->nelems = 0; - map->freelist = -1; - } - --__attribute__((unused)) static void lo_map_destroy(struct lo_map *map) -+static void lo_map_destroy(struct lo_map *map) - { - free(map->elems); - } -@@ -183,8 +185,7 @@ static int lo_map_grow(struct lo_map *map, size_t new_nelems) - return 1; - } - --__attribute__((unused)) static struct lo_map_elem * --lo_map_alloc_elem(struct lo_map *map) -+static struct lo_map_elem *lo_map_alloc_elem(struct lo_map *map) - { - struct lo_map_elem *elem; - -@@ -200,8 +201,7 @@ lo_map_alloc_elem(struct lo_map *map) - return elem; - } - --__attribute__((unused)) static struct lo_map_elem * --lo_map_reserve(struct lo_map *map, size_t key) -+static struct lo_map_elem *lo_map_reserve(struct lo_map *map, size_t key) - { - ssize_t *prev; - -@@ -222,8 +222,7 @@ lo_map_reserve(struct lo_map *map, size_t key) - return NULL; - } - --__attribute__((unused)) static struct lo_map_elem * --lo_map_get(struct lo_map *map, size_t key) -+static struct lo_map_elem *lo_map_get(struct lo_map *map, size_t key) - { - if (key >= map->nelems) { - return NULL; -@@ -234,8 +233,7 @@ lo_map_get(struct lo_map *map, size_t key) - return &map->elems[key]; - } - --__attribute__((unused)) static void lo_map_remove(struct lo_map *map, -- size_t key) -+static void lo_map_remove(struct lo_map *map, size_t key) - { - struct lo_map_elem *elem; - -@@ -254,18 +252,40 @@ __attribute__((unused)) static void lo_map_remove(struct lo_map *map, - map->freelist = key; - } - -+/* Assumes lo->mutex is held */ -+static ssize_t lo_add_inode_mapping(fuse_req_t req, struct lo_inode *inode) -+{ -+ struct lo_map_elem *elem; -+ -+ elem = lo_map_alloc_elem(&lo_data(req)->ino_map); -+ if (!elem) { -+ return -1; -+ } -+ -+ elem->inode = inode; -+ return elem - lo_data(req)->ino_map.elems; -+} -+ - static struct lo_inode *lo_inode(fuse_req_t req, fuse_ino_t ino) - { -- if (ino == FUSE_ROOT_ID) { -- return &lo_data(req)->root; -- } else { -- return (struct lo_inode *)(uintptr_t)ino; -+ struct lo_data *lo = lo_data(req); -+ struct lo_map_elem *elem; -+ -+ pthread_mutex_lock(&lo->mutex); -+ elem = lo_map_get(&lo->ino_map, ino); -+ pthread_mutex_unlock(&lo->mutex); -+ -+ if (!elem) { -+ return NULL; - } -+ -+ return elem->inode; - } - - static int lo_fd(fuse_req_t req, fuse_ino_t ino) - { -- return lo_inode(req, ino)->fd; -+ struct lo_inode *inode = lo_inode(req, ino); -+ return inode ? inode->fd : -1; - } - - static bool lo_debug(fuse_req_t req) -@@ -337,10 +357,18 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, - { - int saverr; - char procname[64]; -- struct lo_inode *inode = lo_inode(req, ino); -- int ifd = inode->fd; -+ struct lo_inode *inode; -+ int ifd; - int res; - -+ inode = lo_inode(req, ino); -+ if (!inode) { -+ fuse_reply_err(req, EBADF); -+ return; -+ } -+ -+ ifd = inode->fd; -+ - if (valid & FUSE_SET_ATTR_MODE) { - if (fi) { - res = fchmod(fi->fh, attr->st_mode); -@@ -470,6 +498,7 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, - inode->dev = e->attr.st_dev; - - pthread_mutex_lock(&lo->mutex); -+ inode->fuse_ino = lo_add_inode_mapping(req, inode); - prev = &lo->root; - next = prev->next; - next->prev = inode; -@@ -478,7 +507,7 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, - prev->next = inode; - pthread_mutex_unlock(&lo->mutex); - } -- e->ino = (uintptr_t)inode; -+ e->ino = inode->fuse_ino; - - if (lo_debug(req)) { - fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", -@@ -582,10 +611,16 @@ static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent, - { - int res; - int saverr; -- struct lo_inode *dir = lo_inode(req, parent); -+ struct lo_inode *dir; - struct fuse_entry_param e; - struct lo_cred old = {}; - -+ dir = lo_inode(req, parent); -+ if (!dir) { -+ fuse_reply_err(req, EBADF); -+ return; -+ } -+ - saverr = ENOMEM; - - saverr = lo_change_cred(req, &old); -@@ -663,10 +698,16 @@ static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent, - { - int res; - struct lo_data *lo = lo_data(req); -- struct lo_inode *inode = lo_inode(req, ino); -+ struct lo_inode *inode; - struct fuse_entry_param e; - int saverr; - -+ inode = lo_inode(req, ino); -+ if (!inode) { -+ fuse_reply_err(req, EBADF); -+ return; -+ } -+ - memset(&e, 0, sizeof(struct fuse_entry_param)); - e.attr_timeout = lo->timeout; - e.entry_timeout = lo->timeout; -@@ -684,7 +725,7 @@ static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent, - pthread_mutex_lock(&lo->mutex); - inode->refcount++; - pthread_mutex_unlock(&lo->mutex); -- e.ino = (uintptr_t)inode; -+ e.ino = inode->fuse_ino; - - if (lo_debug(req)) { - fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", -@@ -750,10 +791,10 @@ static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n) - next->prev = prev; - prev->next = next; - -+ lo_map_remove(&lo->ino_map, inode->fuse_ino); - pthread_mutex_unlock(&lo->mutex); - close(inode->fd); - free(inode); -- - } else { - pthread_mutex_unlock(&lo->mutex); - } -@@ -762,7 +803,12 @@ static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n) - static void lo_forget_one(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) - { - struct lo_data *lo = lo_data(req); -- struct lo_inode *inode = lo_inode(req, ino); -+ struct lo_inode *inode; -+ -+ inode = lo_inode(req, ino); -+ if (!inode) { -+ return; -+ } - - if (lo_debug(req)) { - fuse_log(FUSE_LOG_DEBUG, " forget %lli %lli -%lli\n", -@@ -1244,10 +1290,16 @@ static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *name, - { - char *value = NULL; - char procname[64]; -- struct lo_inode *inode = lo_inode(req, ino); -+ struct lo_inode *inode; - ssize_t ret; - int saverr; - -+ inode = lo_inode(req, ino); -+ if (!inode) { -+ fuse_reply_err(req, EBADF); -+ return; -+ } -+ - saverr = ENOSYS; - if (!lo_data(req)->xattr) { - goto out; -@@ -1306,10 +1358,16 @@ static void lo_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size) - { - char *value = NULL; - char procname[64]; -- struct lo_inode *inode = lo_inode(req, ino); -+ struct lo_inode *inode; - ssize_t ret; - int saverr; - -+ inode = lo_inode(req, ino); -+ if (!inode) { -+ fuse_reply_err(req, EBADF); -+ return; -+ } -+ - saverr = ENOSYS; - if (!lo_data(req)->xattr) { - goto out; -@@ -1367,10 +1425,16 @@ static void lo_setxattr(fuse_req_t req, fuse_ino_t ino, const char *name, - const char *value, size_t size, int flags) - { - char procname[64]; -- struct lo_inode *inode = lo_inode(req, ino); -+ struct lo_inode *inode; - ssize_t ret; - int saverr; - -+ inode = lo_inode(req, ino); -+ if (!inode) { -+ fuse_reply_err(req, EBADF); -+ return; -+ } -+ - saverr = ENOSYS; - if (!lo_data(req)->xattr) { - goto out; -@@ -1400,10 +1464,16 @@ out: - static void lo_removexattr(fuse_req_t req, fuse_ino_t ino, const char *name) - { - char procname[64]; -- struct lo_inode *inode = lo_inode(req, ino); -+ struct lo_inode *inode; - ssize_t ret; - int saverr; - -+ inode = lo_inode(req, ino); -+ if (!inode) { -+ fuse_reply_err(req, EBADF); -+ return; -+ } -+ - saverr = ENOSYS; - if (!lo_data(req)->xattr) { - goto out; -@@ -1522,6 +1592,7 @@ int main(int argc, char *argv[]) - struct fuse_session *se; - struct fuse_cmdline_opts opts; - struct lo_data lo = { .debug = 0, .writeback = 0 }; -+ struct lo_map_elem *root_elem; - int ret = -1; - - /* Don't mask creation mode, kernel already did that */ -@@ -1530,8 +1601,19 @@ int main(int argc, char *argv[]) - pthread_mutex_init(&lo.mutex, NULL); - lo.root.next = lo.root.prev = &lo.root; - lo.root.fd = -1; -+ lo.root.fuse_ino = FUSE_ROOT_ID; - lo.cache = CACHE_NORMAL; - -+ /* -+ * Set up the ino map like this: -+ * [0] Reserved (will not be used) -+ * [1] Root inode -+ */ -+ lo_map_init(&lo.ino_map); -+ lo_map_reserve(&lo.ino_map, 0)->in_use = false; -+ root_elem = lo_map_reserve(&lo.ino_map, lo.root.fuse_ino); -+ root_elem->inode = &lo.root; -+ - if (fuse_parse_cmdline(&args, &opts) != 0) { - return 1; - } -@@ -1628,6 +1710,8 @@ err_out2: - err_out1: - fuse_opt_free_args(&args); - -+ lo_map_destroy(&lo.ino_map); -+ - if (lo.root.fd >= 0) { - close(lo.root.fd); - } --- -1.8.3.1 - diff --git a/kvm-virtiofsd-passthrough_ll-add-lo_map-for-ino-fh-indir.patch b/kvm-virtiofsd-passthrough_ll-add-lo_map-for-ino-fh-indir.patch deleted file mode 100644 index 4751f95..0000000 --- a/kvm-virtiofsd-passthrough_ll-add-lo_map-for-ino-fh-indir.patch +++ /dev/null @@ -1,182 +0,0 @@ -From d56651e227bae83ee0cceb12bd91e3e9f6045ab3 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:13 +0100 -Subject: [PATCH 042/116] virtiofsd: passthrough_ll: add lo_map for ino/fh - indirection -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-39-dgilbert@redhat.com> -Patchwork-id: 93492 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 038/112] virtiofsd: passthrough_ll: add lo_map for ino/fh indirection -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -A layer of indirection is needed because passthrough_ll cannot expose -pointers or file descriptor numbers to untrusted clients. Malicious -clients could send invalid pointers or file descriptors in order to -crash or exploit the file system daemon. - -lo_map provides an integer key->value mapping. This will be used for -ino and fh fields in the patches that follow. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Masayoshi Mizuma -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 25c135727b08dca90f00094e522a69170b13dfac) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 124 +++++++++++++++++++++++++++++++++++++++ - 1 file changed, 124 insertions(+) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 5e06179..e83a976 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -74,6 +74,21 @@ struct _uintptr_to_must_hold_fuse_ino_t_dummy_struct { - }; - #endif - -+struct lo_map_elem { -+ union { -+ /* Element values will go here... */ -+ ssize_t freelist; -+ }; -+ bool in_use; -+}; -+ -+/* Maps FUSE fh or ino values to internal objects */ -+struct lo_map { -+ struct lo_map_elem *elems; -+ size_t nelems; -+ ssize_t freelist; -+}; -+ - struct lo_inode { - struct lo_inode *next; /* protected by lo->mutex */ - struct lo_inode *prev; /* protected by lo->mutex */ -@@ -130,6 +145,115 @@ static struct lo_data *lo_data(fuse_req_t req) - return (struct lo_data *)fuse_req_userdata(req); - } - -+__attribute__((unused)) static void lo_map_init(struct lo_map *map) -+{ -+ map->elems = NULL; -+ map->nelems = 0; -+ map->freelist = -1; -+} -+ -+__attribute__((unused)) static void lo_map_destroy(struct lo_map *map) -+{ -+ free(map->elems); -+} -+ -+static int lo_map_grow(struct lo_map *map, size_t new_nelems) -+{ -+ struct lo_map_elem *new_elems; -+ size_t i; -+ -+ if (new_nelems <= map->nelems) { -+ return 1; -+ } -+ -+ new_elems = realloc(map->elems, sizeof(map->elems[0]) * new_nelems); -+ if (!new_elems) { -+ return 0; -+ } -+ -+ for (i = map->nelems; i < new_nelems; i++) { -+ new_elems[i].freelist = i + 1; -+ new_elems[i].in_use = false; -+ } -+ new_elems[new_nelems - 1].freelist = -1; -+ -+ map->elems = new_elems; -+ map->freelist = map->nelems; -+ map->nelems = new_nelems; -+ return 1; -+} -+ -+__attribute__((unused)) static struct lo_map_elem * -+lo_map_alloc_elem(struct lo_map *map) -+{ -+ struct lo_map_elem *elem; -+ -+ if (map->freelist == -1 && !lo_map_grow(map, map->nelems + 256)) { -+ return NULL; -+ } -+ -+ elem = &map->elems[map->freelist]; -+ map->freelist = elem->freelist; -+ -+ elem->in_use = true; -+ -+ return elem; -+} -+ -+__attribute__((unused)) static struct lo_map_elem * -+lo_map_reserve(struct lo_map *map, size_t key) -+{ -+ ssize_t *prev; -+ -+ if (!lo_map_grow(map, key + 1)) { -+ return NULL; -+ } -+ -+ for (prev = &map->freelist; *prev != -1; -+ prev = &map->elems[*prev].freelist) { -+ if (*prev == key) { -+ struct lo_map_elem *elem = &map->elems[key]; -+ -+ *prev = elem->freelist; -+ elem->in_use = true; -+ return elem; -+ } -+ } -+ return NULL; -+} -+ -+__attribute__((unused)) static struct lo_map_elem * -+lo_map_get(struct lo_map *map, size_t key) -+{ -+ if (key >= map->nelems) { -+ return NULL; -+ } -+ if (!map->elems[key].in_use) { -+ return NULL; -+ } -+ return &map->elems[key]; -+} -+ -+__attribute__((unused)) static void lo_map_remove(struct lo_map *map, -+ size_t key) -+{ -+ struct lo_map_elem *elem; -+ -+ if (key >= map->nelems) { -+ return; -+ } -+ -+ elem = &map->elems[key]; -+ if (!elem->in_use) { -+ return; -+ } -+ -+ elem->in_use = false; -+ -+ elem->freelist = map->freelist; -+ map->freelist = key; -+} -+ - static struct lo_inode *lo_inode(fuse_req_t req, fuse_ino_t ino) - { - if (ino == FUSE_ROOT_ID) { --- -1.8.3.1 - diff --git a/kvm-virtiofsd-passthrough_ll-add-renameat2-support.patch b/kvm-virtiofsd-passthrough_ll-add-renameat2-support.patch deleted file mode 100644 index a3f7970..0000000 --- a/kvm-virtiofsd-passthrough_ll-add-renameat2-support.patch +++ /dev/null @@ -1,52 +0,0 @@ -From 86b4f2865f2ebd7e6b3d85beb66a9390eb46eb96 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:45 +0100 -Subject: [PATCH 074/116] virtiofsd: passthrough_ll: add renameat2 support -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-71-dgilbert@redhat.com> -Patchwork-id: 93531 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 070/112] virtiofsd: passthrough_ll: add renameat2 support -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Miklos Szeredi - -Signed-off-by: Miklos Szeredi -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit f0ab7d6f78a7d3c1c19fd81a91c9b1199f56c4f6) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 10 ++++++++++ - 1 file changed, 10 insertions(+) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 98114a3..18d69ab 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -1099,7 +1099,17 @@ static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name, - } - - if (flags) { -+#ifndef SYS_renameat2 - fuse_reply_err(req, EINVAL); -+#else -+ res = syscall(SYS_renameat2, lo_fd(req, parent), name, -+ lo_fd(req, newparent), newname, flags); -+ if (res == -1 && errno == ENOSYS) { -+ fuse_reply_err(req, EINVAL); -+ } else { -+ fuse_reply_err(req, res == -1 ? errno : 0); -+ } -+#endif - return; - } - --- -1.8.3.1 - diff --git a/kvm-virtiofsd-passthrough_ll-clean-up-cache-related-opti.patch b/kvm-virtiofsd-passthrough_ll-clean-up-cache-related-opti.patch deleted file mode 100644 index dc87ef2..0000000 --- a/kvm-virtiofsd-passthrough_ll-clean-up-cache-related-opti.patch +++ /dev/null @@ -1,138 +0,0 @@ -From 079199c53f483f0051f994b195ebb595aec76a39 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:51 +0100 -Subject: [PATCH 080/116] virtiofsd: passthrough_ll: clean up cache related - options -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-77-dgilbert@redhat.com> -Patchwork-id: 93530 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 076/112] virtiofsd: passthrough_ll: clean up cache related options -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Miklos Szeredi - - - Rename "cache=never" to "cache=none" to match 9p's similar option. - - - Rename CACHE_NORMAL constant to CACHE_AUTO to match the "cache=auto" - option. - -Signed-off-by: Miklos Szeredi -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 230e777b5e250759ee0480fcc0e9ccfa2b082fba) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/helper.c | 5 ++++- - tools/virtiofsd/passthrough_ll.c | 20 ++++++++++---------- - 2 files changed, 14 insertions(+), 11 deletions(-) - -diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c -index 14f5d70..5672024 100644 ---- a/tools/virtiofsd/helper.c -+++ b/tools/virtiofsd/helper.c -@@ -145,6 +145,9 @@ void fuse_cmdline_help(void) - " --syslog log to syslog (default stderr)\n" - " -f foreground operation\n" - " --daemonize run in background\n" -+ " -o cache= cache mode. could be one of \"auto, " -+ "always, none\"\n" -+ " default: auto\n" - " -o log_level= log level, default to \"info\"\n" - " level could be one of \"debug, " - "info, warn, err\"\n" -@@ -156,7 +159,7 @@ void fuse_cmdline_help(void) - " -o readdirplus|no_readdirplus\n" - " enable/disable readirplus\n" - " default: readdirplus except with " -- "cache=never\n" -+ "cache=none\n" - ); - } - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 9e7191e..b40f287 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -101,8 +101,8 @@ struct lo_cred { - }; - - enum { -- CACHE_NEVER, -- CACHE_NORMAL, -+ CACHE_NONE, -+ CACHE_AUTO, - CACHE_ALWAYS, - }; - -@@ -138,8 +138,8 @@ static const struct fuse_opt lo_opts[] = { - { "no_xattr", offsetof(struct lo_data, xattr), 0 }, - { "timeout=%lf", offsetof(struct lo_data, timeout), 0 }, - { "timeout=", offsetof(struct lo_data, timeout_set), 1 }, -- { "cache=never", offsetof(struct lo_data, cache), CACHE_NEVER }, -- { "cache=auto", offsetof(struct lo_data, cache), CACHE_NORMAL }, -+ { "cache=none", offsetof(struct lo_data, cache), CACHE_NONE }, -+ { "cache=auto", offsetof(struct lo_data, cache), CACHE_AUTO }, - { "cache=always", offsetof(struct lo_data, cache), CACHE_ALWAYS }, - { "norace", offsetof(struct lo_data, norace), 1 }, - { "readdirplus", offsetof(struct lo_data, readdirplus_set), 1 }, -@@ -482,7 +482,7 @@ static void lo_init(void *userdata, struct fuse_conn_info *conn) - fuse_log(FUSE_LOG_DEBUG, "lo_init: activating flock locks\n"); - conn->want |= FUSE_CAP_FLOCK_LOCKS; - } -- if ((lo->cache == CACHE_NEVER && !lo->readdirplus_set) || -+ if ((lo->cache == CACHE_NONE && !lo->readdirplus_set) || - lo->readdirplus_clear) { - fuse_log(FUSE_LOG_DEBUG, "lo_init: disabling readdirplus\n"); - conn->want &= ~FUSE_CAP_READDIRPLUS; -@@ -1493,7 +1493,7 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, - fi->fh = fh; - err = lo_do_lookup(req, parent, name, &e); - } -- if (lo->cache == CACHE_NEVER) { -+ if (lo->cache == CACHE_NONE) { - fi->direct_io = 1; - } else if (lo->cache == CACHE_ALWAYS) { - fi->keep_cache = 1; -@@ -1578,7 +1578,7 @@ static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) - } - - fi->fh = fh; -- if (lo->cache == CACHE_NEVER) { -+ if (lo->cache == CACHE_NONE) { - fi->direct_io = 1; - } else if (lo->cache == CACHE_ALWAYS) { - fi->keep_cache = 1; -@@ -2395,7 +2395,7 @@ int main(int argc, char *argv[]) - lo.root.next = lo.root.prev = &lo.root; - lo.root.fd = -1; - lo.root.fuse_ino = FUSE_ROOT_ID; -- lo.cache = CACHE_NORMAL; -+ lo.cache = CACHE_AUTO; - - /* - * Set up the ino map like this: -@@ -2470,11 +2470,11 @@ int main(int argc, char *argv[]) - } - if (!lo.timeout_set) { - switch (lo.cache) { -- case CACHE_NEVER: -+ case CACHE_NONE: - lo.timeout = 0.0; - break; - -- case CACHE_NORMAL: -+ case CACHE_AUTO: - lo.timeout = 1.0; - break; - --- -1.8.3.1 - diff --git a/kvm-virtiofsd-passthrough_ll-cleanup-getxattr-listxattr.patch b/kvm-virtiofsd-passthrough_ll-cleanup-getxattr-listxattr.patch deleted file mode 100644 index c55eead..0000000 --- a/kvm-virtiofsd-passthrough_ll-cleanup-getxattr-listxattr.patch +++ /dev/null @@ -1,154 +0,0 @@ -From f93ea308351cbe2630d7ecf637c3b69894d84a11 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Tue, 3 Mar 2020 18:43:13 +0000 -Subject: [PATCH 17/18] virtiofsd: passthrough_ll: cleanup getxattr/listxattr -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200303184314.155564-7-dgilbert@redhat.com> -Patchwork-id: 94125 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 6/7] virtiofsd: passthrough_ll: cleanup getxattr/listxattr -Bugzilla: 1797064 -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual -RH-Acked-by: Ján Tomko - -From: Misono Tomohiro - -This is a cleanup patch to simplify the following xattr fix and -there is no functional changes. - -- Move memory allocation to head of the function -- Unify fgetxattr/flistxattr call for both size == 0 and - size != 0 case -- Remove redundant lo_inode_put call in error path - (Note: second call is ignored now since @inode is already NULL) - -Signed-off-by: Misono Tomohiro -Message-Id: <20200227055927.24566-2-misono.tomohiro@jp.fujitsu.com> -Acked-by: Vivek Goyal -Reviewed-by: Dr. David Alan Gilbert -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 16e15a73089102c3d8846792d514e769300fcc3c) -Signed-off-by: Danilo C. L. de Paula ---- - tools/virtiofsd/passthrough_ll.c | 54 ++++++++++++++++------------------------ - 1 file changed, 22 insertions(+), 32 deletions(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index c635fc8..50c7273 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -2199,34 +2199,30 @@ static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *name, - goto out; - } - -+ if (size) { -+ value = malloc(size); -+ if (!value) { -+ goto out_err; -+ } -+ } -+ - sprintf(procname, "%i", inode->fd); - fd = openat(lo->proc_self_fd, procname, O_RDONLY); - if (fd < 0) { - goto out_err; - } - -+ ret = fgetxattr(fd, name, value, size); -+ if (ret == -1) { -+ goto out_err; -+ } - if (size) { -- value = malloc(size); -- if (!value) { -- goto out_err; -- } -- -- ret = fgetxattr(fd, name, value, size); -- if (ret == -1) { -- goto out_err; -- } - saverr = 0; - if (ret == 0) { - goto out; - } -- - fuse_reply_buf(req, value, ret); - } else { -- ret = fgetxattr(fd, name, NULL, 0); -- if (ret == -1) { -- goto out_err; -- } -- - fuse_reply_xattr(req, ret); - } - out_free: -@@ -2242,7 +2238,6 @@ out_free: - out_err: - saverr = errno; - out: -- lo_inode_put(lo, &inode); - fuse_reply_err(req, saverr); - goto out_free; - } -@@ -2277,34 +2272,30 @@ static void lo_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size) - goto out; - } - -+ if (size) { -+ value = malloc(size); -+ if (!value) { -+ goto out_err; -+ } -+ } -+ - sprintf(procname, "%i", inode->fd); - fd = openat(lo->proc_self_fd, procname, O_RDONLY); - if (fd < 0) { - goto out_err; - } - -+ ret = flistxattr(fd, value, size); -+ if (ret == -1) { -+ goto out_err; -+ } - if (size) { -- value = malloc(size); -- if (!value) { -- goto out_err; -- } -- -- ret = flistxattr(fd, value, size); -- if (ret == -1) { -- goto out_err; -- } - saverr = 0; - if (ret == 0) { - goto out; - } -- - fuse_reply_buf(req, value, ret); - } else { -- ret = flistxattr(fd, NULL, 0); -- if (ret == -1) { -- goto out_err; -- } -- - fuse_reply_xattr(req, ret); - } - out_free: -@@ -2320,7 +2311,6 @@ out_free: - out_err: - saverr = errno; - out: -- lo_inode_put(lo, &inode); - fuse_reply_err(req, saverr); - goto out_free; - } --- -1.8.3.1 - diff --git a/kvm-virtiofsd-passthrough_ll-control-readdirplus.patch b/kvm-virtiofsd-passthrough_ll-control-readdirplus.patch deleted file mode 100644 index 98d00fc..0000000 --- a/kvm-virtiofsd-passthrough_ll-control-readdirplus.patch +++ /dev/null @@ -1,79 +0,0 @@ -From 0f1d456fad4ba6a696eff8976b9fe8a0f251e1b5 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:47 +0100 -Subject: [PATCH 076/116] virtiofsd: passthrough_ll: control readdirplus -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-73-dgilbert@redhat.com> -Patchwork-id: 93524 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 072/112] virtiofsd: passthrough_ll: control readdirplus -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Miklos Szeredi - -Signed-off-by: Miklos Szeredi -Reviewed-by: Misono Tomohiro -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 59aef494be2d8d91055ff3f3a8eb13d9f32873d8) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/helper.c | 4 ++++ - tools/virtiofsd/passthrough_ll.c | 7 ++++++- - 2 files changed, 10 insertions(+), 1 deletion(-) - -diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c -index 6d50a46..14f5d70 100644 ---- a/tools/virtiofsd/helper.c -+++ b/tools/virtiofsd/helper.c -@@ -153,6 +153,10 @@ void fuse_cmdline_help(void) - " allowed (default: 10)\n" - " -o norace disable racy fallback\n" - " default: false\n" -+ " -o readdirplus|no_readdirplus\n" -+ " enable/disable readirplus\n" -+ " default: readdirplus except with " -+ "cache=never\n" - ); - } - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 6480c51..8b1784f 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -117,6 +117,8 @@ struct lo_data { - double timeout; - int cache; - int timeout_set; -+ int readdirplus_set; -+ int readdirplus_clear; - struct lo_inode root; /* protected by lo->mutex */ - struct lo_map ino_map; /* protected by lo->mutex */ - struct lo_map dirp_map; /* protected by lo->mutex */ -@@ -140,6 +142,8 @@ static const struct fuse_opt lo_opts[] = { - { "cache=auto", offsetof(struct lo_data, cache), CACHE_NORMAL }, - { "cache=always", offsetof(struct lo_data, cache), CACHE_ALWAYS }, - { "norace", offsetof(struct lo_data, norace), 1 }, -+ { "readdirplus", offsetof(struct lo_data, readdirplus_set), 1 }, -+ { "no_readdirplus", offsetof(struct lo_data, readdirplus_clear), 1 }, - FUSE_OPT_END - }; - static bool use_syslog = false; -@@ -478,7 +482,8 @@ static void lo_init(void *userdata, struct fuse_conn_info *conn) - fuse_log(FUSE_LOG_DEBUG, "lo_init: activating flock locks\n"); - conn->want |= FUSE_CAP_FLOCK_LOCKS; - } -- if (lo->cache == CACHE_NEVER) { -+ if ((lo->cache == CACHE_NEVER && !lo->readdirplus_set) || -+ lo->readdirplus_clear) { - fuse_log(FUSE_LOG_DEBUG, "lo_init: disabling readdirplus\n"); - conn->want &= ~FUSE_CAP_READDIRPLUS; - } --- -1.8.3.1 - diff --git a/kvm-virtiofsd-passthrough_ll-create-new-files-in-caller-.patch b/kvm-virtiofsd-passthrough_ll-create-new-files-in-caller-.patch deleted file mode 100644 index 4b02779..0000000 --- a/kvm-virtiofsd-passthrough_ll-create-new-files-in-caller-.patch +++ /dev/null @@ -1,198 +0,0 @@ -From af14ef1dba9356e566c9c7531b8fd23361c2b16d Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:12 +0100 -Subject: [PATCH 041/116] virtiofsd: passthrough_ll: create new files in - caller's context -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-38-dgilbert@redhat.com> -Patchwork-id: 93488 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 037/112] virtiofsd: passthrough_ll: create new files in caller's context -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Vivek Goyal - -We need to create files in the caller's context. Otherwise after -creating a file, the caller might not be able to do file operations on -that file. - -Changed effective uid/gid to caller's uid/gid, create file and then -switch back to uid/gid 0. - -Use syscall(setresuid, ...) otherwise glibc does some magic to change EUID -in all threads, which is not what we want. - -Signed-off-by: Vivek Goyal -Signed-off-by: Miklos Szeredi -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 929cfb7a9a1b101cdfc9ac19807ecab4c81a13e4) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 96 +++++++++++++++++++++++++++++++++++++--- - 1 file changed, 91 insertions(+), 5 deletions(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index cd27c09..5e06179 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -50,6 +50,7 @@ - #include - #include - #include -+#include - #include - #include - -@@ -83,6 +84,11 @@ struct lo_inode { - uint64_t refcount; /* protected by lo->mutex */ - }; - -+struct lo_cred { -+ uid_t euid; -+ gid_t egid; -+}; -+ - enum { - CACHE_NEVER, - CACHE_NORMAL, -@@ -383,6 +389,69 @@ static void lo_lookup(fuse_req_t req, fuse_ino_t parent, const char *name) - } - } - -+/* -+ * On some archs, setres*id is limited to 2^16 but they -+ * provide setres*id32 variants that allow 2^32. -+ * Others just let setres*id do 2^32 anyway. -+ */ -+#ifdef SYS_setresgid32 -+#define OURSYS_setresgid SYS_setresgid32 -+#else -+#define OURSYS_setresgid SYS_setresgid -+#endif -+ -+#ifdef SYS_setresuid32 -+#define OURSYS_setresuid SYS_setresuid32 -+#else -+#define OURSYS_setresuid SYS_setresuid -+#endif -+ -+/* -+ * Change to uid/gid of caller so that file is created with -+ * ownership of caller. -+ * TODO: What about selinux context? -+ */ -+static int lo_change_cred(fuse_req_t req, struct lo_cred *old) -+{ -+ int res; -+ -+ old->euid = geteuid(); -+ old->egid = getegid(); -+ -+ res = syscall(OURSYS_setresgid, -1, fuse_req_ctx(req)->gid, -1); -+ if (res == -1) { -+ return errno; -+ } -+ -+ res = syscall(OURSYS_setresuid, -1, fuse_req_ctx(req)->uid, -1); -+ if (res == -1) { -+ int errno_save = errno; -+ -+ syscall(OURSYS_setresgid, -1, old->egid, -1); -+ return errno_save; -+ } -+ -+ return 0; -+} -+ -+/* Regain Privileges */ -+static void lo_restore_cred(struct lo_cred *old) -+{ -+ int res; -+ -+ res = syscall(OURSYS_setresuid, -1, old->euid, -1); -+ if (res == -1) { -+ fuse_log(FUSE_LOG_ERR, "seteuid(%u): %m\n", old->euid); -+ exit(1); -+ } -+ -+ res = syscall(OURSYS_setresgid, -1, old->egid, -1); -+ if (res == -1) { -+ fuse_log(FUSE_LOG_ERR, "setegid(%u): %m\n", old->egid); -+ exit(1); -+ } -+} -+ - static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent, - const char *name, mode_t mode, dev_t rdev, - const char *link) -@@ -391,12 +460,21 @@ static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent, - int saverr; - struct lo_inode *dir = lo_inode(req, parent); - struct fuse_entry_param e; -+ struct lo_cred old = {}; - - saverr = ENOMEM; - -+ saverr = lo_change_cred(req, &old); -+ if (saverr) { -+ goto out; -+ } -+ - res = mknod_wrapper(dir->fd, name, link, mode, rdev); - - saverr = errno; -+ -+ lo_restore_cred(&old); -+ - if (res == -1) { - goto out; - } -@@ -794,26 +872,34 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, - struct lo_data *lo = lo_data(req); - struct fuse_entry_param e; - int err; -+ struct lo_cred old = {}; - - if (lo_debug(req)) { - fuse_log(FUSE_LOG_DEBUG, "lo_create(parent=%" PRIu64 ", name=%s)\n", - parent, name); - } - -+ err = lo_change_cred(req, &old); -+ if (err) { -+ goto out; -+ } -+ - fd = openat(lo_fd(req, parent), name, (fi->flags | O_CREAT) & ~O_NOFOLLOW, - mode); -- if (fd == -1) { -- return (void)fuse_reply_err(req, errno); -- } -+ err = fd == -1 ? errno : 0; -+ lo_restore_cred(&old); - -- fi->fh = fd; -+ if (!err) { -+ fi->fh = fd; -+ err = lo_do_lookup(req, parent, name, &e); -+ } - if (lo->cache == CACHE_NEVER) { - fi->direct_io = 1; - } else if (lo->cache == CACHE_ALWAYS) { - fi->keep_cache = 1; - } - -- err = lo_do_lookup(req, parent, name, &e); -+out: - if (err) { - fuse_reply_err(req, err); - } else { --- -1.8.3.1 - diff --git a/kvm-virtiofsd-passthrough_ll-disable-readdirplus-on-cach.patch b/kvm-virtiofsd-passthrough_ll-disable-readdirplus-on-cach.patch deleted file mode 100644 index 4a531a3..0000000 --- a/kvm-virtiofsd-passthrough_ll-disable-readdirplus-on-cach.patch +++ /dev/null @@ -1,50 +0,0 @@ -From bbf92338e5e5eed796d511d2bd3c3686b7d1e5fd Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:46 +0100 -Subject: [PATCH 075/116] virtiofsd: passthrough_ll: disable readdirplus on - cache=never -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-72-dgilbert@redhat.com> -Patchwork-id: 93525 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 071/112] virtiofsd: passthrough_ll: disable readdirplus on cache=never -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Miklos Szeredi - -...because the attributes sent in the READDIRPLUS reply would be discarded -anyway. - -Signed-off-by: Miklos Szeredi -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit ddcbabcb0ea177be3ec3500726b699c7c26ffd93) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 4 ++++ - 1 file changed, 4 insertions(+) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 18d69ab..6480c51 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -478,6 +478,10 @@ static void lo_init(void *userdata, struct fuse_conn_info *conn) - fuse_log(FUSE_LOG_DEBUG, "lo_init: activating flock locks\n"); - conn->want |= FUSE_CAP_FLOCK_LOCKS; - } -+ if (lo->cache == CACHE_NEVER) { -+ fuse_log(FUSE_LOG_DEBUG, "lo_init: disabling readdirplus\n"); -+ conn->want &= ~FUSE_CAP_READDIRPLUS; -+ } - } - - static void lo_getattr(fuse_req_t req, fuse_ino_t ino, --- -1.8.3.1 - diff --git a/kvm-virtiofsd-passthrough_ll-fix-refcounting-on-remove-r.patch b/kvm-virtiofsd-passthrough_ll-fix-refcounting-on-remove-r.patch deleted file mode 100644 index 00e11b4..0000000 --- a/kvm-virtiofsd-passthrough_ll-fix-refcounting-on-remove-r.patch +++ /dev/null @@ -1,143 +0,0 @@ -From 5e33269d5fbc4ba4614bab4a6b9e0ef759bebcb7 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:02:10 +0100 -Subject: [PATCH 099/116] virtiofsd: passthrough_ll: fix refcounting on - remove/rename -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-96-dgilbert@redhat.com> -Patchwork-id: 93549 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 095/112] virtiofsd: passthrough_ll: fix refcounting on remove/rename -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Miklos Szeredi - -Signed-off-by: Miklos Szeredi -Reviewed-by: Misono Tomohiro -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 9257e514d861afa759c36704e1904d43ca3fec88) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 50 +++++++++++++++++++++++++++++++++++++++- - 1 file changed, 49 insertions(+), 1 deletion(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index c819b5f..e3a6d6b 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -1140,17 +1140,42 @@ out_err: - fuse_reply_err(req, saverr); - } - -+static struct lo_inode *lookup_name(fuse_req_t req, fuse_ino_t parent, -+ const char *name) -+{ -+ int res; -+ struct stat attr; -+ -+ res = fstatat(lo_fd(req, parent), name, &attr, -+ AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); -+ if (res == -1) { -+ return NULL; -+ } -+ -+ return lo_find(lo_data(req), &attr); -+} -+ - static void lo_rmdir(fuse_req_t req, fuse_ino_t parent, const char *name) - { - int res; -+ struct lo_inode *inode; -+ struct lo_data *lo = lo_data(req); -+ - if (!is_safe_path_component(name)) { - fuse_reply_err(req, EINVAL); - return; - } - -+ inode = lookup_name(req, parent, name); -+ if (!inode) { -+ fuse_reply_err(req, EIO); -+ return; -+ } -+ - res = unlinkat(lo_fd(req, parent), name, AT_REMOVEDIR); - - fuse_reply_err(req, res == -1 ? errno : 0); -+ unref_inode_lolocked(lo, inode, 1); - } - - static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name, -@@ -1158,12 +1183,23 @@ static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name, - unsigned int flags) - { - int res; -+ struct lo_inode *oldinode; -+ struct lo_inode *newinode; -+ struct lo_data *lo = lo_data(req); - - if (!is_safe_path_component(name) || !is_safe_path_component(newname)) { - fuse_reply_err(req, EINVAL); - return; - } - -+ oldinode = lookup_name(req, parent, name); -+ newinode = lookup_name(req, newparent, newname); -+ -+ if (!oldinode) { -+ fuse_reply_err(req, EIO); -+ goto out; -+ } -+ - if (flags) { - #ifndef SYS_renameat2 - fuse_reply_err(req, EINVAL); -@@ -1176,26 +1212,38 @@ static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name, - fuse_reply_err(req, res == -1 ? errno : 0); - } - #endif -- return; -+ goto out; - } - - res = renameat(lo_fd(req, parent), name, lo_fd(req, newparent), newname); - - fuse_reply_err(req, res == -1 ? errno : 0); -+out: -+ unref_inode_lolocked(lo, oldinode, 1); -+ unref_inode_lolocked(lo, newinode, 1); - } - - static void lo_unlink(fuse_req_t req, fuse_ino_t parent, const char *name) - { - int res; -+ struct lo_inode *inode; -+ struct lo_data *lo = lo_data(req); - - if (!is_safe_path_component(name)) { - fuse_reply_err(req, EINVAL); - return; - } - -+ inode = lookup_name(req, parent, name); -+ if (!inode) { -+ fuse_reply_err(req, EIO); -+ return; -+ } -+ - res = unlinkat(lo_fd(req, parent), name, 0); - - fuse_reply_err(req, res == -1 ? errno : 0); -+ unref_inode_lolocked(lo, inode, 1); - } - - static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode, --- -1.8.3.1 - diff --git a/kvm-virtiofsd-passthrough_ll-use-hashtable.patch b/kvm-virtiofsd-passthrough_ll-use-hashtable.patch deleted file mode 100644 index b0be1f9..0000000 --- a/kvm-virtiofsd-passthrough_ll-use-hashtable.patch +++ /dev/null @@ -1,211 +0,0 @@ -From 44f4434b1305f6ff47b4f63fafcf39bcea9e4ceb Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:52 +0100 -Subject: [PATCH 081/116] virtiofsd: passthrough_ll: use hashtable -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-78-dgilbert@redhat.com> -Patchwork-id: 93528 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 077/112] virtiofsd: passthrough_ll: use hashtable -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Miklos Szeredi - -Improve performance of inode lookup by using a hash table. - -Signed-off-by: Miklos Szeredi -Signed-off-by: Dr. David Alan Gilbert -Signed-off-by: Liu Bo -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit bfc50a6e06b10b2f9dbaf6c1a89dd523322e016f) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 81 ++++++++++++++++++++++------------------ - 1 file changed, 45 insertions(+), 36 deletions(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index b40f287..b176a31 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -84,13 +84,15 @@ struct lo_map { - ssize_t freelist; - }; - -+struct lo_key { -+ ino_t ino; -+ dev_t dev; -+}; -+ - struct lo_inode { -- struct lo_inode *next; /* protected by lo->mutex */ -- struct lo_inode *prev; /* protected by lo->mutex */ - int fd; - bool is_symlink; -- ino_t ino; -- dev_t dev; -+ struct lo_key key; - uint64_t refcount; /* protected by lo->mutex */ - fuse_ino_t fuse_ino; - }; -@@ -119,7 +121,8 @@ struct lo_data { - int timeout_set; - int readdirplus_set; - int readdirplus_clear; -- struct lo_inode root; /* protected by lo->mutex */ -+ struct lo_inode root; -+ GHashTable *inodes; /* protected by lo->mutex */ - struct lo_map ino_map; /* protected by lo->mutex */ - struct lo_map dirp_map; /* protected by lo->mutex */ - struct lo_map fd_map; /* protected by lo->mutex */ -@@ -573,7 +576,7 @@ retry: - } - goto fail_unref; - } -- if (stat.st_dev != inode->dev || stat.st_ino != inode->ino) { -+ if (stat.st_dev != inode->key.dev || stat.st_ino != inode->key.ino) { - if (!retries) { - fuse_log(FUSE_LOG_WARNING, - "%s: failed to match last\n", __func__); -@@ -753,19 +756,20 @@ out_err: - static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st) - { - struct lo_inode *p; -- struct lo_inode *ret = NULL; -+ struct lo_key key = { -+ .ino = st->st_ino, -+ .dev = st->st_dev, -+ }; - - pthread_mutex_lock(&lo->mutex); -- for (p = lo->root.next; p != &lo->root; p = p->next) { -- if (p->ino == st->st_ino && p->dev == st->st_dev) { -- assert(p->refcount > 0); -- ret = p; -- ret->refcount++; -- break; -- } -+ p = g_hash_table_lookup(lo->inodes, &key); -+ if (p) { -+ assert(p->refcount > 0); -+ p->refcount++; - } - pthread_mutex_unlock(&lo->mutex); -- return ret; -+ -+ return p; - } - - static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, -@@ -810,8 +814,6 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, - close(newfd); - newfd = -1; - } else { -- struct lo_inode *prev, *next; -- - saverr = ENOMEM; - inode = calloc(1, sizeof(struct lo_inode)); - if (!inode) { -@@ -822,17 +824,12 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, - inode->refcount = 1; - inode->fd = newfd; - newfd = -1; -- inode->ino = e->attr.st_ino; -- inode->dev = e->attr.st_dev; -+ inode->key.ino = e->attr.st_ino; -+ inode->key.dev = e->attr.st_dev; - - pthread_mutex_lock(&lo->mutex); - inode->fuse_ino = lo_add_inode_mapping(req, inode); -- prev = &lo->root; -- next = prev->next; -- next->prev = inode; -- inode->next = next; -- inode->prev = prev; -- prev->next = inode; -+ g_hash_table_insert(lo->inodes, &inode->key, inode); - pthread_mutex_unlock(&lo->mutex); - } - e->ino = inode->fuse_ino; -@@ -1162,14 +1159,8 @@ static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode, - assert(inode->refcount >= n); - inode->refcount -= n; - if (!inode->refcount) { -- struct lo_inode *prev, *next; -- -- prev = inode->prev; -- next = inode->next; -- next->prev = prev; -- prev->next = next; -- - lo_map_remove(&lo->ino_map, inode->fuse_ino); -+ g_hash_table_remove(lo->inodes, &inode->key); - pthread_mutex_unlock(&lo->mutex); - close(inode->fd); - free(inode); -@@ -1369,7 +1360,7 @@ static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, - - /* Hide root's parent directory */ - if (dinode == &lo->root && strcmp(name, "..") == 0) { -- e.attr.st_ino = lo->root.ino; -+ e.attr.st_ino = lo->root.key.ino; - e.attr.st_mode = DT_DIR << 12; - } - -@@ -2370,11 +2361,26 @@ static void setup_root(struct lo_data *lo, struct lo_inode *root) - - root->is_symlink = false; - root->fd = fd; -- root->ino = stat.st_ino; -- root->dev = stat.st_dev; -+ root->key.ino = stat.st_ino; -+ root->key.dev = stat.st_dev; - root->refcount = 2; - } - -+static guint lo_key_hash(gconstpointer key) -+{ -+ const struct lo_key *lkey = key; -+ -+ return (guint)lkey->ino + (guint)lkey->dev; -+} -+ -+static gboolean lo_key_equal(gconstpointer a, gconstpointer b) -+{ -+ const struct lo_key *la = a; -+ const struct lo_key *lb = b; -+ -+ return la->ino == lb->ino && la->dev == lb->dev; -+} -+ - int main(int argc, char *argv[]) - { - struct fuse_args args = FUSE_ARGS_INIT(argc, argv); -@@ -2392,7 +2398,7 @@ int main(int argc, char *argv[]) - umask(0); - - pthread_mutex_init(&lo.mutex, NULL); -- lo.root.next = lo.root.prev = &lo.root; -+ lo.inodes = g_hash_table_new(lo_key_hash, lo_key_equal); - lo.root.fd = -1; - lo.root.fuse_ino = FUSE_ROOT_ID; - lo.cache = CACHE_AUTO; -@@ -2522,6 +2528,9 @@ err_out2: - err_out1: - fuse_opt_free_args(&args); - -+ if (lo.inodes) { -+ g_hash_table_destroy(lo.inodes); -+ } - lo_map_destroy(&lo.fd_map); - lo_map_destroy(&lo.dirp_map); - lo_map_destroy(&lo.ino_map); --- -1.8.3.1 - diff --git a/kvm-virtiofsd-prevent-.-escape-in-lo_do_lookup.patch b/kvm-virtiofsd-prevent-.-escape-in-lo_do_lookup.patch deleted file mode 100644 index 68eb03e..0000000 --- a/kvm-virtiofsd-prevent-.-escape-in-lo_do_lookup.patch +++ /dev/null @@ -1,54 +0,0 @@ -From feb005dfeb15dd5ac5156c994f323ab4c573b1fc Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:24 +0100 -Subject: [PATCH 053/116] virtiofsd: prevent ".." escape in lo_do_lookup() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-50-dgilbert@redhat.com> -Patchwork-id: 93500 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 049/112] virtiofsd: prevent ".." escape in lo_do_lookup() -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Sergio Lopez -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 854684bc0b3d63eb90b3abdfe471c2e4271ef176) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 7 ++++++- - 1 file changed, 6 insertions(+), 1 deletion(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index e375406..79d5966 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -624,12 +624,17 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, - int res; - int saverr; - struct lo_data *lo = lo_data(req); -- struct lo_inode *inode; -+ struct lo_inode *inode, *dir = lo_inode(req, parent); - - memset(e, 0, sizeof(*e)); - e->attr_timeout = lo->timeout; - e->entry_timeout = lo->timeout; - -+ /* Do not allow escaping root directory */ -+ if (dir == &lo->root && strcmp(name, "..") == 0) { -+ name = "."; -+ } -+ - newfd = openat(lo_fd(req, parent), name, O_PATH | O_NOFOLLOW); - if (newfd == -1) { - goto out_err; --- -1.8.3.1 - diff --git a/kvm-virtiofsd-prevent-.-escape-in-lo_do_readdir.patch b/kvm-virtiofsd-prevent-.-escape-in-lo_do_readdir.patch deleted file mode 100644 index 5f97cbf..0000000 --- a/kvm-virtiofsd-prevent-.-escape-in-lo_do_readdir.patch +++ /dev/null @@ -1,108 +0,0 @@ -From 97e232e75bbc0032f4a309d248f383384612eafe Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:25 +0100 -Subject: [PATCH 054/116] virtiofsd: prevent ".." escape in lo_do_readdir() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-51-dgilbert@redhat.com> -Patchwork-id: 93507 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 050/112] virtiofsd: prevent ".." escape in lo_do_readdir() -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -Construct a fake dirent for the root directory's ".." entry. This hides -the parent directory from the FUSE client. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Sergio Lopez -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 752272da2b68a2312f0e11fc5303015a6c3ee1ac) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 36 ++++++++++++++++++++++-------------- - 1 file changed, 22 insertions(+), 14 deletions(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 79d5966..e3d65c3 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -1149,19 +1149,25 @@ out_err: - static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, - off_t offset, struct fuse_file_info *fi, int plus) - { -+ struct lo_data *lo = lo_data(req); - struct lo_dirp *d; -+ struct lo_inode *dinode; - char *buf = NULL; - char *p; - size_t rem = size; -- int err = ENOMEM; -+ int err = EBADF; - -- (void)ino; -+ dinode = lo_inode(req, ino); -+ if (!dinode) { -+ goto error; -+ } - - d = lo_dirp(req, fi); - if (!d) { - goto error; - } - -+ err = ENOMEM; - buf = calloc(1, size); - if (!buf) { - goto error; -@@ -1192,15 +1198,21 @@ static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, - } - nextoff = d->entry->d_off; - name = d->entry->d_name; -+ - fuse_ino_t entry_ino = 0; -+ struct fuse_entry_param e = (struct fuse_entry_param){ -+ .attr.st_ino = d->entry->d_ino, -+ .attr.st_mode = d->entry->d_type << 12, -+ }; -+ -+ /* Hide root's parent directory */ -+ if (dinode == &lo->root && strcmp(name, "..") == 0) { -+ e.attr.st_ino = lo->root.ino; -+ e.attr.st_mode = DT_DIR << 12; -+ } -+ - if (plus) { -- struct fuse_entry_param e; -- if (is_dot_or_dotdot(name)) { -- e = (struct fuse_entry_param){ -- .attr.st_ino = d->entry->d_ino, -- .attr.st_mode = d->entry->d_type << 12, -- }; -- } else { -+ if (!is_dot_or_dotdot(name)) { - err = lo_do_lookup(req, ino, name, &e); - if (err) { - goto error; -@@ -1210,11 +1222,7 @@ static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, - - entsize = fuse_add_direntry_plus(req, p, rem, name, &e, nextoff); - } else { -- struct stat st = { -- .st_ino = d->entry->d_ino, -- .st_mode = d->entry->d_type << 12, -- }; -- entsize = fuse_add_direntry(req, p, rem, name, &st, nextoff); -+ entsize = fuse_add_direntry(req, p, rem, name, &e.attr, nextoff); - } - if (entsize > rem) { - if (entry_ino != 0) { --- -1.8.3.1 - diff --git a/kvm-virtiofsd-prevent-FUSE_INIT-FUSE_DESTROY-races.patch b/kvm-virtiofsd-prevent-FUSE_INIT-FUSE_DESTROY-races.patch deleted file mode 100644 index be7c120..0000000 --- a/kvm-virtiofsd-prevent-FUSE_INIT-FUSE_DESTROY-races.patch +++ /dev/null @@ -1,103 +0,0 @@ -From 249c02ae54739dc5894ee1b2905bbe8f1e79e909 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:02:20 +0100 -Subject: [PATCH 109/116] virtiofsd: prevent FUSE_INIT/FUSE_DESTROY races -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-106-dgilbert@redhat.com> -Patchwork-id: 93562 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 105/112] virtiofsd: prevent FUSE_INIT/FUSE_DESTROY races -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -When running with multiple threads it can be tricky to handle -FUSE_INIT/FUSE_DESTROY in parallel with other request types or in -parallel with themselves. Serialize FUSE_INIT and FUSE_DESTROY so that -malicious clients cannot trigger race conditions. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Masayoshi Mizuma -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit cdc497c6925be745bc895355bd4674a17a4b2a8b) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_i.h | 1 + - tools/virtiofsd/fuse_lowlevel.c | 18 ++++++++++++++++++ - 2 files changed, 19 insertions(+) - -diff --git a/tools/virtiofsd/fuse_i.h b/tools/virtiofsd/fuse_i.h -index a20854f..1447d86 100644 ---- a/tools/virtiofsd/fuse_i.h -+++ b/tools/virtiofsd/fuse_i.h -@@ -61,6 +61,7 @@ struct fuse_session { - struct fuse_req list; - struct fuse_req interrupts; - pthread_mutex_t lock; -+ pthread_rwlock_t init_rwlock; - int got_destroy; - int broken_splice_nonblock; - uint64_t notify_ctr; -diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -index dab6a31..79a4031 100644 ---- a/tools/virtiofsd/fuse_lowlevel.c -+++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -2428,6 +2428,19 @@ void fuse_session_process_buf_int(struct fuse_session *se, - req->ctx.pid = in->pid; - req->ch = ch; - -+ /* -+ * INIT and DESTROY requests are serialized, all other request types -+ * run in parallel. This prevents races between FUSE_INIT and ordinary -+ * requests, FUSE_INIT and FUSE_INIT, FUSE_INIT and FUSE_DESTROY, and -+ * FUSE_DESTROY and FUSE_DESTROY. -+ */ -+ if (in->opcode == FUSE_INIT || in->opcode == CUSE_INIT || -+ in->opcode == FUSE_DESTROY) { -+ pthread_rwlock_wrlock(&se->init_rwlock); -+ } else { -+ pthread_rwlock_rdlock(&se->init_rwlock); -+ } -+ - err = EIO; - if (!se->got_init) { - enum fuse_opcode expected; -@@ -2485,10 +2498,13 @@ void fuse_session_process_buf_int(struct fuse_session *se, - } else { - fuse_ll_ops[in->opcode].func(req, in->nodeid, &iter); - } -+ -+ pthread_rwlock_unlock(&se->init_rwlock); - return; - - reply_err: - fuse_reply_err(req, err); -+ pthread_rwlock_unlock(&se->init_rwlock); - } - - #define LL_OPTION(n, o, v) \ -@@ -2531,6 +2547,7 @@ void fuse_session_destroy(struct fuse_session *se) - se->op.destroy(se->userdata); - } - } -+ pthread_rwlock_destroy(&se->init_rwlock); - pthread_mutex_destroy(&se->lock); - free(se->cuse_data); - if (se->fd != -1) { -@@ -2610,6 +2627,7 @@ struct fuse_session *fuse_session_new(struct fuse_args *args, - list_init_req(&se->list); - list_init_req(&se->interrupts); - fuse_mutex_init(&se->lock); -+ pthread_rwlock_init(&se->init_rwlock, NULL); - - memcpy(&se->op, op, op_size); - se->owner = getuid(); --- -1.8.3.1 - diff --git a/kvm-virtiofsd-prevent-fv_queue_thread-vs-virtio_loop-rac.patch b/kvm-virtiofsd-prevent-fv_queue_thread-vs-virtio_loop-rac.patch deleted file mode 100644 index 8eabede..0000000 --- a/kvm-virtiofsd-prevent-fv_queue_thread-vs-virtio_loop-rac.patch +++ /dev/null @@ -1,149 +0,0 @@ -From 69c6a829f8136a8c95ccdf480f2fd0173d64b6ec Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:02:05 +0100 -Subject: [PATCH 094/116] virtiofsd: prevent fv_queue_thread() vs virtio_loop() - races -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-91-dgilbert@redhat.com> -Patchwork-id: 93544 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 090/112] virtiofsd: prevent fv_queue_thread() vs virtio_loop() races -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -We call into libvhost-user from the virtqueue handler thread and the -vhost-user message processing thread without a lock. There is nothing -protecting the virtqueue handler thread if the vhost-user message -processing thread changes the virtqueue or memory table while it is -running. - -This patch introduces a read-write lock. Virtqueue handler threads are -readers. The vhost-user message processing thread is a writer. This -will allow concurrency for multiqueue in the future while protecting -against fv_queue_thread() vs virtio_loop() races. - -Note that the critical sections could be made smaller but it would be -more invasive and require libvhost-user changes. Let's start simple and -improve performance later, if necessary. Another option would be an -RCU-style approach with lighter-weight primitives. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit e7b337326d594b71b07cd6dbb332c49c122c80a4) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_virtio.c | 34 +++++++++++++++++++++++++++++++++- - 1 file changed, 33 insertions(+), 1 deletion(-) - -diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c -index fb8d6d1..f6242f9 100644 ---- a/tools/virtiofsd/fuse_virtio.c -+++ b/tools/virtiofsd/fuse_virtio.c -@@ -59,6 +59,18 @@ struct fv_VuDev { - struct fuse_session *se; - - /* -+ * Either handle virtqueues or vhost-user protocol messages. Don't do -+ * both at the same time since that could lead to race conditions if -+ * virtqueues or memory tables change while another thread is accessing -+ * them. -+ * -+ * The assumptions are: -+ * 1. fv_queue_thread() reads/writes to virtqueues and only reads VuDev. -+ * 2. virtio_loop() reads/writes virtqueues and VuDev. -+ */ -+ pthread_rwlock_t vu_dispatch_rwlock; -+ -+ /* - * The following pair of fields are only accessed in the main - * virtio_loop - */ -@@ -415,6 +427,8 @@ static void *fv_queue_thread(void *opaque) - qi->qidx, qi->kick_fd); - while (1) { - struct pollfd pf[2]; -+ int ret; -+ - pf[0].fd = qi->kick_fd; - pf[0].events = POLLIN; - pf[0].revents = 0; -@@ -461,6 +475,9 @@ static void *fv_queue_thread(void *opaque) - fuse_log(FUSE_LOG_ERR, "Eventfd_read for queue: %m\n"); - break; - } -+ /* Mutual exclusion with virtio_loop() */ -+ ret = pthread_rwlock_rdlock(&qi->virtio_dev->vu_dispatch_rwlock); -+ assert(ret == 0); /* there is no possible error case */ - /* out is from guest, in is too guest */ - unsigned int in_bytes, out_bytes; - vu_queue_get_avail_bytes(dev, q, &in_bytes, &out_bytes, ~0, ~0); -@@ -469,6 +486,7 @@ static void *fv_queue_thread(void *opaque) - "%s: Queue %d gave evalue: %zx available: in: %u out: %u\n", - __func__, qi->qidx, (size_t)evalue, in_bytes, out_bytes); - -+ - while (1) { - bool allocated_bufv = false; - struct fuse_bufvec bufv; -@@ -597,6 +615,8 @@ static void *fv_queue_thread(void *opaque) - free(elem); - elem = NULL; - } -+ -+ pthread_rwlock_unlock(&qi->virtio_dev->vu_dispatch_rwlock); - } - out: - pthread_mutex_destroy(&ch.lock); -@@ -711,6 +731,8 @@ int virtio_loop(struct fuse_session *se) - - while (!fuse_session_exited(se)) { - struct pollfd pf[1]; -+ bool ok; -+ int ret; - pf[0].fd = se->vu_socketfd; - pf[0].events = POLLIN; - pf[0].revents = 0; -@@ -735,7 +757,15 @@ int virtio_loop(struct fuse_session *se) - } - assert(pf[0].revents & POLLIN); - fuse_log(FUSE_LOG_DEBUG, "%s: Got VU event\n", __func__); -- if (!vu_dispatch(&se->virtio_dev->dev)) { -+ /* Mutual exclusion with fv_queue_thread() */ -+ ret = pthread_rwlock_wrlock(&se->virtio_dev->vu_dispatch_rwlock); -+ assert(ret == 0); /* there is no possible error case */ -+ -+ ok = vu_dispatch(&se->virtio_dev->dev); -+ -+ pthread_rwlock_unlock(&se->virtio_dev->vu_dispatch_rwlock); -+ -+ if (!ok) { - fuse_log(FUSE_LOG_ERR, "%s: vu_dispatch failed\n", __func__); - break; - } -@@ -877,6 +907,7 @@ int virtio_session_mount(struct fuse_session *se) - - se->vu_socketfd = data_sock; - se->virtio_dev->se = se; -+ pthread_rwlock_init(&se->virtio_dev->vu_dispatch_rwlock, NULL); - vu_init(&se->virtio_dev->dev, 2, se->vu_socketfd, fv_panic, fv_set_watch, - fv_remove_watch, &fv_iface); - -@@ -892,6 +923,7 @@ void virtio_session_close(struct fuse_session *se) - } - - free(se->virtio_dev->qi); -+ pthread_rwlock_destroy(&se->virtio_dev->vu_dispatch_rwlock); - free(se->virtio_dev); - se->virtio_dev = NULL; - } --- -1.8.3.1 - diff --git a/kvm-virtiofsd-prevent-races-with-lo_dirp_put.patch b/kvm-virtiofsd-prevent-races-with-lo_dirp_put.patch deleted file mode 100644 index acafa41..0000000 --- a/kvm-virtiofsd-prevent-races-with-lo_dirp_put.patch +++ /dev/null @@ -1,147 +0,0 @@ -From 2e58ff6978f8433fc8672d2e357c6f0f5f36d24f Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:02:07 +0100 -Subject: [PATCH 096/116] virtiofsd: prevent races with lo_dirp_put() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-93-dgilbert@redhat.com> -Patchwork-id: 93546 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 092/112] virtiofsd: prevent races with lo_dirp_put() -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -Introduce lo_dirp_put() so that FUSE_RELEASEDIR does not cause -use-after-free races with other threads that are accessing lo_dirp. - -Also make lo_releasedir() atomic to prevent FUSE_RELEASEDIR racing with -itself. This prevents double-frees. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Philippe Mathieu-Daudé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit acefdde73b403576a241ebd8dbe8431ddc0d9442) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 41 ++++++++++++++++++++++++++++++++++------ - 1 file changed, 35 insertions(+), 6 deletions(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 690edbc..2d703b5 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -1284,11 +1284,28 @@ static void lo_readlink(fuse_req_t req, fuse_ino_t ino) - } - - struct lo_dirp { -+ gint refcount; - DIR *dp; - struct dirent *entry; - off_t offset; - }; - -+static void lo_dirp_put(struct lo_dirp **dp) -+{ -+ struct lo_dirp *d = *dp; -+ -+ if (!d) { -+ return; -+ } -+ *dp = NULL; -+ -+ if (g_atomic_int_dec_and_test(&d->refcount)) { -+ closedir(d->dp); -+ free(d); -+ } -+} -+ -+/* Call lo_dirp_put() on the return value when no longer needed */ - static struct lo_dirp *lo_dirp(fuse_req_t req, struct fuse_file_info *fi) - { - struct lo_data *lo = lo_data(req); -@@ -1296,6 +1313,9 @@ static struct lo_dirp *lo_dirp(fuse_req_t req, struct fuse_file_info *fi) - - pthread_mutex_lock(&lo->mutex); - elem = lo_map_get(&lo->dirp_map, fi->fh); -+ if (elem) { -+ g_atomic_int_inc(&elem->dirp->refcount); -+ } - pthread_mutex_unlock(&lo->mutex); - if (!elem) { - return NULL; -@@ -1331,6 +1351,7 @@ static void lo_opendir(fuse_req_t req, fuse_ino_t ino, - d->offset = 0; - d->entry = NULL; - -+ g_atomic_int_set(&d->refcount, 1); /* paired with lo_releasedir() */ - pthread_mutex_lock(&lo->mutex); - fh = lo_add_dirp_mapping(req, d); - pthread_mutex_unlock(&lo->mutex); -@@ -1364,7 +1385,7 @@ static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, - off_t offset, struct fuse_file_info *fi, int plus) - { - struct lo_data *lo = lo_data(req); -- struct lo_dirp *d; -+ struct lo_dirp *d = NULL; - struct lo_inode *dinode; - char *buf = NULL; - char *p; -@@ -1454,6 +1475,8 @@ static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, - - err = 0; - error: -+ lo_dirp_put(&d); -+ - /* - * If there's an error, we can only signal it if we haven't stored - * any entries yet - otherwise we'd end up with wrong lookup -@@ -1484,22 +1507,25 @@ static void lo_releasedir(fuse_req_t req, fuse_ino_t ino, - struct fuse_file_info *fi) - { - struct lo_data *lo = lo_data(req); -+ struct lo_map_elem *elem; - struct lo_dirp *d; - - (void)ino; - -- d = lo_dirp(req, fi); -- if (!d) { -+ pthread_mutex_lock(&lo->mutex); -+ elem = lo_map_get(&lo->dirp_map, fi->fh); -+ if (!elem) { -+ pthread_mutex_unlock(&lo->mutex); - fuse_reply_err(req, EBADF); - return; - } - -- pthread_mutex_lock(&lo->mutex); -+ d = elem->dirp; - lo_map_remove(&lo->dirp_map, fi->fh); - pthread_mutex_unlock(&lo->mutex); - -- closedir(d->dp); -- free(d); -+ lo_dirp_put(&d); /* paired with lo_opendir() */ -+ - fuse_reply_err(req, 0); - } - -@@ -1710,6 +1736,9 @@ static void lo_fsyncdir(fuse_req_t req, fuse_ino_t ino, int datasync, - } else { - res = fsync(fd); - } -+ -+ lo_dirp_put(&d); -+ - fuse_reply_err(req, res == -1 ? errno : 0); - } - --- -1.8.3.1 - diff --git a/kvm-virtiofsd-print-log-only-when-priority-is-high-enoug.patch b/kvm-virtiofsd-print-log-only-when-priority-is-high-enoug.patch deleted file mode 100644 index 056559d..0000000 --- a/kvm-virtiofsd-print-log-only-when-priority-is-high-enoug.patch +++ /dev/null @@ -1,469 +0,0 @@ -From 5c9bbd00e8f8c944d9e8e22e7d1cf08cb8fddd6b Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:37 +0100 -Subject: [PATCH 066/116] virtiofsd: print log only when priority is high - enough -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-63-dgilbert@redhat.com> -Patchwork-id: 93518 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 062/112] virtiofsd: print log only when priority is high enough -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Eryu Guan - -Introduce "-o log_level=" command line option to specify current log -level (priority), valid values are "debug info warn err", e.g. - - ./virtiofsd -o log_level=debug ... - -So only log priority higher than "debug" will be printed to -stderr/syslog. And the default level is info. - -The "-o debug"/"-d" options are kept, and imply debug log level. - -Signed-off-by: Eryu Guan -dgilbert: Reworked for libfuse's log_func -Signed-off-by: Dr. David Alan Gilbert -with fix by: -Signed-off-by: Xiao Yang -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit d240314a1a18a1d914af1b5763fe8c9a572e6409) - -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_lowlevel.c | 75 ++++++++++--------------- - tools/virtiofsd/fuse_lowlevel.h | 1 + - tools/virtiofsd/helper.c | 8 ++- - tools/virtiofsd/passthrough_ll.c | 118 ++++++++++++++++----------------------- - 4 files changed, 87 insertions(+), 115 deletions(-) - -diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -index 6ceb33d..a7a1968 100644 ---- a/tools/virtiofsd/fuse_lowlevel.c -+++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -158,19 +158,17 @@ static int fuse_send_msg(struct fuse_session *se, struct fuse_chan *ch, - struct fuse_out_header *out = iov[0].iov_base; - - out->len = iov_length(iov, count); -- if (se->debug) { -- if (out->unique == 0) { -- fuse_log(FUSE_LOG_DEBUG, "NOTIFY: code=%d length=%u\n", out->error, -- out->len); -- } else if (out->error) { -- fuse_log(FUSE_LOG_DEBUG, -- " unique: %llu, error: %i (%s), outsize: %i\n", -- (unsigned long long)out->unique, out->error, -- strerror(-out->error), out->len); -- } else { -- fuse_log(FUSE_LOG_DEBUG, " unique: %llu, success, outsize: %i\n", -- (unsigned long long)out->unique, out->len); -- } -+ if (out->unique == 0) { -+ fuse_log(FUSE_LOG_DEBUG, "NOTIFY: code=%d length=%u\n", out->error, -+ out->len); -+ } else if (out->error) { -+ fuse_log(FUSE_LOG_DEBUG, -+ " unique: %llu, error: %i (%s), outsize: %i\n", -+ (unsigned long long)out->unique, out->error, -+ strerror(-out->error), out->len); -+ } else { -+ fuse_log(FUSE_LOG_DEBUG, " unique: %llu, success, outsize: %i\n", -+ (unsigned long long)out->unique, out->len); - } - - if (fuse_lowlevel_is_virtio(se)) { -@@ -1662,10 +1660,8 @@ static void do_interrupt(fuse_req_t req, fuse_ino_t nodeid, - return; - } - -- if (se->debug) { -- fuse_log(FUSE_LOG_DEBUG, "INTERRUPT: %llu\n", -- (unsigned long long)arg->unique); -- } -+ fuse_log(FUSE_LOG_DEBUG, "INTERRUPT: %llu\n", -+ (unsigned long long)arg->unique); - - req->u.i.unique = arg->unique; - -@@ -1901,13 +1897,10 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid, - } - } - -- if (se->debug) { -- fuse_log(FUSE_LOG_DEBUG, "INIT: %u.%u\n", arg->major, arg->minor); -- if (arg->major == 7 && arg->minor >= 6) { -- fuse_log(FUSE_LOG_DEBUG, "flags=0x%08x\n", arg->flags); -- fuse_log(FUSE_LOG_DEBUG, "max_readahead=0x%08x\n", -- arg->max_readahead); -- } -+ fuse_log(FUSE_LOG_DEBUG, "INIT: %u.%u\n", arg->major, arg->minor); -+ if (arg->major == 7 && arg->minor >= 6) { -+ fuse_log(FUSE_LOG_DEBUG, "flags=0x%08x\n", arg->flags); -+ fuse_log(FUSE_LOG_DEBUG, "max_readahead=0x%08x\n", arg->max_readahead); - } - se->conn.proto_major = arg->major; - se->conn.proto_minor = arg->minor; -@@ -2116,19 +2109,14 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid, - outarg.congestion_threshold = se->conn.congestion_threshold; - outarg.time_gran = se->conn.time_gran; - -- if (se->debug) { -- fuse_log(FUSE_LOG_DEBUG, " INIT: %u.%u\n", outarg.major, -- outarg.minor); -- fuse_log(FUSE_LOG_DEBUG, " flags=0x%08x\n", outarg.flags); -- fuse_log(FUSE_LOG_DEBUG, " max_readahead=0x%08x\n", -- outarg.max_readahead); -- fuse_log(FUSE_LOG_DEBUG, " max_write=0x%08x\n", outarg.max_write); -- fuse_log(FUSE_LOG_DEBUG, " max_background=%i\n", -- outarg.max_background); -- fuse_log(FUSE_LOG_DEBUG, " congestion_threshold=%i\n", -- outarg.congestion_threshold); -- fuse_log(FUSE_LOG_DEBUG, " time_gran=%u\n", outarg.time_gran); -- } -+ fuse_log(FUSE_LOG_DEBUG, " INIT: %u.%u\n", outarg.major, outarg.minor); -+ fuse_log(FUSE_LOG_DEBUG, " flags=0x%08x\n", outarg.flags); -+ fuse_log(FUSE_LOG_DEBUG, " max_readahead=0x%08x\n", outarg.max_readahead); -+ fuse_log(FUSE_LOG_DEBUG, " max_write=0x%08x\n", outarg.max_write); -+ fuse_log(FUSE_LOG_DEBUG, " max_background=%i\n", outarg.max_background); -+ fuse_log(FUSE_LOG_DEBUG, " congestion_threshold=%i\n", -+ outarg.congestion_threshold); -+ fuse_log(FUSE_LOG_DEBUG, " time_gran=%u\n", outarg.time_gran); - - send_reply_ok(req, &outarg, outargsize); - } -@@ -2407,14 +2395,11 @@ void fuse_session_process_buf_int(struct fuse_session *se, - in = fuse_mbuf_iter_advance(&iter, sizeof(*in)); - assert(in); /* caller guarantees the input buffer is large enough */ - -- if (se->debug) { -- fuse_log(FUSE_LOG_DEBUG, -- "unique: %llu, opcode: %s (%i), nodeid: %llu, insize: %zu, " -- "pid: %u\n", -- (unsigned long long)in->unique, -- opname((enum fuse_opcode)in->opcode), in->opcode, -- (unsigned long long)in->nodeid, buf->size, in->pid); -- } -+ fuse_log( -+ FUSE_LOG_DEBUG, -+ "unique: %llu, opcode: %s (%i), nodeid: %llu, insize: %zu, pid: %u\n", -+ (unsigned long long)in->unique, opname((enum fuse_opcode)in->opcode), -+ in->opcode, (unsigned long long)in->nodeid, buf->size, in->pid); - - req = fuse_ll_alloc_req(se); - if (req == NULL) { -diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h -index f2750bc..138041e 100644 ---- a/tools/virtiofsd/fuse_lowlevel.h -+++ b/tools/virtiofsd/fuse_lowlevel.h -@@ -1796,6 +1796,7 @@ struct fuse_cmdline_opts { - int show_help; - int print_capabilities; - int syslog; -+ int log_level; - unsigned int max_idle_threads; - }; - -diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c -index 9692ef9..6d50a46 100644 ---- a/tools/virtiofsd/helper.c -+++ b/tools/virtiofsd/helper.c -@@ -34,7 +34,6 @@ - t, offsetof(struct fuse_cmdline_opts, p), v \ - } - -- - static const struct fuse_opt fuse_helper_opts[] = { - FUSE_HELPER_OPT("-h", show_help), - FUSE_HELPER_OPT("--help", show_help), -@@ -55,6 +54,10 @@ static const struct fuse_opt fuse_helper_opts[] = { - FUSE_OPT_KEY("subtype=", FUSE_OPT_KEY_KEEP), - FUSE_HELPER_OPT("max_idle_threads=%u", max_idle_threads), - FUSE_HELPER_OPT("--syslog", syslog), -+ FUSE_HELPER_OPT_VALUE("log_level=debug", log_level, FUSE_LOG_DEBUG), -+ FUSE_HELPER_OPT_VALUE("log_level=info", log_level, FUSE_LOG_INFO), -+ FUSE_HELPER_OPT_VALUE("log_level=warn", log_level, FUSE_LOG_WARNING), -+ FUSE_HELPER_OPT_VALUE("log_level=err", log_level, FUSE_LOG_ERR), - FUSE_OPT_END - }; - -@@ -142,6 +145,9 @@ void fuse_cmdline_help(void) - " --syslog log to syslog (default stderr)\n" - " -f foreground operation\n" - " --daemonize run in background\n" -+ " -o log_level= log level, default to \"info\"\n" -+ " level could be one of \"debug, " -+ "info, warn, err\"\n" - " -o max_idle_threads the maximum number of idle worker " - "threads\n" - " allowed (default: 10)\n" -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 0372aca..ff6910f 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -37,6 +37,7 @@ - - #include "qemu/osdep.h" - #include "fuse_virtio.h" -+#include "fuse_log.h" - #include "fuse_lowlevel.h" - #include - #include -@@ -140,6 +141,7 @@ static const struct fuse_opt lo_opts[] = { - FUSE_OPT_END - }; - static bool use_syslog = false; -+static int current_log_level; - - static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n); - -@@ -458,11 +460,6 @@ static int lo_fd(fuse_req_t req, fuse_ino_t ino) - return inode ? inode->fd : -1; - } - --static bool lo_debug(fuse_req_t req) --{ -- return lo_data(req)->debug != 0; --} -- - static void lo_init(void *userdata, struct fuse_conn_info *conn) - { - struct lo_data *lo = (struct lo_data *)userdata; -@@ -472,15 +469,11 @@ static void lo_init(void *userdata, struct fuse_conn_info *conn) - } - - if (lo->writeback && conn->capable & FUSE_CAP_WRITEBACK_CACHE) { -- if (lo->debug) { -- fuse_log(FUSE_LOG_DEBUG, "lo_init: activating writeback\n"); -- } -+ fuse_log(FUSE_LOG_DEBUG, "lo_init: activating writeback\n"); - conn->want |= FUSE_CAP_WRITEBACK_CACHE; - } - if (lo->flock && conn->capable & FUSE_CAP_FLOCK_LOCKS) { -- if (lo->debug) { -- fuse_log(FUSE_LOG_DEBUG, "lo_init: activating flock locks\n"); -- } -+ fuse_log(FUSE_LOG_DEBUG, "lo_init: activating flock locks\n"); - conn->want |= FUSE_CAP_FLOCK_LOCKS; - } - } -@@ -823,10 +816,8 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, - } - e->ino = inode->fuse_ino; - -- if (lo_debug(req)) { -- fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", -- (unsigned long long)parent, name, (unsigned long long)e->ino); -- } -+ fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", (unsigned long long)parent, -+ name, (unsigned long long)e->ino); - - return 0; - -@@ -843,10 +834,8 @@ static void lo_lookup(fuse_req_t req, fuse_ino_t parent, const char *name) - struct fuse_entry_param e; - int err; - -- if (lo_debug(req)) { -- fuse_log(FUSE_LOG_DEBUG, "lo_lookup(parent=%" PRIu64 ", name=%s)\n", -- parent, name); -- } -+ fuse_log(FUSE_LOG_DEBUG, "lo_lookup(parent=%" PRIu64 ", name=%s)\n", parent, -+ name); - - /* - * Don't use is_safe_path_component(), allow "." and ".." for NFS export -@@ -971,10 +960,8 @@ static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent, - goto out; - } - -- if (lo_debug(req)) { -- fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", -- (unsigned long long)parent, name, (unsigned long long)e.ino); -- } -+ fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", (unsigned long long)parent, -+ name, (unsigned long long)e.ino); - - fuse_reply_entry(req, &e); - return; -@@ -1074,10 +1061,8 @@ static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent, - pthread_mutex_unlock(&lo->mutex); - e.ino = inode->fuse_ino; - -- if (lo_debug(req)) { -- fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", -- (unsigned long long)parent, name, (unsigned long long)e.ino); -- } -+ fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", (unsigned long long)parent, -+ name, (unsigned long long)e.ino); - - fuse_reply_entry(req, &e); - return; -@@ -1171,11 +1156,9 @@ static void lo_forget_one(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) - return; - } - -- if (lo_debug(req)) { -- fuse_log(FUSE_LOG_DEBUG, " forget %lli %lli -%lli\n", -- (unsigned long long)ino, (unsigned long long)inode->refcount, -- (unsigned long long)nlookup); -- } -+ fuse_log(FUSE_LOG_DEBUG, " forget %lli %lli -%lli\n", -+ (unsigned long long)ino, (unsigned long long)inode->refcount, -+ (unsigned long long)nlookup); - - unref_inode(lo, inode, nlookup); - } -@@ -1445,10 +1428,8 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, - int err; - struct lo_cred old = {}; - -- if (lo_debug(req)) { -- fuse_log(FUSE_LOG_DEBUG, "lo_create(parent=%" PRIu64 ", name=%s)\n", -- parent, name); -- } -+ fuse_log(FUSE_LOG_DEBUG, "lo_create(parent=%" PRIu64 ", name=%s)\n", parent, -+ name); - - if (!is_safe_path_component(name)) { - fuse_reply_err(req, EINVAL); -@@ -1525,10 +1506,8 @@ static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) - char buf[64]; - struct lo_data *lo = lo_data(req); - -- if (lo_debug(req)) { -- fuse_log(FUSE_LOG_DEBUG, "lo_open(ino=%" PRIu64 ", flags=%d)\n", ino, -- fi->flags); -- } -+ fuse_log(FUSE_LOG_DEBUG, "lo_open(ino=%" PRIu64 ", flags=%d)\n", ino, -+ fi->flags); - - /* - * With writeback cache, kernel may send read requests even -@@ -1644,12 +1623,10 @@ static void lo_read(fuse_req_t req, fuse_ino_t ino, size_t size, off_t offset, - { - struct fuse_bufvec buf = FUSE_BUFVEC_INIT(size); - -- if (lo_debug(req)) { -- fuse_log(FUSE_LOG_DEBUG, -- "lo_read(ino=%" PRIu64 ", size=%zd, " -- "off=%lu)\n", -- ino, size, (unsigned long)offset); -- } -+ fuse_log(FUSE_LOG_DEBUG, -+ "lo_read(ino=%" PRIu64 ", size=%zd, " -+ "off=%lu)\n", -+ ino, size, (unsigned long)offset); - - buf.buf[0].flags = FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK; - buf.buf[0].fd = lo_fi_fd(req, fi); -@@ -1671,11 +1648,9 @@ static void lo_write_buf(fuse_req_t req, fuse_ino_t ino, - out_buf.buf[0].fd = lo_fi_fd(req, fi); - out_buf.buf[0].pos = off; - -- if (lo_debug(req)) { -- fuse_log(FUSE_LOG_DEBUG, -- "lo_write(ino=%" PRIu64 ", size=%zd, off=%lu)\n", ino, -- out_buf.buf[0].size, (unsigned long)off); -- } -+ fuse_log(FUSE_LOG_DEBUG, -+ "lo_write_buf(ino=%" PRIu64 ", size=%zd, off=%lu)\n", ino, -+ out_buf.buf[0].size, (unsigned long)off); - - /* - * If kill_priv is set, drop CAP_FSETID which should lead to kernel -@@ -1774,11 +1749,8 @@ static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *name, - goto out; - } - -- if (lo_debug(req)) { -- fuse_log(FUSE_LOG_DEBUG, -- "lo_getxattr(ino=%" PRIu64 ", name=%s size=%zd)\n", ino, name, -- size); -- } -+ fuse_log(FUSE_LOG_DEBUG, "lo_getxattr(ino=%" PRIu64 ", name=%s size=%zd)\n", -+ ino, name, size); - - if (inode->is_symlink) { - /* Sorry, no race free way to getxattr on symlink. */ -@@ -1852,10 +1824,8 @@ static void lo_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size) - goto out; - } - -- if (lo_debug(req)) { -- fuse_log(FUSE_LOG_DEBUG, "lo_listxattr(ino=%" PRIu64 ", size=%zd)\n", -- ino, size); -- } -+ fuse_log(FUSE_LOG_DEBUG, "lo_listxattr(ino=%" PRIu64 ", size=%zd)\n", ino, -+ size); - - if (inode->is_symlink) { - /* Sorry, no race free way to listxattr on symlink. */ -@@ -1929,11 +1899,8 @@ static void lo_setxattr(fuse_req_t req, fuse_ino_t ino, const char *name, - goto out; - } - -- if (lo_debug(req)) { -- fuse_log(FUSE_LOG_DEBUG, -- "lo_setxattr(ino=%" PRIu64 ", name=%s value=%s size=%zd)\n", -- ino, name, value, size); -- } -+ fuse_log(FUSE_LOG_DEBUG, "lo_setxattr(ino=%" PRIu64 -+ ", name=%s value=%s size=%zd)\n", ino, name, value, size); - - if (inode->is_symlink) { - /* Sorry, no race free way to setxattr on symlink. */ -@@ -1978,10 +1945,8 @@ static void lo_removexattr(fuse_req_t req, fuse_ino_t ino, const char *name) - goto out; - } - -- if (lo_debug(req)) { -- fuse_log(FUSE_LOG_DEBUG, "lo_removexattr(ino=%" PRIu64 ", name=%s)\n", -- ino, name); -- } -+ fuse_log(FUSE_LOG_DEBUG, "lo_removexattr(ino=%" PRIu64 ", name=%s)\n", ino, -+ name); - - if (inode->is_symlink) { - /* Sorry, no race free way to setxattr on symlink. */ -@@ -2303,6 +2268,10 @@ static void setup_nofile_rlimit(void) - - static void log_func(enum fuse_log_level level, const char *fmt, va_list ap) - { -+ if (current_log_level < level) { -+ return; -+ } -+ - if (use_syslog) { - int priority = LOG_ERR; - switch (level) { -@@ -2401,8 +2370,19 @@ int main(int argc, char *argv[]) - return 1; - } - -+ /* -+ * log_level is 0 if not configured via cmd options (0 is LOG_EMERG, -+ * and we don't use this log level). -+ */ -+ if (opts.log_level != 0) { -+ current_log_level = opts.log_level; -+ } - lo.debug = opts.debug; -+ if (lo.debug) { -+ current_log_level = FUSE_LOG_DEBUG; -+ } - lo.root.refcount = 2; -+ - if (lo.source) { - struct stat stat; - int res; --- -1.8.3.1 - diff --git a/kvm-virtiofsd-process-requests-in-a-thread-pool.patch b/kvm-virtiofsd-process-requests-in-a-thread-pool.patch deleted file mode 100644 index 87fff99..0000000 --- a/kvm-virtiofsd-process-requests-in-a-thread-pool.patch +++ /dev/null @@ -1,533 +0,0 @@ -From b0db5e666aaa43eadff3e60a1ada704f33b03074 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:02:19 +0100 -Subject: [PATCH 108/116] virtiofsd: process requests in a thread pool -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-105-dgilbert@redhat.com> -Patchwork-id: 93554 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 104/112] virtiofsd: process requests in a thread pool -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -Introduce a thread pool so that fv_queue_thread() just pops -VuVirtqElements and hands them to the thread pool. For the time being -only one worker thread is allowed since passthrough_ll.c is not -thread-safe yet. Future patches will lift this restriction so that -multiple FUSE requests can be processed in parallel. - -The main new concept is struct FVRequest, which contains both -VuVirtqElement and struct fuse_chan. We now have fv_VuDev for a device, -fv_QueueInfo for a virtqueue, and FVRequest for a request. Some of -fv_QueueInfo's fields are moved into FVRequest because they are -per-request. The name FVRequest conforms to QEMU coding style and I -expect the struct fv_* types will be renamed in a future refactoring. - -This patch series is not optimal. fbuf reuse is dropped so each request -does malloc(se->bufsize), but there is no clean and cheap way to keep -this with a thread pool. The vq_lock mutex is held for longer than -necessary, especially during the eventfd_write() syscall. Performance -can be improved in the future. - -prctl(2) had to be added to the seccomp whitelist because glib invokes -it. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Misono Tomohiro -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit a3d756c5aecccc4c0e51060a7e2f1c87bf8f1180) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_virtio.c | 359 +++++++++++++++++++++++------------------- - 1 file changed, 201 insertions(+), 158 deletions(-) - -diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c -index f6242f9..0dcf2ef 100644 ---- a/tools/virtiofsd/fuse_virtio.c -+++ b/tools/virtiofsd/fuse_virtio.c -@@ -22,6 +22,7 @@ - - #include - #include -+#include - #include - #include - #include -@@ -37,17 +38,28 @@ - struct fv_VuDev; - struct fv_QueueInfo { - pthread_t thread; -+ /* -+ * This lock protects the VuVirtq preventing races between -+ * fv_queue_thread() and fv_queue_worker(). -+ */ -+ pthread_mutex_t vq_lock; -+ - struct fv_VuDev *virtio_dev; - - /* Our queue index, corresponds to array position */ - int qidx; - int kick_fd; - int kill_fd; /* For killing the thread */ -+}; - -- /* The element for the command currently being processed */ -- VuVirtqElement *qe; -+/* A FUSE request */ -+typedef struct { -+ VuVirtqElement elem; -+ struct fuse_chan ch; -+ -+ /* Used to complete requests that involve no reply */ - bool reply_sent; --}; -+} FVRequest; - - /* - * We pass the dev element into libvhost-user -@@ -191,8 +203,11 @@ static void copy_iov(struct iovec *src_iov, int src_count, - int virtio_send_msg(struct fuse_session *se, struct fuse_chan *ch, - struct iovec *iov, int count) - { -- VuVirtqElement *elem; -- VuVirtq *q; -+ FVRequest *req = container_of(ch, FVRequest, ch); -+ struct fv_QueueInfo *qi = ch->qi; -+ VuDev *dev = &se->virtio_dev->dev; -+ VuVirtq *q = vu_get_queue(dev, qi->qidx); -+ VuVirtqElement *elem = &req->elem; - int ret = 0; - - assert(count >= 1); -@@ -205,11 +220,7 @@ int virtio_send_msg(struct fuse_session *se, struct fuse_chan *ch, - - /* unique == 0 is notification, which we don't support */ - assert(out->unique); -- /* For virtio we always have ch */ -- assert(ch); -- assert(!ch->qi->reply_sent); -- elem = ch->qi->qe; -- q = &ch->qi->virtio_dev->dev.vq[ch->qi->qidx]; -+ assert(!req->reply_sent); - - /* The 'in' part of the elem is to qemu */ - unsigned int in_num = elem->in_num; -@@ -236,9 +247,15 @@ int virtio_send_msg(struct fuse_session *se, struct fuse_chan *ch, - } - - copy_iov(iov, count, in_sg, in_num, tosend_len); -- vu_queue_push(&se->virtio_dev->dev, q, elem, tosend_len); -- vu_queue_notify(&se->virtio_dev->dev, q); -- ch->qi->reply_sent = true; -+ -+ pthread_rwlock_rdlock(&qi->virtio_dev->vu_dispatch_rwlock); -+ pthread_mutex_lock(&qi->vq_lock); -+ vu_queue_push(dev, q, elem, tosend_len); -+ vu_queue_notify(dev, q); -+ pthread_mutex_unlock(&qi->vq_lock); -+ pthread_rwlock_unlock(&qi->virtio_dev->vu_dispatch_rwlock); -+ -+ req->reply_sent = true; - - err: - return ret; -@@ -254,9 +271,12 @@ int virtio_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, - struct iovec *iov, int count, struct fuse_bufvec *buf, - size_t len) - { -+ FVRequest *req = container_of(ch, FVRequest, ch); -+ struct fv_QueueInfo *qi = ch->qi; -+ VuDev *dev = &se->virtio_dev->dev; -+ VuVirtq *q = vu_get_queue(dev, qi->qidx); -+ VuVirtqElement *elem = &req->elem; - int ret = 0; -- VuVirtqElement *elem; -- VuVirtq *q; - - assert(count >= 1); - assert(iov[0].iov_len >= sizeof(struct fuse_out_header)); -@@ -275,11 +295,7 @@ int virtio_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, - /* unique == 0 is notification which we don't support */ - assert(out->unique); - -- /* For virtio we always have ch */ -- assert(ch); -- assert(!ch->qi->reply_sent); -- elem = ch->qi->qe; -- q = &ch->qi->virtio_dev->dev.vq[ch->qi->qidx]; -+ assert(!req->reply_sent); - - /* The 'in' part of the elem is to qemu */ - unsigned int in_num = elem->in_num; -@@ -395,33 +411,175 @@ int virtio_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, - - ret = 0; - -- vu_queue_push(&se->virtio_dev->dev, q, elem, tosend_len); -- vu_queue_notify(&se->virtio_dev->dev, q); -+ pthread_rwlock_rdlock(&qi->virtio_dev->vu_dispatch_rwlock); -+ pthread_mutex_lock(&qi->vq_lock); -+ vu_queue_push(dev, q, elem, tosend_len); -+ vu_queue_notify(dev, q); -+ pthread_mutex_unlock(&qi->vq_lock); -+ pthread_rwlock_unlock(&qi->virtio_dev->vu_dispatch_rwlock); - - err: - if (ret == 0) { -- ch->qi->reply_sent = true; -+ req->reply_sent = true; - } - - return ret; - } - -+/* Process one FVRequest in a thread pool */ -+static void fv_queue_worker(gpointer data, gpointer user_data) -+{ -+ struct fv_QueueInfo *qi = user_data; -+ struct fuse_session *se = qi->virtio_dev->se; -+ struct VuDev *dev = &qi->virtio_dev->dev; -+ FVRequest *req = data; -+ VuVirtqElement *elem = &req->elem; -+ struct fuse_buf fbuf = {}; -+ bool allocated_bufv = false; -+ struct fuse_bufvec bufv; -+ struct fuse_bufvec *pbufv; -+ -+ assert(se->bufsize > sizeof(struct fuse_in_header)); -+ -+ /* -+ * An element contains one request and the space to send our response -+ * They're spread over multiple descriptors in a scatter/gather set -+ * and we can't trust the guest to keep them still; so copy in/out. -+ */ -+ fbuf.mem = malloc(se->bufsize); -+ assert(fbuf.mem); -+ -+ fuse_mutex_init(&req->ch.lock); -+ req->ch.fd = -1; -+ req->ch.qi = qi; -+ -+ /* The 'out' part of the elem is from qemu */ -+ unsigned int out_num = elem->out_num; -+ struct iovec *out_sg = elem->out_sg; -+ size_t out_len = iov_size(out_sg, out_num); -+ fuse_log(FUSE_LOG_DEBUG, -+ "%s: elem %d: with %d out desc of length %zd\n", -+ __func__, elem->index, out_num, out_len); -+ -+ /* -+ * The elem should contain a 'fuse_in_header' (in to fuse) -+ * plus the data based on the len in the header. -+ */ -+ if (out_len < sizeof(struct fuse_in_header)) { -+ fuse_log(FUSE_LOG_ERR, "%s: elem %d too short for in_header\n", -+ __func__, elem->index); -+ assert(0); /* TODO */ -+ } -+ if (out_len > se->bufsize) { -+ fuse_log(FUSE_LOG_ERR, "%s: elem %d too large for buffer\n", __func__, -+ elem->index); -+ assert(0); /* TODO */ -+ } -+ /* Copy just the first element and look at it */ -+ copy_from_iov(&fbuf, 1, out_sg); -+ -+ pbufv = NULL; /* Compiler thinks an unitialised path */ -+ if (out_num > 2 && -+ out_sg[0].iov_len == sizeof(struct fuse_in_header) && -+ ((struct fuse_in_header *)fbuf.mem)->opcode == FUSE_WRITE && -+ out_sg[1].iov_len == sizeof(struct fuse_write_in)) { -+ /* -+ * For a write we don't actually need to copy the -+ * data, we can just do it straight out of guest memory -+ * but we must still copy the headers in case the guest -+ * was nasty and changed them while we were using them. -+ */ -+ fuse_log(FUSE_LOG_DEBUG, "%s: Write special case\n", __func__); -+ -+ /* copy the fuse_write_in header afte rthe fuse_in_header */ -+ fbuf.mem += out_sg->iov_len; -+ copy_from_iov(&fbuf, 1, out_sg + 1); -+ fbuf.mem -= out_sg->iov_len; -+ fbuf.size = out_sg[0].iov_len + out_sg[1].iov_len; -+ -+ /* Allocate the bufv, with space for the rest of the iov */ -+ pbufv = malloc(sizeof(struct fuse_bufvec) + -+ sizeof(struct fuse_buf) * (out_num - 2)); -+ if (!pbufv) { -+ fuse_log(FUSE_LOG_ERR, "%s: pbufv malloc failed\n", -+ __func__); -+ goto out; -+ } -+ -+ allocated_bufv = true; -+ pbufv->count = 1; -+ pbufv->buf[0] = fbuf; -+ -+ size_t iovindex, pbufvindex; -+ iovindex = 2; /* 2 headers, separate iovs */ -+ pbufvindex = 1; /* 2 headers, 1 fusebuf */ -+ -+ for (; iovindex < out_num; iovindex++, pbufvindex++) { -+ pbufv->count++; -+ pbufv->buf[pbufvindex].pos = ~0; /* Dummy */ -+ pbufv->buf[pbufvindex].flags = 0; -+ pbufv->buf[pbufvindex].mem = out_sg[iovindex].iov_base; -+ pbufv->buf[pbufvindex].size = out_sg[iovindex].iov_len; -+ } -+ } else { -+ /* Normal (non fast write) path */ -+ -+ /* Copy the rest of the buffer */ -+ fbuf.mem += out_sg->iov_len; -+ copy_from_iov(&fbuf, out_num - 1, out_sg + 1); -+ fbuf.mem -= out_sg->iov_len; -+ fbuf.size = out_len; -+ -+ /* TODO! Endianness of header */ -+ -+ /* TODO: Add checks for fuse_session_exited */ -+ bufv.buf[0] = fbuf; -+ bufv.count = 1; -+ pbufv = &bufv; -+ } -+ pbufv->idx = 0; -+ pbufv->off = 0; -+ fuse_session_process_buf_int(se, pbufv, &req->ch); -+ -+out: -+ if (allocated_bufv) { -+ free(pbufv); -+ } -+ -+ /* If the request has no reply, still recycle the virtqueue element */ -+ if (!req->reply_sent) { -+ struct VuVirtq *q = vu_get_queue(dev, qi->qidx); -+ -+ fuse_log(FUSE_LOG_DEBUG, "%s: elem %d no reply sent\n", __func__, -+ elem->index); -+ -+ pthread_rwlock_rdlock(&qi->virtio_dev->vu_dispatch_rwlock); -+ pthread_mutex_lock(&qi->vq_lock); -+ vu_queue_push(dev, q, elem, 0); -+ vu_queue_notify(dev, q); -+ pthread_mutex_unlock(&qi->vq_lock); -+ pthread_rwlock_unlock(&qi->virtio_dev->vu_dispatch_rwlock); -+ } -+ -+ pthread_mutex_destroy(&req->ch.lock); -+ free(fbuf.mem); -+ free(req); -+} -+ - /* Thread function for individual queues, created when a queue is 'started' */ - static void *fv_queue_thread(void *opaque) - { - struct fv_QueueInfo *qi = opaque; - struct VuDev *dev = &qi->virtio_dev->dev; - struct VuVirtq *q = vu_get_queue(dev, qi->qidx); -- struct fuse_session *se = qi->virtio_dev->se; -- struct fuse_chan ch; -- struct fuse_buf fbuf; -+ GThreadPool *pool; - -- fbuf.mem = NULL; -- fbuf.flags = 0; -- -- fuse_mutex_init(&ch.lock); -- ch.fd = (int)0xdaff0d111; -- ch.qi = qi; -+ pool = g_thread_pool_new(fv_queue_worker, qi, 1 /* TODO max_threads */, -+ TRUE, NULL); -+ if (!pool) { -+ fuse_log(FUSE_LOG_ERR, "%s: g_thread_pool_new failed\n", __func__); -+ return NULL; -+ } - - fuse_log(FUSE_LOG_INFO, "%s: Start for queue %d kick_fd %d\n", __func__, - qi->qidx, qi->kick_fd); -@@ -478,6 +636,7 @@ static void *fv_queue_thread(void *opaque) - /* Mutual exclusion with virtio_loop() */ - ret = pthread_rwlock_rdlock(&qi->virtio_dev->vu_dispatch_rwlock); - assert(ret == 0); /* there is no possible error case */ -+ pthread_mutex_lock(&qi->vq_lock); - /* out is from guest, in is too guest */ - unsigned int in_bytes, out_bytes; - vu_queue_get_avail_bytes(dev, q, &in_bytes, &out_bytes, ~0, ~0); -@@ -486,141 +645,22 @@ static void *fv_queue_thread(void *opaque) - "%s: Queue %d gave evalue: %zx available: in: %u out: %u\n", - __func__, qi->qidx, (size_t)evalue, in_bytes, out_bytes); - -- - while (1) { -- bool allocated_bufv = false; -- struct fuse_bufvec bufv; -- struct fuse_bufvec *pbufv; -- -- /* -- * An element contains one request and the space to send our -- * response They're spread over multiple descriptors in a -- * scatter/gather set and we can't trust the guest to keep them -- * still; so copy in/out. -- */ -- VuVirtqElement *elem = vu_queue_pop(dev, q, sizeof(VuVirtqElement)); -- if (!elem) { -+ FVRequest *req = vu_queue_pop(dev, q, sizeof(FVRequest)); -+ if (!req) { - break; - } - -- qi->qe = elem; -- qi->reply_sent = false; -+ req->reply_sent = false; - -- if (!fbuf.mem) { -- fbuf.mem = malloc(se->bufsize); -- assert(fbuf.mem); -- assert(se->bufsize > sizeof(struct fuse_in_header)); -- } -- /* The 'out' part of the elem is from qemu */ -- unsigned int out_num = elem->out_num; -- struct iovec *out_sg = elem->out_sg; -- size_t out_len = iov_size(out_sg, out_num); -- fuse_log(FUSE_LOG_DEBUG, -- "%s: elem %d: with %d out desc of length %zd\n", __func__, -- elem->index, out_num, out_len); -- -- /* -- * The elem should contain a 'fuse_in_header' (in to fuse) -- * plus the data based on the len in the header. -- */ -- if (out_len < sizeof(struct fuse_in_header)) { -- fuse_log(FUSE_LOG_ERR, "%s: elem %d too short for in_header\n", -- __func__, elem->index); -- assert(0); /* TODO */ -- } -- if (out_len > se->bufsize) { -- fuse_log(FUSE_LOG_ERR, "%s: elem %d too large for buffer\n", -- __func__, elem->index); -- assert(0); /* TODO */ -- } -- /* Copy just the first element and look at it */ -- copy_from_iov(&fbuf, 1, out_sg); -- -- if (out_num > 2 && -- out_sg[0].iov_len == sizeof(struct fuse_in_header) && -- ((struct fuse_in_header *)fbuf.mem)->opcode == FUSE_WRITE && -- out_sg[1].iov_len == sizeof(struct fuse_write_in)) { -- /* -- * For a write we don't actually need to copy the -- * data, we can just do it straight out of guest memory -- * but we must still copy the headers in case the guest -- * was nasty and changed them while we were using them. -- */ -- fuse_log(FUSE_LOG_DEBUG, "%s: Write special case\n", __func__); -- -- /* copy the fuse_write_in header after the fuse_in_header */ -- fbuf.mem += out_sg->iov_len; -- copy_from_iov(&fbuf, 1, out_sg + 1); -- fbuf.mem -= out_sg->iov_len; -- fbuf.size = out_sg[0].iov_len + out_sg[1].iov_len; -- -- /* Allocate the bufv, with space for the rest of the iov */ -- allocated_bufv = true; -- pbufv = malloc(sizeof(struct fuse_bufvec) + -- sizeof(struct fuse_buf) * (out_num - 2)); -- if (!pbufv) { -- vu_queue_unpop(dev, q, elem, 0); -- free(elem); -- fuse_log(FUSE_LOG_ERR, "%s: pbufv malloc failed\n", -- __func__); -- goto out; -- } -- -- pbufv->count = 1; -- pbufv->buf[0] = fbuf; -- -- size_t iovindex, pbufvindex; -- iovindex = 2; /* 2 headers, separate iovs */ -- pbufvindex = 1; /* 2 headers, 1 fusebuf */ -- -- for (; iovindex < out_num; iovindex++, pbufvindex++) { -- pbufv->count++; -- pbufv->buf[pbufvindex].pos = ~0; /* Dummy */ -- pbufv->buf[pbufvindex].flags = 0; -- pbufv->buf[pbufvindex].mem = out_sg[iovindex].iov_base; -- pbufv->buf[pbufvindex].size = out_sg[iovindex].iov_len; -- } -- } else { -- /* Normal (non fast write) path */ -- -- /* Copy the rest of the buffer */ -- fbuf.mem += out_sg->iov_len; -- copy_from_iov(&fbuf, out_num - 1, out_sg + 1); -- fbuf.mem -= out_sg->iov_len; -- fbuf.size = out_len; -- -- /* TODO! Endianness of header */ -- -- /* TODO: Add checks for fuse_session_exited */ -- bufv.buf[0] = fbuf; -- bufv.count = 1; -- pbufv = &bufv; -- } -- pbufv->idx = 0; -- pbufv->off = 0; -- fuse_session_process_buf_int(se, pbufv, &ch); -- -- if (allocated_bufv) { -- free(pbufv); -- } -- -- if (!qi->reply_sent) { -- fuse_log(FUSE_LOG_DEBUG, "%s: elem %d no reply sent\n", -- __func__, elem->index); -- /* I think we've still got to recycle the element */ -- vu_queue_push(dev, q, elem, 0); -- vu_queue_notify(dev, q); -- } -- qi->qe = NULL; -- free(elem); -- elem = NULL; -+ g_thread_pool_push(pool, req, NULL); - } - -+ pthread_mutex_unlock(&qi->vq_lock); - pthread_rwlock_unlock(&qi->virtio_dev->vu_dispatch_rwlock); - } --out: -- pthread_mutex_destroy(&ch.lock); -- free(fbuf.mem); -+ -+ g_thread_pool_free(pool, FALSE, TRUE); - - return NULL; - } -@@ -643,6 +683,7 @@ static void fv_queue_cleanup_thread(struct fv_VuDev *vud, int qidx) - fuse_log(FUSE_LOG_ERR, "%s: Failed to join thread idx %d err %d\n", - __func__, qidx, ret); - } -+ pthread_mutex_destroy(&ourqi->vq_lock); - close(ourqi->kill_fd); - ourqi->kick_fd = -1; - free(vud->qi[qidx]); -@@ -696,6 +737,8 @@ static void fv_queue_set_started(VuDev *dev, int qidx, bool started) - - ourqi->kill_fd = eventfd(0, EFD_CLOEXEC | EFD_SEMAPHORE); - assert(ourqi->kill_fd != -1); -+ pthread_mutex_init(&ourqi->vq_lock, NULL); -+ - if (pthread_create(&ourqi->thread, NULL, fv_queue_thread, ourqi)) { - fuse_log(FUSE_LOG_ERR, "%s: Failed to create thread for queue %d\n", - __func__, qidx); --- -1.8.3.1 - diff --git a/kvm-virtiofsd-remove-mountpoint-dummy-argument.patch b/kvm-virtiofsd-remove-mountpoint-dummy-argument.patch deleted file mode 100644 index 181e32d..0000000 --- a/kvm-virtiofsd-remove-mountpoint-dummy-argument.patch +++ /dev/null @@ -1,159 +0,0 @@ -From a8a1835a82510be7d2d6edcc28a60e506a2cedad Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:00:46 +0100 -Subject: [PATCH 015/116] virtiofsd: remove mountpoint dummy argument -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-12-dgilbert@redhat.com> -Patchwork-id: 93466 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 011/112] virtiofsd: remove mountpoint dummy argument -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -Classic FUSE file system daemons take a mountpoint argument but -virtiofsd exposes a vhost-user UNIX domain socket instead. The -mountpoint argument is not used by virtiofsd but the user is still -required to pass a dummy argument on the command-line. - -Remove the mountpoint argument to clean up the command-line. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 67aab02272f6cb47c56420f60b370c184961b5ca) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_lowlevel.c | 2 +- - tools/virtiofsd/fuse_lowlevel.h | 4 +--- - tools/virtiofsd/helper.c | 20 +++----------------- - tools/virtiofsd/passthrough_ll.c | 12 ++---------- - 4 files changed, 7 insertions(+), 31 deletions(-) - -diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -index 5c9cb52..2f32c68 100644 ---- a/tools/virtiofsd/fuse_lowlevel.c -+++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -2455,7 +2455,7 @@ out1: - return NULL; - } - --int fuse_session_mount(struct fuse_session *se, const char *mountpoint) -+int fuse_session_mount(struct fuse_session *se) - { - int fd; - -diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h -index adb9054..8d8909b 100644 ---- a/tools/virtiofsd/fuse_lowlevel.h -+++ b/tools/virtiofsd/fuse_lowlevel.h -@@ -1863,7 +1863,6 @@ struct fuse_cmdline_opts { - int foreground; - int debug; - int nodefault_subtype; -- char *mountpoint; - int show_version; - int show_help; - unsigned int max_idle_threads; -@@ -1924,12 +1923,11 @@ struct fuse_session *fuse_session_new(struct fuse_args *args, - /** - * Mount a FUSE file system. - * -- * @param mountpoint the mount point path - * @param se session object - * - * @return 0 on success, -1 on failure. - **/ --int fuse_session_mount(struct fuse_session *se, const char *mountpoint); -+int fuse_session_mount(struct fuse_session *se); - - /** - * Enter a single threaded, blocking event loop. -diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c -index 5711dd2..5e6f205 100644 ---- a/tools/virtiofsd/helper.c -+++ b/tools/virtiofsd/helper.c -@@ -140,27 +140,13 @@ void fuse_cmdline_help(void) - static int fuse_helper_opt_proc(void *data, const char *arg, int key, - struct fuse_args *outargs) - { -+ (void)data; - (void)outargs; -- struct fuse_cmdline_opts *opts = data; - - switch (key) { - case FUSE_OPT_KEY_NONOPT: -- if (!opts->mountpoint) { -- if (fuse_mnt_parse_fuse_fd(arg) != -1) { -- return fuse_opt_add_opt(&opts->mountpoint, arg); -- } -- -- char mountpoint[PATH_MAX] = ""; -- if (realpath(arg, mountpoint) == NULL) { -- fuse_log(FUSE_LOG_ERR, "fuse: bad mount point `%s': %s\n", arg, -- strerror(errno)); -- return -1; -- } -- return fuse_opt_add_opt(&opts->mountpoint, mountpoint); -- } else { -- fuse_log(FUSE_LOG_ERR, "fuse: invalid argument `%s'\n", arg); -- return -1; -- } -+ fuse_log(FUSE_LOG_ERR, "fuse: invalid argument `%s'\n", arg); -+ return -1; - - default: - /* Pass through unknown options */ -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index c5850ef..9377718 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -1297,7 +1297,7 @@ int main(int argc, char *argv[]) - return 1; - } - if (opts.show_help) { -- printf("usage: %s [options] \n\n", argv[0]); -+ printf("usage: %s [options]\n\n", argv[0]); - fuse_cmdline_help(); - fuse_lowlevel_help(); - ret = 0; -@@ -1308,13 +1308,6 @@ int main(int argc, char *argv[]) - goto err_out1; - } - -- if (opts.mountpoint == NULL) { -- printf("usage: %s [options] \n", argv[0]); -- printf(" %s --help\n", argv[0]); -- ret = 1; -- goto err_out1; -- } -- - if (fuse_opt_parse(&args, &lo, lo_opts, NULL) == -1) { - return 1; - } -@@ -1374,7 +1367,7 @@ int main(int argc, char *argv[]) - goto err_out2; - } - -- if (fuse_session_mount(se, opts.mountpoint) != 0) { -+ if (fuse_session_mount(se) != 0) { - goto err_out3; - } - -@@ -1393,7 +1386,6 @@ err_out3: - err_out2: - fuse_session_destroy(se); - err_out1: -- free(opts.mountpoint); - fuse_opt_free_args(&args); - - if (lo.root.fd >= 0) { --- -1.8.3.1 - diff --git a/kvm-virtiofsd-remove-unused-notify-reply-support.patch b/kvm-virtiofsd-remove-unused-notify-reply-support.patch deleted file mode 100644 index 98fb968..0000000 --- a/kvm-virtiofsd-remove-unused-notify-reply-support.patch +++ /dev/null @@ -1,294 +0,0 @@ -From e5534c0d4b866f61dbafa8d2422a24ab956189c1 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:00:47 +0100 -Subject: [PATCH 016/116] virtiofsd: remove unused notify reply support -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-13-dgilbert@redhat.com> -Patchwork-id: 93467 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 012/112] virtiofsd: remove unused notify reply support -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -Notify reply support is unused by virtiofsd. The code would need to be -updated to validate input buffer sizes. Remove this unused code since -changes to it are untestable. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 64c6f408a29ef03e9b8da9f5a5d8fd511b0d801e) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_lowlevel.c | 147 +--------------------------------------- - tools/virtiofsd/fuse_lowlevel.h | 47 ------------- - 2 files changed, 1 insertion(+), 193 deletions(-) - -diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -index 2f32c68..eb0ec49 100644 ---- a/tools/virtiofsd/fuse_lowlevel.c -+++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -31,12 +31,6 @@ - #define PARAM(inarg) (((char *)(inarg)) + sizeof(*(inarg))) - #define OFFSET_MAX 0x7fffffffffffffffLL - --#define container_of(ptr, type, member) \ -- ({ \ -- const typeof(((type *)0)->member) *__mptr = (ptr); \ -- (type *)((char *)__mptr - offsetof(type, member)); \ -- }) -- - struct fuse_pollhandle { - uint64_t kh; - struct fuse_session *se; -@@ -1862,52 +1856,6 @@ static void do_destroy(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - send_reply_ok(req, NULL, 0); - } - --static void list_del_nreq(struct fuse_notify_req *nreq) --{ -- struct fuse_notify_req *prev = nreq->prev; -- struct fuse_notify_req *next = nreq->next; -- prev->next = next; -- next->prev = prev; --} -- --static void list_add_nreq(struct fuse_notify_req *nreq, -- struct fuse_notify_req *next) --{ -- struct fuse_notify_req *prev = next->prev; -- nreq->next = next; -- nreq->prev = prev; -- prev->next = nreq; -- next->prev = nreq; --} -- --static void list_init_nreq(struct fuse_notify_req *nreq) --{ -- nreq->next = nreq; -- nreq->prev = nreq; --} -- --static void do_notify_reply(fuse_req_t req, fuse_ino_t nodeid, -- const void *inarg, const struct fuse_buf *buf) --{ -- struct fuse_session *se = req->se; -- struct fuse_notify_req *nreq; -- struct fuse_notify_req *head; -- -- pthread_mutex_lock(&se->lock); -- head = &se->notify_list; -- for (nreq = head->next; nreq != head; nreq = nreq->next) { -- if (nreq->unique == req->unique) { -- list_del_nreq(nreq); -- break; -- } -- } -- pthread_mutex_unlock(&se->lock); -- -- if (nreq != head) { -- nreq->reply(nreq, req, nodeid, inarg, buf); -- } --} -- - static int send_notify_iov(struct fuse_session *se, int notify_code, - struct iovec *iov, int count) - { -@@ -2059,95 +2007,6 @@ int fuse_lowlevel_notify_store(struct fuse_session *se, fuse_ino_t ino, - return res; - } - --struct fuse_retrieve_req { -- struct fuse_notify_req nreq; -- void *cookie; --}; -- --static void fuse_ll_retrieve_reply(struct fuse_notify_req *nreq, fuse_req_t req, -- fuse_ino_t ino, const void *inarg, -- const struct fuse_buf *ibuf) --{ -- struct fuse_session *se = req->se; -- struct fuse_retrieve_req *rreq = -- container_of(nreq, struct fuse_retrieve_req, nreq); -- const struct fuse_notify_retrieve_in *arg = inarg; -- struct fuse_bufvec bufv = { -- .buf[0] = *ibuf, -- .count = 1, -- }; -- -- if (!(bufv.buf[0].flags & FUSE_BUF_IS_FD)) { -- bufv.buf[0].mem = PARAM(arg); -- } -- -- bufv.buf[0].size -= -- sizeof(struct fuse_in_header) + sizeof(struct fuse_notify_retrieve_in); -- -- if (bufv.buf[0].size < arg->size) { -- fuse_log(FUSE_LOG_ERR, "fuse: retrieve reply: buffer size too small\n"); -- fuse_reply_none(req); -- goto out; -- } -- bufv.buf[0].size = arg->size; -- -- if (se->op.retrieve_reply) { -- se->op.retrieve_reply(req, rreq->cookie, ino, arg->offset, &bufv); -- } else { -- fuse_reply_none(req); -- } --out: -- free(rreq); --} -- --int fuse_lowlevel_notify_retrieve(struct fuse_session *se, fuse_ino_t ino, -- size_t size, off_t offset, void *cookie) --{ -- struct fuse_notify_retrieve_out outarg; -- struct iovec iov[2]; -- struct fuse_retrieve_req *rreq; -- int err; -- -- if (!se) { -- return -EINVAL; -- } -- -- if (se->conn.proto_major < 6 || se->conn.proto_minor < 15) { -- return -ENOSYS; -- } -- -- rreq = malloc(sizeof(*rreq)); -- if (rreq == NULL) { -- return -ENOMEM; -- } -- -- pthread_mutex_lock(&se->lock); -- rreq->cookie = cookie; -- rreq->nreq.unique = se->notify_ctr++; -- rreq->nreq.reply = fuse_ll_retrieve_reply; -- list_add_nreq(&rreq->nreq, &se->notify_list); -- pthread_mutex_unlock(&se->lock); -- -- outarg.notify_unique = rreq->nreq.unique; -- outarg.nodeid = ino; -- outarg.offset = offset; -- outarg.size = size; -- outarg.padding = 0; -- -- iov[1].iov_base = &outarg; -- iov[1].iov_len = sizeof(outarg); -- -- err = send_notify_iov(se, FUSE_NOTIFY_RETRIEVE, iov, 2); -- if (err) { -- pthread_mutex_lock(&se->lock); -- list_del_nreq(&rreq->nreq); -- pthread_mutex_unlock(&se->lock); -- free(rreq); -- } -- -- return err; --} -- - void *fuse_req_userdata(fuse_req_t req) - { - return req->se->userdata; -@@ -2226,7 +2085,7 @@ static struct { - [FUSE_POLL] = { do_poll, "POLL" }, - [FUSE_FALLOCATE] = { do_fallocate, "FALLOCATE" }, - [FUSE_DESTROY] = { do_destroy, "DESTROY" }, -- [FUSE_NOTIFY_REPLY] = { (void *)1, "NOTIFY_REPLY" }, -+ [FUSE_NOTIFY_REPLY] = { NULL, "NOTIFY_REPLY" }, - [FUSE_BATCH_FORGET] = { do_batch_forget, "BATCH_FORGET" }, - [FUSE_READDIRPLUS] = { do_readdirplus, "READDIRPLUS" }, - [FUSE_RENAME2] = { do_rename2, "RENAME2" }, -@@ -2333,8 +2192,6 @@ void fuse_session_process_buf_int(struct fuse_session *se, - inarg = (void *)&in[1]; - if (in->opcode == FUSE_WRITE && se->op.write_buf) { - do_write_buf(req, in->nodeid, inarg, buf); -- } else if (in->opcode == FUSE_NOTIFY_REPLY) { -- do_notify_reply(req, in->nodeid, inarg, buf); - } else { - fuse_ll_ops[in->opcode].func(req, in->nodeid, inarg); - } -@@ -2437,8 +2294,6 @@ struct fuse_session *fuse_session_new(struct fuse_args *args, - - list_init_req(&se->list); - list_init_req(&se->interrupts); -- list_init_nreq(&se->notify_list); -- se->notify_ctr = 1; - fuse_mutex_init(&se->lock); - - memcpy(&se->op, op, op_size); -diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h -index 8d8909b..12a84b4 100644 ---- a/tools/virtiofsd/fuse_lowlevel.h -+++ b/tools/virtiofsd/fuse_lowlevel.h -@@ -1085,21 +1085,6 @@ struct fuse_lowlevel_ops { - off_t off, struct fuse_file_info *fi); - - /** -- * Callback function for the retrieve request -- * -- * Valid replies: -- * fuse_reply_none -- * -- * @param req request handle -- * @param cookie user data supplied to fuse_lowlevel_notify_retrieve() -- * @param ino the inode number supplied to fuse_lowlevel_notify_retrieve() -- * @param offset the offset supplied to fuse_lowlevel_notify_retrieve() -- * @param bufv the buffer containing the returned data -- */ -- void (*retrieve_reply)(fuse_req_t req, void *cookie, fuse_ino_t ino, -- off_t offset, struct fuse_bufvec *bufv); -- -- /** - * Forget about multiple inodes - * - * See description of the forget function for more -@@ -1726,38 +1711,6 @@ int fuse_lowlevel_notify_delete(struct fuse_session *se, fuse_ino_t parent, - int fuse_lowlevel_notify_store(struct fuse_session *se, fuse_ino_t ino, - off_t offset, struct fuse_bufvec *bufv, - enum fuse_buf_copy_flags flags); --/** -- * Retrieve data from the kernel buffers -- * -- * Retrieve data in the kernel buffers belonging to the given inode. -- * If successful then the retrieve_reply() method will be called with -- * the returned data. -- * -- * Only present pages are returned in the retrieve reply. Retrieving -- * stops when it finds a non-present page and only data prior to that -- * is returned. -- * -- * If this function returns an error, then the retrieve will not be -- * completed and no reply will be sent. -- * -- * This function doesn't change the dirty state of pages in the kernel -- * buffer. For dirty pages the write() method will be called -- * regardless of having been retrieved previously. -- * -- * Added in FUSE protocol version 7.15. If the kernel does not support -- * this (or a newer) version, the function will return -ENOSYS and do -- * nothing. -- * -- * @param se the session object -- * @param ino the inode number -- * @param size the number of bytes to retrieve -- * @param offset the starting offset into the file to retrieve from -- * @param cookie user data to supply to the reply callback -- * @return zero for success, -errno for failure -- */ --int fuse_lowlevel_notify_retrieve(struct fuse_session *se, fuse_ino_t ino, -- size_t size, off_t offset, void *cookie); -- - - /* - * Utility functions --- -1.8.3.1 - diff --git a/kvm-virtiofsd-rename-inode-refcount-to-inode-nlookup.patch b/kvm-virtiofsd-rename-inode-refcount-to-inode-nlookup.patch deleted file mode 100644 index 97a0db3..0000000 --- a/kvm-virtiofsd-rename-inode-refcount-to-inode-nlookup.patch +++ /dev/null @@ -1,139 +0,0 @@ -From e01a6e68d799ed2af0ca3b04d75818ba62b18682 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:02:08 +0100 -Subject: [PATCH 097/116] virtiofsd: rename inode->refcount to inode->nlookup -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-94-dgilbert@redhat.com> -Patchwork-id: 93547 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 093/112] virtiofsd: rename inode->refcount to inode->nlookup -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -This reference counter plays a specific role in the FUSE protocol. It's -not a generic object reference counter and the FUSE kernel code calls it -"nlookup". - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Philippe Mathieu-Daudé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 1222f015558fc34cea02aa3a5a92de608c82cec8) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 37 +++++++++++++++++++++++++------------ - 1 file changed, 25 insertions(+), 12 deletions(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 2d703b5..c819b5f 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -99,7 +99,20 @@ struct lo_inode { - int fd; - bool is_symlink; - struct lo_key key; -- uint64_t refcount; /* protected by lo->mutex */ -+ -+ /* -+ * This counter keeps the inode alive during the FUSE session. -+ * Incremented when the FUSE inode number is sent in a reply -+ * (FUSE_LOOKUP, FUSE_READDIRPLUS, etc). Decremented when an inode is -+ * released by requests like FUSE_FORGET, FUSE_RMDIR, FUSE_RENAME, etc. -+ * -+ * Note that this value is untrusted because the client can manipulate -+ * it arbitrarily using FUSE_FORGET requests. -+ * -+ * Protected by lo->mutex. -+ */ -+ uint64_t nlookup; -+ - fuse_ino_t fuse_ino; - pthread_mutex_t plock_mutex; - GHashTable *posix_locks; /* protected by lo_inode->plock_mutex */ -@@ -568,7 +581,7 @@ retry: - if (last == path) { - p = &lo->root; - pthread_mutex_lock(&lo->mutex); -- p->refcount++; -+ p->nlookup++; - pthread_mutex_unlock(&lo->mutex); - } else { - *last = '\0'; -@@ -786,8 +799,8 @@ static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st) - pthread_mutex_lock(&lo->mutex); - p = g_hash_table_lookup(lo->inodes, &key); - if (p) { -- assert(p->refcount > 0); -- p->refcount++; -+ assert(p->nlookup > 0); -+ p->nlookup++; - } - pthread_mutex_unlock(&lo->mutex); - -@@ -855,7 +868,7 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, - } - - inode->is_symlink = S_ISLNK(e->attr.st_mode); -- inode->refcount = 1; -+ inode->nlookup = 1; - inode->fd = newfd; - newfd = -1; - inode->key.ino = e->attr.st_ino; -@@ -1112,7 +1125,7 @@ static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent, - } - - pthread_mutex_lock(&lo->mutex); -- inode->refcount++; -+ inode->nlookup++; - pthread_mutex_unlock(&lo->mutex); - e.ino = inode->fuse_ino; - -@@ -1193,9 +1206,9 @@ static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode, - } - - pthread_mutex_lock(&lo->mutex); -- assert(inode->refcount >= n); -- inode->refcount -= n; -- if (!inode->refcount) { -+ assert(inode->nlookup >= n); -+ inode->nlookup -= n; -+ if (!inode->nlookup) { - lo_map_remove(&lo->ino_map, inode->fuse_ino); - g_hash_table_remove(lo->inodes, &inode->key); - if (g_hash_table_size(inode->posix_locks)) { -@@ -1216,7 +1229,7 @@ static int unref_all_inodes_cb(gpointer key, gpointer value, gpointer user_data) - struct lo_inode *inode = value; - struct lo_data *lo = user_data; - -- inode->refcount = 0; -+ inode->nlookup = 0; - lo_map_remove(&lo->ino_map, inode->fuse_ino); - close(inode->fd); - -@@ -1241,7 +1254,7 @@ static void lo_forget_one(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) - } - - fuse_log(FUSE_LOG_DEBUG, " forget %lli %lli -%lli\n", -- (unsigned long long)ino, (unsigned long long)inode->refcount, -+ (unsigned long long)ino, (unsigned long long)inode->nlookup, - (unsigned long long)nlookup); - - unref_inode_lolocked(lo, inode, nlookup); -@@ -2609,7 +2622,7 @@ static void setup_root(struct lo_data *lo, struct lo_inode *root) - root->fd = fd; - root->key.ino = stat.st_ino; - root->key.dev = stat.st_dev; -- root->refcount = 2; -+ root->nlookup = 2; - } - - static guint lo_key_hash(gconstpointer key) --- -1.8.3.1 - diff --git a/kvm-virtiofsd-rename-unref_inode-to-unref_inode_lolocked.patch b/kvm-virtiofsd-rename-unref_inode-to-unref_inode_lolocked.patch deleted file mode 100644 index 95858f8..0000000 --- a/kvm-virtiofsd-rename-unref_inode-to-unref_inode_lolocked.patch +++ /dev/null @@ -1,94 +0,0 @@ -From cfa4550f926e7a07757853f94273f2d1589cb9d3 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:48 +0100 -Subject: [PATCH 077/116] virtiofsd: rename unref_inode() to - unref_inode_lolocked() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-74-dgilbert@redhat.com> -Patchwork-id: 93526 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 073/112] virtiofsd: rename unref_inode() to unref_inode_lolocked() -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Miklos Szeredi - -Signed-off-by: Miklos Szeredi -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 95d2715791c60b5dc2d22e4eb7b83217273296fa) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 15 ++++++++------- - 1 file changed, 8 insertions(+), 7 deletions(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 8b1784f..de12e75 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -148,8 +148,8 @@ static const struct fuse_opt lo_opts[] = { - }; - static bool use_syslog = false; - static int current_log_level; -- --static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n); -+static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode, -+ uint64_t n); - - static struct { - pthread_mutex_t mutex; -@@ -586,7 +586,7 @@ retry: - return 0; - - fail_unref: -- unref_inode(lo, p, 1); -+ unref_inode_lolocked(lo, p, 1); - fail: - if (retries) { - retries--; -@@ -624,7 +624,7 @@ fallback: - res = lo_parent_and_name(lo, inode, path, &parent); - if (res != -1) { - res = utimensat(parent->fd, path, tv, AT_SYMLINK_NOFOLLOW); -- unref_inode(lo, parent, 1); -+ unref_inode_lolocked(lo, parent, 1); - } - - return res; -@@ -1027,7 +1027,7 @@ fallback: - res = lo_parent_and_name(lo, inode, path, &parent); - if (res != -1) { - res = linkat(parent->fd, path, dfd, name, 0); -- unref_inode(lo, parent, 1); -+ unref_inode_lolocked(lo, parent, 1); - } - - return res; -@@ -1141,7 +1141,8 @@ static void lo_unlink(fuse_req_t req, fuse_ino_t parent, const char *name) - fuse_reply_err(req, res == -1 ? errno : 0); - } - --static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n) -+static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode, -+ uint64_t n) - { - if (!inode) { - return; -@@ -1181,7 +1182,7 @@ static void lo_forget_one(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) - (unsigned long long)ino, (unsigned long long)inode->refcount, - (unsigned long long)nlookup); - -- unref_inode(lo, inode, nlookup); -+ unref_inode_lolocked(lo, inode, nlookup); - } - - static void lo_forget(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) --- -1.8.3.1 - diff --git a/kvm-virtiofsd-sandbox-mount-namespace.patch b/kvm-virtiofsd-sandbox-mount-namespace.patch deleted file mode 100644 index ab6f751..0000000 --- a/kvm-virtiofsd-sandbox-mount-namespace.patch +++ /dev/null @@ -1,166 +0,0 @@ -From c7ae38df696e4be432fd418c670dcea892b910a7 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:27 +0100 -Subject: [PATCH 056/116] virtiofsd: sandbox mount namespace -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-53-dgilbert@redhat.com> -Patchwork-id: 93504 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 052/112] virtiofsd: sandbox mount namespace -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -Use a mount namespace with the shared directory tree mounted at "/" and -no other mounts. - -This prevents symlink escape attacks because symlink targets are -resolved only against the shared directory and cannot go outside it. - -Signed-off-by: Stefan Hajnoczi -Signed-off-by: Peng Tao -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 5baa3b8e95064c2434bd9e2f312edd5e9ae275dc) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 89 ++++++++++++++++++++++++++++++++++++++++ - 1 file changed, 89 insertions(+) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index e2e2211..0570453 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -50,6 +50,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -1943,6 +1944,58 @@ static void print_capabilities(void) - printf("}\n"); - } - -+/* This magic is based on lxc's lxc_pivot_root() */ -+static void setup_pivot_root(const char *source) -+{ -+ int oldroot; -+ int newroot; -+ -+ oldroot = open("/", O_DIRECTORY | O_RDONLY | O_CLOEXEC); -+ if (oldroot < 0) { -+ fuse_log(FUSE_LOG_ERR, "open(/): %m\n"); -+ exit(1); -+ } -+ -+ newroot = open(source, O_DIRECTORY | O_RDONLY | O_CLOEXEC); -+ if (newroot < 0) { -+ fuse_log(FUSE_LOG_ERR, "open(%s): %m\n", source); -+ exit(1); -+ } -+ -+ if (fchdir(newroot) < 0) { -+ fuse_log(FUSE_LOG_ERR, "fchdir(newroot): %m\n"); -+ exit(1); -+ } -+ -+ if (syscall(__NR_pivot_root, ".", ".") < 0) { -+ fuse_log(FUSE_LOG_ERR, "pivot_root(., .): %m\n"); -+ exit(1); -+ } -+ -+ if (fchdir(oldroot) < 0) { -+ fuse_log(FUSE_LOG_ERR, "fchdir(oldroot): %m\n"); -+ exit(1); -+ } -+ -+ if (mount("", ".", "", MS_SLAVE | MS_REC, NULL) < 0) { -+ fuse_log(FUSE_LOG_ERR, "mount(., MS_SLAVE | MS_REC): %m\n"); -+ exit(1); -+ } -+ -+ if (umount2(".", MNT_DETACH) < 0) { -+ fuse_log(FUSE_LOG_ERR, "umount2(., MNT_DETACH): %m\n"); -+ exit(1); -+ } -+ -+ if (fchdir(newroot) < 0) { -+ fuse_log(FUSE_LOG_ERR, "fchdir(newroot): %m\n"); -+ exit(1); -+ } -+ -+ close(newroot); -+ close(oldroot); -+} -+ - static void setup_proc_self_fd(struct lo_data *lo) - { - lo->proc_self_fd = open("/proc/self/fd", O_PATH); -@@ -1952,6 +2005,39 @@ static void setup_proc_self_fd(struct lo_data *lo) - } - } - -+/* -+ * Make the source directory our root so symlinks cannot escape and no other -+ * files are accessible. -+ */ -+static void setup_mount_namespace(const char *source) -+{ -+ if (unshare(CLONE_NEWNS) != 0) { -+ fuse_log(FUSE_LOG_ERR, "unshare(CLONE_NEWNS): %m\n"); -+ exit(1); -+ } -+ -+ if (mount(NULL, "/", NULL, MS_REC | MS_SLAVE, NULL) < 0) { -+ fuse_log(FUSE_LOG_ERR, "mount(/, MS_REC|MS_PRIVATE): %m\n"); -+ exit(1); -+ } -+ -+ if (mount(source, source, NULL, MS_BIND, NULL) < 0) { -+ fuse_log(FUSE_LOG_ERR, "mount(%s, %s, MS_BIND): %m\n", source, source); -+ exit(1); -+ } -+ -+ setup_pivot_root(source); -+} -+ -+/* -+ * Lock down this process to prevent access to other processes or files outside -+ * source directory. This reduces the impact of arbitrary code execution bugs. -+ */ -+static void setup_sandbox(struct lo_data *lo) -+{ -+ setup_mount_namespace(lo->source); -+} -+ - int main(int argc, char *argv[]) - { - struct fuse_args args = FUSE_ARGS_INIT(argc, argv); -@@ -2052,6 +2138,7 @@ int main(int argc, char *argv[]) - } - - lo.root.fd = open(lo.source, O_PATH); -+ - if (lo.root.fd == -1) { - fuse_log(FUSE_LOG_ERR, "open(\"%s\", O_PATH): %m\n", lo.source); - exit(1); -@@ -2075,6 +2162,8 @@ int main(int argc, char *argv[]) - /* Must be after daemonize to get the right /proc/self/fd */ - setup_proc_self_fd(&lo); - -+ setup_sandbox(&lo); -+ - /* Block until ctrl+c or fusermount -u */ - ret = virtio_loop(se); - --- -1.8.3.1 - diff --git a/kvm-virtiofsd-set-maximum-RLIMIT_NOFILE-limit.patch b/kvm-virtiofsd-set-maximum-RLIMIT_NOFILE-limit.patch deleted file mode 100644 index e54248c..0000000 --- a/kvm-virtiofsd-set-maximum-RLIMIT_NOFILE-limit.patch +++ /dev/null @@ -1,93 +0,0 @@ -From 4cc435b3a8a9a419cc85ee883d5184f810f91e52 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:34 +0100 -Subject: [PATCH 063/116] virtiofsd: set maximum RLIMIT_NOFILE limit -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-60-dgilbert@redhat.com> -Patchwork-id: 93516 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 059/112] virtiofsd: set maximum RLIMIT_NOFILE limit -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -virtiofsd can exceed the default open file descriptor limit easily on -most systems. Take advantage of the fact that it runs as root to raise -the limit. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Daniel P. Berrangé -Reviewed-by: Philippe Mathieu-Daudé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 01a6dc95ec7f71eeff9963fe3cb03d85225fba3e) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 32 ++++++++++++++++++++++++++++++++ - 1 file changed, 32 insertions(+) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index d53cb1e..c281d81 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -53,6 +53,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -2268,6 +2269,35 @@ static void setup_sandbox(struct lo_data *lo, struct fuse_session *se) - setup_seccomp(); - } - -+/* Raise the maximum number of open file descriptors */ -+static void setup_nofile_rlimit(void) -+{ -+ const rlim_t max_fds = 1000000; -+ struct rlimit rlim; -+ -+ if (getrlimit(RLIMIT_NOFILE, &rlim) < 0) { -+ fuse_log(FUSE_LOG_ERR, "getrlimit(RLIMIT_NOFILE): %m\n"); -+ exit(1); -+ } -+ -+ if (rlim.rlim_cur >= max_fds) { -+ return; /* nothing to do */ -+ } -+ -+ rlim.rlim_cur = max_fds; -+ rlim.rlim_max = max_fds; -+ -+ if (setrlimit(RLIMIT_NOFILE, &rlim) < 0) { -+ /* Ignore SELinux denials */ -+ if (errno == EPERM) { -+ return; -+ } -+ -+ fuse_log(FUSE_LOG_ERR, "setrlimit(RLIMIT_NOFILE): %m\n"); -+ exit(1); -+ } -+} -+ - int main(int argc, char *argv[]) - { - struct fuse_args args = FUSE_ARGS_INIT(argc, argv); -@@ -2389,6 +2419,8 @@ int main(int argc, char *argv[]) - - fuse_daemonize(opts.foreground); - -+ setup_nofile_rlimit(); -+ - /* Must be before sandbox since it wants /proc */ - setup_capng(); - --- -1.8.3.1 - diff --git a/kvm-virtiofsd-stop-all-queue-threads-on-exit-in-virtio_l.patch b/kvm-virtiofsd-stop-all-queue-threads-on-exit-in-virtio_l.patch deleted file mode 100644 index be6b244..0000000 --- a/kvm-virtiofsd-stop-all-queue-threads-on-exit-in-virtio_l.patch +++ /dev/null @@ -1,72 +0,0 @@ -From 06a24b54c94345b436d888a48b92fafa967c3d58 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:02:25 +0100 -Subject: [PATCH 114/116] virtiofsd: stop all queue threads on exit in - virtio_loop() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-111-dgilbert@redhat.com> -Patchwork-id: 93564 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 110/112] virtiofsd: stop all queue threads on exit in virtio_loop() -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Eryu Guan - -On guest graceful shutdown, virtiofsd receives VHOST_USER_GET_VRING_BASE -request from VMM and shuts down virtqueues by calling fv_set_started(), -which joins fv_queue_thread() threads. So when virtio_loop() returns, -there should be no thread is still accessing data in fuse session and/or -virtio dev. - -But on abnormal exit, e.g. guest got killed for whatever reason, -vhost-user socket is closed and virtio_loop() breaks out the main loop -and returns to main(). But it's possible fv_queue_worker()s are still -working and accessing fuse session and virtio dev, which results in -crash or use-after-free. - -Fix it by stopping fv_queue_thread()s before virtio_loop() returns, -to make sure there's no-one could access fuse session and virtio dev. - -Reported-by: Qingming Su -Signed-off-by: Eryu Guan -Reviewed-by: Stefan Hajnoczi -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 9883df8ccae6d744a0c8d9cbf9d62b1797d70ebd) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_virtio.c | 13 +++++++++++++ - 1 file changed, 13 insertions(+) - -diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c -index 9f65823..80a6e92 100644 ---- a/tools/virtiofsd/fuse_virtio.c -+++ b/tools/virtiofsd/fuse_virtio.c -@@ -815,6 +815,19 @@ int virtio_loop(struct fuse_session *se) - } - } - -+ /* -+ * Make sure all fv_queue_thread()s quit on exit, as we're about to -+ * free virtio dev and fuse session, no one should access them anymore. -+ */ -+ for (int i = 0; i < se->virtio_dev->nqueues; i++) { -+ if (!se->virtio_dev->qi[i]) { -+ continue; -+ } -+ -+ fuse_log(FUSE_LOG_INFO, "%s: Stopping queue %d thread\n", __func__, i); -+ fv_queue_cleanup_thread(se->virtio_dev, i); -+ } -+ - fuse_log(FUSE_LOG_INFO, "%s: Exit\n", __func__); - - return 0; --- -1.8.3.1 - diff --git a/kvm-virtiofsd-support-nanosecond-resolution-for-file-tim.patch b/kvm-virtiofsd-support-nanosecond-resolution-for-file-tim.patch deleted file mode 100644 index f595ffa..0000000 --- a/kvm-virtiofsd-support-nanosecond-resolution-for-file-tim.patch +++ /dev/null @@ -1,83 +0,0 @@ -From 1744329bcba4a3e1a82cec3b1a34b3fbf0a9d7cf Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:54 +0100 -Subject: [PATCH 083/116] virtiofsd: support nanosecond resolution for file - timestamp -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-80-dgilbert@redhat.com> -Patchwork-id: 93535 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 079/112] virtiofsd: support nanosecond resolution for file timestamp -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Jiufei Xue - -Define HAVE_STRUCT_STAT_ST_ATIM to 1 if `st_atim' is member of `struct -stat' which means support nanosecond resolution for the file timestamp -fields. - -Signed-off-by: Jiufei Xue -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 8a792b034d4b315251fd842bb4c73a133aa1368f) -Signed-off-by: Miroslav Rezanina ---- - configure | 16 ++++++++++++++++ - tools/virtiofsd/fuse_misc.h | 1 + - 2 files changed, 17 insertions(+) - -diff --git a/configure b/configure -index 7831618..5120c14 100755 ---- a/configure -+++ b/configure -@@ -5218,6 +5218,19 @@ if compile_prog "" "" ; then - strchrnul=yes - fi - -+######################################### -+# check if we have st_atim -+ -+st_atim=no -+cat > $TMPC << EOF -+#include -+#include -+int main(void) { return offsetof(struct stat, st_atim); } -+EOF -+if compile_prog "" "" ; then -+ st_atim=yes -+fi -+ - ########################################## - # check if trace backend exists - -@@ -6919,6 +6932,9 @@ fi - if test "$strchrnul" = "yes" ; then - echo "HAVE_STRCHRNUL=y" >> $config_host_mak - fi -+if test "$st_atim" = "yes" ; then -+ echo "HAVE_STRUCT_STAT_ST_ATIM=y" >> $config_host_mak -+fi - if test "$byteswap_h" = "yes" ; then - echo "CONFIG_BYTESWAP_H=y" >> $config_host_mak - fi -diff --git a/tools/virtiofsd/fuse_misc.h b/tools/virtiofsd/fuse_misc.h -index f252baa..5c618ce 100644 ---- a/tools/virtiofsd/fuse_misc.h -+++ b/tools/virtiofsd/fuse_misc.h -@@ -7,6 +7,7 @@ - */ - - #include -+#include "config-host.h" - - /* - * Versioned symbols cannot be used in some cases because it --- -1.8.3.1 - diff --git a/kvm-virtiofsd-use-fuse_buf_writev-to-replace-fuse_buf_wr.patch b/kvm-virtiofsd-use-fuse_buf_writev-to-replace-fuse_buf_wr.patch deleted file mode 100644 index 1bae1bf..0000000 --- a/kvm-virtiofsd-use-fuse_buf_writev-to-replace-fuse_buf_wr.patch +++ /dev/null @@ -1,82 +0,0 @@ -From 7bc27a767bc8c78b1bca46bbe5e1d53dcd7173b4 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:02:18 +0100 -Subject: [PATCH 107/116] virtiofsd: use fuse_buf_writev to replace - fuse_buf_write for better performance -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-104-dgilbert@redhat.com> -Patchwork-id: 93558 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 103/112] virtiofsd: use fuse_buf_writev to replace fuse_buf_write for better performance -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: piaojun - -fuse_buf_writev() only handles the normal write in which src is buffer -and dest is fd. Specially if src buffer represents guest physical -address that can't be mapped by the daemon process, IO must be bounced -back to the VMM to do it by fuse_buf_copy(). - -Signed-off-by: Jun Piao -Suggested-by: Dr. David Alan Gilbert -Suggested-by: Stefan Hajnoczi -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit c465bba2c90a810f6e71e4f2646b1b4ee4b478de) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/buffer.c | 20 ++++++++++++++++++-- - 1 file changed, 18 insertions(+), 2 deletions(-) - -diff --git a/tools/virtiofsd/buffer.c b/tools/virtiofsd/buffer.c -index 37befeb..27c1377 100644 ---- a/tools/virtiofsd/buffer.c -+++ b/tools/virtiofsd/buffer.c -@@ -34,7 +34,6 @@ size_t fuse_buf_size(const struct fuse_bufvec *bufv) - return size; - } - --__attribute__((unused)) - static ssize_t fuse_buf_writev(struct fuse_buf *out_buf, - struct fuse_bufvec *in_buf) - { -@@ -262,12 +261,29 @@ static int fuse_bufvec_advance(struct fuse_bufvec *bufv, size_t len) - - ssize_t fuse_buf_copy(struct fuse_bufvec *dstv, struct fuse_bufvec *srcv) - { -- size_t copied = 0; -+ size_t copied = 0, i; - - if (dstv == srcv) { - return fuse_buf_size(dstv); - } - -+ /* -+ * use writev to improve bandwidth when all the -+ * src buffers already mapped by the daemon -+ * process -+ */ -+ for (i = 0; i < srcv->count; i++) { -+ if (srcv->buf[i].flags & FUSE_BUF_IS_FD) { -+ break; -+ } -+ } -+ if ((i == srcv->count) && (dstv->count == 1) && -+ (dstv->idx == 0) && -+ (dstv->buf[0].flags & FUSE_BUF_IS_FD)) { -+ dstv->buf[0].pos += dstv->off; -+ return fuse_buf_writev(&dstv->buf[0], srcv); -+ } -+ - for (;;) { - const struct fuse_buf *src = fuse_bufvec_current(srcv); - const struct fuse_buf *dst = fuse_bufvec_current(dstv); --- -1.8.3.1 - diff --git a/kvm-virtiofsd-use-fuse_lowlevel_is_virtio-in-fuse_sessio.patch b/kvm-virtiofsd-use-fuse_lowlevel_is_virtio-in-fuse_sessio.patch deleted file mode 100644 index feffb5e..0000000 --- a/kvm-virtiofsd-use-fuse_lowlevel_is_virtio-in-fuse_sessio.patch +++ /dev/null @@ -1,56 +0,0 @@ -From 1724f54070d33d8070ba2d22c8fac87ea65814c1 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:02:04 +0100 -Subject: [PATCH 093/116] virtiofsd: use fuse_lowlevel_is_virtio() in - fuse_session_destroy() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-90-dgilbert@redhat.com> -Patchwork-id: 93540 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 089/112] virtiofsd: use fuse_lowlevel_is_virtio() in fuse_session_destroy() -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -vu_socket_path is NULL when --fd=FDNUM was used. Use -fuse_lowlevel_is_virtio() instead. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 620e9d8d9cee6df7fe71168dea950dba0cc21a4a) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_lowlevel.c | 7 ++++--- - 1 file changed, 4 insertions(+), 3 deletions(-) - -diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -index 70568d2..dab6a31 100644 ---- a/tools/virtiofsd/fuse_lowlevel.c -+++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -2537,12 +2537,13 @@ void fuse_session_destroy(struct fuse_session *se) - close(se->fd); - } - -- if (se->vu_socket_path) { -+ if (fuse_lowlevel_is_virtio(se)) { - virtio_session_close(se); -- free(se->vu_socket_path); -- se->vu_socket_path = NULL; - } - -+ free(se->vu_socket_path); -+ se->vu_socket_path = NULL; -+ - free(se); - } - --- -1.8.3.1 - diff --git a/kvm-virtiofsd-use-proc-self-fd-O_PATH-file-descriptor.patch b/kvm-virtiofsd-use-proc-self-fd-O_PATH-file-descriptor.patch deleted file mode 100644 index f250ed7..0000000 --- a/kvm-virtiofsd-use-proc-self-fd-O_PATH-file-descriptor.patch +++ /dev/null @@ -1,390 +0,0 @@ -From bce5070d1aada88154b811a08eec1586ab24fce5 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:26 +0100 -Subject: [PATCH 055/116] virtiofsd: use /proc/self/fd/ O_PATH file descriptor -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-52-dgilbert@redhat.com> -Patchwork-id: 93506 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 051/112] virtiofsd: use /proc/self/fd/ O_PATH file descriptor -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -Sandboxing will remove /proc from the mount namespace so we can no -longer build string paths into "/proc/self/fd/...". - -Keep an O_PATH file descriptor so we can still re-open fds via -/proc/self/fd. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Philippe Mathieu-Daudé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 9f59d175e2ca96f0b87f534dba69ea547dd35945) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 130 +++++++++++++++++++++++++++++++-------- - 1 file changed, 103 insertions(+), 27 deletions(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index e3d65c3..e2e2211 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -110,6 +110,9 @@ struct lo_data { - struct lo_map ino_map; /* protected by lo->mutex */ - struct lo_map dirp_map; /* protected by lo->mutex */ - struct lo_map fd_map; /* protected by lo->mutex */ -+ -+ /* An O_PATH file descriptor to /proc/self/fd/ */ -+ int proc_self_fd; - }; - - static const struct fuse_opt lo_opts[] = { -@@ -379,9 +382,9 @@ static int lo_parent_and_name(struct lo_data *lo, struct lo_inode *inode, - int res; - - retry: -- sprintf(procname, "/proc/self/fd/%i", inode->fd); -+ sprintf(procname, "%i", inode->fd); - -- res = readlink(procname, path, PATH_MAX); -+ res = readlinkat(lo->proc_self_fd, procname, path, PATH_MAX); - if (res < 0) { - fuse_log(FUSE_LOG_WARNING, "%s: readlink failed: %m\n", __func__); - goto fail_noretry; -@@ -477,9 +480,9 @@ static int utimensat_empty(struct lo_data *lo, struct lo_inode *inode, - } - return res; - } -- sprintf(path, "/proc/self/fd/%i", inode->fd); -+ sprintf(path, "%i", inode->fd); - -- return utimensat(AT_FDCWD, path, tv, 0); -+ return utimensat(lo->proc_self_fd, path, tv, 0); - - fallback: - res = lo_parent_and_name(lo, inode, path, &parent); -@@ -535,8 +538,8 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, - if (fi) { - res = fchmod(fd, attr->st_mode); - } else { -- sprintf(procname, "/proc/self/fd/%i", ifd); -- res = chmod(procname, attr->st_mode); -+ sprintf(procname, "%i", ifd); -+ res = fchmodat(lo->proc_self_fd, procname, attr->st_mode, 0); - } - if (res == -1) { - goto out_err; -@@ -552,11 +555,23 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, - } - } - if (valid & FUSE_SET_ATTR_SIZE) { -+ int truncfd; -+ - if (fi) { -- res = ftruncate(fd, attr->st_size); -+ truncfd = fd; - } else { -- sprintf(procname, "/proc/self/fd/%i", ifd); -- res = truncate(procname, attr->st_size); -+ sprintf(procname, "%i", ifd); -+ truncfd = openat(lo->proc_self_fd, procname, O_RDWR); -+ if (truncfd < 0) { -+ goto out_err; -+ } -+ } -+ -+ res = ftruncate(truncfd, attr->st_size); -+ if (!fi) { -+ saverr = errno; -+ close(truncfd); -+ errno = saverr; - } - if (res == -1) { - goto out_err; -@@ -874,9 +889,9 @@ static int linkat_empty_nofollow(struct lo_data *lo, struct lo_inode *inode, - return res; - } - -- sprintf(path, "/proc/self/fd/%i", inode->fd); -+ sprintf(path, "%i", inode->fd); - -- return linkat(AT_FDCWD, path, dfd, name, AT_SYMLINK_FOLLOW); -+ return linkat(lo->proc_self_fd, path, dfd, name, AT_SYMLINK_FOLLOW); - - fallback: - res = lo_parent_and_name(lo, inode, path, &parent); -@@ -1404,8 +1419,8 @@ static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) - fi->flags &= ~O_APPEND; - } - -- sprintf(buf, "/proc/self/fd/%i", lo_fd(req, ino)); -- fd = open(buf, fi->flags & ~O_NOFOLLOW); -+ sprintf(buf, "%i", lo_fd(req, ino)); -+ fd = openat(lo->proc_self_fd, buf, fi->flags & ~O_NOFOLLOW); - if (fd == -1) { - return (void)fuse_reply_err(req, errno); - } -@@ -1458,7 +1473,6 @@ static void lo_fsync(fuse_req_t req, fuse_ino_t ino, int datasync, - struct fuse_file_info *fi) - { - int res; -- (void)ino; - int fd; - char *buf; - -@@ -1466,12 +1480,14 @@ static void lo_fsync(fuse_req_t req, fuse_ino_t ino, int datasync, - (void *)fi); - - if (!fi) { -- res = asprintf(&buf, "/proc/self/fd/%i", lo_fd(req, ino)); -+ struct lo_data *lo = lo_data(req); -+ -+ res = asprintf(&buf, "%i", lo_fd(req, ino)); - if (res == -1) { - return (void)fuse_reply_err(req, errno); - } - -- fd = open(buf, O_RDWR); -+ fd = openat(lo->proc_self_fd, buf, O_RDWR); - free(buf); - if (fd == -1) { - return (void)fuse_reply_err(req, errno); -@@ -1587,11 +1603,13 @@ static void lo_flock(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, - static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *name, - size_t size) - { -+ struct lo_data *lo = lo_data(req); - char *value = NULL; - char procname[64]; - struct lo_inode *inode; - ssize_t ret; - int saverr; -+ int fd = -1; - - inode = lo_inode(req, ino); - if (!inode) { -@@ -1616,7 +1634,11 @@ static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *name, - goto out; - } - -- sprintf(procname, "/proc/self/fd/%i", inode->fd); -+ sprintf(procname, "%i", inode->fd); -+ fd = openat(lo->proc_self_fd, procname, O_RDONLY); -+ if (fd < 0) { -+ goto out_err; -+ } - - if (size) { - value = malloc(size); -@@ -1624,7 +1646,7 @@ static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *name, - goto out_err; - } - -- ret = getxattr(procname, name, value, size); -+ ret = fgetxattr(fd, name, value, size); - if (ret == -1) { - goto out_err; - } -@@ -1635,7 +1657,7 @@ static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *name, - - fuse_reply_buf(req, value, ret); - } else { -- ret = getxattr(procname, name, NULL, 0); -+ ret = fgetxattr(fd, name, NULL, 0); - if (ret == -1) { - goto out_err; - } -@@ -1644,6 +1666,10 @@ static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *name, - } - out_free: - free(value); -+ -+ if (fd >= 0) { -+ close(fd); -+ } - return; - - out_err: -@@ -1655,11 +1681,13 @@ out: - - static void lo_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size) - { -+ struct lo_data *lo = lo_data(req); - char *value = NULL; - char procname[64]; - struct lo_inode *inode; - ssize_t ret; - int saverr; -+ int fd = -1; - - inode = lo_inode(req, ino); - if (!inode) { -@@ -1683,7 +1711,11 @@ static void lo_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size) - goto out; - } - -- sprintf(procname, "/proc/self/fd/%i", inode->fd); -+ sprintf(procname, "%i", inode->fd); -+ fd = openat(lo->proc_self_fd, procname, O_RDONLY); -+ if (fd < 0) { -+ goto out_err; -+ } - - if (size) { - value = malloc(size); -@@ -1691,7 +1723,7 @@ static void lo_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size) - goto out_err; - } - -- ret = listxattr(procname, value, size); -+ ret = flistxattr(fd, value, size); - if (ret == -1) { - goto out_err; - } -@@ -1702,7 +1734,7 @@ static void lo_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size) - - fuse_reply_buf(req, value, ret); - } else { -- ret = listxattr(procname, NULL, 0); -+ ret = flistxattr(fd, NULL, 0); - if (ret == -1) { - goto out_err; - } -@@ -1711,6 +1743,10 @@ static void lo_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size) - } - out_free: - free(value); -+ -+ if (fd >= 0) { -+ close(fd); -+ } - return; - - out_err: -@@ -1724,9 +1760,11 @@ static void lo_setxattr(fuse_req_t req, fuse_ino_t ino, const char *name, - const char *value, size_t size, int flags) - { - char procname[64]; -+ struct lo_data *lo = lo_data(req); - struct lo_inode *inode; - ssize_t ret; - int saverr; -+ int fd = -1; - - inode = lo_inode(req, ino); - if (!inode) { -@@ -1751,21 +1789,31 @@ static void lo_setxattr(fuse_req_t req, fuse_ino_t ino, const char *name, - goto out; - } - -- sprintf(procname, "/proc/self/fd/%i", inode->fd); -+ sprintf(procname, "%i", inode->fd); -+ fd = openat(lo->proc_self_fd, procname, O_RDWR); -+ if (fd < 0) { -+ saverr = errno; -+ goto out; -+ } - -- ret = setxattr(procname, name, value, size, flags); -+ ret = fsetxattr(fd, name, value, size, flags); - saverr = ret == -1 ? errno : 0; - - out: -+ if (fd >= 0) { -+ close(fd); -+ } - fuse_reply_err(req, saverr); - } - - static void lo_removexattr(fuse_req_t req, fuse_ino_t ino, const char *name) - { - char procname[64]; -+ struct lo_data *lo = lo_data(req); - struct lo_inode *inode; - ssize_t ret; - int saverr; -+ int fd = -1; - - inode = lo_inode(req, ino); - if (!inode) { -@@ -1789,12 +1837,20 @@ static void lo_removexattr(fuse_req_t req, fuse_ino_t ino, const char *name) - goto out; - } - -- sprintf(procname, "/proc/self/fd/%i", inode->fd); -+ sprintf(procname, "%i", inode->fd); -+ fd = openat(lo->proc_self_fd, procname, O_RDWR); -+ if (fd < 0) { -+ saverr = errno; -+ goto out; -+ } - -- ret = removexattr(procname, name); -+ ret = fremovexattr(fd, name); - saverr = ret == -1 ? errno : 0; - - out: -+ if (fd >= 0) { -+ close(fd); -+ } - fuse_reply_err(req, saverr); - } - -@@ -1887,12 +1943,25 @@ static void print_capabilities(void) - printf("}\n"); - } - -+static void setup_proc_self_fd(struct lo_data *lo) -+{ -+ lo->proc_self_fd = open("/proc/self/fd", O_PATH); -+ if (lo->proc_self_fd == -1) { -+ fuse_log(FUSE_LOG_ERR, "open(/proc/self/fd, O_PATH): %m\n"); -+ exit(1); -+ } -+} -+ - int main(int argc, char *argv[]) - { - struct fuse_args args = FUSE_ARGS_INIT(argc, argv); - struct fuse_session *se; - struct fuse_cmdline_opts opts; -- struct lo_data lo = { .debug = 0, .writeback = 0 }; -+ struct lo_data lo = { -+ .debug = 0, -+ .writeback = 0, -+ .proc_self_fd = -1, -+ }; - struct lo_map_elem *root_elem; - int ret = -1; - -@@ -2003,6 +2072,9 @@ int main(int argc, char *argv[]) - - fuse_daemonize(opts.foreground); - -+ /* Must be after daemonize to get the right /proc/self/fd */ -+ setup_proc_self_fd(&lo); -+ - /* Block until ctrl+c or fusermount -u */ - ret = virtio_loop(se); - -@@ -2018,6 +2090,10 @@ err_out1: - lo_map_destroy(&lo.dirp_map); - lo_map_destroy(&lo.ino_map); - -+ if (lo.proc_self_fd >= 0) { -+ close(lo.proc_self_fd); -+ } -+ - if (lo.root.fd >= 0) { - close(lo.root.fd); - } --- -1.8.3.1 - diff --git a/kvm-virtiofsd-validate-input-buffer-sizes-in-do_write_bu.patch b/kvm-virtiofsd-validate-input-buffer-sizes-in-do_write_bu.patch deleted file mode 100644 index d60a902..0000000 --- a/kvm-virtiofsd-validate-input-buffer-sizes-in-do_write_bu.patch +++ /dev/null @@ -1,137 +0,0 @@ -From 6877a6c456178d6c1ca9a0ffaabaa7e51105b2ac Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:22 +0100 -Subject: [PATCH 051/116] virtiofsd: validate input buffer sizes in - do_write_buf() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-48-dgilbert@redhat.com> -Patchwork-id: 93501 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 047/112] virtiofsd: validate input buffer sizes in do_write_buf() -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -There is a small change in behavior: if fuse_write_in->size doesn't -match the input buffer size then the request is failed. Previously -write requests with 1 fuse_buf element would truncate to -fuse_write_in->size. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Sergio Lopez -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 0ba8c3c6fce8fe949d59c1fd84d98d220ef9e759) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_lowlevel.c | 49 +++++++++++++++++++++++++---------------- - 1 file changed, 30 insertions(+), 19 deletions(-) - -diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -index 7e10995..611e8b0 100644 ---- a/tools/virtiofsd/fuse_lowlevel.c -+++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -1003,8 +1003,8 @@ static void do_write(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - } - --static void do_write_buf(fuse_req_t req, fuse_ino_t nodeid, const void *inarg, -- struct fuse_bufvec *ibufv) -+static void do_write_buf(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter, struct fuse_bufvec *ibufv) - { - struct fuse_session *se = req->se; - struct fuse_bufvec *pbufv = ibufv; -@@ -1012,28 +1012,27 @@ static void do_write_buf(fuse_req_t req, fuse_ino_t nodeid, const void *inarg, - .buf[0] = ibufv->buf[0], - .count = 1, - }; -- struct fuse_write_in *arg = (struct fuse_write_in *)inarg; -+ struct fuse_write_in *arg; -+ size_t arg_size = sizeof(*arg); - struct fuse_file_info fi; - - memset(&fi, 0, sizeof(fi)); -+ -+ arg = fuse_mbuf_iter_advance(iter, arg_size); -+ if (!arg) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } -+ -+ fi.lock_owner = arg->lock_owner; -+ fi.flags = arg->flags; - fi.fh = arg->fh; - fi.writepage = arg->write_flags & FUSE_WRITE_CACHE; - - if (ibufv->count == 1) { -- fi.lock_owner = arg->lock_owner; -- fi.flags = arg->flags; -- if (!(tmpbufv.buf[0].flags & FUSE_BUF_IS_FD)) { -- tmpbufv.buf[0].mem = PARAM(arg); -- } -- tmpbufv.buf[0].size -= -- sizeof(struct fuse_in_header) + sizeof(struct fuse_write_in); -- if (tmpbufv.buf[0].size < arg->size) { -- fuse_log(FUSE_LOG_ERR, -- "fuse: do_write_buf: buffer size too small\n"); -- fuse_reply_err(req, EIO); -- return; -- } -- tmpbufv.buf[0].size = arg->size; -+ assert(!(tmpbufv.buf[0].flags & FUSE_BUF_IS_FD)); -+ tmpbufv.buf[0].mem = ((char *)arg) + arg_size; -+ tmpbufv.buf[0].size -= sizeof(struct fuse_in_header) + arg_size; - pbufv = &tmpbufv; - } else { - /* -@@ -1043,6 +1042,13 @@ static void do_write_buf(fuse_req_t req, fuse_ino_t nodeid, const void *inarg, - ibufv->buf[0].size = 0; - } - -+ if (fuse_buf_size(pbufv) != arg->size) { -+ fuse_log(FUSE_LOG_ERR, -+ "fuse: do_write_buf: buffer size doesn't match arg->size\n"); -+ fuse_reply_err(req, EIO); -+ return; -+ } -+ - se->op.write_buf(req, nodeid, pbufv, arg->offset, &fi); - } - -@@ -2052,12 +2058,17 @@ void fuse_session_process_buf_int(struct fuse_session *se, - struct fuse_chan *ch) - { - const struct fuse_buf *buf = bufv->buf; -+ struct fuse_mbuf_iter iter = FUSE_MBUF_ITER_INIT(buf); - struct fuse_in_header *in; - const void *inarg; - struct fuse_req *req; - int err; - -- in = buf->mem; -+ /* The first buffer must be a memory buffer */ -+ assert(!(buf->flags & FUSE_BUF_IS_FD)); -+ -+ in = fuse_mbuf_iter_advance(&iter, sizeof(*in)); -+ assert(in); /* caller guarantees the input buffer is large enough */ - - if (se->debug) { - fuse_log(FUSE_LOG_DEBUG, -@@ -2129,7 +2140,7 @@ void fuse_session_process_buf_int(struct fuse_session *se, - - inarg = (void *)&in[1]; - if (in->opcode == FUSE_WRITE && se->op.write_buf) { -- do_write_buf(req, in->nodeid, inarg, bufv); -+ do_write_buf(req, in->nodeid, &iter, bufv); - } else { - fuse_ll_ops[in->opcode].func(req, in->nodeid, inarg); - } --- -1.8.3.1 - diff --git a/kvm-virtiofsd-validate-path-components.patch b/kvm-virtiofsd-validate-path-components.patch deleted file mode 100644 index b35aed7..0000000 --- a/kvm-virtiofsd-validate-path-components.patch +++ /dev/null @@ -1,164 +0,0 @@ -From 69ac47502848c37ca3ede00f432c0675d9eef42c Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:18 +0100 -Subject: [PATCH 047/116] virtiofsd: validate path components -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-44-dgilbert@redhat.com> -Patchwork-id: 93498 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 043/112] virtiofsd: validate path components -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -Several FUSE requests contain single path components. A correct FUSE -client sends well-formed path components but there is currently no input -validation in case something went wrong or the client is malicious. - -Refuse ".", "..", and paths containing '/' when we expect a path -component. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 25dae28c58d7e706b5d5db99042c9db3cef2e657) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 59 ++++++++++++++++++++++++++++++++++++---- - 1 file changed, 53 insertions(+), 6 deletions(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index ac380ef..e375406 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -133,6 +133,21 @@ static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n); - - static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st); - -+static int is_dot_or_dotdot(const char *name) -+{ -+ return name[0] == '.' && -+ (name[1] == '\0' || (name[1] == '.' && name[2] == '\0')); -+} -+ -+/* Is `path` a single path component that is not "." or ".."? */ -+static int is_safe_path_component(const char *path) -+{ -+ if (strchr(path, '/')) { -+ return 0; -+ } -+ -+ return !is_dot_or_dotdot(path); -+} - - static struct lo_data *lo_data(fuse_req_t req) - { -@@ -681,6 +696,15 @@ static void lo_lookup(fuse_req_t req, fuse_ino_t parent, const char *name) - parent, name); - } - -+ /* -+ * Don't use is_safe_path_component(), allow "." and ".." for NFS export -+ * support. -+ */ -+ if (strchr(name, '/')) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } -+ - err = lo_do_lookup(req, parent, name, &e); - if (err) { - fuse_reply_err(req, err); -@@ -762,6 +786,11 @@ static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent, - struct fuse_entry_param e; - struct lo_cred old = {}; - -+ if (!is_safe_path_component(name)) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } -+ - dir = lo_inode(req, parent); - if (!dir) { - fuse_reply_err(req, EBADF); -@@ -863,6 +892,11 @@ static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent, - struct fuse_entry_param e; - int saverr; - -+ if (!is_safe_path_component(name)) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } -+ - inode = lo_inode(req, ino); - if (!inode) { - fuse_reply_err(req, EBADF); -@@ -904,6 +938,10 @@ out_err: - static void lo_rmdir(fuse_req_t req, fuse_ino_t parent, const char *name) - { - int res; -+ if (!is_safe_path_component(name)) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } - - res = unlinkat(lo_fd(req, parent), name, AT_REMOVEDIR); - -@@ -916,6 +954,11 @@ static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name, - { - int res; - -+ if (!is_safe_path_component(name) || !is_safe_path_component(newname)) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } -+ - if (flags) { - fuse_reply_err(req, EINVAL); - return; -@@ -930,6 +973,11 @@ static void lo_unlink(fuse_req_t req, fuse_ino_t parent, const char *name) - { - int res; - -+ if (!is_safe_path_component(name)) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } -+ - res = unlinkat(lo_fd(req, parent), name, 0); - - fuse_reply_err(req, res == -1 ? errno : 0); -@@ -1093,12 +1141,6 @@ out_err: - fuse_reply_err(req, error); - } - --static int is_dot_or_dotdot(const char *name) --{ -- return name[0] == '.' && -- (name[1] == '\0' || (name[1] == '.' && name[2] == '\0')); --} -- - static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, - off_t offset, struct fuse_file_info *fi, int plus) - { -@@ -1248,6 +1290,11 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, - parent, name); - } - -+ if (!is_safe_path_component(name)) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } -+ - err = lo_change_cred(req, &old); - if (err) { - goto out; --- -1.8.3.1 - diff --git a/kvm-vitriofsd-passthrough_ll-fix-fallocate-ifdefs.patch b/kvm-vitriofsd-passthrough_ll-fix-fallocate-ifdefs.patch deleted file mode 100644 index 20add81..0000000 --- a/kvm-vitriofsd-passthrough_ll-fix-fallocate-ifdefs.patch +++ /dev/null @@ -1,56 +0,0 @@ -From 247987aa987b7332eb501e00c440079b9e8e1fe7 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:00:52 +0100 -Subject: [PATCH 021/116] vitriofsd/passthrough_ll: fix fallocate() ifdefs -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-18-dgilbert@redhat.com> -Patchwork-id: 93471 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 017/112] vitriofsd/passthrough_ll: fix fallocate() ifdefs -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Xiao Yang - -1) Use correct CONFIG_FALLOCATE macro to check if fallocate() is supported.(i.e configure - script sets CONFIG_FALLOCATE intead of HAVE_FALLOCATE if fallocate() is supported) -2) Replace HAVE_POSIX_FALLOCATE with CONFIG_POSIX_FALLOCATE. - -Signed-off-by: Xiao Yang -Signed-off-by: Dr. David Alan Gilbert - Merged from two of Xiao Yang's patches -(cherry picked from commit 9776457ca6f05d5900e27decb1dba2ffddf95a22) - -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 322a889..6c4da18 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -975,13 +975,13 @@ static void lo_fallocate(fuse_req_t req, fuse_ino_t ino, int mode, off_t offset, - int err = EOPNOTSUPP; - (void)ino; - --#ifdef HAVE_FALLOCATE -+#ifdef CONFIG_FALLOCATE - err = fallocate(fi->fh, mode, offset, length); - if (err < 0) { - err = errno; - } - --#elif defined(HAVE_POSIX_FALLOCATE) -+#elif defined(CONFIG_POSIX_FALLOCATE) - if (mode) { - fuse_reply_err(req, EOPNOTSUPP); - return; --- -1.8.3.1 - diff --git a/kvm-xhci-recheck-slot-status.patch b/kvm-xhci-recheck-slot-status.patch deleted file mode 100644 index 8bcbc2c..0000000 --- a/kvm-xhci-recheck-slot-status.patch +++ /dev/null @@ -1,77 +0,0 @@ -From ab87c0ed2a8f0a626099261a3028bc34cfac3929 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Tue, 14 Jan 2020 20:23:31 +0000 -Subject: [PATCH 5/5] xhci: recheck slot status -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200114202331.51831-3-dgilbert@redhat.com> -Patchwork-id: 93345 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 2/2] xhci: recheck slot status -Bugzilla: 1790844 -RH-Acked-by: Peter Xu -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Gerd Hoffmann - -From: Gerd Hoffmann - -Factor out slot status check into a helper function. Add an additional -check after completing transfers. This is needed in case a guest -queues multiple transfers in a row and a device unplug happens while -qemu processes them. - -Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=1786413 -Signed-off-by: Gerd Hoffmann -Reviewed-by: Philippe Mathieu-Daudé -Message-id: 20200107083606.12393-1-kraxel@redhat.com -(cherry picked from commit 236846a019c4f7aa3111026fc9a1fe09684c8978) -Signed-off-by: Danilo C. L. de Paula ---- - hw/usb/hcd-xhci.c | 15 ++++++++++++--- - 1 file changed, 12 insertions(+), 3 deletions(-) - -diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c -index d2b9744..646c78c 100644 ---- a/hw/usb/hcd-xhci.c -+++ b/hw/usb/hcd-xhci.c -@@ -1861,6 +1861,13 @@ static void xhci_kick_ep(XHCIState *xhci, unsigned int slotid, - xhci_kick_epctx(epctx, streamid); - } - -+static bool xhci_slot_ok(XHCIState *xhci, int slotid) -+{ -+ return (xhci->slots[slotid - 1].uport && -+ xhci->slots[slotid - 1].uport->dev && -+ xhci->slots[slotid - 1].uport->dev->attached); -+} -+ - static void xhci_kick_epctx(XHCIEPContext *epctx, unsigned int streamid) - { - XHCIState *xhci = epctx->xhci; -@@ -1878,9 +1885,7 @@ static void xhci_kick_epctx(XHCIEPContext *epctx, unsigned int streamid) - - /* If the device has been detached, but the guest has not noticed this - yet the 2 above checks will succeed, but we must NOT continue */ -- if (!xhci->slots[epctx->slotid - 1].uport || -- !xhci->slots[epctx->slotid - 1].uport->dev || -- !xhci->slots[epctx->slotid - 1].uport->dev->attached) { -+ if (!xhci_slot_ok(xhci, epctx->slotid)) { - return; - } - -@@ -1987,6 +1992,10 @@ static void xhci_kick_epctx(XHCIEPContext *epctx, unsigned int streamid) - } else { - xhci_fire_transfer(xhci, xfer, epctx); - } -+ if (!xhci_slot_ok(xhci, epctx->slotid)) { -+ /* surprise removal -> stop processing */ -+ break; -+ } - if (xfer->complete) { - /* update ring dequeue ptr */ - xhci_set_ep_state(xhci, epctx, stctx, epctx->state); --- -1.8.3.1 - diff --git a/kvm-xics-Don-t-deassert-outputs.patch b/kvm-xics-Don-t-deassert-outputs.patch deleted file mode 100644 index 08ed724..0000000 --- a/kvm-xics-Don-t-deassert-outputs.patch +++ /dev/null @@ -1,52 +0,0 @@ -From 99b6ee4b7f63ea49e5b73f61bbf68f67252f27da Mon Sep 17 00:00:00 2001 -From: David Gibson -Date: Tue, 21 Jan 2020 05:16:12 +0000 -Subject: [PATCH 02/15] xics: Don't deassert outputs -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: David Gibson -Message-id: <20200121051613.388295-3-dgibson@redhat.com> -Patchwork-id: 93430 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 2/3] xics: Don't deassert outputs -Bugzilla: 1776638 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Laurent Vivier -RH-Acked-by: Thomas Huth - -From: Greg Kurz - -The correct way to do this is to deassert the input pins on the CPU side. -This is the case since a previous change. - -Signed-off-by: Greg Kurz -Message-Id: <157548862298.3650476.1228720391270249433.stgit@bahia.lan> -Signed-off-by: David Gibson -(cherry picked from commit 4febcdd88f08422a66a1aa0dc55e1472abed3c4b) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1776638 - -Signed-off-by: David Gibson -Signed-off-by: Danilo C. L. de Paula ---- - hw/intc/xics.c | 3 --- - 1 file changed, 3 deletions(-) - -diff --git a/hw/intc/xics.c b/hw/intc/xics.c -index e7ac9ba..72c5dca 100644 ---- a/hw/intc/xics.c -+++ b/hw/intc/xics.c -@@ -289,9 +289,6 @@ void icp_reset(ICPState *icp) - icp->pending_priority = 0xff; - icp->mfrr = 0xff; - -- /* Make all outputs are deasserted */ -- qemu_set_irq(icp->output, 0); -- - if (kvm_irqchip_in_kernel()) { - Error *local_err = NULL; - --- -1.8.3.1 - diff --git a/kvm.modules b/kvm.modules deleted file mode 100644 index b9d9646..0000000 --- a/kvm.modules +++ /dev/null @@ -1,18 +0,0 @@ -#!/bin/sh - -case $(uname -m) in - ppc64) - grep OPAL /proc/cpuinfo >/dev/null 2>&1 && opal=1 - - modprobe -b kvm >/dev/null 2>&1 - modprobe -b kvm-pr >/dev/null 2>&1 && kvm=1 - if [ "$opal" ]; then - modprobe -b kvm-hv >/dev/null 2>&1 - fi - ;; - s390x) - modprobe -b kvm >/dev/null 2>&1 && kvm=1 - ;; -esac - -exit 0 diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 4f9fc85..db9f5c6 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -9,6 +9,7 @@ %global have_kvm_setup 0 %global have_memlock_limits 0 + %ifnarch %{ix86} x86_64 %global have_usbredir 0 %endif @@ -66,8 +67,8 @@ Obsoletes: %1-rhev Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm -Version: 4.2.0 -Release: 19%{?dist} +Version: 5.0.0 +Release: 0%{?dist}.wrb200506 # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -76,7 +77,7 @@ URL: http://www.qemu.org/ ExclusiveArch: x86_64 %{power64} aarch64 s390x -Source0: http://wiki.qemu.org/download/qemu-4.2.0.tar.xz +Source0: http://wiki.qemu.org/download/qemu-5.0.0.tar.xz # KSM control scripts Source4: ksm.service @@ -120,529 +121,7 @@ Patch0017: 0017-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch Patch0018: 0018-usb-xhci-Fix-PCI-capability-order.patch Patch0019: 0019-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch Patch0020: 0020-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch -Patch0021: 0021-Using-ip_deq-after-m_free-might-read-pointers-from-a.patch -# For bz#1741345 - Remove the "cpu64-rhel6" CPU from qemu-kvm -Patch22: kvm-i386-Remove-cpu64-rhel6-CPU-model.patch -# For bz#1772774 - qemu-kvm core dump during migration+reboot ( Assertion `mem->dirty_bmap' failed ) -Patch23: kvm-Reallocate-dirty_bmap-when-we-change-a-slot.patch -# For bz#1733893 - Boot a guest with "-prom-env 'auto-boot?=false'", SLOF failed to enter the boot entry after input "boot" followed by "0 > " on VNC -Patch24: kvm-spapr-Don-t-trigger-a-CAS-reboot-for-XICS-XIVE-mode-.patch -# For bz#1782678 - qemu core dump after hot-unplugging the XXV710/XL710 PF -Patch25: kvm-vfio-pci-Don-t-remove-irqchip-notifier-if-not-regist.patch -# For bz#1789301 - virtio-blk/scsi: fix notification suppression during AioContext polling -Patch26: kvm-virtio-don-t-enable-notifications-during-polling.patch -# For bz#1790844 - USB related fixes -Patch27: kvm-usbredir-Prevent-recursion-in-usbredir_write.patch -# For bz#1790844 - USB related fixes -Patch28: kvm-xhci-recheck-slot-status.patch -# For bz#1791568 - CVE-2020-7039 qemu-kvm: QEMU: slirp: OOB buffer access while emulating tcp protocols in tcp_emu() [rhel-av-8.2.0] -Patch29: kvm-tcp_emu-Fix-oob-access.patch -# For bz#1791568 - CVE-2020-7039 qemu-kvm: QEMU: slirp: OOB buffer access while emulating tcp protocols in tcp_emu() [rhel-av-8.2.0] -Patch30: kvm-slirp-use-correct-size-while-emulating-IRC-commands.patch -# For bz#1791568 - CVE-2020-7039 qemu-kvm: QEMU: slirp: OOB buffer access while emulating tcp protocols in tcp_emu() [rhel-av-8.2.0] -Patch31: kvm-slirp-use-correct-size-while-emulating-commands.patch -# For bz#1559846 - Nested KVM: limit VMX features according to CPU models - Fast Train -Patch32: kvm-RHEL-hw-i386-disable-nested-PERF_GLOBAL_CTRL-MSR-sup.patch -# For bz#1725084 - aarch64: support dumping SVE registers -Patch33: kvm-target-arm-arch_dump-Add-SVE-notes.patch -# For bz#1779041 - netkvm: no connectivity Windows guest with q35 + hugepages + vhost + hv_synic -Patch34: kvm-vhost-Add-names-to-section-rounded-warning.patch -# For bz#1779041 - netkvm: no connectivity Windows guest with q35 + hugepages + vhost + hv_synic -Patch35: kvm-vhost-Only-align-sections-for-vhost-user.patch -# For bz#1779041 - netkvm: no connectivity Windows guest with q35 + hugepages + vhost + hv_synic -Patch36: kvm-vhost-coding-style-fix.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch37: kvm-virtio-fs-fix-MSI-X-nvectors-calculation.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch38: kvm-vhost-user-fs-remove-vhostfd-property.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch39: kvm-build-rename-CONFIG_LIBCAP-to-CONFIG_LIBCAP_NG.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch40: kvm-virtiofsd-Pull-in-upstream-headers.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch41: kvm-virtiofsd-Pull-in-kernel-s-fuse.h.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch42: kvm-virtiofsd-Add-auxiliary-.c-s.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch43: kvm-virtiofsd-Add-fuse_lowlevel.c.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch44: kvm-virtiofsd-Add-passthrough_ll.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch45: kvm-virtiofsd-Trim-down-imported-files.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch46: kvm-virtiofsd-Format-imported-files-to-qemu-style.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch47: kvm-virtiofsd-remove-mountpoint-dummy-argument.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch48: kvm-virtiofsd-remove-unused-notify-reply-support.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch49: kvm-virtiofsd-Remove-unused-enum-fuse_buf_copy_flags.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch50: kvm-virtiofsd-Fix-fuse_daemonize-ignored-return-values.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch51: kvm-virtiofsd-Fix-common-header-and-define-for-QEMU-buil.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch52: kvm-virtiofsd-Trim-out-compatibility-code.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch53: kvm-vitriofsd-passthrough_ll-fix-fallocate-ifdefs.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch54: kvm-virtiofsd-Make-fsync-work-even-if-only-inode-is-pass.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch55: kvm-virtiofsd-Add-options-for-virtio.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch56: kvm-virtiofsd-add-o-source-PATH-to-help-output.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch57: kvm-virtiofsd-Open-vhost-connection-instead-of-mounting.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch58: kvm-virtiofsd-Start-wiring-up-vhost-user.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch59: kvm-virtiofsd-Add-main-virtio-loop.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch60: kvm-virtiofsd-get-set-features-callbacks.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch61: kvm-virtiofsd-Start-queue-threads.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch62: kvm-virtiofsd-Poll-kick_fd-for-queue.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch63: kvm-virtiofsd-Start-reading-commands-from-queue.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch64: kvm-virtiofsd-Send-replies-to-messages.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch65: kvm-virtiofsd-Keep-track-of-replies.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch66: kvm-virtiofsd-Add-Makefile-wiring-for-virtiofsd-contrib.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch67: kvm-virtiofsd-Fast-path-for-virtio-read.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch68: kvm-virtiofsd-add-fd-FDNUM-fd-passing-option.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch69: kvm-virtiofsd-make-f-foreground-the-default.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch70: kvm-virtiofsd-add-vhost-user.json-file.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch71: kvm-virtiofsd-add-print-capabilities-option.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch72: kvm-virtiofs-Add-maintainers-entry.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch73: kvm-virtiofsd-passthrough_ll-create-new-files-in-caller-.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch74: kvm-virtiofsd-passthrough_ll-add-lo_map-for-ino-fh-indir.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch75: kvm-virtiofsd-passthrough_ll-add-ino_map-to-hide-lo_inod.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch76: kvm-virtiofsd-passthrough_ll-add-dirp_map-to-hide-lo_dir.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch77: kvm-virtiofsd-passthrough_ll-add-fd_map-to-hide-file-des.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch78: kvm-virtiofsd-passthrough_ll-add-fallback-for-racy-ops.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch79: kvm-virtiofsd-validate-path-components.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch80: kvm-virtiofsd-Plumb-fuse_bufvec-through-to-do_write_buf.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch81: kvm-virtiofsd-Pass-write-iov-s-all-the-way-through.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch82: kvm-virtiofsd-add-fuse_mbuf_iter-API.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch83: kvm-virtiofsd-validate-input-buffer-sizes-in-do_write_bu.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch84: kvm-virtiofsd-check-input-buffer-size-in-fuse_lowlevel.c.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch85: kvm-virtiofsd-prevent-.-escape-in-lo_do_lookup.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch86: kvm-virtiofsd-prevent-.-escape-in-lo_do_readdir.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch87: kvm-virtiofsd-use-proc-self-fd-O_PATH-file-descriptor.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch88: kvm-virtiofsd-sandbox-mount-namespace.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch89: kvm-virtiofsd-move-to-an-empty-network-namespace.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch90: kvm-virtiofsd-move-to-a-new-pid-namespace.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch91: kvm-virtiofsd-add-seccomp-whitelist.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch92: kvm-virtiofsd-Parse-flag-FUSE_WRITE_KILL_PRIV.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch93: kvm-virtiofsd-cap-ng-helpers.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch94: kvm-virtiofsd-Drop-CAP_FSETID-if-client-asked-for-it.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch95: kvm-virtiofsd-set-maximum-RLIMIT_NOFILE-limit.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch96: kvm-virtiofsd-fix-libfuse-information-leaks.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch97: kvm-virtiofsd-add-syslog-command-line-option.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch98: kvm-virtiofsd-print-log-only-when-priority-is-high-enoug.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch99: kvm-virtiofsd-Add-ID-to-the-log-with-FUSE_LOG_DEBUG-leve.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch100: kvm-virtiofsd-Add-timestamp-to-the-log-with-FUSE_LOG_DEB.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch101: kvm-virtiofsd-Handle-reinit.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch102: kvm-virtiofsd-Handle-hard-reboot.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch103: kvm-virtiofsd-Kill-threads-when-queues-are-stopped.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch104: kvm-vhost-user-Print-unexpected-slave-message-types.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch105: kvm-contrib-libvhost-user-Protect-slave-fd-with-mutex.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch106: kvm-virtiofsd-passthrough_ll-add-renameat2-support.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch107: kvm-virtiofsd-passthrough_ll-disable-readdirplus-on-cach.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch108: kvm-virtiofsd-passthrough_ll-control-readdirplus.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch109: kvm-virtiofsd-rename-unref_inode-to-unref_inode_lolocked.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch110: kvm-virtiofsd-fail-when-parent-inode-isn-t-known-in-lo_d.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch111: kvm-virtiofsd-extract-root-inode-init-into-setup_root.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch112: kvm-virtiofsd-passthrough_ll-clean-up-cache-related-opti.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch113: kvm-virtiofsd-passthrough_ll-use-hashtable.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch114: kvm-virtiofsd-Clean-up-inodes-on-destroy.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch115: kvm-virtiofsd-support-nanosecond-resolution-for-file-tim.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch116: kvm-virtiofsd-fix-error-handling-in-main.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch117: kvm-virtiofsd-cleanup-allocated-resource-in-se.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch118: kvm-virtiofsd-fix-memory-leak-on-lo.source.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch119: kvm-virtiofsd-add-helper-for-lo_data-cleanup.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch120: kvm-virtiofsd-Prevent-multiply-running-with-same-vhost_u.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch121: kvm-virtiofsd-enable-PARALLEL_DIROPS-during-INIT.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch122: kvm-virtiofsd-fix-incorrect-error-handling-in-lo_do_look.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch123: kvm-Virtiofsd-fix-memory-leak-on-fuse-queueinfo.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch124: kvm-virtiofsd-Support-remote-posix-locks.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch125: kvm-virtiofsd-use-fuse_lowlevel_is_virtio-in-fuse_sessio.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch126: kvm-virtiofsd-prevent-fv_queue_thread-vs-virtio_loop-rac.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch127: kvm-virtiofsd-make-lo_release-atomic.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch128: kvm-virtiofsd-prevent-races-with-lo_dirp_put.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch129: kvm-virtiofsd-rename-inode-refcount-to-inode-nlookup.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch130: kvm-libvhost-user-Fix-some-memtable-remap-cases.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch131: kvm-virtiofsd-passthrough_ll-fix-refcounting-on-remove-r.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch132: kvm-virtiofsd-introduce-inode-refcount-to-prevent-use-af.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch133: kvm-virtiofsd-do-not-always-set-FUSE_FLOCK_LOCKS.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch134: kvm-virtiofsd-convert-more-fprintf-and-perror-to-use-fus.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch135: kvm-virtiofsd-Reset-O_DIRECT-flag-during-file-open.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch136: kvm-virtiofsd-Fix-data-corruption-with-O_APPEND-write-in.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch137: kvm-virtiofsd-passthrough_ll-Use-cache_readdir-for-direc.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch138: kvm-virtiofsd-add-definition-of-fuse_buf_writev.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch139: kvm-virtiofsd-use-fuse_buf_writev-to-replace-fuse_buf_wr.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch140: kvm-virtiofsd-process-requests-in-a-thread-pool.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch141: kvm-virtiofsd-prevent-FUSE_INIT-FUSE_DESTROY-races.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch142: kvm-virtiofsd-fix-lo_destroy-resource-leaks.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch143: kvm-virtiofsd-add-thread-pool-size-NUM-option.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch144: kvm-virtiofsd-Convert-lo_destroy-to-take-the-lo-mutex-lo.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch145: kvm-virtiofsd-passthrough_ll-Pass-errno-to-fuse_reply_er.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch146: kvm-virtiofsd-stop-all-queue-threads-on-exit-in-virtio_l.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch147: kvm-virtiofsd-add-some-options-to-the-help-message.patch -# For bz#1776638 - Guest failed to boot up after system_reset 20 times -Patch148: kvm-ppc-Deassert-the-external-interrupt-pin-in-KVM-on-re.patch -# For bz#1776638 - Guest failed to boot up after system_reset 20 times -Patch149: kvm-xics-Don-t-deassert-outputs.patch -# For bz#1776638 - Guest failed to boot up after system_reset 20 times -Patch150: kvm-ppc-Don-t-use-CPUPPCState-irq_input_state-with-moder.patch -# For bz#1787395 - qemu-trace-stap list : TypeError: startswith first arg must be bytes or a tuple of bytes, not str -Patch151: kvm-trace-update-qemu-trace-stap-to-Python-3.patch -# For bz#1794503 - CVE-2020-1711 qemu-kvm: QEMU: block: iscsi: OOB heap access via an unexpected response of iSCSI Server [rhel-av-8.2.0] -Patch153: kvm-iscsi-Cap-block-count-from-GET-LBA-STATUS-CVE-2020-1.patch -# For bz#1787444 - Broken postcopy migration with vTPM device -Patch154: kvm-tpm-ppi-page-align-PPI-RAM.patch -# For bz#1647366 - aarch64: Add support for the kvm-no-adjvtime ARM CPU feature -Patch155: kvm-target-arm-kvm-trivial-Clean-up-header-documentation.patch -# For bz#1647366 - aarch64: Add support for the kvm-no-adjvtime ARM CPU feature -Patch156: kvm-target-arm-kvm64-kvm64-cpus-have-timer-registers.patch -# For bz#1647366 - aarch64: Add support for the kvm-no-adjvtime ARM CPU feature -Patch157: kvm-tests-arm-cpu-features-Check-feature-default-values.patch -# For bz#1647366 - aarch64: Add support for the kvm-no-adjvtime ARM CPU feature -Patch158: kvm-target-arm-kvm-Implement-virtual-time-adjustment.patch -# For bz#1647366 - aarch64: Add support for the kvm-no-adjvtime ARM CPU feature -Patch159: kvm-target-arm-cpu-Add-the-kvm-no-adjvtime-CPU-property.patch -# For bz#1529231 - [q35] VM hangs after migration with 200 vCPUs -Patch160: kvm-migration-Define-VMSTATE_INSTANCE_ID_ANY.patch -# For bz#1529231 - [q35] VM hangs after migration with 200 vCPUs -Patch161: kvm-migration-Change-SaveStateEntry.instance_id-into-uin.patch -# For bz#1529231 - [q35] VM hangs after migration with 200 vCPUs -Patch162: kvm-apic-Use-32bit-APIC-ID-for-migration-instance-ID.patch -# For bz#1779078 - RHVH 4.4: Failed to run VM on 4.3/4.4 engine (Exit message: the CPU is incompatible with host CPU: Host CPU does not provide required features: hle, rtm) -# For bz#1787291 - RHVH 4.4: Failed to run VM on 4.3/4.4 engine (Exit message: the CPU is incompatible with host CPU: Host CPU does not provide required features: hle, rtm) [rhel-8.1.0.z] -# For bz#1779078 - RHVH 4.4: Failed to run VM on 4.3/4.4 engine (Exit message: the CPU is incompatible with host CPU: Host CPU does not provide required features: hle, rtm) -# For bz#1779078 - RHVH 4.4: Failed to run VM on 4.3/4.4 engine (Exit message: the CPU is incompatible with host CPU: Host CPU does not provide required features: hle, rtm) -Patch163: kvm-i386-Resolve-CPU-models-to-v1-by-default.patch -# For bz#1781637 - qemu crashed when do mem and disk snapshot -Patch164: kvm-iotests-Support-job-complete-in-run_job.patch -# For bz#1781637 - qemu crashed when do mem and disk snapshot -Patch165: kvm-iotests-Create-VM.blockdev_create.patch -# For bz#1781637 - qemu crashed when do mem and disk snapshot -Patch166: kvm-block-Activate-recursively-even-for-already-active-n.patch -# For bz#1781637 - qemu crashed when do mem and disk snapshot -Patch167: kvm-hmp-Allow-using-qdev-ID-for-qemu-io-command.patch -# For bz#1781637 - qemu crashed when do mem and disk snapshot -Patch168: kvm-iotests-Test-external-snapshot-with-VM-state.patch -# For bz#1781637 - qemu crashed when do mem and disk snapshot -Patch169: kvm-iotests.py-Let-wait_migration-wait-even-more.patch -# For bz#1745606 - Qemu hang when do incremental live backup in transaction mode without bitmap -# For bz#1746217 - Src qemu hang when do storage vm migration during guest installation -# For bz#1773517 - Src qemu hang when do storage vm migration with dataplane enable -# For bz#1779036 - Qemu coredump when do snapshot in transaction mode with one snapshot path not exist -# For bz#1782111 - Qemu hang when do full backup on multi-disks with one job's 'job-id' missed in transaction mode(data plane enable) -# For bz#1782175 - Qemu core dump when add persistent bitmap(data plane enable) -# For bz#1783965 - Qemu core dump when do backup with sync: bitmap and no bitmap provided -Patch170: kvm-blockdev-fix-coding-style-issues-in-drive_backup_pre.patch -# For bz#1745606 - Qemu hang when do incremental live backup in transaction mode without bitmap -# For bz#1746217 - Src qemu hang when do storage vm migration during guest installation -# For bz#1773517 - Src qemu hang when do storage vm migration with dataplane enable -# For bz#1779036 - Qemu coredump when do snapshot in transaction mode with one snapshot path not exist -# For bz#1782111 - Qemu hang when do full backup on multi-disks with one job's 'job-id' missed in transaction mode(data plane enable) -# For bz#1782175 - Qemu core dump when add persistent bitmap(data plane enable) -# For bz#1783965 - Qemu core dump when do backup with sync: bitmap and no bitmap provided -Patch171: kvm-blockdev-unify-qmp_drive_backup-and-drive-backup-tra.patch -# For bz#1745606 - Qemu hang when do incremental live backup in transaction mode without bitmap -# For bz#1746217 - Src qemu hang when do storage vm migration during guest installation -# For bz#1773517 - Src qemu hang when do storage vm migration with dataplane enable -# For bz#1779036 - Qemu coredump when do snapshot in transaction mode with one snapshot path not exist -# For bz#1782111 - Qemu hang when do full backup on multi-disks with one job's 'job-id' missed in transaction mode(data plane enable) -# For bz#1782175 - Qemu core dump when add persistent bitmap(data plane enable) -# For bz#1783965 - Qemu core dump when do backup with sync: bitmap and no bitmap provided -Patch172: kvm-blockdev-unify-qmp_blockdev_backup-and-blockdev-back.patch -# For bz#1745606 - Qemu hang when do incremental live backup in transaction mode without bitmap -# For bz#1746217 - Src qemu hang when do storage vm migration during guest installation -# For bz#1773517 - Src qemu hang when do storage vm migration with dataplane enable -# For bz#1779036 - Qemu coredump when do snapshot in transaction mode with one snapshot path not exist -# For bz#1782111 - Qemu hang when do full backup on multi-disks with one job's 'job-id' missed in transaction mode(data plane enable) -# For bz#1782175 - Qemu core dump when add persistent bitmap(data plane enable) -# For bz#1783965 - Qemu core dump when do backup with sync: bitmap and no bitmap provided -Patch173: kvm-blockdev-honor-bdrv_try_set_aio_context-context-requ.patch -# For bz#1745606 - Qemu hang when do incremental live backup in transaction mode without bitmap -# For bz#1746217 - Src qemu hang when do storage vm migration during guest installation -# For bz#1773517 - Src qemu hang when do storage vm migration with dataplane enable -# For bz#1779036 - Qemu coredump when do snapshot in transaction mode with one snapshot path not exist -# For bz#1782111 - Qemu hang when do full backup on multi-disks with one job's 'job-id' missed in transaction mode(data plane enable) -# For bz#1782175 - Qemu core dump when add persistent bitmap(data plane enable) -# For bz#1783965 - Qemu core dump when do backup with sync: bitmap and no bitmap provided -Patch174: kvm-backup-top-Begin-drain-earlier.patch -# For bz#1745606 - Qemu hang when do incremental live backup in transaction mode without bitmap -# For bz#1746217 - Src qemu hang when do storage vm migration during guest installation -# For bz#1773517 - Src qemu hang when do storage vm migration with dataplane enable -# For bz#1779036 - Qemu coredump when do snapshot in transaction mode with one snapshot path not exist -# For bz#1782111 - Qemu hang when do full backup on multi-disks with one job's 'job-id' missed in transaction mode(data plane enable) -# For bz#1782175 - Qemu core dump when add persistent bitmap(data plane enable) -# For bz#1783965 - Qemu core dump when do backup with sync: bitmap and no bitmap provided -Patch175: kvm-block-backup-top-Don-t-acquire-context-while-droppin.patch -# For bz#1745606 - Qemu hang when do incremental live backup in transaction mode without bitmap -# For bz#1746217 - Src qemu hang when do storage vm migration during guest installation -# For bz#1773517 - Src qemu hang when do storage vm migration with dataplane enable -# For bz#1779036 - Qemu coredump when do snapshot in transaction mode with one snapshot path not exist -# For bz#1782111 - Qemu hang when do full backup on multi-disks with one job's 'job-id' missed in transaction mode(data plane enable) -# For bz#1782175 - Qemu core dump when add persistent bitmap(data plane enable) -# For bz#1783965 - Qemu core dump when do backup with sync: bitmap and no bitmap provided -Patch176: kvm-blockdev-Acquire-AioContext-on-dirty-bitmap-function.patch -# For bz#1745606 - Qemu hang when do incremental live backup in transaction mode without bitmap -# For bz#1746217 - Src qemu hang when do storage vm migration during guest installation -# For bz#1773517 - Src qemu hang when do storage vm migration with dataplane enable -# For bz#1779036 - Qemu coredump when do snapshot in transaction mode with one snapshot path not exist -# For bz#1782111 - Qemu hang when do full backup on multi-disks with one job's 'job-id' missed in transaction mode(data plane enable) -# For bz#1782175 - Qemu core dump when add persistent bitmap(data plane enable) -# For bz#1783965 - Qemu core dump when do backup with sync: bitmap and no bitmap provided -Patch177: kvm-blockdev-Return-bs-to-the-proper-context-on-snapshot.patch -# For bz#1745606 - Qemu hang when do incremental live backup in transaction mode without bitmap -# For bz#1746217 - Src qemu hang when do storage vm migration during guest installation -# For bz#1773517 - Src qemu hang when do storage vm migration with dataplane enable -# For bz#1779036 - Qemu coredump when do snapshot in transaction mode with one snapshot path not exist -# For bz#1782111 - Qemu hang when do full backup on multi-disks with one job's 'job-id' missed in transaction mode(data plane enable) -# For bz#1782175 - Qemu core dump when add persistent bitmap(data plane enable) -# For bz#1783965 - Qemu core dump when do backup with sync: bitmap and no bitmap provided -Patch178: kvm-iotests-Test-handling-of-AioContexts-with-some-block.patch -# For bz#1801320 - aarch64: backport query-cpu-model-expansion and adjvtime document fixes -Patch179: kvm-target-arm-monitor-query-cpu-model-expansion-crashed.patch -# For bz#1801320 - aarch64: backport query-cpu-model-expansion and adjvtime document fixes -Patch180: kvm-docs-arm-cpu-features-Make-kvm-no-adjvtime-comment-c.patch -# For bz#1796240 - Enable hw accelerated cache-count-flush by default for POWER9 DD2.3 cpus -Patch181: kvm-spapr-Enable-DD2.3-accelerated-count-cache-flush-in-.patch -# For bz#1798994 - CVE-2020-8608 qemu-kvm: QEMU: Slirp: potential OOB access due to unsafe snprintf() usages [rhel-av-8.2.0] -Patch182: kvm-util-add-slirp_fmt-helpers.patch -# For bz#1798994 - CVE-2020-8608 qemu-kvm: QEMU: Slirp: potential OOB access due to unsafe snprintf() usages [rhel-av-8.2.0] -Patch183: kvm-tcp_emu-fix-unsafe-snprintf-usages.patch -# For bz#1791590 - [Q35] No "DEVICE_DELETED" event in qmp after unplug virtio-net-pci device -Patch184: kvm-virtio-add-ability-to-delete-vq-through-a-pointer.patch -# For bz#1791590 - [Q35] No "DEVICE_DELETED" event in qmp after unplug virtio-net-pci device -Patch185: kvm-virtio-make-virtio_delete_queue-idempotent.patch -# For bz#1791590 - [Q35] No "DEVICE_DELETED" event in qmp after unplug virtio-net-pci device -Patch186: kvm-virtio-reset-region-cache-when-on-queue-deletion.patch -# For bz#1791590 - [Q35] No "DEVICE_DELETED" event in qmp after unplug virtio-net-pci device -Patch187: kvm-virtio-net-delete-also-control-queue-when-TX-RX-dele.patch -# For bz#1805334 - vhost-user/50-qemu-gpu.json is not valid JSON -Patch188: kvm-vhost-user-gpu-Drop-trailing-json-comma.patch -# For bz#1791648 - [RFE] Passthrough host CPU microcode version to KVM guest if using CPU passthrough -Patch189: kvm-target-i386-kvm-initialize-feature-MSRs-very-early.patch -# For bz#1791648 - [RFE] Passthrough host CPU microcode version to KVM guest if using CPU passthrough -Patch190: kvm-target-i386-add-a-ucode-rev-property.patch -# For bz#1791648 - [RFE] Passthrough host CPU microcode version to KVM guest if using CPU passthrough -Patch191: kvm-target-i386-kvm-initialize-microcode-revision-from-K.patch -# For bz#1791648 - [RFE] Passthrough host CPU microcode version to KVM guest if using CPU passthrough -Patch192: kvm-target-i386-fix-TCG-UCODE_REV-access.patch -# For bz#1791648 - [RFE] Passthrough host CPU microcode version to KVM guest if using CPU passthrough -Patch193: kvm-target-i386-check-for-availability-of-MSR_IA32_UCODE.patch -# For bz#1791648 - [RFE] Passthrough host CPU microcode version to KVM guest if using CPU passthrough -Patch194: kvm-target-i386-enable-monitor-and-ucode-revision-with-c.patch -# For bz#1703907 - [upstream]QEMU coredump when converting to qcow2: external data file images on block devices with copy_offloading -Patch195: kvm-qcow2-Fix-qcow2_alloc_cluster_abort-for-external-dat.patch -# For bz#1794692 - Mirror block job stops making progress -Patch196: kvm-mirror-Store-MirrorOp.co-for-debuggability.patch -# For bz#1794692 - Mirror block job stops making progress -Patch197: kvm-mirror-Don-t-let-an-operation-wait-for-itself.patch -# For bz#1782529 - Windows Update Enablement with default smbios strings in qemu -Patch198: kvm-hw-smbios-set-new-default-SMBIOS-fields-for-Windows-.patch -# For bz#1738451 - qemu on src host core dump after set multifd-channels and do migration twice (first migration execute migrate_cancel) -Patch199: kvm-migration-multifd-clean-pages-after-filling-packet.patch -# For bz#1738451 - qemu on src host core dump after set multifd-channels and do migration twice (first migration execute migrate_cancel) -Patch200: kvm-migration-Make-sure-that-we-don-t-call-write-in-case.patch -# For bz#1738451 - qemu on src host core dump after set multifd-channels and do migration twice (first migration execute migrate_cancel) -Patch201: kvm-migration-multifd-fix-nullptr-access-in-terminating-.patch -# For bz#1738451 - qemu on src host core dump after set multifd-channels and do migration twice (first migration execute migrate_cancel) -Patch202: kvm-migration-multifd-fix-destroyed-mutex-access-in-term.patch -# For bz#1738451 - qemu on src host core dump after set multifd-channels and do migration twice (first migration execute migrate_cancel) -Patch203: kvm-multifd-Make-sure-that-we-don-t-do-any-IO-after-an-e.patch -# For bz#1738451 - qemu on src host core dump after set multifd-channels and do migration twice (first migration execute migrate_cancel) -Patch204: kvm-qemu-file-Don-t-do-IO-after-shutdown.patch -# For bz#1738451 - qemu on src host core dump after set multifd-channels and do migration twice (first migration execute migrate_cancel) -Patch205: kvm-migration-Don-t-send-data-if-we-have-stopped.patch -# For bz#1738451 - qemu on src host core dump after set multifd-channels and do migration twice (first migration execute migrate_cancel) -Patch206: kvm-migration-Create-migration_is_running.patch -# For bz#1738451 - qemu on src host core dump after set multifd-channels and do migration twice (first migration execute migrate_cancel) -Patch207: kvm-migration-multifd-fix-nullptr-access-in-multifd_send.patch -# For bz#1738451 - qemu on src host core dump after set multifd-channels and do migration twice (first migration execute migrate_cancel) -Patch208: kvm-migration-Maybe-VM-is-paused-when-migration-is-cance.patch -# For bz#1797064 - virtiofsd: Fixes -Patch209: kvm-virtiofsd-Remove-fuse_req_getgroups.patch -# For bz#1797064 - virtiofsd: Fixes -Patch210: kvm-virtiofsd-fv_create_listen_socket-error-path-socket-.patch -# For bz#1797064 - virtiofsd: Fixes -Patch211: kvm-virtiofsd-load_capng-missing-unlock.patch -# For bz#1797064 - virtiofsd: Fixes -Patch212: kvm-virtiofsd-do_read-missing-NULL-check.patch -# For bz#1797064 - virtiofsd: Fixes -Patch213: kvm-tools-virtiofsd-fuse_lowlevel-Fix-fuse_out_header-er.patch -# For bz#1797064 - virtiofsd: Fixes -Patch214: kvm-virtiofsd-passthrough_ll-cleanup-getxattr-listxattr.patch -# For bz#1797064 - virtiofsd: Fixes -Patch215: kvm-virtiofsd-Fix-xattr-operations.patch -# For bz#1640894 - Fix generic file creation fallback for qemu-img nvme:// image creation support -Patch216: kvm-block-nbd-Fix-hang-in-.bdrv_close.patch -# For bz#1640894 - Fix generic file creation fallback for qemu-img nvme:// image creation support -Patch217: kvm-block-Generic-file-creation-fallback.patch -# For bz#1640894 - Fix generic file creation fallback for qemu-img nvme:// image creation support -Patch218: kvm-file-posix-Drop-hdev_co_create_opts.patch -# For bz#1640894 - Fix generic file creation fallback for qemu-img nvme:// image creation support -Patch219: kvm-iscsi-Drop-iscsi_co_create_opts.patch -# For bz#1640894 - Fix generic file creation fallback for qemu-img nvme:// image creation support -Patch220: kvm-iotests-Add-test-for-image-creation-fallback.patch -# For bz#1640894 - Fix generic file creation fallback for qemu-img nvme:// image creation support -Patch221: kvm-block-Fix-leak-in-bdrv_create_file_fallback.patch -# For bz#1790482 - bitmaps in backing images can't be modified -# For bz#1805143 - allow late/lazy opening of backing chain for shallow blockdev-mirror -Patch222: kvm-iotests-Use-complete_and_wait-in-155.patch -# For bz#1790482 - bitmaps in backing images can't be modified -# For bz#1805143 - allow late/lazy opening of backing chain for shallow blockdev-mirror -Patch223: kvm-block-Introduce-bdrv_reopen_commit_post-step.patch -# For bz#1790482 - bitmaps in backing images can't be modified -# For bz#1805143 - allow late/lazy opening of backing chain for shallow blockdev-mirror -Patch224: kvm-block-qcow2-Move-bitmap-reopen-into-bdrv_reopen_comm.patch -# For bz#1790482 - bitmaps in backing images can't be modified -# For bz#1805143 - allow late/lazy opening of backing chain for shallow blockdev-mirror -Patch225: kvm-iotests-Refactor-blockdev-reopen-test-for-iothreads.patch -# For bz#1790482 - bitmaps in backing images can't be modified -# For bz#1805143 - allow late/lazy opening of backing chain for shallow blockdev-mirror -Patch226: kvm-block-bdrv_reopen-with-backing-file-in-different-Aio.patch -# For bz#1790482 - bitmaps in backing images can't be modified -# For bz#1805143 - allow late/lazy opening of backing chain for shallow blockdev-mirror -Patch227: kvm-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch -# For bz#1790482 - bitmaps in backing images can't be modified -# For bz#1805143 - allow late/lazy opening of backing chain for shallow blockdev-mirror -Patch228: kvm-block-Make-bdrv_get_cumulative_perm-public.patch -# For bz#1790482 - bitmaps in backing images can't be modified -# For bz#1805143 - allow late/lazy opening of backing chain for shallow blockdev-mirror -Patch229: kvm-block-Relax-restrictions-for-blockdev-snapshot.patch -# For bz#1790482 - bitmaps in backing images can't be modified -# For bz#1805143 - allow late/lazy opening of backing chain for shallow blockdev-mirror -Patch230: kvm-iotests-Fix-run_job-with-use_log-False.patch -# For bz#1790482 - bitmaps in backing images can't be modified -# For bz#1805143 - allow late/lazy opening of backing chain for shallow blockdev-mirror -Patch231: kvm-iotests-Test-mirror-with-temporarily-disabled-target.patch -# For bz#1790482 - bitmaps in backing images can't be modified -# For bz#1805143 - allow late/lazy opening of backing chain for shallow blockdev-mirror -Patch232: kvm-block-Fix-cross-AioContext-blockdev-snapshot.patch -# For bz#1790482 - bitmaps in backing images can't be modified -# For bz#1805143 - allow late/lazy opening of backing chain for shallow blockdev-mirror -Patch233: kvm-iotests-Add-iothread-cases-to-155.patch -# For bz#1790482 - bitmaps in backing images can't be modified -# For bz#1805143 - allow late/lazy opening of backing chain for shallow blockdev-mirror -Patch234: kvm-qapi-Add-allow-write-only-overlay-feature-for-blockd.patch -# For bz#1809380 - guest hang during reboot process after migration from RHEl7.8 to RHEL8.2.0. -Patch235: kvm-exec-rom_reset-Free-rom-data-during-inmigrate-skip.patch -# For bz#1814336 - [POWER9] QEMU migration-test triggers a kernel warning -Patch236: kvm-migration-Rate-limit-inside-host-pages.patch -# For bz#1811670 - Unneeded qemu-guest-agent dependency on pixman -Patch237: kvm-build-sys-do-not-make-qemu-ga-link-with-pixman.patch -# For bz#1816007 - qemu-img convert failed to convert with block device as target -Patch238: kvm-block-pass-BlockDriver-reference-to-the-.bdrv_co_cre.patch -# For bz#1816007 - qemu-img convert failed to convert with block device as target -Patch239: kvm-block-trickle-down-the-fallback-image-creation-funct.patch -# For bz#1794692 - Mirror block job stops making progress -Patch240: kvm-Revert-mirror-Don-t-let-an-operation-wait-for-itself.patch -# For bz#1794692 - Mirror block job stops making progress -Patch241: kvm-mirror-Wait-only-for-in-flight-operations.patch -# For bz#1817621 - Crash and deadlock with block jobs when using io-threads -Patch242: kvm-job-take-each-job-s-lock-individually-in-job_txn_app.patch -# For bz#1817621 - Crash and deadlock with block jobs when using io-threads -Patch243: kvm-replication-assert-we-own-context-before-job_cancel_.patch -# For bz#1817621 - Crash and deadlock with block jobs when using io-threads -Patch244: kvm-backup-don-t-acquire-aio_context-in-backup_clean.patch -# For bz#1817621 - Crash and deadlock with block jobs when using io-threads -Patch245: kvm-block-backend-Reorder-flush-pdiscard-function-defini.patch -# For bz#1817621 - Crash and deadlock with block jobs when using io-threads -Patch246: kvm-block-Increase-BB.in_flight-for-coroutine-and-sync-i.patch -# For bz#1817621 - Crash and deadlock with block jobs when using io-threads -Patch247: kvm-block-Fix-blk-in_flight-during-blk_wait_while_draine.patch -# For bz#1822682 - QEMU-4.2 fails to start a VM on Azure -Patch248: kvm-target-i386-do-not-set-unsupported-VMX-secondary-exe.patch +Patch0021: 0021-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch BuildRequires: wget BuildRequires: rpm-build @@ -691,8 +170,6 @@ BuildRequires: systemtap-sdt-devel BuildRequires: libpng-devel # For uuid generation BuildRequires: libuuid-devel -# For BlueZ device support -BuildRequires: bluez-libs-devel # For Braille device support BuildRequires: brlapi-devel # For test suite @@ -930,6 +407,7 @@ the Secure Shell (SSH) protocol. %prep %setup -n qemu-%{version} %autopatch -p1 +mkdir qemu-kvm-build %build %global buildarch %{kvm_target}-softmmu @@ -943,166 +421,174 @@ buildldflags="VL_LDFLAGS=-Wl,--build-id" %global block_drivers_list %{block_drivers_list},gluster %endif -./configure \ - --prefix="%{_prefix}" \ - --libdir="%{_libdir}" \ - --sysconfdir="%{_sysconfdir}" \ - --interp-prefix=%{_prefix}/qemu-%M \ - --localstatedir="%{_localstatedir}" \ - --docdir="%{qemudocdir}" \ - --libexecdir="%{_libexecdir}" \ - --extra-ldflags="-Wl,--build-id -Wl,-z,relro -Wl,-z,now" \ - --extra-cflags="%{optflags}" \ - --with-pkgversion="%{name}-%{version}-%{release}" \ - --with-confsuffix=/"%{name}" \ - --firmwarepath=%{_prefix}/share/qemu-firmware \ -%if 0%{have_fdt} - --enable-fdt \ -%else - --disable-fdt \ - %endif -%if 0%{have_gluster} - --enable-glusterfs \ -%else - --disable-glusterfs \ -%endif - --enable-guest-agent \ -%ifnarch s390x - --enable-numa \ -%else - --disable-numa \ -%endif - --enable-rbd \ -%if 0%{have_librdma} - --enable-rdma \ -%else - --disable-rdma \ -%endif - --disable-pvrdma \ - --enable-seccomp \ -%if 0%{have_spice} - --enable-spice \ - --enable-smartcard \ - --enable-virglrenderer \ -%else - --disable-spice \ - --disable-smartcard \ - --disable-virglrenderer \ -%endif -%if 0%{have_opengl} - --enable-opengl \ -%else - --disable-opengl \ -%endif -%if 0%{have_usbredir} - --enable-usb-redir \ -%else - --disable-usb-redir \ -%endif - --disable-tcmalloc \ -%ifarch x86_64 - --enable-libpmem \ -%else - --disable-libpmem \ -%endif - --enable-vhost-user \ -%ifarch %{ix86} x86_64 - --enable-avx2 \ -%else - --disable-avx2 \ -%endif +cd qemu-kvm-build +../configure \ + --prefix="%{_prefix}" \ + --libdir="%{_libdir}" \ + --sysconfdir="%{_sysconfdir}" \ + --interp-prefix=%{_prefix}/qemu-%M \ + --localstatedir="%{_localstatedir}" \ + --docdir="%{qemudocdir}" \ + --libexecdir="%{_libexecdir}" \ + --extra-ldflags="-Wl,--build-id -Wl,-z,relro -Wl,-z,now" \ + --extra-cflags="%{optflags}" \ + --with-pkgversion="%{name}-%{version}-%{release}" \ + --with-confsuffix=/"%{name}" \ + --firmwarepath=%{_prefix}/share/qemu-firmware \ --python=%{__python3} \ --target-list="%{buildarch}" \ --block-drv-rw-whitelist=%{block_drivers_list} \ --audio-drv-list= \ --block-drv-ro-whitelist=vmdk,vhdx,vpc,https,ssh \ --with-coroutine=ucontext \ + --with-git=git \ --tls-priority=NORMAL \ - --disable-bluez \ + --enable-attr \ + --disable-auth-pam \ +%ifarch %{ix86} x86_64 + --enable-avx2 \ +%else + --disable-avx2 \ +%endif + --disable-avx512f \ + --disable-bochs \ --disable-brlapi \ + --disable-bsd-user \ + --disable-bzip2 \ --enable-cap-ng \ + --enable-capstone \ + --disable-cloop \ + --disable-cocoa \ --enable-coroutine-pool \ + --disable-crypto-afalg \ --enable-curl \ --disable-curses \ + --enable-debug-info \ + --disable-debug-mutex \ --disable-debug-tcg \ + --disable-dmg \ --enable-docs \ +%if 0%{have_fdt} + --enable-fdt \ +%else + --disable-fdt \ + %endif + --enable-gcrypt \ + --disable-git-update \ +%if 0%{have_gluster} + --enable-glusterfs \ +%else + --disable-glusterfs \ +%endif + --enable-gnutls \ --disable-gtk \ + --enable-guest-agent \ + --disable-guest-agent-msi \ + --disable-hax \ + --disable-hvf \ + --enable-iconv \ + --disable-jemalloc \ --enable-kvm \ --enable-libiscsi \ --disable-libnfs \ +%ifarch x86_64 + --enable-libpmem \ +%else + --disable-libpmem \ +%endif --enable-libssh \ --enable-libusb \ - --disable-bzip2 \ + --disable-libxml2 \ --enable-linux-aio \ - --disable-live-block-migration \ - --enable-lzo \ - --enable-pie \ - --disable-qom-cast-debug \ - --disable-sdl \ - --enable-snappy \ - --disable-sparse \ - --disable-strip \ - --enable-tpm \ - --enable-trace-backend=dtrace \ - --disable-vde \ - --disable-vhost-scsi \ - --disable-vxhs \ - --disable-virtfs \ - --disable-vnc-jpeg \ - --disable-vte \ - --enable-vnc-png \ - --enable-vnc-sasl \ - --enable-werror \ - --disable-xen \ - --disable-xfsctl \ - --enable-gnutls \ - --enable-gcrypt \ - --disable-nettle \ - --enable-attr \ - --disable-bsd-user \ - --disable-cocoa \ - --enable-debug-info \ - --disable-guest-agent-msi \ - --disable-hax \ - --disable-jemalloc \ + --disable-linux-io-uring \ --disable-linux-user \ - --enable-modules \ - --disable-netmap \ - --disable-replication \ - --enable-system \ - --enable-tools \ - --disable-user \ - --enable-vhost-net \ - --enable-vhost-vsock \ - --enable-vnc \ - --enable-mpath \ - --disable-xen-pci-passthrough \ - --enable-tcg \ - --with-git=git \ - --disable-sanitizers \ - --disable-hvf \ - --disable-whpx \ + --disable-live-block-migration \ + --disable-lzfse \ + --enable-lzo \ --enable-malloc-trim \ --disable-membarrier \ - --disable-vhost-crypto \ - --disable-libxml2 \ - --enable-capstone \ - --disable-git-update \ - --disable-crypto-afalg \ - --disable-debug-mutex \ - --disable-bochs \ - --disable-cloop \ - --disable-dmg \ - --disable-qcow1 \ - --disable-vdi \ - --disable-vvfat \ - --disable-qed \ + --enable-modules \ + --disable-module-upgrades \ + --enable-mpath \ + --disable-netmap \ + --disable-nettle \ +%ifnarch s390x + --enable-numa \ +%else + --disable-numa \ +%endif +%if 0%{have_opengl} + --enable-opengl \ +%else + --disable-opengl \ +%endif --disable-parallels \ + --enable-pie \ + --disable-pvrdma \ + --disable-qcow1 \ + --disable-qed \ + --disable-qom-cast-debug \ + --enable-rbd \ +%if 0%{have_librdma} + --enable-rdma \ +%else + --disable-rdma \ +%endif + --disable-replication \ + --disable-sanitizers \ + --disable-sdl \ + --disable-sdl-image \ + --enable-seccomp \ --disable-sheepdog \ - --disable-auth-pam \ - --enable-iconv \ - --disable-lzfse \ + --enable-snappy \ + --disable-sparse \ +%if 0%{have_spice} + --enable-smartcard \ + --enable-spice \ +%else + --disable-smartcard \ + --disable-spice \ +%endif + --disable-strip \ + --enable-system \ + --enable-tcg \ + --disable-tcmalloc \ + --enable-tools \ + --enable-tpm \ + --enable-trace-backend=dtrace \ +%if 0%{have_usbredir} + --enable-usb-redir \ +%else + --disable-usb-redir \ +%endif + --disable-user \ + --disable-vde \ + --disable-vdi \ + --disable-vhost-crypto \ --enable-vhost-kernel \ + --enable-vhost-net \ + --disable-vhost-scsi \ + --enable-vhost-user \ + --enable-vhost-vsock \ +%if 0%{have_spice} + --enable-virglrenderer \ +%else + --disable-virglrenderer \ +%endif + --disable-virtfs \ + --enable-vnc \ + --disable-vnc-jpeg \ + --enable-vnc-png \ + --enable-vnc-sasl \ + --disable-vte \ + --disable-vvfat \ + --disable-vxhs \ + --enable-werror \ + --disable-whpx \ + --disable-xen \ + --disable-xen-pci-passthrough \ + --disable-xfsctl \ + --enable-xkbcommon \ --without-default-devices echo "config-host.mak contents:" @@ -1131,6 +617,7 @@ gcc %{SOURCE6} $RPM_OPT_FLAGS $RPM_LD_FLAGS -o ksmctl gcc %{SOURCE35} $RPM_OPT_FLAGS $RPM_LD_FLAGS -o udev-kvm-check %install +cd qemu-kvm-build %define _udevdir %(pkg-config --variable=udevdir udev) %define _udevrulesdir %{_udevdir}/rules.d @@ -1175,13 +662,13 @@ cp -R tests/acceptance/* $RPM_BUILD_ROOT%{testsdir}/tests/acceptance/ # Install qemu.py and qmp/ scripts required to run avocado_qemu tests cp -R python/qemu $RPM_BUILD_ROOT%{testsdir}/python cp -R scripts/qmp/* $RPM_BUILD_ROOT%{testsdir}/scripts/qmp -install -p -m 0755 tests/Makefile.include $RPM_BUILD_ROOT%{testsdir}/tests/ +install -p -m 0755 ../tests/Makefile.include $RPM_BUILD_ROOT%{testsdir}/tests/ # Install qemu-iotests cp -R tests/qemu-iotests/* $RPM_BUILD_ROOT%{testsdir}/tests/qemu-iotests/ # Avoid ambiguous 'python' interpreter name find $RPM_BUILD_ROOT%{testsdir}/tests/qemu-iotests/* -maxdepth 1 -type f -exec sed -i -e '1 s+/usr/bin/env python+%{__python3}+' {} \; -find $RPM_BUILD_ROOT%{testsdir}/scripts/qmp/* -maxdepth 1 -type f -exec sed -i -e '1 s+/usr/bin/env python+%{__python3}+' {} \; +find $RPM_BUILD_ROOT%{testsdir}/scripts/qmp/* -maxdepth 1 -type f -exec sed -i -e '1 s+/usr/bin/env python3+%{__python3}+' {} \; find $RPM_BUILD_ROOT%{testsdir}/scripts/qmp/* -maxdepth 1 -type f -exec sed -i -e '1 s+/usr/bin/python+%{__python3}+' {} \; install -p -m 0644 %{SOURCE36} $RPM_BUILD_ROOT%{testsdir}/README @@ -1235,7 +722,6 @@ rm $RPM_BUILD_ROOT%{_bindir}/elf2dmp # Install simpletrace install -m 0755 scripts/simpletrace.py $RPM_BUILD_ROOT%{_datadir}/%{name}/simpletrace.py # Avoid ambiguous 'python' interpreter name -sed -i -e '1 s/python/python3/' $RPM_BUILD_ROOT%{_datadir}/%{name}/simpletrace.py mkdir -p $RPM_BUILD_ROOT%{_datadir}/%{name}/tracetool install -m 0644 -t $RPM_BUILD_ROOT%{_datadir}/%{name}/tracetool scripts/tracetool/*.py mkdir -p $RPM_BUILD_ROOT%{_datadir}/%{name}/tracetool/backend @@ -1244,11 +730,11 @@ mkdir -p $RPM_BUILD_ROOT%{_datadir}/%{name}/tracetool/format install -m 0644 -t $RPM_BUILD_ROOT%{_datadir}/%{name}/tracetool/format scripts/tracetool/format/*.py mkdir -p $RPM_BUILD_ROOT%{qemudocdir} -install -p -m 0644 -t ${RPM_BUILD_ROOT}%{qemudocdir} Changelog README.rst README.systemtap COPYING COPYING.LIB LICENSE docs/interop/qmp-spec.txt +install -p -m 0644 -t ${RPM_BUILD_ROOT}%{qemudocdir} ../Changelog ../README.rst ../README.systemtap ../COPYING ../COPYING.LIB ../LICENSE ../docs/interop/qmp-spec.txt chmod -x ${RPM_BUILD_ROOT}%{_mandir}/man1/* chmod -x ${RPM_BUILD_ROOT}%{_mandir}/man8/* -install -D -p -m 0644 qemu.sasl $RPM_BUILD_ROOT%{_sysconfdir}/sasl2/%{name}.conf +install -D -p -m 0644 ../qemu.sasl $RPM_BUILD_ROOT%{_sysconfdir}/sasl2/%{name}.conf # Provided by package openbios rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/openbios-ppc @@ -1276,6 +762,7 @@ rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/firmware rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/edk2-*.fd rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/edk2-licenses.txt +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/opensbi-riscv32-sifive_u-fw_jump.bin rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/opensbi-riscv32-virt-fw_jump.bin rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/opensbi-riscv64-sifive_u-fw_jump.bin rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/opensbi-riscv64-virt-fw_jump.bin @@ -1305,6 +792,9 @@ rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/QEMU,cgthree.bin rm -rf ${RPM_BUILD_ROOT}%{_bindir}/ivshmem-client rm -rf ${RPM_BUILD_ROOT}%{_bindir}/ivshmem-server +# Remove qemu-storage-daemon +rm -rf ${RPM_BUILD_ROOT}%{_bindir}/qemu-storage-daemon + # Remove efi roms rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/efi*.rom @@ -1378,11 +868,15 @@ chmod +x $RPM_BUILD_ROOT%{_libdir}/qemu-kvm/block-*.so # Remove buildinfo rm -rf $RPM_BUILD_ROOT%{qemudocdir}/interop/.buildinfo +rm -rf $RPM_BUILD_ROOT%{qemudocdir}/system/.buildinfo +rm -rf $RPM_BUILD_ROOT%{qemudocdir}/tools/.buildinfo +rm -rf $RPM_BUILD_ROOT%{qemudocdir}/user/.buildinfo # Remove spec rm -rf $RPM_BUILD_ROOT%{qemudocdir}/specs %check +cd qemu-kvm-build export DIFF=diff; make check V=1 %post -n qemu-kvm-core @@ -1431,18 +925,20 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %dir %{qemudocdir} %doc %{qemudocdir}/Changelog %doc %{qemudocdir}/README.rst -%doc %{qemudocdir}/qemu-doc.html %doc %{qemudocdir}/COPYING %doc %{qemudocdir}/COPYING.LIB %doc %{qemudocdir}/LICENSE %doc %{qemudocdir}/README.systemtap %doc %{qemudocdir}/qmp-spec.txt -%doc %{qemudocdir}/qemu-doc.txt %doc %{qemudocdir}/qemu-ga-ref.html %doc %{qemudocdir}/qemu-ga-ref.txt %doc %{qemudocdir}/qemu-qmp-ref.html %doc %{qemudocdir}/qemu-qmp-ref.txt %doc %{qemudocdir}/interop/* +%doc %{qemudocdir}/index.html +%doc %{qemudocdir}/system/* +%doc %{qemudocdir}/tools/* +%doc %{qemudocdir}/user/* %{_mandir}/man7/qemu-qmp-ref.7* %{_mandir}/man7/qemu-cpu-models.7* %{_bindir}/qemu-keymap @@ -1452,6 +948,7 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %{_unitdir}/qemu-pr-helper.service %{_unitdir}/qemu-pr-helper.socket %{_mandir}/man7/qemu-ga-ref.7* +%{_mandir}/man1/virtiofsd.1* %dir %{_datadir}/%{name}/ %{_datadir}/%{name}/keymaps/ @@ -1576,6 +1073,9 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %changelog +Tue May 12 2020 Danilo Cesar Lemes de Paula - 5.0.0-0 +- Temporary rebase of qemu-kvm to 5.0.0 + * Tue Apr 14 2020 Danilo Cesar Lemes de Paula - 4.2.0-19.el8_2 - kvm-target-i386-do-not-set-unsupported-VMX-secondary-exe.patch [bz#1822682] - Resolves: bz#1822682 diff --git a/sources b/sources index 46350e1..23b2923 100644 --- a/sources +++ b/sources @@ -1 +1 @@ -SHA512 (qemu-4.2.0.tar.xz) = 2a79973c2b07c53e8c57a808ea8add7b6b2cbca96488ed5d4b669ead8c9318907dec2b6109f180fc8ca8f04c0f73a56e82b3a527b5626b799d7e849f2474ec56 +SHA512 (qemu-5.0.0.tar.xz) = 34c87dfc56c5a63c7649cdc1281cb742e7665e9f3fe2c1dfc1c6b3abf0ca937a2b8a0d4d8894060f6f3e03f4ba6616a11097c48b32db2cbc8925f87255b4acb5